SmartScraperGraph is the most popular and versatile graph in ScrapeGraphAI. It automates the process of extracting information from web pages using a natural language model to interpret and answer prompts.
The SmartScraperGraph constructor accepts the following parameters:
SmartScraperGraph( prompt: str, # Natural language description of what to extract source: str, # URL or path to local HTML file config: dict, # Configuration dictionary schema: Optional[BaseModel] = None # Pydantic schema for structured output)
import osfrom dotenv import load_dotenvfrom scrapegraphai.graphs import SmartScraperGraphload_dotenv()# Define the configurationgraph_config = { "llm": { "api_key": os.getenv("OPENAI_API_KEY"), "model": "openai/gpt-4o-mini", }, "verbose": True, "headless": False,}# Create the SmartScraperGraph instancesmart_scraper_graph = SmartScraperGraph( prompt="Extract me the first article", source="https://www.wired.com", config=graph_config,)# Run the graphresult = smart_scraper_graph.run()print(result)
from scrapegraphai.graphs import SmartScraperGraphfrom scrapegraphai.utils import prettify_exec_info# Define the configuration for local Ollamagraph_config = { "llm": { "model": "ollama/llama3.2", "temperature": 0, "base_url": "http://localhost:11434", "model_tokens": 4096, }, "verbose": True, "headless": False,}# Create the SmartScraperGraph instancesmart_scraper_graph = SmartScraperGraph( prompt="Find some information about the founders.", source="https://scrapegraphai.com/", config=graph_config,)# Run the graphresult = smart_scraper_graph.run()print(result)# Get execution infograph_exec_info = smart_scraper_graph.get_execution_info()print(prettify_exec_info(graph_exec_info))
try: result = smart_scraper_graph.run() if result: print("Extraction successful:", result) else: print("No data extracted")except Exception as e: print(f"Error during scraping: {e}")