# Get all quotesquotes = page.css('.quote .text::text').getall()print(quotes)# Get all authorsauthors = page.css('.quote .author::text').getall()print(authors)
The ::text pseudo-element extracts text content, similar to Scrapy/Parsel syntax.
For larger scraping projects, use Scrapling’s spider framework:
1
Create a spider class
from scrapling.spiders import Spider, Responseclass QuotesSpider(Spider): name = "quotes" start_urls = ["https://quotes.toscrape.com/"] concurrent_requests = 10
2
Define the parse method
async def parse(self, response: Response): # Extract quotes from current page for quote in response.css('.quote'): yield { "text": quote.css('.text::text').get(), "author": quote.css('.author::text').get(), "tags": quote.css('.tag::text').getall(), } # Follow pagination next_page = response.css('.next a::attr(href)').get() if next_page: yield response.follow(next_page)
3
Run the spider
# Run and get resultsresult = QuotesSpider().start()print(f"Scraped {len(result.items)} quotes")# Export to JSONresult.items.to_json("quotes.json")# Or JSONLresult.items.to_jsonl("quotes.jsonl")
Scrapling can automatically relocate elements when website structure changes:
1
Enable adaptive mode
from scrapling.fetchers import StealthyFetcherStealthyFetcher.adaptive = True
2
Save element locations
page = StealthyFetcher.fetch('https://example.com', headless=True)# Save element signatures for future useproducts = page.css('.product', auto_save=True)
3
Relocate after changes
# Later, if website structure changespage = StealthyFetcher.fetch('https://example.com', headless=True)# Automatically find elements using saved signaturesproducts = page.css('.product', adaptive=True)
from scrapling.fetchers import Fetcherpage = Fetcher.get('https://quotes.toscrape.com/')while True: # Extract data from current page quotes = page.css('.quote .text::text').getall() print(quotes) # Check for next page next_link = page.css('.next a::attr(href)').get() if not next_link: break # Fetch next page page = Fetcher.get(f'https://quotes.toscrape.com{next_link}')
Extracting Attributes
# Get href attributeslinks = page.css('a::attr(href)').getall()# Get data attributesproduct_ids = page.css('.product::attr(data-id)').getall()# Get multiple attributesfor link in page.css('a'): url = link.attrib.get('href') title = link.attrib.get('title')
JSON Data Extraction
# Extract JSON from script tagsjson_data = page.css('script#data::text').get()# Parse JSON attributesschema = page.css('[schema]').attrib['schema'].json()# Extract all text as JSON-readydata = { "title": page.css('h1::text').get(), "price": page.css('.price::text').get(), "description": page.css('.description::text').get(),}
Error Handling
from scrapling.fetchers import Fetchertry: page = Fetcher.get('https://example.com', timeout=10) if page.status != 200: print(f"Error: Status {page.status}") else: data = page.css('.content::text').get()except Exception as e: print(f"Failed to fetch: {e}")