Skip to main content
The CrawlResult class represents the complete result from a spider run, including scraped items, statistics, and pause state.

Class Definition

from scrapling.spiders.result import CrawlResult

@dataclass
class CrawlResult:
    """Complete result from a spider run."""

Attributes

stats
CrawlStats
required
Detailed statistics about the crawl (requests, items, timing, etc.).
items
ItemList
required
List of scraped items with export capabilities.
paused
bool
default:"False"
Whether the crawl was paused (True) or completed normally (False).

Properties

completed

@property
def completed(self) -> bool
Returns True if the crawl completed normally (not paused). Returns: not self.paused

Special Methods

Length

def __len__(self) -> int
Returns the number of scraped items. Example:
result = spider.start()
print(f"Scraped {len(result)} items")  # Same as len(result.items)

Iteration

def __iter__(self) -> Iterator[dict[str, Any]]
Iterate over scraped items. Example:
result = spider.start()
for item in result:
    print(item)  # Same as iterating result.items

ItemList

The items attribute is an ItemList - a list subclass with export methods.

to_json

def to_json(
    self,
    path: Union[str, Path],
    *,
    indent: bool = False
)
Export items to a JSON file.
path
str | Path
required
Path to the output file.
indent
bool
default:"False"
Pretty-print with 2-space indentation (slightly slower).
Example:
result = spider.start()
result.items.to_json("output.json", indent=True)

to_jsonl

def to_jsonl(self, path: Union[str, Path])
Export items as JSON Lines (one JSON object per line).
path
str | Path
required
Path to the output file.
Example:
result = spider.start()
result.items.to_jsonl("output.jsonl")

CrawlStats

The stats attribute contains detailed crawl metrics.

Attributes

requests_count
int
default:"0"
Total number of successful requests made.
concurrent_requests
int
default:"0"
Maximum concurrent requests setting.
concurrent_requests_per_domain
int
default:"0"
Maximum concurrent requests per domain setting.
failed_requests_count
int
default:"0"
Number of failed requests (exceptions during fetch).
offsite_requests_count
int
default:"0"
Number of requests filtered due to allowed_domains.
blocked_requests_count
int
default:"0"
Number of requests detected as blocked.
response_bytes
int
default:"0"
Total bytes downloaded.
items_scraped
int
default:"0"
Number of items successfully scraped.
items_dropped
int
default:"0"
Number of items dropped by on_scraped_item().
start_time
float
default:"0.0"
Timestamp when crawl started.
end_time
float
default:"0.0"
Timestamp when crawl ended.
download_delay
float
default:"0.0"
Download delay setting.
custom_stats
Dict
default:"{}"
User-defined custom statistics.
response_status_count
Dict
default:"{}"
Count of responses by status code (e.g., {"status_200": 42, "status_404": 3}).
domains_response_bytes
Dict
default:"{}"
Bytes downloaded per domain.
sessions_requests_count
Dict
default:"{}"
Requests made per session ID.
proxies
List[str | Dict | Tuple]
default:"[]"
List of proxies used during the crawl.
log_levels_counter
Dict
default:"{}"
Count of log messages by level (debug, info, warning, error, critical).

Properties

elapsed_seconds

@property
def elapsed_seconds(self) -> float
Total crawl duration in seconds. Returns: end_time - start_time

requests_per_second

@property
def requests_per_second(self) -> float
Average request rate. Returns: requests_count / elapsed_seconds (or 0.0 if elapsed_seconds == 0)

Methods

to_dict

def to_dict(self) -> dict[str, Any]
Convert statistics to a dictionary. Returns: Dictionary with formatted statistics Example:
result = spider.start()
stats_dict = result.stats.to_dict()
print(stats_dict)
# {
#     "items_scraped": 150,
#     "items_dropped": 5,
#     "elapsed_seconds": 12.34,
#     "requests_count": 200,
#     "requests_per_second": 16.21,
#     ...
# }

Usage Examples

Basic Usage

from scrapling.spiders import Spider

class MySpider(Spider):
    name = "example"
    start_urls = ["https://example.com"]
    
    async def parse(self, response):
        yield {"url": response.url}

# Run spider
spider = MySpider()
result = spider.start()

# Access results
print(f"Completed: {result.completed}")
print(f"Items: {len(result)}")
print(f"Duration: {result.stats.elapsed_seconds:.2f}s")
print(f"Rate: {result.stats.requests_per_second:.2f} req/s")

Export Results

result = spider.start()

# Export as JSON
result.items.to_json("output/items.json", indent=True)

# Export as JSONL
result.items.to_jsonl("output/items.jsonl")

print(f"Saved {len(result)} items")

Iterate Items

result = spider.start()

# Process each item
for item in result:
    print(f"Title: {item.get('title')}")
    print(f"URL: {item.get('url')}")

Detailed Statistics

result = spider.start()
stats = result.stats

print(f"Requests: {stats.requests_count}")
print(f"Failed: {stats.failed_requests_count}")
print(f"Blocked: {stats.blocked_requests_count}")
print(f"Items: {stats.items_scraped}")
print(f"Dropped: {stats.items_dropped}")
print(f"Bandwidth: {stats.response_bytes / 1024 / 1024:.2f} MB")
print(f"Status codes: {stats.response_status_count}")
print(f"Per domain: {stats.domains_response_bytes}")

Handle Paused Crawls

spider = MySpider(crawldir="./checkpoints")
result = spider.start()

if result.paused:
    print("Crawl was paused. Resume by running again.")
    print(f"Scraped {len(result)} items before pause")
else:
    print("Crawl completed successfully")
    result.items.to_json("final_results.json")

Custom Statistics

class MySpider(Spider):
    async def parse(self, response):
        # Track custom metrics
        self._engine.stats.custom_stats["pages_with_images"] = \
            self._engine.stats.custom_stats.get("pages_with_images", 0) + 1
        
        yield {"url": response.url}

result = spider.start()
print(result.stats.custom_stats)  # {"pages_with_images": 42}

Performance Analysis

result = spider.start()
stats = result.stats

# Calculate efficiency metrics
if stats.requests_count > 0:
    success_rate = (stats.requests_count - stats.failed_requests_count) / stats.requests_count * 100
    print(f"Success rate: {success_rate:.1f}%")

if stats.items_scraped > 0:
    avg_bytes_per_item = stats.response_bytes / stats.items_scraped
    print(f"Avg bytes per item: {avg_bytes_per_item:.0f}")

# Session performance
for session_id, count in stats.sessions_requests_count.items():
    print(f"Session '{session_id}': {count} requests")

Logging Analysis

result = spider.start()
logs = result.stats.log_levels_counter

print(f"Debug: {logs.get('debug', 0)}")
print(f"Info: {logs.get('info', 0)}")
print(f"Warnings: {logs.get('warning', 0)}")
print(f"Errors: {logs.get('error', 0)}")
print(f"Critical: {logs.get('critical', 0)}")

Complete Example

import json
from pathlib import Path
from scrapling.spiders import Spider

class ProductSpider(Spider):
    name = "products"
    start_urls = ["https://store.example.com/products"]
    concurrent_requests = 10
    
    async def parse(self, response):
        for product in response.css(".product"):
            yield {
                "name": product.css(".name::text").get(),
                "price": product.css(".price::text").get(),
            }

# Run spider with checkpoints
spider = ProductSpider(crawldir="./checkpoints")
result = spider.start(use_uvloop=True)

# Create output directory
output_dir = Path("output")
output_dir.mkdir(exist_ok=True)

# Save items
if result.completed:
    result.items.to_json(output_dir / "products.json", indent=True)
    result.items.to_jsonl(output_dir / "products.jsonl")
    
    # Save statistics
    with open(output_dir / "stats.json", "w") as f:
        json.dump(result.stats.to_dict(), f, indent=2)
    
    print(f"✓ Scraped {len(result)} products in {result.stats.elapsed_seconds:.1f}s")
    print(f"✓ Rate: {result.stats.requests_per_second:.1f} req/s")
else:
    print(f"✗ Crawl paused after scraping {len(result)} products")
    print("  Run again to resume")

See Also

  • Spider - Running spiders and getting results
  • CrawlerEngine - Understanding statistics collection

Build docs developers (and LLMs) love