Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/dlt-hub/dlt/llms.txt

Use this file to discover all available pages before exploring further.

Load data from any REST API using dlt’s declarative configuration. The REST API source handles pagination, authentication, and nested resources automatically.

Quick Start

Here’s a complete example loading GitHub issues and comments:
import dlt
from dlt.sources.rest_api import rest_api_source

source = rest_api_source({
    "client": {
        "base_url": "https://api.github.com/repos/dlt-hub/dlt/",
        "auth": {
            "type": "bearer",
            "token": dlt.secrets["github_token"],
        },
    },
    "resources": [
        {
            "name": "issues",
            "endpoint": {
                "path": "issues",
                "params": {
                    "state": "open",
                    "per_page": 100,
                },
            },
        },
        {
            "name": "comments",
            "endpoint": {
                "path": "issues/{resources.issues.number}/comments",
            },
            "include_from_parent": ["number"],
        },
    ],
})

pipeline = dlt.pipeline(
    pipeline_name="github_api",
    destination="duckdb",
    dataset_name="github_data",
)

load_info = pipeline.run(source)
print(load_info)

Configuration

Using RESTAPIConfig

For better type hints and IDE support, use the RESTAPIConfig type:
from dlt.sources.rest_api import RESTAPIConfig, rest_api_resources

@dlt.source
def github_source(access_token=dlt.secrets.value):
    config: RESTAPIConfig = {
        "client": {
            "base_url": "https://api.github.com/repos/dlt-hub/dlt/",
            "auth": {
                "type": "bearer",
                "token": access_token,
            } if access_token else None,
        },
        "resource_defaults": {
            "primary_key": "id",
            "write_disposition": "merge",
            "endpoint": {
                "params": {
                    "per_page": 100,
                },
            },
        },
        "resources": [
            {
                "name": "issues",
                "endpoint": {
                    "path": "issues",
                    "params": {
                        "sort": "updated",
                        "direction": "desc",
                        "state": "open",
                    },
                },
            },
        ],
    }
    
    yield from rest_api_resources(config)

Authentication

config = {
    "client": {
        "base_url": "https://api.example.com/",
        "auth": {
            "type": "bearer",
            "token": dlt.secrets["api_token"],
        },
    },
}

Pagination

The REST API source automatically detects and handles pagination:
# Automatic pagination detection
source = rest_api_source({
    "client": {
        "base_url": "https://pokeapi.co/api/v2/",
        # Paginator is automatically inferred
    },
    "resources": ["pokemon", "berry"],
})
Or configure it explicitly:
config = {
    "client": {
        "base_url": "https://api.example.com/",
        "paginator": {
            "type": "json_link",
            "next_url_path": "paging.next",
        },
    },
}

Resource Relationships

Load nested resources by referencing parent resource fields:
config = {
    "resources": [
        {
            "name": "issues",
            "endpoint": "issues",
        },
        {
            "name": "issue_comments",
            "endpoint": {
                # Use {resources.issues.number} to reference parent
                "path": "issues/{resources.issues.number}/comments",
            },
            # Include parent fields in child table
            "include_from_parent": ["id", "number"],
        },
    ],
}

Incremental Loading

Combine REST API source with incremental loading:
from dlt.common.pendulum import pendulum

config: RESTAPIConfig = {
    "client": {
        "base_url": "https://api.github.com/repos/dlt-hub/dlt/",
    },
    "resources": [
        {
            "name": "issues",
            "endpoint": {
                "path": "issues",
                "params": {
                    "since": "{incremental.start_value}",
                },
                "incremental": {
                    "cursor_path": "updated_at",
                    "initial_value": pendulum.today().subtract(days=30).to_iso8601_string(),
                },
            },
        },
    ],
}

Testing Connection

Verify your API configuration before running the pipeline:
from dlt.sources.rest_api import check_connection

source = rest_api_source(config)

can_connect, error_msg = check_connection(
    source,
    "issues",  # Test this endpoint
)

if not can_connect:
    print(f"Connection failed: {error_msg}")
else:
    print("Connection successful!")

Complete Example: Pokemon API

import dlt
from dlt.sources.rest_api import rest_api_source

def load_pokemon():
    pipeline = dlt.pipeline(
        pipeline_name="pokemon_api",
        destination="duckdb",
        dataset_name="pokemon_data",
    )
    
    pokemon_source = rest_api_source({
        "client": {
            "base_url": "https://pokeapi.co/api/v2/",
        },
        "resource_defaults": {
            "endpoint": {
                "params": {
                    "limit": 1000,
                },
            },
        },
        "resources": [
            "pokemon",
            "berry",
            "location",
        ],
    })
    
    load_info = pipeline.run(pokemon_source)
    print(load_info)

if __name__ == "__main__":
    load_pokemon()

Next Steps

Incremental Loading

Add incremental loading to track changes

Schema Evolution

Handle schema changes automatically

Build docs developers (and LLMs) love