PYTHON

Python Paginating External REST API

Learn to fetch all data from a paginated REST API in Python using either offset/limit or cursor-based pagination strategies, ensuring comprehensive data retrieval.

import requests
import time

def fetch_paginated_data(base_url, api_key, page_size=100, max_pages=None, pagination_type='offset'):
    """
    Fetches all data from a paginated REST API.

    :param base_url: The base URL of the API endpoint (e.g., 'https://api.example.com/items').
    :param api_key: Your API key for authentication.
    :param page_size: The number of items per page.
    :param max_pages: Optional maximum number of pages to fetch.
    :param pagination_type: 'offset' or 'cursor'.
    :return: A list containing all fetched items.
    """
    all_data = []
    page = 1
    offset = 0
    next_cursor = None

    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }

    while True:
        params = {'limit': page_size}
        if pagination_type == 'offset':
            params['offset'] = offset
        elif pagination_type == 'cursor' and next_cursor:
            params['cursor'] = next_cursor
        elif pagination_type == 'cursor' and not next_cursor and page > 1:
            # No more pages if cursor is not provided after first page
            break

        print(f"Fetching page {page} with params: {params}")
        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
            data = response.json()

            items = data.get('items', []) # Adjust 'items' key based on actual API response
            all_data.extend(items)

            if pagination_type == 'offset':
                # For offset pagination, assume API returns less than page_size when no more data
                if len(items) < page_size:
                    break
                offset += page_size
            elif pagination_type == 'cursor':
                next_cursor = data.get('next_cursor') # Adjust 'next_cursor' key based on actual API response
                if not next_cursor:
                    break # No more pages

            page += 1
            if max_pages and page > max_pages:
                print(f"Max pages ({max_pages}) reached. Stopping.")
                break

            # Implement a small delay to avoid hitting rate limits
            time.sleep(0.1)

        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            break
        except ValueError as e:
            print(f"Error decoding JSON: {e}")
            break

    return all_data

# Example Usage:
# BASE_URL = 'https://api.example.com/v1/products'
# API_KEY = 'your_secret_api_key' # Replace with your actual key
# products = fetch_paginated_data(BASE_URL, API_KEY, page_size=50, max_pages=3, pagination_type='offset')
# print(f"Fetched {len(products)} products.")
# # print(products)
How it works: This Python snippet provides a versatile function to fetch all available data from an external REST API that uses either offset/limit or cursor-based pagination. It iteratively makes requests, accumulating items until no more pages are indicated by the API's response structure. The function includes basic error handling and a small delay to prevent rate limiting, demonstrating a robust approach to handling large datasets from external services.

Need help integrating this into your project?

Our team of expert developers can help you build your custom application from scratch.

Hire DigitalCodeLabs