PYTHON

Python Paginated API Consumption

Learn to efficiently consume data from paginated REST APIs in Python using the `requests` library, iterating through pages to fetch all available records.

import requests
import time

def fetch_all_paginated_data(base_url, initial_params=None, headers=None, page_param_name='page', per_page_param_name='per_page', delay_seconds=0.1):
    """
    Fetches all data from a paginated REST API.

    Args:
        base_url (str): The base URL of the API endpoint (e.g., 'https://api.example.com/items').
        initial_params (dict, optional): Initial query parameters to send with the first request.
        headers (dict, optional): Custom headers for the requests.
        page_param_name (str): The name of the query parameter for the page number. Defaults to 'page'.
        per_page_param_name (str): The name of the query parameter for items per page. Defaults to 'per_page'.
        delay_seconds (float): Delay between requests to avoid rate limiting. Defaults to 0.1 seconds.

    Returns:
        list: A list containing all fetched data items.
    """
    all_data = []
    page = 1
    has_more_data = True

    while has_more_data:
        params = initial_params.copy() if initial_params else {}
        params[page_param_name] = page
        # Example: Set a fixed number of items per page if not already in initial_params
        if per_page_param_name not in params:
            params[per_page_param_name] = 100 # Adjust as needed based on API limits

        print(f"Fetching page {page} from {base_url} with params: {params}")

        try:
            response = requests.get(base_url, params=params, headers=headers)
            response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
            data = response.json()

            # --- Adapt this logic based on your API's pagination structure ---
            # Common patterns:
            # 1. API returns a list of items directly: `data` is the list
            # 2. API returns an object with `items` key and `next_page` or `has_more` boolean
            # 3. API uses Link headers for pagination (more complex to parse, not covered here)

            items = []
            if isinstance(data, list):
                items = data
            elif isinstance(data, dict) and 'data' in data and isinstance(data['data'], list):
                items = data['data']
            elif isinstance(data, dict) and 'results' in data and isinstance(data['results'], list):
                items = data['results']
            # Add more conditions here if your API has a different structure for items

            all_data.extend(items)

            # Determine if there are more pages based on API response
            # Example 1: Check if the returned list is empty (last page)
            has_more_data = bool(items)
            # Example 2: Check for a 'next' or 'has_more' field in the response (if your API provides it)
            # if isinstance(data, dict) and 'has_more' in data:
            #     has_more_data = data['has_more']
            # elif isinstance(data, dict) and 'next_page_url' in data and data['next_page_url']:
            #     # This would require updating base_url for the next request, more complex.
            #     # For this snippet, we assume simple page number increment.
            #     has_more_data = True
            # else: # If no explicit indicator, assume no more data if current page is not full or empty
            #     has_more_data = len(items) == params[per_page_param_name]


            page += 1
            if has_more_data and delay_seconds > 0:
                time.sleep(delay_seconds) # Respect API rate limits
            elif not has_more_data:
                print(f"Finished fetching all {len(all_data)} items.")

        except requests.exceptions.HTTPError as e:
            print(f"HTTP error occurred: {e}")
            print(f"Response content: {e.response.text}")
            break
        except requests.exceptions.RequestException as e:
            print(f"Request error occurred: {e}")
            break
        except ValueError as e: # For JSON decoding errors
            print(f"JSON decoding error: {e}")
            print(f"Response content: {response.text}")
            break

    return all_data

# --- Example Usage (using JSONPlaceholder for demonstration) ---
# This API provides a list directly, so `items` will be `data`.
# It implicitly paginates by returning empty list if page number is too high.
# Note: JSONPlaceholder doesn't support 'per_page' param, so we just iterate until empty.
# For a real API, adjust `per_page_param_name` and `per_page` value.

# Example 1: Simple list of posts
# posts_data = fetch_all_paginated_data(
#     base_url='https://jsonplaceholder.typicode.com/posts',
#     page_param_name='_page', # JSONPlaceholder uses _page
#     per_page_param_name='_limit', # JSONPlaceholder uses _limit
#     initial_params={'_limit': 10} # Fetch 10 items per page
# )
# print(f"Total posts fetched: {len(posts_data)}")
# if posts_data:
#     print("First post:", posts_data[0])
#     print("Last post:", posts_data[-1])

# Example 2: Users (assuming a real API with explicit pagination support for 20 items per page)
# real_api_users = fetch_all_paginated_data(
#     base_url='https://reqres.in/api/users', # A demo API with 'data' array and 'total_pages'
#     per_page_param_name='per_page',
#     initial_params={'per_page': 6}, # Default on reqres.in, but can be higher
#     delay_seconds=0.5 # A bit more delay
# )
# print(f"Total users fetched: {len(real_api_users)}")
# if real_api_users:
#     print("First user:", real_api_users[0])
#     print("Last user:", real_api_users[-1])

How it works: This Python function `fetch_all_paginated_data` provides a generic way to consume data from REST APIs that use pagination. Many APIs limit the number of records returned per request, requiring clients to make multiple sequential calls to retrieve all available data. This snippet automates this process by iteratively incrementing a page parameter, making requests using the `requests` library, aggregating the results, and stopping when no more data is returned. It includes error handling and a customizable delay to help respect API rate limits, making it a robust solution for large data fetches.

Python Paginated API Consumption

Related PYTHON Snippets

Performing Efficient Set Operations in Python

Advanced Dictionary Merging and Update Strategies in Python

Efficient Data Transformation with Python List and Generator Expressions

Need help integrating this into your project?