PYTHON

Iterating Through Paginated API Results with Python

Learn to effectively fetch all data from paginated APIs using a loop, handling different pagination styles like next_page_url or offset/limit.

import requests
import time

def fetch_all_paginated_data(base_url, params=None, pagination_style='link_header', max_pages=None):
    """
    Fetches all data from a paginated API endpoint.

    :param base_url: The base URL of the API endpoint.
    :param params: Initial query parameters as a dictionary.
    :param pagination_style: 'link_header', 'next_url_in_body', 'offset_limit', or 'page_number'.
    :param max_pages: Optional maximum number of pages to fetch to prevent infinite loops.
    :return: A list containing all fetched data items.
    """
    all_data = []
    current_url = base_url
    current_params = params.copy() if params else {}
    page_count = 0

    while current_url:
        page_count += 1
        if max_pages and page_count > max_pages:
            print(f"Reached max_pages limit: {max_pages}")
            break

        try:
            print(f"Fetching page {page_count} from: {current_url if pagination_style != 'offset_limit' else base_url} with params: {current_params}")
            response = requests.get(current_url, params=current_params)
            response.raise_for_status()  # Raise an exception for HTTP errors
            data = response.json()

            if pagination_style == 'link_header':
                # Example: GitHub API uses Link header for pagination
                all_data.extend(data)
                link_header = response.headers.get('Link')
                if link_header:
                    next_link = [
                        link.split(';')[0].strip('<>')
                        for link in link_header.split(',')
                        if 'rel="next"' in link
                    ]
                    current_url = next_link[0] if next_link else None
                else:
                    current_url = None
            elif pagination_style == 'next_url_in_body':
                # Example: APIs where 'next_page' or 'next' field is in the response body
                items = data.get('results', []) # Adjust based on actual API response structure
                all_data.extend(items)
                current_url = data.get('next') # Adjust field name as needed
            elif pagination_style == 'offset_limit':
                # Example: APIs using offset and limit parameters
                items = data.get('items', []) # Adjust based on actual API response structure
                all_data.extend(items)
                if len(items) < current_params.get('limit', 100): # Assuming limit is 100 by default
                    current_url = None # No more items, stop
                else:
                    current_params['offset'] = current_params.get('offset', 0) + current_params.get('limit', 100)
                    # current_url remains base_url, only params change
            elif pagination_style == 'page_number':
                # Example: APIs using 'page' parameter
                items = data.get('data', []) # Adjust based on actual API response structure
                all_data.extend(items)
                if not items or len(items) < current_params.get('per_page', 100): # If last page returns less than per_page
                     current_url = None
                else:
                    current_params['page'] = current_params.get('page', 1) + 1
                    # current_url remains base_url, only params change
            else:
                raise ValueError(f"Unsupported pagination style: {pagination_style}")

            if pagination_style in ['offset_limit', 'page_number'] and current_url:
                current_url = base_url # Reset current_url to base for these styles if more pages exist

            if not current_url and pagination_style not in ['offset_limit', 'page_number']:
                break # Break if next_url is not found for link_header/next_url_in_body styles

        except requests.exceptions.HTTPError as e:
            print(f"HTTP error occurred: {e}")
            break
        except requests.exceptions.RequestException as e:
            print(f"Request error occurred: {e}")
            break
        except ValueError as e:
            print(f"Pagination style error: {e}")
            break
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            break
        
        time.sleep(0.5) # Be kind to the API

    return all_data

# --- Example Usage ---
# 1. Link Header Pagination (e.g., GitHub API - simplified example)
# github_data = fetch_all_paginated_data('https://api.github.com/users/octocat/repos', pagination_style='link_header', max_pages=2)
# print(f"Fetched {len(github_data)} GitHub repos.")

# 2. Next URL in Body Pagination (dummy example)
# dummy_api_url_next_in_body = 'https://reqres.in/api/users?page=1' # This API uses 'page' param, but let's simulate 'next' in body
# # For a real API that puts 'next' url in body, adjust 'next' key in code.
# # Here, we simulate by manually changing the URL for each page
# # For reqres.in, it would be 'page_number' style more accurately.
# # Let's create a hypothetical scenario for next_url_in_body for demonstration:
# # If an API responded like this: {"data": [...], "next": "https://api.example.com/items?page=2"}
# # You'd adjust 'items = data.get('results', [])' and 'current_url = data.get('next')' accordingly.

# 3. Offset/Limit Pagination (dummy example)
# # If an API endpoint was like 'https://api.example.com/items' and takes 'offset' and 'limit'
# # offset_limit_data = fetch_all_paginated_data(
# #     'https://api.example.com/items',
# #     params={'offset': 0, 'limit': 50},
# #     pagination_style='offset_limit',
# #     max_pages=3
# # )
# # print(f"Fetched {len(offset_limit_data)} items with offset/limit.")

# 4. Page Number Pagination (using reqres.in as a real example)
# page_number_data = fetch_all_paginated_data(
#     'https://reqres.in/api/users',
#     params={'page': 1, 'per_page': 3}, # start at page 1, 3 items per page
#     pagination_style='page_number',
#     max_pages=2
# )
# print(f"Fetched {len(page_number_data)} users with page number.")
# print(page_number_data)

How it works: This Python snippet provides a flexible function to retrieve all data from APIs that implement pagination. It supports common pagination strategies: `Link` headers (like GitHub), `next_page` URL in the response body, `offset/limit` parameters, and `page_number` parameters. The function iteratively makes requests until no more pages are found, collecting all results. It also includes error handling and a small delay between requests to be considerate of the API's rate limits.

Iterating Through Paginated API Results with Python

Related PYTHON Snippets

Efficient List Filtering and Transformation with List Comprehensions

Implement a Fixed-Size History or Cache with collections.deque

Manage Unique Items and Perform Fast Membership Checks with Sets

Need help integrating this into your project?