PYTHON
Fetch All Pages from a Paginated REST API
Efficiently retrieve all data from a paginated REST API endpoint using a recursive fetching strategy in Python, handling 'next page' links or page numbers.
import requests
import time
def fetch_paginated_data(base_url, params=None, all_data=None, page_key='page', next_page_link_key='next'):
"""
Recursively fetches all data from a paginated API.
Args:
base_url (str): The base URL of the API endpoint.
params (dict, optional): Initial query parameters for the first request. Defaults to None.
all_data (list, optional): Accumulator for all fetched data. Defaults to None.
page_key (str): The query parameter name for pagination (e.g., 'page', 'offset').
next_page_link_key (str): The key in the response that contains the URL to the next page,
or 'None' if using page numbers.
Returns:
list: A list containing all items fetched from all pages.
"""
if all_data is None:
all_data = []
if params is None:
params = {}
print(f"Fetching: {base_url} with params: {params}")
try:
response = requests.get(base_url, params=params)
response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
data = response.json()
# Assuming the items are in a list directly or under a 'results' key
items = data.get('results', data) # Adjust 'results' based on your API's structure
if isinstance(items, list):
all_data.extend(items)
else:
# If the API returns a single object per page, or the structure is different
print("Warning: API response not a list. Check 'items' extraction logic.")
all_data.append(items) # Or handle as appropriate
# --- Pagination Logic ---
# Strategy 1: Using a 'next' link in the response
next_page_url = data.get(next_page_link_key)
if next_page_url and next_page_link_key != 'None':
# Extract new base_url and params from the next_page_url
from urllib.parse import urlparse, parse_qs
parsed_url = urlparse(next_page_url)
new_base_url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}"
new_params = {k: v[0] for k, v in parse_qs(parsed_url.query).items()}
# Recursive call with new URL and parameters
time.sleep(0.5) # Be kind to the API
return fetch_paginated_data(new_base_url, new_params, all_data, page_key, next_page_link_key)
# Strategy 2: Using page numbers (if next_page_link_key is 'None' or not found)
# This requires knowing the total pages or checking if the current page has results
# This example focuses on 'next_page_link_key' but can be adapted
current_page = params.get(page_key, 1)
if not next_page_url and len(items) > 0 and next_page_link_key == 'None':
# This is a simplified logic for page numbers, assuming a `page` parameter
# You might need `total_pages` or `has_more` from the API response
new_params = params.copy()
new_params[page_key] = current_page + 1
time.sleep(0.5) # Be kind to the API
return fetch_paginated_data(base_url, new_params, all_data, page_key, next_page_link_key)
return all_data
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return all_data # Return what was fetched so far, or re-raise
# --- Example Usage ---
# Assuming an API like https://api.example.com/items?page=1 or with a 'next' link
# Example 1: API using 'page' query parameter
# total_items = fetch_paginated_data(
# 'https://jsonplaceholder.typicode.com/posts',
# params={'_limit': 10}, # Assuming an API that responds to _page and _limit for pagination
# page_key='_page',
# next_page_link_key='None' # Indicate no 'next' link in response, rely on page numbers
# )
# print(f"Fetched {len(total_items)} items.")
# print(total_items[0]) # First item
# print(total_items[-1]) # Last item
# Example 2: API using a 'next' link in the response (e.g., GitHub API)
# For a real GitHub API, you'd fetch repo issues and it provides `next` in link headers
# Here's a simplified simulation using a known paginated endpoint if possible,
# or conceptual for an API that has 'next' key in JSON body
# total_users = fetch_paginated_data(
# 'https://reqres.in/api/users', # A demo API with 'page' and 'next' properties
# params={'page': 1},
# page_key='page',
# next_page_link_key='next_page' # This is a conceptual key, adjust based on actual API
# )
# print(f"Fetched {len(total_users)} users.")
# if total_users:
# print(total_users[0])
How it works: This Python snippet demonstrates how to programmatically fetch all available data from a REST API that uses pagination. It uses a recursive approach to follow 'next page' links typically found in API responses (either in the body or headers) or to increment a page number parameter until no more data is returned. This ensures that a complete dataset is retrieved, overcoming the limitations of single-page API calls. A small `time.sleep` is included to prevent aggressive hammering of the API.