PYTHON
Iterating Through Paginated API Results with Python
Learn to effectively fetch all data from paginated APIs using a loop, handling different pagination styles like next_page_url or offset/limit.
import requests
import time
def fetch_all_paginated_data(base_url, params=None, pagination_style='link_header', max_pages=None):
"""
Fetches all data from a paginated API endpoint.
:param base_url: The base URL of the API endpoint.
:param params: Initial query parameters as a dictionary.
:param pagination_style: 'link_header', 'next_url_in_body', 'offset_limit', or 'page_number'.
:param max_pages: Optional maximum number of pages to fetch to prevent infinite loops.
:return: A list containing all fetched data items.
"""
all_data = []
current_url = base_url
current_params = params.copy() if params else {}
page_count = 0
while current_url:
page_count += 1
if max_pages and page_count > max_pages:
print(f"Reached max_pages limit: {max_pages}")
break
try:
print(f"Fetching page {page_count} from: {current_url if pagination_style != 'offset_limit' else base_url} with params: {current_params}")
response = requests.get(current_url, params=current_params)
response.raise_for_status() # Raise an exception for HTTP errors
data = response.json()
if pagination_style == 'link_header':
# Example: GitHub API uses Link header for pagination
all_data.extend(data)
link_header = response.headers.get('Link')
if link_header:
next_link = [
link.split(';')[0].strip('<>')
for link in link_header.split(',')
if 'rel="next"' in link
]
current_url = next_link[0] if next_link else None
else:
current_url = None
elif pagination_style == 'next_url_in_body':
# Example: APIs where 'next_page' or 'next' field is in the response body
items = data.get('results', []) # Adjust based on actual API response structure
all_data.extend(items)
current_url = data.get('next') # Adjust field name as needed
elif pagination_style == 'offset_limit':
# Example: APIs using offset and limit parameters
items = data.get('items', []) # Adjust based on actual API response structure
all_data.extend(items)
if len(items) < current_params.get('limit', 100): # Assuming limit is 100 by default
current_url = None # No more items, stop
else:
current_params['offset'] = current_params.get('offset', 0) + current_params.get('limit', 100)
# current_url remains base_url, only params change
elif pagination_style == 'page_number':
# Example: APIs using 'page' parameter
items = data.get('data', []) # Adjust based on actual API response structure
all_data.extend(items)
if not items or len(items) < current_params.get('per_page', 100): # If last page returns less than per_page
current_url = None
else:
current_params['page'] = current_params.get('page', 1) + 1
# current_url remains base_url, only params change
else:
raise ValueError(f"Unsupported pagination style: {pagination_style}")
if pagination_style in ['offset_limit', 'page_number'] and current_url:
current_url = base_url # Reset current_url to base for these styles if more pages exist
if not current_url and pagination_style not in ['offset_limit', 'page_number']:
break # Break if next_url is not found for link_header/next_url_in_body styles
except requests.exceptions.HTTPError as e:
print(f"HTTP error occurred: {e}")
break
except requests.exceptions.RequestException as e:
print(f"Request error occurred: {e}")
break
except ValueError as e:
print(f"Pagination style error: {e}")
break
except Exception as e:
print(f"An unexpected error occurred: {e}")
break
time.sleep(0.5) # Be kind to the API
return all_data
# --- Example Usage ---
# 1. Link Header Pagination (e.g., GitHub API - simplified example)
# github_data = fetch_all_paginated_data('https://api.github.com/users/octocat/repos', pagination_style='link_header', max_pages=2)
# print(f"Fetched {len(github_data)} GitHub repos.")
# 2. Next URL in Body Pagination (dummy example)
# dummy_api_url_next_in_body = 'https://reqres.in/api/users?page=1' # This API uses 'page' param, but let's simulate 'next' in body
# # For a real API that puts 'next' url in body, adjust 'next' key in code.
# # Here, we simulate by manually changing the URL for each page
# # For reqres.in, it would be 'page_number' style more accurately.
# # Let's create a hypothetical scenario for next_url_in_body for demonstration:
# # If an API responded like this: {"data": [...], "next": "https://api.example.com/items?page=2"}
# # You'd adjust 'items = data.get('results', [])' and 'current_url = data.get('next')' accordingly.
# 3. Offset/Limit Pagination (dummy example)
# # If an API endpoint was like 'https://api.example.com/items' and takes 'offset' and 'limit'
# # offset_limit_data = fetch_all_paginated_data(
# # 'https://api.example.com/items',
# # params={'offset': 0, 'limit': 50},
# # pagination_style='offset_limit',
# # max_pages=3
# # )
# # print(f"Fetched {len(offset_limit_data)} items with offset/limit.")
# 4. Page Number Pagination (using reqres.in as a real example)
# page_number_data = fetch_all_paginated_data(
# 'https://reqres.in/api/users',
# params={'page': 1, 'per_page': 3}, # start at page 1, 3 items per page
# pagination_style='page_number',
# max_pages=2
# )
# print(f"Fetched {len(page_number_data)} users with page number.")
# print(page_number_data)
How it works: This Python snippet provides a flexible function to retrieve all data from APIs that implement pagination. It supports common pagination strategies: `Link` headers (like GitHub), `next_page` URL in the response body, `offset/limit` parameters, and `page_number` parameters. The function iteratively makes requests until no more pages are found, collecting all results. It also includes error handling and a small delay between requests to be considerate of the API's rate limits.