PYTHON
Recursively Fetch All Pages from a Paginated API
Learn to efficiently retrieve all data from a paginated API using a recursive Python function, iterating through pages until no more data is available.
import requests
import json
# Configuration for the paginated API
# Example: GitHub API for public repositories, with 'page' and 'per_page' parameters
API_URL = "https://api.github.com/users/octocat/repos"
PER_PAGE = 30 # Number of items per page
def fetch_paginated_data(url, page=1, all_data=None):
"""
Recursively fetches data from a paginated API.
Assumes the API uses 'page' and 'per_page' query parameters
and returns an empty list or less than per_page items on the last page.
"""
if all_data is None:
all_data = []
params = {
"page": page,
"per_page": PER_PAGE
}
print(f"Fetching page {page} from {url}...")
try:
response = requests.get(url, params=params)
response.raise_for_status() # Raise an exception for HTTP errors
current_page_data = response.json()
if not current_page_data:
# No more data, or API returns empty list on last page
print(f"No more data found on page {page}.")
return all_data
all_data.extend(current_page_data)
if len(current_page_data) < PER_PAGE:
# This is the last page if fewer items than 'per_page' are returned
print(f"Reached the last page on page {page}.")
return all_data
else:
# Fetch next page
return fetch_paginated_data(url, page + 1, all_data)
except requests.exceptions.RequestException as e:
print(f"Error fetching data on page {page}: {e}")
return all_data # Return data collected so far on error
if __name__ == "__main__":
print("Starting to fetch all paginated data...")
all_repositories = fetch_paginated_data(API_URL)
if all_repositories:
print(f"
Successfully fetched {len(all_repositories)} repositories.")
# Example: Print names of first few repositories
for i, repo in enumerate(all_repositories[:5]):
print(f"- {repo.get('name')}")
if len(all_repositories) > 5:
print("...")
else:
print("No repositories fetched or an error occurred.")
How it works: This Python snippet demonstrates a recursive approach to fetch all data from an API that returns results in paginated chunks. The `fetch_paginated_data` function makes successive API calls, incrementing the `page` parameter with each recursion. It accumulates all received data and stops when an API response is empty or contains fewer items than the `per_page` limit, indicating that the last page has been retrieved.