PYTHON
Fetch All Data from Cursor-Based Paginated API
Efficiently retrieve all available data from APIs using cursor-based pagination (e.g., `next_cursor` or `after_id`) with a Python function.
import requests
import time
BASE_URL = 'https://api.example.com/v1/items' # Replace with your API base URL
API_KEY = 'your_api_key' # Replace with your API key or token
def fetch_all_paginated_data(base_url, api_key, limit=100):
all_items = []
next_cursor = None # Initialize cursor for the first request
page_count = 0
while True:
page_count += 1
params = {'limit': limit}
if next_cursor:
params['cursor'] = next_cursor # Or 'after_id', 'next_page_token', etc.
headers = {'Authorization': f'Bearer {api_key}'} # Or other auth headers
print(f"Fetching page {page_count} with cursor: {next_cursor or 'N/A'}")
try:
response = requests.get(base_url, headers=headers, params=params, timeout=10)
response.raise_for_status() # Raise an exception for HTTP errors
data = response.json()
items = data.get('data', []) # Assumes data is under 'data' key
all_items.extend(items)
# Find the next cursor. Common keys: 'next_cursor', 'next_page_token', 'after'
next_cursor = data.get('paging', {}).get('next_cursor')
if not items or not next_cursor: # No more items or no next cursor implies last page
print("Last page reached or no more items.")
break
time.sleep(1) # Be a good API citizen: add a delay between requests
except requests.exceptions.RequestException as e:
print(f"Error fetching data: {e}")
break
except ValueError:
print(f"Error decoding JSON response: {response.text}")
break
print(f"Finished fetching. Total items collected: {len(all_items)}")
return all_items
if __name__ == "__main__":
# Example usage:
# For demonstration, BASE_URL and API_KEY need to be valid for a real API.
# This will simulate fetching, but won't retrieve real data without a working API.
# Ensure you replace 'https://api.example.com/v1/items' and 'your_api_key'
# with actual values if you want to run this against a live API.
# Example dummy data for testing the loop logic
# (This section would not be needed with a live API)
def mock_api_get(url, headers, params, timeout):
print(f"Mock API call with params: {params}")
current_cursor = params.get('cursor', None)
limit = params.get('limit', 100)
mock_data_store = [
{'id': i, 'name': f'Item {i}'} for i in range(1, 205)
]
start_index = 0
if current_cursor:
try:
start_index = int(current_cursor)
except ValueError:
pass # Fallback to 0 if cursor invalid
items_to_return = mock_data_store[start_index : start_index + limit]
response_data = {
'data': items_to_return,
'paging': {}
}
if start_index + limit < len(mock_data_store):
response_data['paging']['next_cursor'] = str(start_index + limit)
class MockResponse:
def __init__(self, json_data, status_code=200):
self._json_data = json_data
self.status_code = status_code
self.text = str(json_data)
def json(self):
return self._json_data
def raise_for_status(self):
if self.status_code >= 400:
raise requests.exceptions.HTTPError(f"HTTP Error: {self.status_code}")
return MockResponse(response_data)
# Temporarily monkey-patch requests.get for demonstration
original_requests_get = requests.get
requests.get = mock_api_get
items = fetch_all_paginated_data(BASE_URL, API_KEY, limit=50)
# print(f"First 5 collected items: {items[:5]}")
# print(f"Last 5 collected items: {items[-5:]}")
# Restore original requests.get
requests.get = original_requests_get
How it works: This Python snippet provides a function to fetch all available data from an API that uses cursor-based pagination. Unlike offset-limit pagination, cursor-based systems use an opaque string or ID (`next_cursor`, `after_id`, `next_page_token`) to determine the next set of results. The function iteratively makes requests, passing the `next_cursor` from the previous response until the API indicates there are no more items or no further cursor. It includes error handling and a `time.sleep` to prevent overwhelming the API.