PYTHON

Fetch All Data from Cursor-Based Paginated API

Efficiently retrieve all available data from APIs using cursor-based pagination (e.g., `next_cursor` or `after_id`) with a Python function.

import requests
import time

BASE_URL = 'https://api.example.com/v1/items' # Replace with your API base URL
API_KEY = 'your_api_key' # Replace with your API key or token

def fetch_all_paginated_data(base_url, api_key, limit=100):
    all_items = []
    next_cursor = None # Initialize cursor for the first request
    page_count = 0

    while True:
        page_count += 1
        params = {'limit': limit}
        if next_cursor:
            params['cursor'] = next_cursor # Or 'after_id', 'next_page_token', etc.

        headers = {'Authorization': f'Bearer {api_key}'} # Or other auth headers

        print(f"Fetching page {page_count} with cursor: {next_cursor or 'N/A'}")

        try:
            response = requests.get(base_url, headers=headers, params=params, timeout=10)
            response.raise_for_status() # Raise an exception for HTTP errors
            data = response.json()

            items = data.get('data', []) # Assumes data is under 'data' key
            all_items.extend(items)

            # Find the next cursor. Common keys: 'next_cursor', 'next_page_token', 'after'
            next_cursor = data.get('paging', {}).get('next_cursor')

            if not items or not next_cursor: # No more items or no next cursor implies last page
                print("Last page reached or no more items.")
                break

            time.sleep(1) # Be a good API citizen: add a delay between requests

        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            break
        except ValueError:
            print(f"Error decoding JSON response: {response.text}")
            break

    print(f"Finished fetching. Total items collected: {len(all_items)}")
    return all_items

if __name__ == "__main__":
    # Example usage:
    # For demonstration, BASE_URL and API_KEY need to be valid for a real API.
    # This will simulate fetching, but won't retrieve real data without a working API.
    # Ensure you replace 'https://api.example.com/v1/items' and 'your_api_key'
    # with actual values if you want to run this against a live API.
    
    # Example dummy data for testing the loop logic
    # (This section would not be needed with a live API)
    def mock_api_get(url, headers, params, timeout):
        print(f"Mock API call with params: {params}")
        current_cursor = params.get('cursor', None)
        limit = params.get('limit', 100)
        
        mock_data_store = [
            {'id': i, 'name': f'Item {i}'} for i in range(1, 205)
        ]
        
        start_index = 0
        if current_cursor:
            try:
                start_index = int(current_cursor)
            except ValueError:
                pass # Fallback to 0 if cursor invalid
        
        items_to_return = mock_data_store[start_index : start_index + limit]
        
        response_data = {
            'data': items_to_return,
            'paging': {}
        }
        
        if start_index + limit < len(mock_data_store):
            response_data['paging']['next_cursor'] = str(start_index + limit)
        
        class MockResponse:
            def __init__(self, json_data, status_code=200):
                self._json_data = json_data
                self.status_code = status_code
                self.text = str(json_data)
            def json(self):
                return self._json_data
            def raise_for_status(self):
                if self.status_code >= 400:
                    raise requests.exceptions.HTTPError(f"HTTP Error: {self.status_code}")
        
        return MockResponse(response_data)
    
    # Temporarily monkey-patch requests.get for demonstration
    original_requests_get = requests.get
    requests.get = mock_api_get
    
    items = fetch_all_paginated_data(BASE_URL, API_KEY, limit=50)
    # print(f"First 5 collected items: {items[:5]}")
    # print(f"Last 5 collected items: {items[-5:]}")
    
    # Restore original requests.get
    requests.get = original_requests_get
How it works: This Python snippet provides a function to fetch all available data from an API that uses cursor-based pagination. Unlike offset-limit pagination, cursor-based systems use an opaque string or ID (`next_cursor`, `after_id`, `next_page_token`) to determine the next set of results. The function iteratively makes requests, passing the `next_cursor` from the previous response until the API indicates there are no more items or no further cursor. It includes error handling and a `time.sleep` to prevent overwhelming the API.

Need help integrating this into your project?

Our team of expert developers can help you build your custom application from scratch.

Hire DigitalCodeLabs