PYTHON
Analyzing Frequencies with `collections.Counter`
Efficiently count the frequency of items, like tags, keywords, or error codes from web logs, using Python's `collections.Counter` for data analysis.
from collections import Counter
from typing import List
# Simulate web server access logs or user-generated tags
access_logs = [
"/api/users", "/api/products", "/api/users", "/dashboard",
"/api/products", "/api/orders", "/api/users", "/login",
"/api/products", "/dashboard", "/api/users"
]
# Simulate user-defined tags for blog posts
blog_tags = [
"python", "webdev", "django", "flask", "python", "api",
"database", "webdev", "python", "javascript", "frontend",
"django", "python"
]
def analyze_access_patterns(paths: List[str]):
path_counts = Counter(paths)
print("--- Most Common Access Paths ---")
for path, count in path_counts.most_common(3):
print(f"Path: {path}, Count: {count}")
print(f"Total unique paths: {len(path_counts)}")
print("-" * 30)
def generate_tag_cloud_data(tags: List[str]):
tag_counts = Counter(tags)
print("--- Top 5 Blog Tags ---")
for tag, count in tag_counts.most_common(5):
print(f"Tag: {tag}, Frequency: {count}")
print(f"Least common tag: {tag_counts.most_common()[-1][0]} (Count: {tag_counts.most_common()[-1][1]})")
print("-" * 30)
analyze_access_patterns(access_logs)
generate_tag_cloud_data(blog_tags)
# Incrementing counts manually
error_codes = Counter()
error_codes['404'] += 5
error_codes['500'] += 2
error_codes['200'] += 10
error_codes['404'] += 1
print("--- Error Code Frequencies ---")
print(error_codes)
print("-" * 30)
How it works: `collections.Counter` is a dictionary subclass for counting hashable objects. It's incredibly useful for web developers to perform frequency analysis on various data sources, such as processing web server access logs to identify popular endpoints, analyzing user-generated tags for a tag cloud, or tracking occurrences of error codes. The `most_common()` method simplifies retrieving the highest frequency items, making it easy to gain insights from raw data.