Files
slskd-stats/slskd-upload-stats.py
T
Player6734 4faba9073e done
2025-05-08 11:08:52 +02:00

208 lines
8.0 KiB
Python

#!/usr/bin/env python3
"""
SLSKD Transfer Size Analyzer
A script I created to analyze SLSKD HTML and calculate accurate transfer statistics.
After experimenting with different approaches, I found that checking the button text
directly gives more reliable results than relying on CSS classes.
I also added automatic conversion between MB and GB to make large numbers more readable.
Usage:
python slskd_analyzer_with_gb.py <html_file>
"""
import re
import sys
import os
from bs4 import BeautifulSoup
def format_size(size_mb):
"""
Format file size in MB or GB depending on the size.
I added this function to make large transfer sizes more readable.
After dealing with multi-GB transfers, seeing something like "2458.7 MB"
is less intuitive than "2.4 GB".
"""
if size_mb >= 1024:
return f"{size_mb/1024:.2f} GB"
else:
return f"{size_mb:.1f} MB"
def analyze_html(html_content):
"""
Parse and analyze the SLSKD HTML content to extract transfer statistics.
I initially tried looking only at CSS classes, but found inconsistencies in how
the success/failure states were represented. Checking the actual button text with
"Completed, Succeeded" proved more reliable and worked across different versions.
"""
soup = BeautifulSoup(html_content, 'html.parser')
# Initialize statistics dictionary
stats = {
'successful_mb': 0,
'failed_mb': 0,
'total_mb': 0,
'successful_files': 0,
'failed_files': 0,
'flac_files': 0,
'mp3_files': 0,
'users': {}
}
# Process each transfer card (one per user)
# I structured it this way to make it easier to attribute transfers to users
transfer_cards = soup.find_all('div', class_='ui raised card transfer-card')
for card in transfer_cards:
# Extract the username from the header
header = card.find('div', class_='header')
username = header.text.strip() if header else "Unknown"
# Initialize user statistics if not already present
if username not in stats['users']:
stats['users'][username] = {
'successful_mb': 0,
'failed_mb': 0,
'total_mb': 0,
'successful_files': 0,
'failed_files': 0
}
# Process all file rows for this user
# I tried several approaches and found traversing the rows directly was most reliable
rows = card.find_all('tr')
for row in rows:
# Skip header rows - they have th elements
if row.find('th'):
continue
# Extract the relevant cells
filename_cell = row.find('td', class_='transferlist-filename')
progress_cell = row.find('td', class_='transferlist-progress')
size_cell = row.find('td', class_='transferlist-size')
# Skip if any required cell is missing
if not all([filename_cell, progress_cell, size_cell]):
continue
filename = filename_cell.text.strip()
button = progress_cell.find('button')
if not button:
continue
# Track file types - primarily interested in audio formats
if filename.lower().endswith('.flac'):
stats['flac_files'] += 1
elif filename.lower().endswith('.mp3'):
stats['mp3_files'] += 1
# Check success/failure status based on button text
# This was a key insight - looking for specific text patterns rather than
# relying on CSS classes which can vary across SLSKD versions/themes
button_text = button.text.strip()
is_success = 'Completed, Succeeded' in button_text
is_failed = 'Completed, Errored' in button_text
# Parse size information
# Format is typically "X.X/Y.Y MB" where X is transferred and Y is total
size_text = size_cell.text.strip()
size_match = re.search(r'(\d+(?:\.\d+)?)/(\d+(?:\.\d+)?)', size_text)
if not size_match:
continue
transferred = float(size_match.group(1))
total = float(size_match.group(2))
# Update statistics based on transfer status
if is_success:
stats['successful_mb'] += transferred
stats['successful_files'] += 1
stats['users'][username]['successful_mb'] += transferred
stats['users'][username]['successful_files'] += 1
elif is_failed:
# For failed transfers, I count the target size rather than the partial transfer
# This gives a better sense of what "should have" transferred
stats['failed_mb'] += total
stats['failed_files'] += 1
stats['users'][username]['failed_mb'] += total
stats['users'][username]['failed_files'] += 1
stats['total_mb'] += total
stats['users'][username]['total_mb'] += total
return stats
def print_report(stats):
"""
Format and print a comprehensive report of the transfer statistics.
I organized this to present the most relevant information first (overall stats),
followed by file type breakdowns and user-specific information.
For readability, I convert large values from MB to GB automatically.
"""
print("\n===== SLSKD TRANSFER STATISTICS =====")
print(f"\nOverall Statistics:")
print(f"Successfully transferred: {format_size(stats['successful_mb'])} ({stats['successful_files']} files)")
print(f"Failed transfers: {format_size(stats['failed_mb'])} ({stats['failed_files']} files)")
print(f"Total size of all files: {format_size(stats['total_mb'])} ({stats['successful_files'] + stats['failed_files']} files)")
if stats['total_mb'] > 0:
success_rate = (stats['successful_mb'] / stats['total_mb']) * 100
print(f"Success rate: {success_rate:.1f}%")
print(f"\nFile Type Statistics:")
print(f"FLAC files: {stats['flac_files']}")
print(f"MP3 files: {stats['mp3_files']}")
other_files = stats['successful_files'] + stats['failed_files'] - stats['flac_files'] - stats['mp3_files']
print(f"Other files: {other_files}")
# Top users by total volume
print("\nTop Users by Total Transfer Volume:")
sorted_users_total = sorted(stats['users'].items(), key=lambda x: x[1]['total_mb'], reverse=True)
for i, (username, user_stats) in enumerate(sorted_users_total[:5], 1):
print(f"{i}. {username}: {format_size(user_stats['total_mb'])} total")
# Top users by successful transfers
print("\nTop Users by Successful Transfers:")
sorted_users_success = sorted(stats['users'].items(), key=lambda x: x[1]['successful_mb'], reverse=True)
for i, (username, user_stats) in enumerate(sorted_users_success[:5], 1):
if user_stats['successful_mb'] > 0:
print(f"{i}. {username}: {format_size(user_stats['successful_mb'])} successful "
f"({user_stats['successful_files']} files)")
print("\n=====================================")
def main():
"""
Main entry point - handles command line arguments and file processing.
I kept the interface simple - just provide the HTML file path as an argument.
This makes it easy to analyze different snapshots over time.
"""
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <html_file>")
sys.exit(1)
html_file = sys.argv[1]
if not os.path.exists(html_file):
print(f"Error: File {html_file} does not exist")
sys.exit(1)
with open(html_file, 'r', encoding='utf-8') as f:
html_content = f.read()
stats = analyze_html(html_content)
print_report(stats)
if __name__ == "__main__":
main()