diff --git a/slskd-upload-stats.py b/slskd-upload-stats.py new file mode 100644 index 0000000..9d0f000 --- /dev/null +++ b/slskd-upload-stats.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +""" +SLSKD Transfer Size Analyzer + +A script I created to analyze SLSKD HTML and calculate accurate transfer statistics. +After experimenting with different approaches, I found that checking the button text +directly gives more reliable results than relying on CSS classes. + +I also added automatic conversion between MB and GB to make large numbers more readable. + +Usage: + python slskd_analyzer_with_gb.py +""" + +import re +import sys +import os +from bs4 import BeautifulSoup + +def format_size(size_mb): + """ + Format file size in MB or GB depending on the size. + + I added this function to make large transfer sizes more readable. + After dealing with multi-GB transfers, seeing something like "2458.7 MB" + is less intuitive than "2.4 GB". + """ + if size_mb >= 1024: + return f"{size_mb/1024:.2f} GB" + else: + return f"{size_mb:.1f} MB" + +def analyze_html(html_content): + """ + Parse and analyze the SLSKD HTML content to extract transfer statistics. + + I initially tried looking only at CSS classes, but found inconsistencies in how + the success/failure states were represented. Checking the actual button text with + "Completed, Succeeded" proved more reliable and worked across different versions. + """ + soup = BeautifulSoup(html_content, 'html.parser') + + # Initialize statistics dictionary + stats = { + 'successful_mb': 0, + 'failed_mb': 0, + 'total_mb': 0, + 'successful_files': 0, + 'failed_files': 0, + 'flac_files': 0, + 'mp3_files': 0, + 'users': {} + } + + # Process each transfer card (one per user) + # I structured it this way to make it easier to attribute transfers to users + transfer_cards = soup.find_all('div', class_='ui raised card transfer-card') + + for card in transfer_cards: + # Extract the username from the header + header = card.find('div', class_='header') + username = header.text.strip() if header else "Unknown" + + # Initialize user statistics if not already present + if username not in stats['users']: + stats['users'][username] = { + 'successful_mb': 0, + 'failed_mb': 0, + 'total_mb': 0, + 'successful_files': 0, + 'failed_files': 0 + } + + # Process all file rows for this user + # I tried several approaches and found traversing the rows directly was most reliable + rows = card.find_all('tr') + + for row in rows: + # Skip header rows - they have th elements + if row.find('th'): + continue + + # Extract the relevant cells + filename_cell = row.find('td', class_='transferlist-filename') + progress_cell = row.find('td', class_='transferlist-progress') + size_cell = row.find('td', class_='transferlist-size') + + # Skip if any required cell is missing + if not all([filename_cell, progress_cell, size_cell]): + continue + + filename = filename_cell.text.strip() + button = progress_cell.find('button') + + if not button: + continue + + # Track file types - primarily interested in audio formats + if filename.lower().endswith('.flac'): + stats['flac_files'] += 1 + elif filename.lower().endswith('.mp3'): + stats['mp3_files'] += 1 + + # Check success/failure status based on button text + # This was a key insight - looking for specific text patterns rather than + # relying on CSS classes which can vary across SLSKD versions/themes + button_text = button.text.strip() + is_success = 'Completed, Succeeded' in button_text + is_failed = 'Completed, Errored' in button_text + + # Parse size information + # Format is typically "X.X/Y.Y MB" where X is transferred and Y is total + size_text = size_cell.text.strip() + size_match = re.search(r'(\d+(?:\.\d+)?)/(\d+(?:\.\d+)?)', size_text) + + if not size_match: + continue + + transferred = float(size_match.group(1)) + total = float(size_match.group(2)) + + # Update statistics based on transfer status + if is_success: + stats['successful_mb'] += transferred + stats['successful_files'] += 1 + stats['users'][username]['successful_mb'] += transferred + stats['users'][username]['successful_files'] += 1 + elif is_failed: + # For failed transfers, I count the target size rather than the partial transfer + # This gives a better sense of what "should have" transferred + stats['failed_mb'] += total + stats['failed_files'] += 1 + stats['users'][username]['failed_mb'] += total + stats['users'][username]['failed_files'] += 1 + + stats['total_mb'] += total + stats['users'][username]['total_mb'] += total + + return stats + +def print_report(stats): + """ + Format and print a comprehensive report of the transfer statistics. + + I organized this to present the most relevant information first (overall stats), + followed by file type breakdowns and user-specific information. + + For readability, I convert large values from MB to GB automatically. + """ + print("\n===== SLSKD TRANSFER STATISTICS =====") + + print(f"\nOverall Statistics:") + print(f"Successfully transferred: {format_size(stats['successful_mb'])} ({stats['successful_files']} files)") + print(f"Failed transfers: {format_size(stats['failed_mb'])} ({stats['failed_files']} files)") + print(f"Total size of all files: {format_size(stats['total_mb'])} ({stats['successful_files'] + stats['failed_files']} files)") + + if stats['total_mb'] > 0: + success_rate = (stats['successful_mb'] / stats['total_mb']) * 100 + print(f"Success rate: {success_rate:.1f}%") + + print(f"\nFile Type Statistics:") + print(f"FLAC files: {stats['flac_files']}") + print(f"MP3 files: {stats['mp3_files']}") + other_files = stats['successful_files'] + stats['failed_files'] - stats['flac_files'] - stats['mp3_files'] + print(f"Other files: {other_files}") + + # Top users by total volume + print("\nTop Users by Total Transfer Volume:") + sorted_users_total = sorted(stats['users'].items(), key=lambda x: x[1]['total_mb'], reverse=True) + for i, (username, user_stats) in enumerate(sorted_users_total[:5], 1): + print(f"{i}. {username}: {format_size(user_stats['total_mb'])} total") + + # Top users by successful transfers + print("\nTop Users by Successful Transfers:") + sorted_users_success = sorted(stats['users'].items(), key=lambda x: x[1]['successful_mb'], reverse=True) + for i, (username, user_stats) in enumerate(sorted_users_success[:5], 1): + if user_stats['successful_mb'] > 0: + print(f"{i}. {username}: {format_size(user_stats['successful_mb'])} successful " + f"({user_stats['successful_files']} files)") + + print("\n=====================================") + +def main(): + """ + Main entry point - handles command line arguments and file processing. + + I kept the interface simple - just provide the HTML file path as an argument. + This makes it easy to analyze different snapshots over time. + """ + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + html_file = sys.argv[1] + + if not os.path.exists(html_file): + print(f"Error: File {html_file} does not exist") + sys.exit(1) + + with open(html_file, 'r', encoding='utf-8') as f: + html_content = f.read() + + stats = analyze_html(html_content) + print_report(stats) + +if __name__ == "__main__": + main()