done

2025-05-08 11:08:52 +02:00
parent 4e068fb26a
commit 4faba9073e
1 changed files with 207 additions and 0 deletions
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+"""
+SLSKD Transfer Size Analyzer
+
+A script I created to analyze SLSKD HTML and calculate accurate transfer statistics.
+After experimenting with different approaches, I found that checking the button text
+directly gives more reliable results than relying on CSS classes.
+
+I also added automatic conversion between MB and GB to make large numbers more readable.
+
+Usage:
+  python slskd_analyzer_with_gb.py <html_file>
+"""
+
+import re
+import sys
+import os
+from bs4 import BeautifulSoup
+
+def format_size(size_mb):
+    """
+    Format file size in MB or GB depending on the size.
+    
+    I added this function to make large transfer sizes more readable.
+    After dealing with multi-GB transfers, seeing something like "2458.7 MB" 
+    is less intuitive than "2.4 GB".
+    """
+    if size_mb >= 1024:
+        return f"{size_mb/1024:.2f} GB"
+    else:
+        return f"{size_mb:.1f} MB"
+
+def analyze_html(html_content):
+    """
+    Parse and analyze the SLSKD HTML content to extract transfer statistics.
+    
+    I initially tried looking only at CSS classes, but found inconsistencies in how 
+    the success/failure states were represented. Checking the actual button text with
+    "Completed, Succeeded" proved more reliable and worked across different versions.
+    """
+    soup = BeautifulSoup(html_content, 'html.parser')
+    
+    # Initialize statistics dictionary
+    stats = {
+        'successful_mb': 0,
+        'failed_mb': 0,
+        'total_mb': 0,
+        'successful_files': 0,
+        'failed_files': 0,
+        'flac_files': 0,
+        'mp3_files': 0,
+        'users': {}
+    }
+    
+    # Process each transfer card (one per user)
+    # I structured it this way to make it easier to attribute transfers to users
+    transfer_cards = soup.find_all('div', class_='ui raised card transfer-card')
+    
+    for card in transfer_cards:
+        # Extract the username from the header
+        header = card.find('div', class_='header')
+        username = header.text.strip() if header else "Unknown"
+        
+        # Initialize user statistics if not already present
+        if username not in stats['users']:
+            stats['users'][username] = {
+                'successful_mb': 0,
+                'failed_mb': 0, 
+                'total_mb': 0,
+                'successful_files': 0,
+                'failed_files': 0
+            }
+        
+        # Process all file rows for this user
+        # I tried several approaches and found traversing the rows directly was most reliable
+        rows = card.find_all('tr')
+        
+        for row in rows:
+            # Skip header rows - they have th elements
+            if row.find('th'):
+                continue
+            
+            # Extract the relevant cells
+            filename_cell = row.find('td', class_='transferlist-filename')
+            progress_cell = row.find('td', class_='transferlist-progress')
+            size_cell = row.find('td', class_='transferlist-size')
+            
+            # Skip if any required cell is missing
+            if not all([filename_cell, progress_cell, size_cell]):
+                continue
+            
+            filename = filename_cell.text.strip()
+            button = progress_cell.find('button')
+            
+            if not button:
+                continue
+                
+            # Track file types - primarily interested in audio formats
+            if filename.lower().endswith('.flac'):
+                stats['flac_files'] += 1
+            elif filename.lower().endswith('.mp3'):
+                stats['mp3_files'] += 1
+            
+            # Check success/failure status based on button text
+            # This was a key insight - looking for specific text patterns rather than
+            # relying on CSS classes which can vary across SLSKD versions/themes
+            button_text = button.text.strip()
+            is_success = 'Completed, Succeeded' in button_text
+            is_failed = 'Completed, Errored' in button_text
+            
+            # Parse size information
+            # Format is typically "X.X/Y.Y MB" where X is transferred and Y is total
+            size_text = size_cell.text.strip()
+            size_match = re.search(r'(\d+(?:\.\d+)?)/(\d+(?:\.\d+)?)', size_text)
+            
+            if not size_match:
+                continue
+                
+            transferred = float(size_match.group(1))
+            total = float(size_match.group(2))
+            
+            # Update statistics based on transfer status
+            if is_success:
+                stats['successful_mb'] += transferred
+                stats['successful_files'] += 1
+                stats['users'][username]['successful_mb'] += transferred
+                stats['users'][username]['successful_files'] += 1
+            elif is_failed:
+                # For failed transfers, I count the target size rather than the partial transfer
+                # This gives a better sense of what "should have" transferred
+                stats['failed_mb'] += total
+                stats['failed_files'] += 1
+                stats['users'][username]['failed_mb'] += total
+                stats['users'][username]['failed_files'] += 1
+            
+            stats['total_mb'] += total
+            stats['users'][username]['total_mb'] += total
+    
+    return stats
+
+def print_report(stats):
+    """
+    Format and print a comprehensive report of the transfer statistics.
+    
+    I organized this to present the most relevant information first (overall stats),
+    followed by file type breakdowns and user-specific information.
+    
+    For readability, I convert large values from MB to GB automatically.
+    """
+    print("\n===== SLSKD TRANSFER STATISTICS =====")
+    
+    print(f"\nOverall Statistics:")
+    print(f"Successfully transferred: {format_size(stats['successful_mb'])} ({stats['successful_files']} files)")
+    print(f"Failed transfers: {format_size(stats['failed_mb'])} ({stats['failed_files']} files)")
+    print(f"Total size of all files: {format_size(stats['total_mb'])} ({stats['successful_files'] + stats['failed_files']} files)")
+    
+    if stats['total_mb'] > 0:
+        success_rate = (stats['successful_mb'] / stats['total_mb']) * 100
+        print(f"Success rate: {success_rate:.1f}%")
+    
+    print(f"\nFile Type Statistics:")
+    print(f"FLAC files: {stats['flac_files']}")
+    print(f"MP3 files: {stats['mp3_files']}")
+    other_files = stats['successful_files'] + stats['failed_files'] - stats['flac_files'] - stats['mp3_files']
+    print(f"Other files: {other_files}")
+    
+    # Top users by total volume
+    print("\nTop Users by Total Transfer Volume:")
+    sorted_users_total = sorted(stats['users'].items(), key=lambda x: x[1]['total_mb'], reverse=True)
+    for i, (username, user_stats) in enumerate(sorted_users_total[:5], 1):
+        print(f"{i}. {username}: {format_size(user_stats['total_mb'])} total")
+    
+    # Top users by successful transfers
+    print("\nTop Users by Successful Transfers:")
+    sorted_users_success = sorted(stats['users'].items(), key=lambda x: x[1]['successful_mb'], reverse=True)
+    for i, (username, user_stats) in enumerate(sorted_users_success[:5], 1):
+        if user_stats['successful_mb'] > 0:
+            print(f"{i}. {username}: {format_size(user_stats['successful_mb'])} successful "
+                  f"({user_stats['successful_files']} files)")
+    
+    print("\n=====================================")
+
+def main():
+    """
+    Main entry point - handles command line arguments and file processing.
+    
+    I kept the interface simple - just provide the HTML file path as an argument.
+    This makes it easy to analyze different snapshots over time.
+    """
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <html_file>")
+        sys.exit(1)
+    
+    html_file = sys.argv[1]
+    
+    if not os.path.exists(html_file):
+        print(f"Error: File {html_file} does not exist")
+        sys.exit(1)
+    
+    with open(html_file, 'r', encoding='utf-8') as f:
+        html_content = f.read()
+    
+    stats = analyze_html(html_content)
+    print_report(stats)
+
+if __name__ == "__main__":
+    main()