208 lines
8.0 KiB
Python
208 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SLSKD Transfer Size Analyzer
|
|
|
|
A script I created to analyze SLSKD HTML and calculate accurate transfer statistics.
|
|
After experimenting with different approaches, I found that checking the button text
|
|
directly gives more reliable results than relying on CSS classes.
|
|
|
|
I also added automatic conversion between MB and GB to make large numbers more readable.
|
|
|
|
Usage:
|
|
python slskd_analyzer_with_gb.py <html_file>
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
import os
|
|
from bs4 import BeautifulSoup
|
|
|
|
def format_size(size_mb):
|
|
"""
|
|
Format file size in MB or GB depending on the size.
|
|
|
|
I added this function to make large transfer sizes more readable.
|
|
After dealing with multi-GB transfers, seeing something like "2458.7 MB"
|
|
is less intuitive than "2.4 GB".
|
|
"""
|
|
if size_mb >= 1024:
|
|
return f"{size_mb/1024:.2f} GB"
|
|
else:
|
|
return f"{size_mb:.1f} MB"
|
|
|
|
def analyze_html(html_content):
|
|
"""
|
|
Parse and analyze the SLSKD HTML content to extract transfer statistics.
|
|
|
|
I initially tried looking only at CSS classes, but found inconsistencies in how
|
|
the success/failure states were represented. Checking the actual button text with
|
|
"Completed, Succeeded" proved more reliable and worked across different versions.
|
|
"""
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
|
# Initialize statistics dictionary
|
|
stats = {
|
|
'successful_mb': 0,
|
|
'failed_mb': 0,
|
|
'total_mb': 0,
|
|
'successful_files': 0,
|
|
'failed_files': 0,
|
|
'flac_files': 0,
|
|
'mp3_files': 0,
|
|
'users': {}
|
|
}
|
|
|
|
# Process each transfer card (one per user)
|
|
# I structured it this way to make it easier to attribute transfers to users
|
|
transfer_cards = soup.find_all('div', class_='ui raised card transfer-card')
|
|
|
|
for card in transfer_cards:
|
|
# Extract the username from the header
|
|
header = card.find('div', class_='header')
|
|
username = header.text.strip() if header else "Unknown"
|
|
|
|
# Initialize user statistics if not already present
|
|
if username not in stats['users']:
|
|
stats['users'][username] = {
|
|
'successful_mb': 0,
|
|
'failed_mb': 0,
|
|
'total_mb': 0,
|
|
'successful_files': 0,
|
|
'failed_files': 0
|
|
}
|
|
|
|
# Process all file rows for this user
|
|
# I tried several approaches and found traversing the rows directly was most reliable
|
|
rows = card.find_all('tr')
|
|
|
|
for row in rows:
|
|
# Skip header rows - they have th elements
|
|
if row.find('th'):
|
|
continue
|
|
|
|
# Extract the relevant cells
|
|
filename_cell = row.find('td', class_='transferlist-filename')
|
|
progress_cell = row.find('td', class_='transferlist-progress')
|
|
size_cell = row.find('td', class_='transferlist-size')
|
|
|
|
# Skip if any required cell is missing
|
|
if not all([filename_cell, progress_cell, size_cell]):
|
|
continue
|
|
|
|
filename = filename_cell.text.strip()
|
|
button = progress_cell.find('button')
|
|
|
|
if not button:
|
|
continue
|
|
|
|
# Track file types - primarily interested in audio formats
|
|
if filename.lower().endswith('.flac'):
|
|
stats['flac_files'] += 1
|
|
elif filename.lower().endswith('.mp3'):
|
|
stats['mp3_files'] += 1
|
|
|
|
# Check success/failure status based on button text
|
|
# This was a key insight - looking for specific text patterns rather than
|
|
# relying on CSS classes which can vary across SLSKD versions/themes
|
|
button_text = button.text.strip()
|
|
is_success = 'Completed, Succeeded' in button_text
|
|
is_failed = 'Completed, Errored' in button_text
|
|
|
|
# Parse size information
|
|
# Format is typically "X.X/Y.Y MB" where X is transferred and Y is total
|
|
size_text = size_cell.text.strip()
|
|
size_match = re.search(r'(\d+(?:\.\d+)?)/(\d+(?:\.\d+)?)', size_text)
|
|
|
|
if not size_match:
|
|
continue
|
|
|
|
transferred = float(size_match.group(1))
|
|
total = float(size_match.group(2))
|
|
|
|
# Update statistics based on transfer status
|
|
if is_success:
|
|
stats['successful_mb'] += transferred
|
|
stats['successful_files'] += 1
|
|
stats['users'][username]['successful_mb'] += transferred
|
|
stats['users'][username]['successful_files'] += 1
|
|
elif is_failed:
|
|
# For failed transfers, I count the target size rather than the partial transfer
|
|
# This gives a better sense of what "should have" transferred
|
|
stats['failed_mb'] += total
|
|
stats['failed_files'] += 1
|
|
stats['users'][username]['failed_mb'] += total
|
|
stats['users'][username]['failed_files'] += 1
|
|
|
|
stats['total_mb'] += total
|
|
stats['users'][username]['total_mb'] += total
|
|
|
|
return stats
|
|
|
|
def print_report(stats):
|
|
"""
|
|
Format and print a comprehensive report of the transfer statistics.
|
|
|
|
I organized this to present the most relevant information first (overall stats),
|
|
followed by file type breakdowns and user-specific information.
|
|
|
|
For readability, I convert large values from MB to GB automatically.
|
|
"""
|
|
print("\n===== SLSKD TRANSFER STATISTICS =====")
|
|
|
|
print(f"\nOverall Statistics:")
|
|
print(f"Successfully transferred: {format_size(stats['successful_mb'])} ({stats['successful_files']} files)")
|
|
print(f"Failed transfers: {format_size(stats['failed_mb'])} ({stats['failed_files']} files)")
|
|
print(f"Total size of all files: {format_size(stats['total_mb'])} ({stats['successful_files'] + stats['failed_files']} files)")
|
|
|
|
if stats['total_mb'] > 0:
|
|
success_rate = (stats['successful_mb'] / stats['total_mb']) * 100
|
|
print(f"Success rate: {success_rate:.1f}%")
|
|
|
|
print(f"\nFile Type Statistics:")
|
|
print(f"FLAC files: {stats['flac_files']}")
|
|
print(f"MP3 files: {stats['mp3_files']}")
|
|
other_files = stats['successful_files'] + stats['failed_files'] - stats['flac_files'] - stats['mp3_files']
|
|
print(f"Other files: {other_files}")
|
|
|
|
# Top users by total volume
|
|
print("\nTop Users by Total Transfer Volume:")
|
|
sorted_users_total = sorted(stats['users'].items(), key=lambda x: x[1]['total_mb'], reverse=True)
|
|
for i, (username, user_stats) in enumerate(sorted_users_total[:5], 1):
|
|
print(f"{i}. {username}: {format_size(user_stats['total_mb'])} total")
|
|
|
|
# Top users by successful transfers
|
|
print("\nTop Users by Successful Transfers:")
|
|
sorted_users_success = sorted(stats['users'].items(), key=lambda x: x[1]['successful_mb'], reverse=True)
|
|
for i, (username, user_stats) in enumerate(sorted_users_success[:5], 1):
|
|
if user_stats['successful_mb'] > 0:
|
|
print(f"{i}. {username}: {format_size(user_stats['successful_mb'])} successful "
|
|
f"({user_stats['successful_files']} files)")
|
|
|
|
print("\n=====================================")
|
|
|
|
def main():
|
|
"""
|
|
Main entry point - handles command line arguments and file processing.
|
|
|
|
I kept the interface simple - just provide the HTML file path as an argument.
|
|
This makes it easy to analyze different snapshots over time.
|
|
"""
|
|
if len(sys.argv) != 2:
|
|
print(f"Usage: {sys.argv[0]} <html_file>")
|
|
sys.exit(1)
|
|
|
|
html_file = sys.argv[1]
|
|
|
|
if not os.path.exists(html_file):
|
|
print(f"Error: File {html_file} does not exist")
|
|
sys.exit(1)
|
|
|
|
with open(html_file, 'r', encoding='utf-8') as f:
|
|
html_content = f.read()
|
|
|
|
stats = analyze_html(html_content)
|
|
print_report(stats)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|