From 7ebb1372f44ec440905f4b69ea3509b79dbf7740 Mon Sep 17 00:00:00 2001 From: Alec Date: Mon, 1 Sep 2025 12:38:33 +0200 Subject: [PATCH] Fix popularity stats and enhance path parsing for download-based analysis - Fix database format compatibility for popularity functions - Change popularity metrics from uploads to downloads (tracks user demand) - Implement enhanced left-to-right path parsing algorithm - Add support for diverse library structures (/music/, \Artists\, etc.) - Improve artist name removal from album titles - Update UI labels to reflect download-based popularity --- README.md | 7 +- slskd_stats_gui.py | 267 +++++++++++++++++++++++++++++---------------- 2 files changed, 177 insertions(+), 97 deletions(-) diff --git a/README.md b/README.md index 5a75d7f..06c25ab 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,9 @@ A GUI tool to analyze upload and download statistics from your slskd transfers d - Lists top users by data transferred - Shows statistics by file type - Filter statistics by time period (All time, Last month, Last year) -- **NEW**: Artist and album popularity statistics based on successful uploads -- Smart album name cleaning (removes redundant artist names from folder names) +- **NEW**: Artist and album popularity statistics based on user download demand +- Smart path parsing with enhanced library structure detection +- Intelligent album name cleaning (removes redundant artist names from folder names) - User-friendly graphical interface with summary and detailed tables ## Requirements @@ -43,7 +44,7 @@ With the GUI, you can: - View upload and download statistics side-by-side - See summary statistics and detailed tables for users and file types - **NEW**: Visual time series graphs showing transfer trends over time -- **NEW**: Analyze artist and album popularity with interactive charts and tables +- **NEW**: Analyze artist and album popularity based on download demand with interactive charts and tables ## Screenshots diff --git a/slskd_stats_gui.py b/slskd_stats_gui.py index d074259..c7c97a4 100644 --- a/slskd_stats_gui.py +++ b/slskd_stats_gui.py @@ -643,12 +643,12 @@ class MainWindow(QMainWindow): popularitySplitter = QSplitter(Qt.Horizontal) # Create artists section - artistsGroup = QGroupBox("Top Artists by Uploads") + artistsGroup = QGroupBox("Top Artists by Downloads") artistsLayout = QVBoxLayout() self.artistsTable = QTableWidget() self.artistsTable.setColumnCount(3) - self.artistsTable.setHorizontalHeaderLabels(["Artist", "Uploads", "Total Data"]) + self.artistsTable.setHorizontalHeaderLabels(["Artist", "Downloads", "Total Data"]) self.artistsTable.horizontalHeader().setStretchLastSection(True) self.artistsTable.setAlternatingRowColors(True) self.artistsTable.setSortingEnabled(True) @@ -664,12 +664,12 @@ class MainWindow(QMainWindow): popularitySplitter.addWidget(artistsGroup) # Create albums section - albumsGroup = QGroupBox("Top Albums by Uploads") + albumsGroup = QGroupBox("Top Albums by Downloads") albumsLayout = QVBoxLayout() self.albumsTable = QTableWidget() self.albumsTable.setColumnCount(4) - self.albumsTable.setHorizontalHeaderLabels(["Artist", "Album", "Uploads", "Total Data"]) + self.albumsTable.setHorizontalHeaderLabels(["Artist", "Album", "Downloads", "Total Data"]) self.albumsTable.horizontalHeader().setStretchLastSection(True) self.albumsTable.setAlternatingRowColors(True) self.albumsTable.setSortingEnabled(True) @@ -1011,7 +1011,7 @@ class MainWindow(QMainWindow): # Check if we have data and good format compatibility if not artist_stats and not album_stats: - self.showPopularityError("No successful upload transfers found.", format_info) + self.showPopularityError("No successful download transfers found.", format_info) return elif format_info['match_percentage'] < 50: self.showPopularityWarning(format_info) @@ -1060,8 +1060,8 @@ class MainWindow(QMainWindow): bars = ax.barh(range(len(truncated_artists)), counts) ax.set_yticks(range(len(truncated_artists))) ax.set_yticklabels(truncated_artists, fontsize=8) - ax.set_xlabel('Uploads') - ax.set_title(f'Top {len(artists)} Artists by Uploads') + ax.set_xlabel('Downloads') + ax.set_title(f'Top {len(artists)} Artists by Downloads') # Add value labels on bars for i, (bar, count) in enumerate(zip(bars, counts)): @@ -1118,8 +1118,8 @@ class MainWindow(QMainWindow): bars = ax.barh(range(len(truncated_labels)), counts) ax.set_yticks(range(len(truncated_labels))) ax.set_yticklabels(truncated_labels, fontsize=7) - ax.set_xlabel('Uploads') - ax.set_title(f'Top {len(album_labels)} Albums by Uploads') + ax.set_xlabel('Downloads') + ax.set_title(f'Top {len(album_labels)} Albums by Downloads') # Add value labels on bars for i, (bar, count) in enumerate(zip(bars, counts)): @@ -1151,7 +1151,7 @@ class MainWindow(QMainWindow): for i, bar in enumerate(bars): if bar.contains(event)[0]: # Show tooltip with full artist name and count - tooltip_text = f"{artists[i]}\n{counts[i]} uploads" + tooltip_text = f"{artists[i]}\n{counts[i]} downloads" self.artistsCanvas.setToolTip(tooltip_text) return @@ -1167,7 +1167,7 @@ class MainWindow(QMainWindow): for i, bar in enumerate(bars): if bar.contains(event)[0]: # Show tooltip with full album name and count - tooltip_text = f"{album_labels[i]}\n{counts[i]} uploads" + tooltip_text = f"{album_labels[i]}\n{counts[i]} downloads" self.albumsCanvas.setToolTip(tooltip_text) return @@ -1202,25 +1202,29 @@ class MainWindow(QMainWindow): {message} How it works: -• Analyzes successful upload transfers only -• Extracts artist and album from file paths -• Expected format: /path/Music/Artist/Album/Track.ext +• Analyzes successful download transfers (what users want) +• Smart left-to-right path parsing +• Detects media folders (/music/, \\Artists\\, etc.) +• Removes artist name prefixes from album titles Library Analysis: • Total files analyzed: {format_info['total_files']} • Compatible files: {format_info['matching_files']} ({format_info['match_percentage']:.1f}%) -Example paths from your library:""" +Parsing Examples:""" - if format_info['sample_paths']: - explanation_text += "\n\n" + "\n".join(f"• {path}" for path in format_info['sample_paths'][:5]) + if format_info.get('format_examples'): + explanation_text += "\n\n" + "\n".join( + f"• {ex['artist']} → {ex['album']}" + for ex in format_info['format_examples'][:5] + ) if format_info['match_percentage'] < 50: explanation_text += f""" ⚠️ Low compatibility detected ({format_info['match_percentage']:.1f}%) -Your music library structure may not match the expected format. -Consider organizing music files as: /Music/Artist/Album/Track.ext""" +The smart parser couldn't extract artist/album info from most files. +Check if your files are in media folders like /music/ or /audiobooks/""" ax.text(0.05, 0.95, explanation_text, transform=ax.transAxes, fontsize=9, verticalalignment='top', fontfamily='monospace', @@ -1229,22 +1233,31 @@ Consider organizing music files as: /Music/Artist/Album/Track.ext""" canvas.draw() def analyze_library_format(db_paths): - """Analyze the library format to determine if it matches expected structure""" + """Analyze the library format using smart left-to-right parsing""" total_files = 0 matching_files = 0 sample_paths = [] + format_examples = [] for db_path in db_paths: try: conn = sqlite3.connect(db_path) cursor = conn.cursor() + # Detect database format + db_format = check_database_format(db_path) + + if db_format == 'new': + success_condition = "StateDescription='Completed, Succeeded'" + else: + success_condition = "State LIKE 'Completed, Succeeded'" + # Get a sample of successful upload filenames - cursor.execute(""" + cursor.execute(f""" SELECT Filename FROM Transfers - WHERE State = 48 AND Direction = 'Upload' AND Filename IS NOT NULL - LIMIT 100 + WHERE {success_condition} AND Direction = 'Download' AND Filename IS NOT NULL + LIMIT 200 """) rows = cursor.fetchall() @@ -1252,17 +1265,17 @@ def analyze_library_format(db_paths): total_files += 1 sample_paths.append(filename) - # Check if it matches expected format: /path/Music/{Artist}/{Album}/{Track} - if filename and '/Music/' in filename: - try: - parts = filename.split('/Music/') - if len(parts) > 1: - music_path = parts[1] - path_parts = music_path.split('/') - if len(path_parts) >= 2 and path_parts[0] and path_parts[1]: - matching_files += 1 - except: - continue + # Use smart parsing to extract artist/album + artist, album = parse_media_path(filename) + if artist and album: + matching_files += 1 + # Keep some examples for display + if len(format_examples) < 10: + format_examples.append({ + 'path': filename, + 'artist': artist, + 'album': album + }) conn.close() @@ -1274,48 +1287,121 @@ def analyze_library_format(db_paths): 'total_files': total_files, 'matching_files': matching_files, 'match_percentage': match_percentage, - 'sample_paths': sample_paths[:10] # Keep first 10 as examples + 'sample_paths': sample_paths[:10], + 'format_examples': format_examples } +def parse_media_path(filepath): + """Smart left-to-right analysis of media file paths to extract artist and album""" + if not filepath: + return None, None + + # Normalize path separators (handle both single and double backslashes) + normalized_path = filepath.replace('\\\\', '/').replace('\\', '/') + lower_path = normalized_path.lower() + + # Find potential media indicators (case insensitive) + media_indicators = [ + '/music/', '/audiobooks/', '/audio/', '/media/', + '/artists/', '/musica/', '/jazz/', '/rock/', '/electronic/', + 'music/', 'artists/', 'musica/', 'jazz/', 'albums/' + ] + + path_parts = [] + + # Try to find a media root + media_start_idx = -1 + for indicator in media_indicators: + idx = lower_path.find(indicator) + if idx >= 0: + media_start_idx = idx + len(indicator) + break + + if media_start_idx >= 0: + # Extract from media root + media_path = normalized_path[media_start_idx:] + path_parts = [part for part in media_path.split('/') if part] + else: + # No clear media indicator - use heuristic approach + # Look for Artist/Album pattern in the path structure + all_parts = [part for part in normalized_path.split('/') if part] + + # Filter out common system/user prefixes + filtered_parts = [] + skip_patterns = ['@@', '!', '#', 'my files', 'downloads', 'shared', 'soulseek', 'main'] + + for part in all_parts: + part_lower = part.lower() + should_skip = False + for pattern in skip_patterns: + if part_lower.startswith(pattern): + should_skip = True + break + # Also skip parts that look like disk/volume identifiers + if len(part) <= 2 or part.isdigit() or (len(part) < 8 and any(c in part for c in '-_0123456789')): + should_skip = True + if not should_skip: + filtered_parts.append(part) + + # Take meaningful parts (likely Artist/Album/File or Genre/Artist/Album/File) + if len(filtered_parts) >= 3: + # Assume last 3 are Genre/Artist/Album or Artist/Album/File + # If last part looks like a file, take the two before it + if '.' in filtered_parts[-1]: + path_parts = filtered_parts[-3:-1] # Artist and Album + else: + path_parts = filtered_parts[-2:] # Artist and Album + elif len(filtered_parts) >= 2: + path_parts = filtered_parts[-2:] # Assume Artist/Album + + # Need at least 2 parts: Artist/Album + if len(path_parts) < 2: + return None, None + + artist = path_parts[0] + raw_album = path_parts[1] + + # Smart album cleaning + cleaned_album = clean_album_name(artist, raw_album) + + return artist, cleaned_album + def clean_album_name(artist, album): - """Smart cleaning of album names to remove redundant artist information""" + """Enhanced album name cleaning with common prefix removal""" if not artist or not album: return album artist_lower = artist.lower().strip() album_lower = album.lower().strip() - # Common patterns to clean - patterns_to_try = [ - # Pattern: "Artist - Album Title" - f"{artist_lower} - ", - f"{artist_lower} – ", # em dash - f"{artist_lower} — ", # em dash variant - - # Pattern: "Artist: Album Title" - f"{artist_lower}: ", - f"{artist_lower} : ", - - # Pattern: "Artist_ Album Title" or similar separators - f"{artist_lower}_ ", - f"{artist_lower} _ ", - ] + # Enhanced patterns to clean (more comprehensive) + separators = [' - ', ' – ', ' — ', ': ', ' : ', '_ ', ' _ ', ' | ', ' / '] cleaned_album = album - for pattern in patterns_to_try: + for sep in separators: + pattern = f"{artist_lower}{sep}" if album_lower.startswith(pattern): cleaned_album = album[len(pattern):] break - # Additional cleaning: remove extra whitespace and dashes - cleaned_album = cleaned_album.strip(' -–—_') + # Additional cleanup patterns + if cleaned_album == album: # No separator match, try other patterns + # Pattern: "ArtistName AlbumTitle" (space separated) + if album_lower.startswith(artist_lower + ' ') and len(album) > len(artist) + 1: + potential_clean = album[len(artist) + 1:] + # Only use if the remaining part looks like an album title + if len(potential_clean) > 3 and not potential_clean[0].islower(): + cleaned_album = potential_clean - # If cleaning resulted in empty string, return original - if not cleaned_album: + # Final cleaning: remove extra whitespace and punctuation + cleaned_album = cleaned_album.strip(' -–—_:|/') + + # Validation: don't return empty or too-short results + if not cleaned_album or len(cleaned_album) < 2: return album - - # If cleaning removed too much (less than 3 chars), return original - if len(cleaned_album) < 3: + + # Don't clean if it removes more than 70% of the original + if len(cleaned_album) < len(album) * 0.3: return album return cleaned_album @@ -1325,20 +1411,28 @@ def get_popularity_stats(db_paths, days=None): artist_stats = defaultdict(lambda: {'count': 0, 'bytes': 0}) album_stats = defaultdict(lambda: {'count': 0, 'bytes': 0}) - # Create WHERE clause for time filtering - where_clause = "WHERE State = 48 AND Direction = 'Upload'" # Only successful uploads - params = [] - - if days is not None: - where_clause += " AND RequestedAt >= ?" - cutoff_date = (datetime.datetime.now() - datetime.timedelta(days=days)).isoformat() - params.append(cutoff_date) - for db_path in db_paths: try: conn = sqlite3.connect(db_path) cursor = conn.cursor() + # Detect database format + db_format = check_database_format(db_path) + + if db_format == 'new': + success_condition = "StateDescription='Completed, Succeeded'" + else: + success_condition = "State LIKE 'Completed, Succeeded'" + + # Create WHERE clause for time filtering + where_clause = f"WHERE {success_condition} AND Direction = 'Download'" # Track what users download + params = [] + + if days is not None: + where_clause += " AND RequestedAt >= ?" + cutoff_date = (datetime.datetime.now() - datetime.timedelta(days=days)).isoformat() + params.append(cutoff_date) + query = f""" SELECT Filename, Size FROM Transfers @@ -1349,32 +1443,17 @@ def get_popularity_stats(db_paths, days=None): rows = cursor.fetchall() for filename, size in rows: - # Parse filename to extract artist and album - # Expected format: /data/Music/{Artist}/{Album}/{Track} - if filename and '/Music/' in filename: - try: - parts = filename.split('/Music/') - if len(parts) > 1: - music_path = parts[1] - path_parts = music_path.split('/') - if len(path_parts) >= 2: - artist = path_parts[0] - raw_album = path_parts[1] - - # Clean album name to remove redundant artist info - cleaned_album = clean_album_name(artist, raw_album) - - # Update artist stats - artist_stats[artist]['count'] += 1 - artist_stats[artist]['bytes'] += size - - # Update album stats using cleaned album name - album_key = (artist, cleaned_album) - album_stats[album_key]['count'] += 1 - album_stats[album_key]['bytes'] += size - except: - # Skip files that don't match expected format - continue + # Use smart left-to-right parsing to extract artist and album + artist, album = parse_media_path(filename) + if artist and album: + # Update artist stats + artist_stats[artist]['count'] += 1 + artist_stats[artist]['bytes'] += size + + # Update album stats + album_key = (artist, album) + album_stats[album_key]['count'] += 1 + album_stats[album_key]['bytes'] += size conn.close()