diff --git a/.gitignore b/.gitignore
index 71ef3ad..ba30730 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,4 +13,5 @@ src/lib/services/initFirebase.js
*.log
/prescraped-data-*/
/prescraped-*.json
-/scraping-data/
\ No newline at end of file
+# Test files with secrets
+test-*.js
diff --git a/documentation/CACHE_STRATEGY_README.md b/CACHE_STRATEGY_README.md
similarity index 100%
rename from documentation/CACHE_STRATEGY_README.md
rename to CACHE_STRATEGY_README.md
diff --git a/documentation/CACHE_TESTING_README.md b/CACHE_TESTING_README.md
similarity index 100%
rename from documentation/CACHE_TESTING_README.md
rename to CACHE_TESTING_README.md
diff --git a/documentation/FIRESTORE_UPLOAD_README.md b/FIRESTORE_UPLOAD_README.md
similarity index 100%
rename from documentation/FIRESTORE_UPLOAD_README.md
rename to FIRESTORE_UPLOAD_README.md
diff --git a/documentation/FIX_NULL_LYRICS_README.md b/FIX_NULL_LYRICS_README.md
similarity index 89%
rename from documentation/FIX_NULL_LYRICS_README.md
rename to FIX_NULL_LYRICS_README.md
index b2583d3..29c3d0f 100644
--- a/documentation/FIX_NULL_LYRICS_README.md
+++ b/FIX_NULL_LYRICS_README.md
@@ -40,17 +40,17 @@ npm run fix-lyrics-cached
1. **Fix songs for a specific artist** (FASTEST):
```bash
-node scripts/fix-null-lyrics.js --artist grace-petrie --dry-run
+node fix-null-lyrics.js --artist grace-petrie --dry-run
```
2. **Fix only cached songs** (FAST):
```bash
-node scripts/fix-null-lyrics.js --check-cached-only --dry-run
+node fix-null-lyrics.js --check-cached-only --dry-run
```
3. **Scan limited songs** (SLOW):
```bash
-node scripts/fix-null-lyrics.js --max-songs 100 --dry-run
+node fix-null-lyrics.js --max-songs 100 --dry-run
```
### Filter by Artist (Recommended)
@@ -58,12 +58,12 @@ node scripts/fix-null-lyrics.js --max-songs 100 --dry-run
Fix only songs from a specific artist - this is the fastest method:
```bash
# Dry run - the script will search for the artist automatically
-node scripts/fix-null-lyrics.js --artist "grace petrie" --dry-run
-node scripts/fix-null-lyrics.js --artist "Grace Petrie" --dry-run
-node scripts/fix-null-lyrics.js --artist grace-petrie --dry-run
+node fix-null-lyrics.js --artist "grace petrie" --dry-run
+node fix-null-lyrics.js --artist "Grace Petrie" --dry-run
+node fix-null-lyrics.js --artist grace-petrie --dry-run
# Actually fix
-node scripts/fix-null-lyrics.js --artist "kendrick lamar"
+node fix-null-lyrics.js --artist "kendrick lamar"
```
**The script now smartly searches for artists!** You can use:
@@ -82,7 +82,7 @@ This method:
Only process songs that are in artists' `cachedSongIds` arrays:
```bash
-node scripts/fix-null-lyrics.js --check-cached-only
+node fix-null-lyrics.js --check-cached-only
```
**This is the recommended approach** as it:
@@ -95,12 +95,12 @@ node scripts/fix-null-lyrics.js --check-cached-only
Process songs in smaller batches:
```bash
-node scripts/fix-null-lyrics.js --batch-size 5
+node fix-null-lyrics.js --batch-size 5
```
Limit total number of songs to process:
```bash
-node scripts/fix-null-lyrics.js --max-songs 50
+node fix-null-lyrics.js --max-songs 50
```
**Note:** When scanning ALL songs (without `--artist` or `--check-cached-only`), the script defaults to a maximum of 10,000 songs to prevent runaway scans. Use `--max-songs` to adjust this limit.
@@ -109,14 +109,14 @@ node scripts/fix-null-lyrics.js --max-songs 50
See detailed information about each song:
```bash
-node scripts/fix-null-lyrics.js --verbose
+node fix-null-lyrics.js --verbose
```
### Combine Options
```bash
-node scripts/fix-null-lyrics.js --artist baby-jey --dry-run --verbose
-node scripts/fix-null-lyrics.js --check-cached-only --batch-size 3 --max-songs 20
+node fix-null-lyrics.js --artist baby-jey --dry-run --verbose
+node fix-null-lyrics.js --check-cached-only --batch-size 3 --max-songs 20
```
## What It Does
@@ -279,9 +279,9 @@ For permanently failed songs:
**Solutions:**
1. Try different name formats:
```bash
- node scripts/fix-null-lyrics.js --artist "grace petrie" --dry-run
- node scripts/fix-null-lyrics.js --artist "Grace Petrie" --dry-run
- node scripts/fix-null-lyrics.js --artist grace-petrie --dry-run
+ node fix-null-lyrics.js --artist "grace petrie" --dry-run
+ node fix-null-lyrics.js --artist "Grace Petrie" --dry-run
+ node fix-null-lyrics.js --artist grace-petrie --dry-run
```
2. The script will search using:
@@ -299,7 +299,7 @@ For permanently failed songs:
**Solution:** Use the `--check-cached-only` flag:
```bash
-node scripts/fix-null-lyrics.js --check-cached-only --dry-run
+node fix-null-lyrics.js --check-cached-only --dry-run
```
This targets only songs in `cachedSongIds` arrays (songs that should have lyrics) and fetches them one at a time instead of all at once, avoiding timeouts.
@@ -326,7 +326,7 @@ If you see HTTP 429 errors, the script is hitting Genius too fast. Try:
### Firebase Connection Issues
-The script uses the same Firebase configuration as your other scripts (`scripts/firebase-uploader.js`, etc.). If those work, this will too!
+The script uses the same Firebase configuration as your other scripts (`firebase-uploader.js`, etc.). If those work, this will too!
If you encounter connection issues:
1. Check that your Firebase config in `src/lib/services/initFirebase.js` is correct
diff --git a/documentation/IMAGE_RENDERING_FIXES.md b/IMAGE_RENDERING_FIXES.md
similarity index 100%
rename from documentation/IMAGE_RENDERING_FIXES.md
rename to IMAGE_RENDERING_FIXES.md
diff --git a/documentation/IMAGE_TEST_TOOL_README.md b/IMAGE_TEST_TOOL_README.md
similarity index 100%
rename from documentation/IMAGE_TEST_TOOL_README.md
rename to IMAGE_TEST_TOOL_README.md
diff --git a/Most Popular J Artists on Genius.html b/Most Popular J Artists on Genius.html
new file mode 100644
index 0000000..ffba41f
--- /dev/null
+++ b/Most Popular J Artists on Genius.html
@@ -0,0 +1,4006 @@
+
+
+
+
+ Most Popular J Artists on Genius
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/documentation/NULL_LYRICS_HANDLING.md b/NULL_LYRICS_HANDLING.md
similarity index 96%
rename from documentation/NULL_LYRICS_HANDLING.md
rename to NULL_LYRICS_HANDLING.md
index 9a40b69..c8f915d 100644
--- a/documentation/NULL_LYRICS_HANDLING.md
+++ b/NULL_LYRICS_HANDLING.md
@@ -99,23 +99,23 @@ This prevents them from being treated as "cached" in the future and cluttering t
A bulk scraper utility has been created to proactively fix all songs with null lyrics across your entire database!
### Location
-- **Script**: `scripts/fix-null-lyrics.js`
+- **Script**: `fix-null-lyrics.js`
- **Documentation**: `FIX_NULL_LYRICS_README.md`
### Quick Start
```bash
# Dry run to see what would be fixed
-node scripts/fix-null-lyrics.js --dry-run
+node fix-null-lyrics.js --dry-run
# Fix all songs with null lyrics
-node scripts/fix-null-lyrics.js
+node fix-null-lyrics.js
# Fix only songs from a specific artist
-node scripts/fix-null-lyrics.js --artist baby-jey
+node fix-null-lyrics.js --artist baby-jey
# Fix only songs that are supposed to be cached
-node scripts/fix-null-lyrics.js --check-cached-only
+node fix-null-lyrics.js --check-cached-only
```
### Features
diff --git a/documentation/PRECISE_LYRICS_SCRAPER_SUMMARY.md b/PRECISE_LYRICS_SCRAPER_SUMMARY.md
similarity index 100%
rename from documentation/PRECISE_LYRICS_SCRAPER_SUMMARY.md
rename to PRECISE_LYRICS_SCRAPER_SUMMARY.md
diff --git a/documentation/PRESCRAPER_README.md b/PRESCRAPER_README.md
similarity index 92%
rename from documentation/PRESCRAPER_README.md
rename to PRESCRAPER_README.md
index 8f67606..bd2ca4c 100644
--- a/documentation/PRESCRAPER_README.md
+++ b/PRESCRAPER_README.md
@@ -6,14 +6,14 @@ A comprehensive bulk scraping and upload system for LyricType that fetches artis
The prescraper system consists of two main components:
-1. **`scripts/prescraper.js`** - Scrapes artist songs and lyrics from Genius API
-2. **`scripts/firebase-uploader.js`** - Uploads prescraped data to Firebase Firestore
+1. **`prescraper.js`** - Scrapes artist songs and lyrics from Genius API
+2. **`firebase-uploader.js`** - Uploads prescraped data to Firebase Firestore
This system allows you to bulk-populate your database with artist data, song metadata, and lyrics for a better user experience.
## Features
-### 🚀 Prescraper (`scripts/prescraper.js`)
+### 🚀 Prescraper (`prescraper.js`)
- ✅ Loads artists from existing `genius-artists-*.json` files
- ✅ Fetches complete song lists for each artist (up to 1000 songs)
- ✅ Scrapes lyrics for configurable number of top songs per artist
@@ -24,7 +24,7 @@ This system allows you to bulk-populate your database with artist data, song met
- ✅ Detailed logging and statistics
- ✅ Configurable via CLI arguments
-### 🔥 Firebase Uploader (`scripts/firebase-uploader.js`)
+### 🔥 Firebase Uploader (`firebase-uploader.js`)
- ✅ Uploads artists, songs, and lyrics to Firestore
- ✅ Batch operations for efficiency
- ✅ Duplicate detection and skip existing data
@@ -81,7 +81,7 @@ Either:
#### Basic Usage
```bash
# Scrape 10 songs per artist for all letters
-node scripts/prescraper.js
+node prescraper.js
# Or use npm script
npm start
@@ -90,13 +90,13 @@ npm start
#### Advanced Options
```bash
# Test with limited data
-node scripts/prescraper.js --test 5 --letters a,b --songs 3
+node prescraper.js --test 5 --letters a,b --songs 3
# Scrape specific letters only
-node scripts/prescraper.js --letters j,k,l --songs 15
+node prescraper.js --letters j,k,l --songs 15
# Help
-node scripts/prescraper.js --help
+node prescraper.js --help
```
#### CLI Options
@@ -110,7 +110,7 @@ node scripts/prescraper.js --help
#### Basic Usage
```bash
# Upload latest prescraped data
-node scripts/firebase-uploader.js
+node firebase-uploader.js
# Or use npm script
npm run upload
@@ -119,17 +119,17 @@ npm run upload
#### Advanced Options
```bash
# Dry run (test without uploading)
-node scripts/firebase-uploader.js --dry-run
+node firebase-uploader.js --dry-run
npm run upload-dry
# Upload specific directory
-node scripts/firebase-uploader.js --dir ./prescraped-data-2025-09-14/
+node firebase-uploader.js --dir ./prescraped-data-2025-09-14/
# Force overwrite existing data
-node scripts/firebase-uploader.js --force
+node firebase-uploader.js --force
# Help
-node scripts/firebase-uploader.js --help
+node firebase-uploader.js --help
```
#### CLI Options
@@ -334,10 +334,10 @@ The prescraper creates partial files as it works. To resume:
The uploader skips existing artists by default:
```bash
# Skip existing data (default)
-node scripts/firebase-uploader.js
+node firebase-uploader.js
# Or force overwrite
-node scripts/firebase-uploader.js --force
+node firebase-uploader.js --force
```
## Advanced Configuration
diff --git a/documentation/QUEUE_INTEGRATION_SUMMARY.md b/QUEUE_INTEGRATION_SUMMARY.md
similarity index 100%
rename from documentation/QUEUE_INTEGRATION_SUMMARY.md
rename to QUEUE_INTEGRATION_SUMMARY.md
diff --git a/documentation/QUICK_START_NULL_LYRICS.md b/QUICK_START_NULL_LYRICS.md
similarity index 91%
rename from documentation/QUICK_START_NULL_LYRICS.md
rename to QUICK_START_NULL_LYRICS.md
index e8d38f9..685ec01 100644
--- a/documentation/QUICK_START_NULL_LYRICS.md
+++ b/QUICK_START_NULL_LYRICS.md
@@ -54,13 +54,13 @@ Fix null lyrics for a single artist (much faster than scanning all songs):
```bash
# Dry run for specific artist - try any of these formats!
-node scripts/fix-null-lyrics.js --artist "grace petrie" --dry-run
-node scripts/fix-null-lyrics.js --artist "Grace Petrie" --dry-run
-node scripts/fix-null-lyrics.js --artist grace-petrie --dry-run
+node fix-null-lyrics.js --artist "grace petrie" --dry-run
+node fix-null-lyrics.js --artist "Grace Petrie" --dry-run
+node fix-null-lyrics.js --artist grace-petrie --dry-run
# Fix for specific artist
-node scripts/fix-null-lyrics.js --artist "kendrick lamar"
-node scripts/fix-null-lyrics.js --artist "Kendrick Lamar"
+node fix-null-lyrics.js --artist "kendrick lamar"
+node fix-null-lyrics.js --artist "Kendrick Lamar"
```
**Smart artist search!** The script will find artists using:
@@ -79,22 +79,22 @@ The artist filter:
Process songs in smaller batches (slower but safer):
```bash
-node scripts/fix-null-lyrics.js --batch-size 3
+node fix-null-lyrics.js --batch-size 3
```
Limit how many songs to process:
```bash
-node scripts/fix-null-lyrics.js --max-songs 20
+node fix-null-lyrics.js --max-songs 20
```
See detailed info about each song:
```bash
-node scripts/fix-null-lyrics.js --verbose
+node fix-null-lyrics.js --verbose
```
Combine options:
```bash
-node scripts/fix-null-lyrics.js --artist drake --batch-size 5 --verbose
+node fix-null-lyrics.js --artist drake --batch-size 5 --verbose
```
## How the Automatic System Works
diff --git a/documentation/SCRAPER_README.md b/SCRAPER_README.md
similarity index 96%
rename from documentation/SCRAPER_README.md
rename to SCRAPER_README.md
index 3ec8c6b..a8873ce 100644
--- a/documentation/SCRAPER_README.md
+++ b/SCRAPER_README.md
@@ -25,25 +25,25 @@ A Node.js script to scrape artist links from Genius.com artist index pages.
#### Single Letter Scraping
```bash
# Scrape artists for letter 'j' with IDs (default, slower)
-node scripts/genius-scraper.js
+node genius-scraper.js
# Scrape artists for a specific letter with IDs
-node scripts/genius-scraper.js a
-node scripts/genius-scraper.js k
-node scripts/genius-scraper.js z
+node genius-scraper.js a
+node genius-scraper.js k
+node genius-scraper.js z
# Fast mode: Skip ID extraction for quicker results
-node scripts/genius-scraper.js j --no-ids
-node scripts/genius-scraper.js a --no-ids
+node genius-scraper.js j --no-ids
+node genius-scraper.js a --no-ids
```
#### Bulk Scraping (All Letters A-Z)
```bash
# Scrape ALL letters with IDs (very slow - several hours!)
-node scripts/genius-scraper.js all
+node genius-scraper.js all
# Bulk scrape ALL letters without IDs (much faster - ~30 minutes)
-node scripts/genius-scraper.js all --no-ids
+node genius-scraper.js all --no-ids
```
### Example Output (with ID extraction)
diff --git a/documentation/SCRAPING_TEST_RESULTS.md b/SCRAPING_TEST_RESULTS.md
similarity index 100%
rename from documentation/SCRAPING_TEST_RESULTS.md
rename to SCRAPING_TEST_RESULTS.md
diff --git a/SSR_REMOVAL_PLAN.md b/SSR_REMOVAL_PLAN.md
new file mode 100644
index 0000000..9e8a76f
--- /dev/null
+++ b/SSR_REMOVAL_PLAN.md
@@ -0,0 +1,289 @@
+# SSR Removal & Image Processing Optimization Plan
+
+## Overview
+This document outlines the comprehensive plan to remove Server-Side Rendering (SSR) from LyricType and implement an optimized image processing system using server-side dithering with client-side WebGL color mapping.
+
+## Current State Analysis
+
+### Current Architecture
+- **SSR Function**: Handles all `/api/**` routes including image proxying
+- **Image Processing**: Client-side dithering using Canvas API
+- **Function Invocations**: Every image load = 1 function call
+- **Cost Structure**: High due to repeated processing of same images
+
+### Current Image Flow
+```
+Genius Image URL → Firebase Function Proxy → Client Download → Client Dithering → Display
+```
+
+### Identified Problems
+1. **High Function Costs**: Every image request hits Firebase Functions
+2. **Repeated Processing**: Same images dithered multiple times
+3. **Performance**: Client-side dithering blocks UI thread
+4. **Scalability**: Processing cost scales linearly with users
+5. **Network**: Full color images downloaded for binary output
+
+## Target Architecture
+
+### New Image Flow
+```
+Genius Image URL → [One-time] Server Dither → Binary Storage → Client WebGL Coloring → Display
+```
+
+### Key Principles
+1. **Process Once, Use Forever**: Dither images server-side once
+2. **Store Binary Data**: Only 1-bit per pixel needed
+3. **Client Coloring**: Real-time theme application via WebGL
+4. **Theme Decoupling**: Backend agnostic to frontend themes
+5. **Graceful Fallback**: Maintain compatibility during transition
+
+## Implementation Phases
+
+### Phase 1: Foundation & Testing (Week 1)
+#### 1.1 Binary Format Implementation
+- [ ] Modify image proxy to return binary dithered data
+- [ ] Implement server-side Atkinson dithering algorithm
+- [ ] Add binary data logging for verification
+- [ ] Test binary format compression ratios
+
+#### 1.2 WebGL Renderer Development
+- [ ] Create WebGL shader for binary→color mapping
+- [ ] Implement fallback for WebGL-unsupported browsers
+- [ ] Performance testing and optimization
+- [ ] Integration with existing component architecture
+
+#### 1.3 Testing Infrastructure
+- [ ] Unit tests for binary conversion
+- [ ] Visual regression tests for dithering accuracy
+- [ ] Performance benchmarks (WebGL vs Canvas)
+- [ ] Browser compatibility testing
+
+### Phase 2: Storage & Caching (Week 2)
+#### 2.1 Database Schema Design
+```javascript
+// Firestore document structure
+{
+ imageId: "hash_of_original_url",
+ originalUrl: "https://genius.com/...",
+ binaryData: "compressed_binary_string", // or blob reference
+ width: 200,
+ height: 200,
+ processedAt: timestamp,
+ compressionFormat: "gzip" | "lz4" | "custom",
+ processingVersion: "1.0" // for future algorithm updates
+}
+```
+
+#### 2.2 Caching Strategy
+- [ ] Implement cache-first lookup in client
+- [ ] Add cache warming for popular artists
+- [ ] Implement cache invalidation strategy
+- [ ] Add metrics for cache hit rates
+
+#### 2.3 Background Processing
+- [ ] Create background function for batch processing
+- [ ] Implement queue system for new image processing
+- [ ] Add retry logic for failed processing
+- [ ] Create admin tools for cache management
+
+### Phase 3: Migration & Optimization (Week 3)
+#### 3.1 Gradual Migration
+- [ ] Implement feature flag for new vs old system
+- [ ] A/B testing infrastructure
+- [ ] User preference storage
+- [ ] Rollback mechanisms
+
+#### 3.2 Performance Optimization
+- [ ] Implement image prefetching for popular artists
+- [ ] Add service worker caching
+- [ ] Optimize WebGL shader performance
+- [ ] Implement lazy loading for large artist lists
+
+#### 3.3 SSR Removal
+- [ ] Audit all SSR usage points
+- [ ] Migrate remaining functionality to client-side
+- [ ] Update Firebase hosting configuration
+- [ ] Remove SSR function and dependencies
+
+### Phase 4: Production & Monitoring (Week 4)
+#### 4.1 Production Deployment
+- [ ] Blue-green deployment strategy
+- [ ] Production monitoring setup
+- [ ] Error tracking and alerting
+- [ ] Performance monitoring dashboard
+
+#### 4.2 Cost Analysis
+- [ ] Function invocation tracking
+- [ ] Storage cost monitoring
+- [ ] Performance metrics collection
+- [ ] ROI calculation and reporting
+
+## Technical Specifications
+
+### Binary Format Design
+```javascript
+// Proposed binary format
+{
+ header: {
+ width: uint16, // 2 bytes
+ height: uint16, // 2 bytes
+ version: uint8, // 1 byte
+ compression: uint8, // 1 byte
+ checksum: uint32 // 4 bytes
+ },
+ data: compressed_binary_array // 1 bit per pixel, compressed
+}
+```
+
+### WebGL Shader Specifications
+```glsl
+// Vertex Shader
+attribute vec2 a_position;
+attribute vec2 a_texCoord;
+varying vec2 v_texCoord;
+
+void main() {
+ gl_Position = vec4(a_position, 0.0, 1.0);
+ v_texCoord = a_texCoord;
+}
+
+// Fragment Shader
+precision mediump float;
+uniform vec3 u_primaryColor;
+uniform vec3 u_secondaryColor;
+uniform sampler2D u_texture;
+varying vec2 v_texCoord;
+
+void main() {
+ float value = texture2D(u_texture, v_texCoord).r;
+ vec3 color = mix(u_primaryColor, u_secondaryColor, value);
+ gl_FragColor = vec4(color, 1.0);
+}
+```
+
+### API Design
+```javascript
+// New image service API
+class ImageService {
+ async getDitheredImage(originalUrl, options = {}) {
+ // 1. Check cache first
+ // 2. Fallback to processing
+ // 3. Return binary data
+ }
+
+ renderWithTheme(binaryData, primaryColor, secondaryColor) {
+ // WebGL rendering with theme colors
+ }
+
+ prefetchImages(urls) {
+ // Background prefetching
+ }
+}
+```
+
+## Performance Targets
+
+### Function Invocation Reduction
+- **Current**: 1 invocation per image load
+- **Target**: 1 invocation per unique image (lifetime)
+- **Expected Reduction**: 95%+
+
+### Image Loading Performance
+- **Current**: 500-2000ms (download + dither)
+- **Target**: 50-200ms (cache lookup + WebGL render)
+- **Expected Improvement**: 5-10x faster
+
+### Storage Efficiency
+- **Current**: ~40KB per 200x200 color image
+- **Target**: ~2-5KB per dithered binary image
+- **Expected Reduction**: 80-90%
+
+### Theme Switching
+- **Current**: Re-download and re-process all images
+- **Target**: Instant WebGL re-rendering
+- **Expected Improvement**: Near-instantaneous
+
+## Risk Assessment
+
+### High Risk
+1. **WebGL Compatibility**: Some older browsers may not support WebGL
+ - **Mitigation**: Canvas fallback implementation
+
+2. **Binary Format Changes**: Future algorithm updates may require format changes
+ - **Mitigation**: Versioned format with migration tools
+
+### Medium Risk
+1. **Storage Costs**: Large number of cached images
+ - **Mitigation**: Compression and LRU eviction
+
+2. **Processing Queue Bottlenecks**: High demand for new image processing
+ - **Mitigation**: Horizontal scaling and priority queues
+
+### Low Risk
+1. **Visual Quality Differences**: Minor differences in dithering output
+ - **Mitigation**: Extensive visual testing and user feedback
+
+## Success Metrics
+
+### Primary KPIs
+- Function invocation count reduction
+- Image loading performance improvement
+- User experience metrics (bounce rate, engagement)
+- Cost reduction percentage
+
+### Secondary KPIs
+- Cache hit rate
+- WebGL vs Canvas usage ratio
+- Storage utilization
+- Processing queue performance
+
+## Rollback Strategy
+
+### Immediate Rollback
+- Feature flag to disable new system
+- Automatic fallback to existing proxy
+- No data loss or corruption risk
+
+### Gradual Rollback
+- Percentage-based traffic routing
+- User-specific opt-out mechanism
+- Detailed monitoring during transition
+
+## Post-Implementation Optimizations
+
+### Future Enhancements
+1. **Machine Learning**: Predictive image prefetching based on user behavior
+2. **Progressive Enhancement**: Higher quality images for high-DPI displays
+3. **Batch Processing**: Bulk image processing for new artist imports
+4. **Edge Computing**: Regional processing for global performance
+5. **Advanced Compression**: Custom compression algorithms for binary data
+
+### Monitoring & Analytics
+1. **Real-time Dashboards**: Function costs, cache performance, user experience
+2. **Automated Alerts**: Performance degradation, error rates, cost spikes
+3. **A/B Testing Framework**: Continuous optimization and feature testing
+
+## Timeline Summary
+
+| Week | Focus Area | Key Deliverables |
+|------|------------|------------------|
+| 1 | Foundation | Binary format, WebGL renderer, testing |
+| 2 | Storage | Database schema, caching, background processing |
+| 3 | Migration | Feature flags, optimization, SSR removal |
+| 4 | Production | Deployment, monitoring, cost analysis |
+
+## Dependencies
+
+### Internal
+- Firebase Functions runtime compatibility
+- Firestore storage limits and pricing
+- SvelteKit client-side architecture
+
+### External
+- Browser WebGL support levels
+- Genius.com image availability and formats
+- Third-party monitoring and analytics tools
+
+---
+
+This plan provides a comprehensive roadmap for eliminating SSR while dramatically improving performance and reducing costs through intelligent caching and modern web technologies.
diff --git a/scripts/add-search-tokens.js b/add-search-tokens.js
similarity index 99%
rename from scripts/add-search-tokens.js
rename to add-search-tokens.js
index 8672adf..58017ba 100644
--- a/scripts/add-search-tokens.js
+++ b/add-search-tokens.js
@@ -1,14 +1,12 @@
#!/usr/bin/env node
-//TODO: REMOVE THIS SCRIPT, PUT THE INDIVIDUAL FUNCTIONALITY INTO THE UPLOAD ARTISTS SCRIPT
-
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
import { initializeApp } from 'firebase/app';
import { getFirestore, collection, doc, getDoc, updateDoc, query, orderBy, limit, startAfter, getDocs } from 'firebase/firestore';
import unidecode from 'unidecode';
-import { firebaseConfig } from '../src/lib/services/initFirebase.js';
+import { firebaseConfig } from './src/lib/services/initFirebase.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
diff --git a/scripts/check-database.js b/check-database.js
similarity index 100%
rename from scripts/check-database.js
rename to check-database.js
diff --git a/cloudflare-image-proxy/package.json b/cloudflare-image-proxy/package.json
deleted file mode 100644
index 015ff4a..0000000
--- a/cloudflare-image-proxy/package.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- "name": "lyrictype-image-proxy",
- "version": "1.0.0",
- "description": "Cloudflare Worker to proxy Genius image requests",
- "main": "worker.js",
- "scripts": {
- "deploy": "wrangler deploy",
- "dev": "wrangler dev"
- },
- "keywords": ["cloudflare", "worker", "proxy", "image"],
- "author": "",
- "license": "MIT"
-}
-
diff --git a/cloudflare-image-proxy/wrangler.toml b/cloudflare-image-proxy/wrangler.toml
deleted file mode 100644
index 96225ae..0000000
--- a/cloudflare-image-proxy/wrangler.toml
+++ /dev/null
@@ -1,12 +0,0 @@
-name = "lyrictype-image-proxy"
-main = "worker.js"
-compatibility_date = "2024-01-01"
-
-# After deploying, set this environment variable in Cloudflare dashboard
-# [vars]
-# AUTH_KEY = "your-secret-key-here"
-
-# For production deployment
-# [[env.production]]
-# name = "lyrictype-image-proxy"
-
diff --git a/documentation/ARTIST_UPDATE_SYSTEM_PLAN.md b/documentation/ARTIST_UPDATE_SYSTEM_PLAN.md
deleted file mode 100644
index 3f7247d..0000000
--- a/documentation/ARTIST_UPDATE_SYSTEM_PLAN.md
+++ /dev/null
@@ -1,936 +0,0 @@
-# Artist Update System - Implementation Plan
-
-## 📋 Overview
-
-A comprehensive system for periodically updating the artist database by:
-1. Scraping the latest artist list from Genius
-2. Identifying new artists not in our database
-3. Prescraping song data for only new artists
-4. Manually uploading new data with popular flag updates
-
-**Run Frequency:** ~Once per month (manual execution)
-
----
-
-## 🗂️ Data Organization Structure
-
-All scraped data will be stored locally under `scraping-data/` with timestamp-based organization:
-
-```
-scraping-data/
-├── artist-lists/
-│ └── 2026-01-04-18-30/
-│ ├── artists-0.json # Numbers/symbols
-│ ├── artists-a.json
-│ ├── artists-b.json
-│ ├── ...
-│ ├── artists-z.json
-│ ├── summary.json # Totals, timestamp, metadata
-│ └── .complete # Marker file indicating completion
-│
-├── new-artists/
-│ └── 2026-01-04-18-30/
-│ ├── new-artists-0.json # Filtered: only new artists
-│ ├── new-artists-a.json
-│ ├── ...
-│ ├── new-artists-z.json
-│ ├── comparison-report.json # Details on what's new vs existing
-│ └── .complete
-│
-└── song-data/
- └── 2026-01-04-18-30/
- ├── songs-0.json # Prescraped songs for new artists
- ├── songs-a.json
- ├── ...
- ├── songs-z.json
- ├── scraping-summary.json # Stats on lyrics scraped
- └── .complete
-```
-
----
-
-## 🔄 Complete Workflow
-
-### **Phase 1: Scrape Artist Lists**
-Fetch current artist data from Genius for all letters.
-
-**Script:** `scripts/scrape-artists.js`
-
-**Input:** None
-**Output:** `scraping-data/artist-lists/{timestamp}/`
-
-**Data Structure (per file):**
-```json
-{
- "letter": "a",
- "scrapedAt": "2026-01-04T18:30:00.000Z",
- "totalArtists": 1245,
- "artists": {
- "popular": [
- {
- "name": "Artist Name",
- "url": "https://genius.com/artists/Artist-name",
- "id": "123456",
- "type": "popular"
- }
- ],
- "regular": [
- {
- "name": "Regular Artist",
- "url": "https://genius.com/artists/Regular-artist",
- "id": "789012",
- "type": "regular"
- }
- ]
- }
-}
-```
-
-### **Phase 2: Compare with Database**
-Identify which artists are new vs. already in Firestore.
-
-**Script:** `scripts/compare-artists.js`
-
-**Input:**
-- Latest artist list from Phase 1
-- Current Firestore artists collection
-
-**Output:** `scraping-data/new-artists/{timestamp}/`
-
-**Comparison Report Structure:**
-```json
-{
- "timestamp": "2026-01-04T18:30:00.000Z",
- "sourceDirectory": "scraping-data/artist-lists/2026-01-04-18-30",
- "statistics": {
- "totalGeniusArtists": 50000,
- "totalFirestoreArtists": 48500,
- "newArtists": 1500,
- "existingArtists": 48500,
- "popularChanges": {
- "addedToPopular": 15,
- "removedFromPopular": 12,
- "unchangedPopular": 505
- }
- },
- "perLetter": {
- "a": {
- "geniusTotal": 2000,
- "firestoreTotal": 1950,
- "newCount": 50,
- "popularInGenius": 20,
- "popularInFirestore": 20
- }
- },
- "newArtistsByLetter": {
- "a": 50,
- "b": 45,
- "...": "..."
- }
-}
-```
-
-**New Artists File Structure (per letter):**
-```json
-{
- "letter": "a",
- "comparisonDate": "2026-01-04T18:30:00.000Z",
- "newArtists": [
- {
- "name": "New Artist",
- "url": "https://genius.com/artists/New-artist",
- "id": "999999",
- "type": "regular",
- "isNew": true,
- "reason": "not_in_firestore"
- }
- ],
- "popularUpdates": [
- {
- "name": "Existing Artist",
- "id": "111111",
- "action": "add_popular",
- "reason": "now_in_genius_popular_top_20"
- },
- {
- "name": "Another Artist",
- "id": "222222",
- "action": "remove_popular",
- "reason": "no_longer_in_genius_popular_top_20"
- }
- ]
-}
-```
-
-### **Phase 3: Prescrape Songs**
-Scrape songs and lyrics for ONLY new artists.
-
-**Script:** `scripts/prescrape-new-artists.js`
-
-**Input:**
-- New artist lists from Phase 2
-- Configuration (songs per artist, delays, etc.)
-
-**Output:** `scraping-data/song-data/{timestamp}/`
-
-**Song Data Structure (matches current prescraper format):**
-```json
-{
- "letter": "a",
- "scrapedAt": "2026-01-04T19:15:00.000Z",
- "artists": [
- {
- "name": "New Artist",
- "urlKey": "new-artist",
- "url": "https://genius.com/artists/New-artist",
- "geniusId": "999999",
- "totalSongs": 25,
- "allSongs": [...],
- "scrapedSongs": [...],
- "processingStats": {
- "totalSongs": 25,
- "songsAttempted": 10,
- "lyricsScraped": 8,
- "lyricsFailed": 2
- }
- }
- ],
- "summary": {
- "totalArtists": 50,
- "totalSongs": 1250,
- "totalLyrics": 890
- }
-}
-```
-
-### **Phase 4: Upload to Database**
-Manual step to upload new data after inspection.
-
-**Script:** `scripts/upload-update.js`
-
-**Input:**
-- Song data from Phase 3
-- Comparison report from Phase 2
-
-**Actions:**
-1. **Clear all popular flags** from Firestore artists
-2. **Upload new artists** with songs, lyrics, and search tokens
-3. **Update popular flags** for exactly 20 artists per letter (from Genius)
-4. **Update metadata** (lastUpdated timestamps)
-
-**Output:** Updated Firestore database
-
----
-
-## 🛠️ Script Details
-
-### **1. scripts/scrape-artists.js**
-
-**Purpose:** Scrape all artist lists from Genius
-
-**Features:**
-- Scrapes all 27 letters (0, a-z)
-- Saves to timestamped directory
-- Includes artist IDs (via iOS app link extraction)
-- Separates popular vs regular artists
-- Creates summary.json with totals
-- Creates .complete marker when done
-- **TUI with progress bar and real-time statistics**
-- Error handling: continues on failures, logs to errors.json
-
-**CLI Options:**
-```bash
-node scripts/scrape-artists.js # Full scrape
-node scripts/scrape-artists.js --letters j,k # Specific letters only
-node scripts/scrape-artists.js --no-ids # Skip ID extraction (faster)
-node scripts/scrape-artists.js --output-dir ./custom/path # Custom output
-node scripts/scrape-artists.js --quiet # Minimal output (no TUI)
-```
-
-**Based on:** Current `genius-scraper.js` (refactored)
-
-**TUI Display:**
-- Current letter being scraped
-- Artists processed / total artists
-- Current artist being processed
-- Error counts by type
-- Estimated time remaining
-
----
-
-### **2. scripts/compare-artists.js**
-
-**Purpose:** Compare Genius artists with Firestore to identify new artists
-
-**Features:**
-- Reads latest (or specified) artist list
-- Queries Firestore for existing artists
-- Identifies new artists not in database
-- Detects popular status changes
-- Generates filtered lists of only new artists
-- Creates detailed comparison report
-- **TUI with progress bar for Firestore queries**
-- Error handling: continues on failures, logs to errors.json
-
-**CLI Options:**
-```bash
-node scripts/compare-artists.js # Use latest artist list
-node scripts/compare-artists.js --date 2026-01-04-18-30 # Specific timestamp
-node scripts/compare-artists.js --dry-run # Preview only
-node scripts/compare-artists.js --quiet # Minimal output
-```
-
-**Logic:**
-```javascript
-// Pseudo-code
-const geniusArtists = loadArtistLists(timestamp);
-const firestoreArtists = await fetchAllFirestoreArtists();
-
-const newArtists = geniusArtists.filter(artist =>
- !firestoreArtists.some(fa => fa.geniusId === artist.id)
-);
-
-const popularUpdates = calculatePopularChanges(
- geniusArtists.popular,
- firestoreArtists.filter(fa => fa.type === 'popular')
-);
-```
-
-**TUI Display:**
-- Loading progress for Firestore queries
-- Artists compared / total artists
-- Current letter being compared
-- New artists found count
-- Popular changes detected
-
----
-
-### **3. scripts/prescrape-new-artists.js**
-
-**Purpose:** Scrape songs and lyrics for only new artists
-
-**Features:**
-- Reads new-artists lists from Phase 2
-- Uses same scraping logic as current prescraper
-- Configurable songs per artist (default: 10)
-- Rate limiting and retries
-- Progress tracking per letter
-- Saves results in timestamp-matching directory
-- **TUI with detailed progress and error tracking**
-- Error handling: continues on failures, logs to errors.json
-
-**CLI Options:**
-```bash
-node scripts/prescrape-new-artists.js # Use latest comparison
-node scripts/prescrape-new-artists.js --date 2026-01-04-18-30
-node scripts/prescrape-new-artists.js --songs 15 # Scrape 15 songs per artist
-node scripts/prescrape-new-artists.js --letters j,k # Only specific letters
-node scripts/prescrape-new-artists.js --quiet # Minimal output
-```
-
-**Based on:** Current `prescraper.js` (adapted for artist list input)
-
-**TUI Display:**
-- Overall progress (artists processed / total)
-- Current letter being processed
-- Current artist and song being scraped
-- Songs scraped / lyrics found counts
-- Error counts by type (network, parsing, rate limit)
-- Processing speed (songs/second)
-- Estimated time remaining
-
----
-
-### **4. scripts/upload-update.js**
-
-**Purpose:** Upload new data to Firestore (manual inspection step)
-
-**Features:**
-- Reads song data and comparison report
-- Shows preview of changes before upload
-- Clears all popular flags first
-- Uploads new artists with search tokens
-- Sets popular flags for top 20 per letter
-- Batch uploads with smart rate limiting
-- **TUI with upload progress tracking**
-- Error handling: continues with partial, logs to errors.json
-
-**CLI Options:**
-```bash
-node scripts/upload-update.js # Use latest data
-node scripts/upload-update.js --date 2026-01-04-18-30 # Specific timestamp
-node scripts/upload-update.js --dry-run # Preview changes only
-node scripts/upload-update.js --skip-popular # Don't update popular flags
-node scripts/upload-update.js --letters j,k # Only specific letters
-node scripts/upload-update.js --batch-size 50 # Slower batching
-node scripts/upload-update.js --quiet # Minimal output
-node scripts/upload-update.js --yes # Skip confirmation prompt
-```
-
-**Upload Steps:**
-1. Load comparison report and song data
-2. **Preview Mode** - Show what will be uploaded:
- - X new artists to add
- - Y popular flags to add
- - Z popular flags to remove
-3. Confirm with user (Y/n) - unless --yes flag
-4. Clear all popular flags in Firestore
-5. Upload new artists (with search tokens)
-6. Upload new songs
-7. Set popular flags (exactly 20 per letter)
-8. Display summary
-
-**TUI Display:**
-- Current upload phase (artists / songs / flags)
-- Items uploaded / total items
-- Current batch being uploaded
-- Upload speed (items/second)
-- Error counts by type
-- Estimated time remaining
-
----
-
-### **5. scripts/artist-uploader.js** (Consolidated Utility)
-
-**Purpose:** Core upload functionality (used by upload-update.js)
-
-**Features:**
-- Merges `upload-to-firestore.js` + `upload-remaining-artists.js`
-- Generates search tokens automatically
-- Smart batching (auto-adjusts on rate limits)
-- Skip existing artists option
-- Update vs create modes
-- Validation and sanitization
-
-**Exports functions used by other scripts**
-
----
-
-## 📝 NPM Scripts (package.json)
-
-```json
-{
- "scripts": {
- "update:scrape-artists": "node scripts/scrape-artists.js",
- "update:compare": "node scripts/compare-artists.js",
- "update:prescrape": "node scripts/prescrape-new-artists.js",
- "update:upload": "node scripts/upload-update.js",
- "update:all": "npm run update:scrape-artists && npm run update:compare && npm run update:prescrape",
- "update:latest": "node scripts/upload-update.js"
- }
-}
-```
-
-**Typical Monthly Workflow:**
-```bash
-# Step 1: Scrape latest artists from Genius (~10 minutes)
-npm run update:scrape-artists
-# TUI shows real-time progress, no emojis
-
-# Step 2: Compare with database and identify new artists (~2 minutes)
-npm run update:compare
-# TUI shows comparison progress and results
-
-# Step 3: Prescrape songs for new artists only (~2 hours)
-npm run update:prescrape
-# TUI shows detailed progress: current artist, song, errors
-
-# Step 4: Inspect data in scraping-data/ directories (manual)
-# Review comparison-report.json and errors.json files
-
-# Step 5: Upload to database (after manual inspection, ~5 minutes)
-npm run update:upload
-# Shows preview, asks for confirmation, then uploads with TUI
-
-# Or run all scraping steps at once (Steps 1-3):
-npm run update:all
-# Then inspect and upload separately
-```
-
----
-
-## 🔧 Implementation Order
-
-### **Week 1: Data Structure & Core Utilities**
-- [ ] Create `scraping-data/` directory structure
-- [ ] Create timestamp utility functions
-- [ ] Set up TUI libraries (`cli-progress`, `chalk` for subtle colors)
-- [ ] Create shared TUI module for progress bars
-- [ ] Create shared error logging module (errors.json)
-- [ ] Refactor `artist-uploader.js` (merge upload scripts)
-- [ ] Add search token generation to upload process
-- [ ] Test upload with rate limiting
-
-### **Week 2: Artist Scraping**
-- [ ] Refactor `genius-scraper.js` → `scrape-artists.js`
-- [ ] Add TUI with progress bar and statistics
-- [ ] Update to save in new directory structure
-- [ ] Add summary.json generation
-- [ ] Add errors.json generation
-- [ ] Add .complete marker creation
-- [ ] Remove all emojis from output
-- [ ] Test full artist scraping workflow
-
-### **Week 3: Comparison Logic**
-- [ ] Create `compare-artists.js`
-- [ ] Add TUI with progress tracking
-- [ ] Implement Firestore artist fetching
-- [ ] Build comparison logic (new vs existing)
-- [ ] Implement popular status change detection
-- [ ] Generate filtered new-artists lists
-- [ ] Create detailed comparison report
-- [ ] Add error handling and logging
-- [ ] Remove all emojis from output
-- [ ] Test with real data
-
-### **Week 4: Prescraper Adaptation**
-- [ ] Create `prescrape-new-artists.js`
-- [ ] Add comprehensive TUI with detailed progress
-- [ ] Adapt prescraper to read artist lists
-- [ ] Update to use new directory structure
-- [ ] Add error handling (continue on failures)
-- [ ] Add errors.json generation
-- [ ] Remove all emojis from output
-- [ ] Test with small artist list
-- [ ] Test with full new artists list
-
-### **Week 5: Upload System**
-- [ ] Create `upload-update.js`
-- [ ] Add TUI with upload progress
-- [ ] Implement preview mode
-- [ ] Add popular flag clearing logic
-- [ ] Add popular flag setting (top 20 per letter)
-- [ ] Implement confirmation prompts
-- [ ] Add error handling (continue with partial)
-- [ ] Add errors.json generation
-- [ ] Remove all emojis from output
-- [ ] Test with dry-run mode
-- [ ] Test full upload workflow
-
-### **Week 6: Integration & Testing**
-- [ ] End-to-end testing of full workflow
-- [ ] Verify all TUIs work correctly
-- [ ] Verify no emojis in any output
-- [ ] Test error handling (graceful degradation)
-- [ ] Test errors.json generation
-- [ ] Create comprehensive documentation
-- [ ] Update README with new workflow
-- [ ] Archive old scripts to `scripts/archived/`
-- [ ] Create migration guide
-- [ ] Document TUI libraries and dependencies
-
----
-
-## 🎯 Key Features
-
-### **1. Incremental Updates**
-- Only process new artists (saves hours of scraping)
-- Preserves existing data (no overwriting)
-- Only updates popular flags (no re-uploads)
-
-### **2. Data Safety**
-- Everything saved locally first
-- Manual inspection before upload
-- Dry-run modes for all scripts
-- Rollback capability
-
-### **3. Transparency**
-- Detailed comparison reports
-- Clear summary of changes
-- Progress tracking throughout
-- Comprehensive logs
-
-### **4. Flexibility**
-- Can run full workflow or individual steps
-- Can target specific letters
-- Can reprocess data from any timestamp
-- Configurable batch sizes for rate limiting
-
-### **5. Popular Artists Management**
-- Clear all flags before update (ensures exactly 20 per letter)
-- Based on Genius's current popular list
-- Automatic detection of status changes
-- Detailed logging of changes
-
----
-
-## Example Complete Run
-
-```bash
-# January 4, 2026 - Monthly update
-
-# 1. Scrape latest artist lists from Genius
-$ npm run update:scrape-artists
-
-================================================================================
-ARTIST LIST SCRAPER
-================================================================================
-
-Progress: [████████████████████████] 100% (27/27 letters) | Time: 8m 42s
-
-Statistics:
- Popular Artists: 540
- Regular Artists: 49,460
- Total Artists: 50,000
-
-Errors:
- Network errors: 2
- ID extraction failed: 18
-
-Output: scraping-data/artist-lists/2026-01-04-18-30/
-[SUCCESS] Artist scraping complete
-================================================================================
-
-# 2. Compare with database
-$ npm run update:compare
-
-================================================================================
-ARTIST COMPARISON
-================================================================================
-
-Progress: [████████████████████████] 100% | Time: 1m 23s
-
-Results:
- Genius artists: 50,000
- Firestore artists: 48,753
- New artists found: 1,247
- Popular changes: 23
-
-Output: scraping-data/new-artists/2026-01-04-18-30/
-[SUCCESS] Comparison complete
-================================================================================
-
-# 3. Prescrape songs for new artists only
-$ npm run update:prescrape
-
-================================================================================
-SONG PRESCRAPER - New Artists Only
-================================================================================
-
-Progress: [████████████████████████] 100% (1247/1247) | Time: 1h 47m
-
-Current: Letter J - Artist: John Doe - Song: Example Song
-
-Statistics:
- Artists processed: 1,247
- Songs scraped: 12,470
- Lyrics found: 9,856
- Processing speed: 1.9 songs/sec
-
-Errors:
- Network timeout: 45
- Lyrics not found: 2,569
- Parsing failed: 12
-
-Output: scraping-data/song-data/2026-01-04-18-30/
-[SUCCESS] Prescraping complete
-================================================================================
-
-# 4. Inspect data (manual step)
-$ cat scraping-data/new-artists/2026-01-04-18-30/comparison-report.json
-# Review: New artists look good, popular changes make sense
-
-# 5. Upload to database
-$ npm run update:upload
-
-================================================================================
-UPLOAD PREVIEW
-================================================================================
-
-Data Source: scraping-data/song-data/2026-01-04-18-30/
-
-Changes to be made:
- New artists: 1,247
- New songs: 12,470
- Popular flags to add: 540 (20 per letter x 27)
- Popular flags to remove: 540
-
-Proceed with upload? (Y/n): y
-
-================================================================================
-UPLOADING TO FIRESTORE
-================================================================================
-
-Progress: [████████████████████████] 100% | Time: 4m 12s
-
-Phase: Uploading Songs (Batch 125/125)
-
-Statistics:
- Artists uploaded: 1,247
- Songs uploaded: 12,470
- Popular flags set: 540
- Upload speed: 3.2 items/sec
-
-Errors:
- Rate limit (retried): 3
- Write failed: 0
-
-[SUCCESS] Database update complete
-================================================================================
-```
-
----
-
-## 📊 Data Flow Diagram
-
-```
-┌─────────────────────┐
-│ Genius Website │
-└──────────┬──────────┘
- │
- ▼
-┌─────────────────────────────────────────────────┐
-│ Step 1: scrape-artists.js │
-│ Output: scraping-data/artist-lists/{timestamp} │
-└──────────┬──────────────────────────────────────┘
- │
- ▼
-┌─────────────────────────────────────────────────┐
-│ Step 2: compare-artists.js │
-│ Input: Artist lists + Firestore artists │
-│ Output: scraping-data/new-artists/{timestamp} │
-│ (filtered: only new + popular changes) │
-└──────────┬──────────────────────────────────────┘
- │
- ▼
-┌─────────────────────────────────────────────────┐
-│ Step 3: prescrape-new-artists.js │
-│ Input: New artist lists │
-│ Output: scraping-data/song-data/{timestamp} │
-│ (songs + lyrics for new artists only) │
-└──────────┬──────────────────────────────────────┘
- │
- ▼
- [Manual Inspection]
- │
- ▼
-┌─────────────────────────────────────────────────┐
-│ Step 4: upload-update.js │
-│ Input: Song data + Comparison report │
-│ Actions: │
-│ 1. Clear all popular flags │
-│ 2. Upload new artists (with search tokens) │
-│ 3. Upload new songs │
-│ 4. Set popular flags (top 20 per letter) │
-│ Output: Updated Firestore database │
-└─────────────────────────────────────────────────┘
-```
-
----
-
-## 🗑️ Script Cleanup Plan
-
-### **Scripts to Keep (Maintain/Refactor)**
-- `genius-scraper.js` → refactor to `scrape-artists.js`
-- `prescraper.js` → adapt to `prescrape-new-artists.js`
-- `firebase-uploader.js` → keep for backward compatibility with manual prescraping
-- `fix-null-lyrics.js` → keep (unrelated to update system)
-- `search-songs-by-id.js` → keep (utility)
-- `check-database.js` → keep (utility)
-
-### **Scripts to Consolidate**
-- `upload-to-firestore.js` + `upload-remaining-artists.js` → merge into `artist-uploader.js`
-- `add-search-tokens.js` → integrate into upload process (no separate step)
-
-### **Scripts to Archive**
-Move to `scripts/archived/` for reference:
-- `upload-to-firestore.js` (after merging)
-- `upload-remaining-artists.js` (after merging)
-- `add-search-tokens.js` (after integration)
-
----
-
-## Documentation Updates Needed
-
-1. **Update README.md** - Add new workflow section
-2. **Create ARTIST_UPDATE_GUIDE.md** - User guide for monthly updates
-3. **Update documentation/** - Update references to old scripts
-4. **Create migration guide** - For transitioning from old to new system
-5. **Document TUI usage** - How to interpret progress displays
-6. **Document error handling** - Where to find errors.json files
-
-## Dependencies to Add
-
-Add to package.json:
-```json
-{
- "dependencies": {
- "cli-progress": "^3.12.0",
- "chalk": "^5.3.0"
- }
-}
-```
-
----
-
-## Success Criteria
-
-- [ ] Can scrape all artists from Genius in <15 minutes
-- [ ] Can identify new artists in <5 minutes
-- [ ] Can prescrape only new artists (saves 90%+ time vs full scrape)
-- [ ] Can upload with manual inspection step
-- [ ] Popular flags maintained at exactly 20 per letter
-- [ ] Search tokens automatically included
-- [ ] All data preserved locally with timestamps
-- [ ] Can rerun any step independently
-- [ ] TUI shows real-time progress and statistics
-- [ ] No emojis in any script output or logs
-- [ ] Errors logged to errors.json with clean console output
-- [ ] Handles rate limits gracefully (continues with partial results)
-- [ ] All phases show estimated time remaining
-- [ ] Error counts displayed by type during execution
-
-
----
-
-## 📋 Implementation Decisions
-
-### **Configuration Settings**
-- **Songs per artist:** 10 (configurable via CLI)
-- **Old data archival:** Manual deletion only (no automated cleanup)
-- **Error handling:** Continue with partial results, document in summary.json
-- **Rollback support:** Not needed
-- **Logging style:** NO EMOJIS in any scripts or logs
-
-### **TUI Requirements**
-
-All main scraping scripts must include a Terminal User Interface (TUI) with:
-
-#### **Progress Tracking**
-- Progress bar showing % completion of current phase
-- Estimated time remaining for current phase
-- Current letter being processed (e.g., "Processing: Letter J")
-- Current item being processed (artist name or song title)
-
-#### **Statistics Display**
-- Items processed / total items (e.g., "Artists: 45/1247")
-- Success count
-- Error count by type:
- - Network errors
- - Parsing errors
- - Rate limit errors
- - Other errors
-- Items per second (processing speed)
-
-#### **Real-time Updates**
-- Updates every 100ms for smooth progress bar
-- Current action description (e.g., "Scraping: Artist Name - Song Title")
-- No emoji characters in any output
-- Clean, professional terminal output
-
-#### **Example TUI Layout**
-```
-================================================================================
-ARTIST LIST SCRAPER - Phase 1/3
-================================================================================
-
-Progress: [████████████████░░░░░░░░] 65.3% (17/26 letters) | ETA: 3m 15s
-
-Current Letter: Q
-Current Artist: Queen
-Action: Extracting artist ID from page
-
-Statistics:
- Popular Artists: 340 | Regular Artists: 12,450
- Total Artists: 12,790
-
-Errors:
- Network errors: 3
- ID extraction failed: 12
- Other errors: 0
-
-Processing Speed: 2.3 artists/sec
-================================================================================
-```
-
-#### **TUI Libraries**
-Consider using:
-- `cli-progress` - Progress bars
-- `ora` - Spinners (if needed)
-- `chalk` - Colors (optional, subtle use)
-- `boxen` - Bordered boxes (optional)
-
----
-
-## 🎨 Code Style Guidelines
-
-### **Logging Standards**
-- **NO EMOJIS** in any script output or logs
-- Use simple prefixes: `[INFO]`, `[WARN]`, `[ERROR]`, `[SUCCESS]`
-- Keep logs concise and machine-readable
-- Timestamps in ISO format: `2026-01-04T18:30:00.000Z`
-- Errors logged to summary.json, not verbose console output
-
-### **Error Handling**
-- Scripts must continue on errors (graceful degradation)
-- Collect all errors during processing
-- Write error summary to `errors.json` in output directory
-- Console shows error counts only, not full error messages
-- Critical errors only halt execution
-
-### **Example Error Summary (errors.json)**
-```json
-{
- "phase": "prescraping",
- "timestamp": "2026-01-04T19:15:00.000Z",
- "totalErrors": 15,
- "errorsByType": {
- "network_timeout": 3,
- "parsing_failed": 7,
- "rate_limit": 2,
- "lyrics_not_found": 3
- },
- "errors": [
- {
- "type": "network_timeout",
- "artist": "Artist Name",
- "song": "Song Title",
- "url": "https://genius.com/...",
- "timestamp": "2026-01-04T19:15:23.000Z",
- "message": "Request timeout after 10000ms"
- }
- ]
-}
-```
-
----
-
-## Summary of Key Requirements
-
-### **User Experience**
-- Terminal User Interface (TUI) with progress bars on all main scripts
-- Real-time statistics and error tracking
-- Estimated time remaining for all operations
-- Clean, professional output with NO EMOJIS
-- Simple log prefixes: [INFO], [WARN], [ERROR], [SUCCESS]
-
-### **Error Handling**
-- Continue processing on errors (graceful degradation)
-- Errors logged to `errors.json` in each output directory
-- Console shows only error counts, not verbose messages
-- Summary includes error breakdown by type
-
-### **Data Management**
-- 10 songs per artist (configurable)
-- All data saved locally in `scraping-data/` with timestamps
-- No automated archival (manual deletion when needed)
-- Timestamped directories for tracking and re-processing
-
-### **Popular Artists**
-- Determined by Genius's current popular list
-- Exactly 20 popular artists per letter
-- Clear all flags before update to ensure accuracy
-- Changes tracked in comparison report
-
-### **Workflow**
-- 4 separate phases: Scrape → Compare → Prescrape → Upload
-- Manual inspection before upload
-- Can process specific letters only
-- Can reprocess from any timestamp
-
----
-
-**Last Updated:** 2026-01-04
-**Status:** Planning Phase - Requirements Finalized
-**Next Step:** Begin Week 1 implementation (TUI setup and core utilities)
-
diff --git a/firebase-uploader.js b/firebase-uploader.js
new file mode 100644
index 0000000..eed1be8
--- /dev/null
+++ b/firebase-uploader.js
@@ -0,0 +1,575 @@
+#!/usr/bin/env node
+
+import fs from 'fs/promises';
+import path from 'path';
+import { fileURLToPath } from 'url';
+import { initializeApp } from 'firebase/app';
+import { getFirestore, collection, writeBatch, doc, getDoc, setDoc } from 'firebase/firestore';
+import { firebaseConfig } from './src/lib/services/initFirebase.js';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+/**
+ * Firebase Uploader Configuration
+ */
+const config = {
+ // Input configuration
+ input: {
+ directory: null, // Will be set via CLI or default to latest
+ pattern: 'prescraped-*.json', // File pattern to match
+ skipExisting: true // Skip artists that already exist in Firestore
+ },
+
+ // Upload configuration
+ upload: {
+ batchSize: 100, // Number of operations per batch
+ delayBetweenBatches: 1000, // Delay between batches (ms)
+ collections: {
+ artists: 'artists',
+ songs: 'songs',
+ albumArt: 'albumArt'
+ }
+ },
+
+ // Processing options
+ processing: {
+ uploadArtistImages: false, // Skip artist image processing for now
+ uploadAlbumArt: false, // Skip album art processing for now
+ dryRun: false // If true, don't actually upload
+ },
+
+ // Filtering options
+ filtering: {
+ startLetter: null, // Filter artists starting with this letter (a-z)
+ endLetter: null, // Filter artists ending with this letter (a-z)
+ maxArtists: null // Limit number of artists to process (for testing)
+ }
+};
+
+/**
+ * Global state tracking
+ */
+const state = {
+ uploaded: {
+ artists: 0,
+ songs: 0,
+ albumArt: 0
+ },
+ skipped: {
+ artists: 0,
+ songs: 0,
+ albumArt: 0
+ },
+ errors: {
+ artists: 0,
+ songs: 0,
+ albumArt: 0
+ },
+ startTime: null
+};
+
+// Initialize Firebase
+let db = null;
+
+/**
+ * Initialize Firebase connection
+ */
+async function initializeFirebase() {
+ try {
+ console.log('🔧 Initializing Firebase...');
+
+ // Initialize Firebase using centralized config (same as working script)
+ const app = initializeApp(firebaseConfig);
+ db = getFirestore(app);
+
+ // Check if we should use emulator
+ if (process.env.FIRESTORE_EMULATOR_HOST) {
+ console.log('🧪 Using Firestore emulator at', process.env.FIRESTORE_EMULATOR_HOST);
+ }
+
+ console.log('✅ Firebase initialized successfully');
+
+ return db;
+ } catch (error) {
+ console.error('❌ Error initializing Firebase:', error);
+ throw error;
+ }
+}
+
+/**
+ * Load prescraped data files
+ */
+async function loadPrescrapedData(inputDir) {
+ console.log(`📂 Loading prescraped data from: ${inputDir}`);
+
+ try {
+ const files = await fs.readdir(inputDir);
+ const jsonFiles = files.filter(file => file.match(/^prescraped-.+\.json$/));
+
+ if (jsonFiles.length === 0) {
+ throw new Error(`No prescraped files found in ${inputDir}`);
+ }
+
+ console.log(`Found ${jsonFiles.length} prescraped files: ${jsonFiles.join(', ')}`);
+
+ const allData = [];
+
+ for (const file of jsonFiles) {
+ const filePath = path.join(inputDir, file);
+ const content = await fs.readFile(filePath, 'utf8');
+ const data = JSON.parse(content);
+
+ console.log(`📄 Loaded ${file}: ${data.artists.length} artists, ${data.summary.totalSongs} songs, ${data.summary.totalLyrics} lyrics`);
+ allData.push(data);
+ }
+
+ // Flatten all artists from all files
+ const allArtists = allData.flatMap(data => data.artists);
+ console.log(`✅ Total loaded: ${allArtists.length} artists`);
+
+ return allArtists;
+
+ } catch (error) {
+ console.error('❌ Error loading prescraped data:', error);
+ throw error;
+ }
+}
+
+/**
+ * Check if artist already exists in Firestore
+ */
+async function checkArtistExists(urlKey) {
+ try {
+ const artistRef = doc(db, config.upload.collections.artists, urlKey);
+ const docSnap = await getDoc(artistRef);
+ return docSnap.exists();
+ } catch (error) {
+ console.error(`❌ Error checking if artist exists (${urlKey}):`, error);
+ return false; // Assume doesn't exist if we can't check
+ }
+}
+
+/**
+ * Upload artist to Firestore
+ */
+async function uploadArtist(artistData) {
+ const urlKey = artistData.urlKey;
+
+ try {
+ // Check if already exists and skip if configured to do so
+ if (config.input.skipExisting) {
+ const exists = await checkArtistExists(urlKey);
+ if (exists) {
+ console.log(` ⏭️ Artist ${artistData.name} already exists, skipping`);
+ state.skipped.artists++;
+ return { skipped: true };
+ }
+ }
+
+ // Prepare artist document data (matching your Firebase Functions structure)
+ const artistDoc = {
+ name: artistData.name,
+ geniusId: parseInt(artistData.geniusId, 10),
+ url: artistData.url,
+ imageUrl: artistData.imageUrl || null,
+ totalSongs: artistData.totalSongs,
+ songIds: artistData.allSongs.map(song => song.id),
+ cachedSongIds: artistData.scrapedSongs.filter(song => song.lyrics).map(song => song.id),
+ songsLastUpdated: new Date(),
+ lyricsScraped: artistData.processingStats.lyricsScraped,
+ isFullyCached: true, // We've fetched all available songs
+ cacheVersion: 1,
+ createdAt: new Date(),
+ // Prescraped metadata
+ prescrapedAt: new Date(artistData.processedAt),
+ prescrapedStats: artistData.processingStats
+ };
+
+ if (!config.processing.dryRun) {
+ const artistRef = doc(db, config.upload.collections.artists, urlKey);
+ await setDoc(artistRef, artistDoc);
+ }
+
+ console.log(` ✅ Uploaded artist: ${artistData.name} (${artistData.totalSongs} songs, ${artistData.processingStats.lyricsScraped} lyrics)`);
+ state.uploaded.artists++;
+
+ return { uploaded: true, songIds: artistDoc.songIds, cachedSongIds: artistDoc.cachedSongIds };
+
+ } catch (error) {
+ console.error(` ❌ Error uploading artist ${artistData.name}:`, error);
+ state.errors.artists++;
+ return { error: error.message };
+ }
+}
+
+/**
+ * Upload songs to Firestore in batches
+ */
+async function uploadSongs(artistData) {
+ console.log(` 📚 Uploading ${artistData.allSongs.length} songs...`);
+
+ try {
+ // Prepare all songs (both with and without lyrics)
+ const songsToUpload = [];
+
+ // Create a map of scraped songs for quick lookup
+ const scrapedSongsMap = new Map();
+ artistData.scrapedSongs.forEach(song => {
+ scrapedSongsMap.set(song.id, song);
+ });
+
+ // Process all songs
+ for (const song of artistData.allSongs) {
+ const scrapedSong = scrapedSongsMap.get(song.id);
+
+ const songDoc = {
+ title: song.title,
+ url: song.url,
+ songArtImageUrl: song.songArtImageUrl,
+ artistNames: song.artistNames,
+ primaryArtist: song.primaryArtist,
+ albumArtId: song.albumArtId,
+ addedAt: new Date(),
+ // Lyrics data (if available)
+ lyrics: scrapedSong?.lyrics || null,
+ lyricsScrapedAt: scrapedSong?.lyrics ? new Date(scrapedSong.scrapedAt) : null,
+ scrapingAttempts: scrapedSong?.lyrics ? 1 : 0,
+ scrapingError: scrapedSong?.scrapingError || null,
+ scrapingStatus: scrapedSong?.lyrics ? 'completed' : (scrapedSong?.scrapingError ? 'failed' : 'pending'),
+ scrapingDuration: scrapedSong?.scrapingDuration || null
+ };
+
+ songsToUpload.push({ id: song.id, data: songDoc });
+ }
+
+ // Upload in batches
+ const batchSize = config.upload.batchSize;
+ let uploaded = 0;
+
+ for (let i = 0; i < songsToUpload.length; i += batchSize) {
+ const batch = writeBatch(db);
+ const batchSongs = songsToUpload.slice(i, i + batchSize);
+
+ for (const song of batchSongs) {
+ if (!config.processing.dryRun) {
+ const songRef = doc(db, config.upload.collections.songs, song.id);
+ batch.set(songRef, song.data);
+ }
+ }
+
+ if (!config.processing.dryRun) {
+ await batch.commit();
+ }
+
+ uploaded += batchSongs.length;
+ console.log(` 📄 Uploaded batch: ${uploaded}/${songsToUpload.length} songs`);
+
+ // Delay between batches
+ if (i + batchSize < songsToUpload.length) {
+ await new Promise(resolve => setTimeout(resolve, config.upload.delayBetweenBatches));
+ }
+ }
+
+ state.uploaded.songs += uploaded;
+ console.log(` ✅ Completed song upload: ${uploaded} songs`);
+
+ return { uploaded: uploaded };
+
+ } catch (error) {
+ console.error(` ❌ Error uploading songs for ${artistData.name}:`, error);
+ state.errors.songs += artistData.allSongs.length;
+ return { error: error.message };
+ }
+}
+
+/**
+ * Process a single artist: upload artist and songs
+ */
+async function processArtist(artistData, artistIndex, totalArtists) {
+ console.log(`\n[${artistIndex + 1}/${totalArtists}] 🎨 Processing: ${artistData.name}`);
+
+ try {
+ // Upload artist document
+ const artistResult = await uploadArtist(artistData);
+
+ if (artistResult.skipped) {
+ console.log(` ⏭️ Skipped artist and songs`);
+ return;
+ }
+
+ if (artistResult.error) {
+ console.log(` ❌ Skipping songs due to artist upload error`);
+ return;
+ }
+
+ // Upload songs
+ const songsResult = await uploadSongs(artistData);
+
+ if (songsResult.error) {
+ console.log(` ⚠️ Artist uploaded but songs failed`);
+ }
+
+ } catch (error) {
+ console.error(` 💥 Critical error processing ${artistData.name}:`, error);
+ state.errors.artists++;
+ }
+}
+
+/**
+ * Get the sorting letter for an artist name (ignores common articles)
+ */
+function getSortingLetter(artistName) {
+ const name = artistName.toLowerCase();
+ const articles = ['the ', 'a ', 'an '];
+
+ for (const article of articles) {
+ if (name.startsWith(article)) {
+ return name.charAt(article.length);
+ }
+ }
+
+ return name.charAt(0);
+}
+
+/**
+ * Filter artists by letter range and/or limit count
+ */
+function filterArtists(artists) {
+ let filtered = [...artists];
+
+ // Apply letter filtering
+ if (config.filtering.startLetter || config.filtering.endLetter) {
+ const startLetter = config.filtering.startLetter?.toLowerCase() || 'a';
+ const endLetter = config.filtering.endLetter?.toLowerCase() || 'z';
+
+ filtered = filtered.filter(artist => {
+ const sortingChar = getSortingLetter(artist.name);
+ return sortingChar >= startLetter && sortingChar <= endLetter;
+ });
+
+ console.log(`🔤 Filtered by letters ${startLetter.toUpperCase()}-${endLetter.toUpperCase()}: ${filtered.length}/${artists.length} artists`);
+ }
+
+ // Apply count limit
+ if (config.filtering.maxArtists && config.filtering.maxArtists > 0) {
+ const originalCount = filtered.length;
+ filtered = filtered.slice(0, config.filtering.maxArtists);
+ console.log(`🔢 Limited to ${config.filtering.maxArtists} artists: ${filtered.length}/${originalCount} selected`);
+ }
+
+ return filtered;
+}
+
+/**
+ * Find the latest prescraped directory
+ */
+async function findLatestPrescrapedDir() {
+ try {
+ const entries = await fs.readdir(__dirname, { withFileTypes: true });
+ const prescrapedDirs = entries
+ .filter(entry => entry.isDirectory() && entry.name.startsWith('prescraped-data-'))
+ .map(entry => entry.name)
+ .sort()
+ .reverse(); // Latest first
+
+ if (prescrapedDirs.length === 0) {
+ throw new Error('No prescraped data directories found');
+ }
+
+ return path.join(__dirname, prescrapedDirs[0]);
+ } catch (error) {
+ console.error('❌ Error finding prescraped directories:', error);
+ throw error;
+ }
+}
+
+/**
+ * Print configuration summary
+ */
+function printConfig() {
+ console.log('\n' + '='.repeat(60));
+ console.log('🚀 LYRICTYPE FIREBASE UPLOADER STARTING');
+ console.log('='.repeat(60));
+ console.log(`📋 Configuration:`);
+ console.log(` Input directory: ${config.input.directory || 'auto-detect latest'}`);
+ console.log(` Skip existing: ${config.input.skipExisting}`);
+ console.log(` Batch size: ${config.upload.batchSize}`);
+ console.log(` Dry run: ${config.processing.dryRun}`);
+ if (config.filtering.startLetter || config.filtering.endLetter) {
+ const start = config.filtering.startLetter?.toUpperCase() || 'A';
+ const end = config.filtering.endLetter?.toUpperCase() || 'Z';
+ console.log(` Letter filter: ${start}-${end}`);
+ }
+ if (config.filtering.maxArtists) {
+ console.log(` Max artists: ${config.filtering.maxArtists}`);
+ }
+ if (config.processing.dryRun) {
+ console.log(' ⚠️ DRY RUN MODE: No data will be uploaded');
+ }
+ console.log('='.repeat(60) + '\n');
+}
+
+/**
+ * Main execution function
+ */
+async function main() {
+ try {
+ state.startTime = Date.now();
+
+ printConfig();
+
+ // Parse command line arguments
+ if (process.argv.includes('--help') || process.argv.includes('-h')) {
+ console.log(`
+Usage: node firebase-uploader.js [options]
+
+Options:
+ --dir Directory containing prescraped JSON files
+ --start-letter Only process artists starting with this letter (a-z)
+ --end-letter Only process artists up to this letter (a-z)
+ --max-artists Limit number of artists to process (for testing)
+ --dry-run Don't actually upload, just show what would be done
+ --force Upload even if artists already exist
+ --emulator Use local Firestore emulator (requires firebase emulators:start)
+ --help, -h Show this help message
+
+Examples:
+ node firebase-uploader.js --start-letter n --end-letter z # Upload artists N-Z
+ node firebase-uploader.js --start-letter n --max-artists 5 --dry-run # Test with 5 artists starting with N
+ node firebase-uploader.js --dir ./prescraped-data-2025-09-14/
+ node firebase-uploader.js --dry-run # Test run without uploading
+ node firebase-uploader.js --force # Overwrite existing artists
+ node firebase-uploader.js --emulator # Use local emulator for testing
+ `);
+ return;
+ }
+
+ // Parse CLI arguments
+ const dirIndex = process.argv.indexOf('--dir');
+ if (dirIndex !== -1 && process.argv[dirIndex + 1]) {
+ config.input.directory = process.argv[dirIndex + 1];
+ }
+
+ const startLetterIndex = process.argv.indexOf('--start-letter');
+ if (startLetterIndex !== -1 && process.argv[startLetterIndex + 1]) {
+ const letter = process.argv[startLetterIndex + 1].toLowerCase();
+ if (letter.match(/^[a-z]$/)) {
+ config.filtering.startLetter = letter;
+ } else {
+ console.error('❌ --start-letter must be a single letter (a-z)');
+ process.exit(1);
+ }
+ }
+
+ const endLetterIndex = process.argv.indexOf('--end-letter');
+ if (endLetterIndex !== -1 && process.argv[endLetterIndex + 1]) {
+ const letter = process.argv[endLetterIndex + 1].toLowerCase();
+ if (letter.match(/^[a-z]$/)) {
+ config.filtering.endLetter = letter;
+ } else {
+ console.error('❌ --end-letter must be a single letter (a-z)');
+ process.exit(1);
+ }
+ }
+
+ const maxArtistsIndex = process.argv.indexOf('--max-artists');
+ if (maxArtistsIndex !== -1 && process.argv[maxArtistsIndex + 1]) {
+ const count = parseInt(process.argv[maxArtistsIndex + 1], 10);
+ if (count > 0) {
+ config.filtering.maxArtists = count;
+ } else {
+ console.error('❌ --max-artists must be a positive number');
+ process.exit(1);
+ }
+ }
+
+ if (process.argv.includes('--dry-run')) {
+ config.processing.dryRun = true;
+ console.log('🧪 DRY RUN MODE: No data will be uploaded');
+ }
+
+ if (process.argv.includes('--force')) {
+ config.input.skipExisting = false;
+ console.log('💪 FORCE MODE: Will overwrite existing artists');
+ }
+
+ if (process.argv.includes('--emulator')) {
+ process.env.FIRESTORE_EMULATOR_HOST = 'localhost:8080';
+ console.log('🧪 EMULATOR MODE: Using local Firestore emulator');
+ }
+
+ // Determine input directory
+ if (!config.input.directory) {
+ config.input.directory = await findLatestPrescrapedDir();
+ console.log(`📁 Auto-detected input directory: ${config.input.directory}`);
+ }
+
+ // Initialize Firebase
+ await initializeFirebase();
+
+ // Load prescraped data
+ const allArtists = await loadPrescrapedData(config.input.directory);
+
+ if (allArtists.length === 0) {
+ console.log('⚠️ No artists found in prescraped data');
+ return;
+ }
+
+ // Apply filtering
+ const artists = filterArtists(allArtists);
+
+ if (artists.length === 0) {
+ console.log('⚠️ No artists match the specified filters');
+ return;
+ }
+
+ console.log(`\n🎯 Uploading ${artists.length} artists to Firestore...`);
+ console.log('Press Ctrl+C to stop gracefully\n');
+
+ // Process each artist
+ for (let i = 0; i < artists.length; i++) {
+ const artist = artists[i];
+ await processArtist(artist, i, artists.length);
+
+ // Small delay between artists to be gentle on Firestore
+ if (i < artists.length - 1) {
+ await new Promise(resolve => setTimeout(resolve, 100));
+ }
+ }
+
+ // Final summary
+ const totalTime = Date.now() - state.startTime;
+ console.log(`\n${'='.repeat(60)}`);
+ console.log('🎉 FIREBASE UPLOAD COMPLETED!');
+ console.log('='.repeat(60));
+ console.log(`⏱️ Total time: ${Math.round(totalTime / 1000)}s`);
+ console.log(`📊 Final stats:`);
+ console.log(` Artists uploaded: ${state.uploaded.artists}, skipped: ${state.skipped.artists}, errors: ${state.errors.artists}`);
+ console.log(` Songs uploaded: ${state.uploaded.songs}, errors: ${state.errors.songs}`);
+ console.log(` Total operations: ${state.uploaded.artists + state.uploaded.songs}`);
+ if (config.processing.dryRun) {
+ console.log(` ⚠️ DRY RUN: No actual data was uploaded`);
+ }
+ console.log('='.repeat(60));
+
+ } catch (error) {
+ console.error('💥 Fatal error:', error);
+ process.exit(1);
+ }
+}
+
+// Handle graceful shutdown
+process.on('SIGINT', () => {
+ console.log('\n\n🛑 Received SIGINT, shutting down gracefully...');
+ console.log(`📊 Final stats:`);
+ console.log(` Artists uploaded: ${state.uploaded.artists}`);
+ console.log(` Songs uploaded: ${state.uploaded.songs}`);
+ console.log(` Total errors: ${state.errors.artists + state.errors.songs}`);
+ process.exit(0);
+});
+
+// Run the script
+if (import.meta.url === `file://${process.argv[1]}`) {
+ main();
+}
diff --git a/scripts/fix-null-lyrics.js b/fix-null-lyrics.js
similarity index 99%
rename from scripts/fix-null-lyrics.js
rename to fix-null-lyrics.js
index 99a466c..63e9b33 100644
--- a/scripts/fix-null-lyrics.js
+++ b/fix-null-lyrics.js
@@ -19,7 +19,7 @@ import { getFirestore, doc, getDoc, updateDoc, arrayRemove, arrayUnion, incremen
import { fileURLToPath } from 'url';
import { dirname } from 'path';
import * as cheerio from 'cheerio';
-import { firebaseConfig } from '../src/lib/services/initFirebase.js';
+import { firebaseConfig } from './src/lib/services/initFirebase.js';
// Note: fetch is built-in for Node.js 18+, no need to import
diff --git a/functions/.gitignore b/functions/.gitignore
index 918c24d..d225730 100644
--- a/functions/.gitignore
+++ b/functions/.gitignore
@@ -3,4 +3,5 @@ local-config.json
build
*.log
/prescraped-data-*/
-/prescraped-*.json
\ No newline at end of file
+/prescraped-*.json
+test-*.js
\ No newline at end of file
diff --git a/functions/check-database.js b/functions/check-database.js
new file mode 100644
index 0000000..22bb47c
--- /dev/null
+++ b/functions/check-database.js
@@ -0,0 +1,49 @@
+import { initializeApp } from 'firebase-admin/app';
+import { getFirestore } from 'firebase-admin/firestore';
+
+const app = initializeApp();
+const db = getFirestore(app);
+
+// Check a few songs to see their structure
+async function checkSongs() {
+ try {
+ // Get the artist document for Demi Lovato first
+ const artistDoc = await db.collection('artists').doc('Demi-lovato').get();
+ if (!artistDoc.exists) {
+ console.log('❌ Artist Demi-lovato not found');
+ return;
+ }
+
+ const artistData = artistDoc.data();
+ const songIds = artistData.songIds || [];
+ console.log(`✅ Artist Demi-lovato has ${songIds.length} songs`);
+
+ // Check the first 3 songs
+ for (let i = 0; i < Math.min(3, songIds.length); i++) {
+ const songId = songIds[i];
+ console.log(`\n📝 Checking song ${i + 1}: ${songId}`);
+
+ const songDoc = await db.collection('songs').doc(songId).get();
+ if (songDoc.exists) {
+ const songData = songDoc.data();
+ console.log(` Title: ${songData.title || 'N/A'}`);
+ console.log(` Has lyrics: ${!!songData.lyrics}`);
+ console.log(` Lyrics length: ${songData.lyrics ? songData.lyrics.length : 0}`);
+ if (songData.lyrics) {
+ console.log(` First 100 chars: ${songData.lyrics.substring(0, 100)}`);
+ } else {
+ console.log(` No lyrics found!`);
+ }
+ } else {
+ console.log(` ❌ Song document not found!`);
+ }
+ }
+
+ process.exit(0);
+ } catch (error) {
+ console.error('Error:', error);
+ process.exit(1);
+ }
+}
+
+checkSongs();
diff --git a/functions/index.js b/functions/index.js
index bbd3358..eace090 100644
--- a/functions/index.js
+++ b/functions/index.js
@@ -7,7 +7,7 @@ import pako from 'pako';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'url';
-import { HttpsProxyAgent } from 'https-proxy-agent';
+import HttpsProxyAgent from 'https-proxy-agent';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
@@ -308,10 +308,9 @@ async function processAndStoreArtistImage(imageUrl, artistUrlKey) {
const sharp = (await import('sharp')).default;
let image = sharp(Buffer.from(imageBuffer));
- // Get image metadata
- const metadata = await image.metadata();
- const nativeWidth = metadata.width;
- const nativeHeight = metadata.height;
+ // Use native image dimensions
+ const nativeWidth = img.naturalWidth || img.width;
+ const nativeHeight = img.naturalHeight || img.height;
console.log(`📐 Native resolution: ${nativeWidth}x${nativeHeight} (${metadata.format})`);
@@ -331,18 +330,12 @@ async function processAndStoreArtistImage(imageUrl, artistUrlKey) {
image = image.resize(finalWidth, finalHeight, { fit: 'inside' });
}
- // Convert to raw RGBA pixels
- const { data, info } = await image
- .raw()
- .ensureAlpha()
- .toBuffer({ resolveWithObject: true });
-
- // Create ImageData-like object for convertToGrayscale
- const imageData = {
- data: data,
- width: info.width,
- height: info.height
- };
+ // Create canvas with native size
+ const canvas = createCanvas(nativeWidth, nativeHeight);
+ const ctx = canvas.getContext('2d');
+ ctx.drawImage(img, 0, 0, nativeWidth, nativeHeight);
+
+ const imageData = ctx.getImageData(0, 0, nativeWidth, nativeHeight);
// Convert to 8-bit grayscale
const grayscaleData = convertToGrayscale(imageData);
@@ -532,10 +525,9 @@ async function processAndStoreAlbumArt(imageUrl, albumArtId) {
const sharp = (await import('sharp')).default;
let image = sharp(Buffer.from(imageBuffer));
- // Get image metadata
- const metadata = await image.metadata();
- const nativeWidth = metadata.width;
- const nativeHeight = metadata.height;
+ // Use native image dimensions
+ const nativeWidth = img.naturalWidth || img.width;
+ const nativeHeight = img.naturalHeight || img.height;
console.log(`📐 Native resolution: ${nativeWidth}x${nativeHeight} (${metadata.format})`);
@@ -555,18 +547,12 @@ async function processAndStoreAlbumArt(imageUrl, albumArtId) {
image = image.resize(finalWidth, finalHeight, { fit: 'inside' });
}
- // Convert to raw RGBA pixels
- const { data, info } = await image
- .raw()
- .ensureAlpha()
- .toBuffer({ resolveWithObject: true });
-
- // Create ImageData-like object for convertToGrayscale
- const imageData = {
- data: data,
- width: info.width,
- height: info.height
- };
+ // Create canvas with native size
+ const canvas = createCanvas(nativeWidth, nativeHeight);
+ const ctx = canvas.getContext('2d');
+ ctx.drawImage(img, 0, 0, nativeWidth, nativeHeight);
+
+ const imageData = ctx.getImageData(0, 0, nativeWidth, nativeHeight);
// Convert to 8-bit grayscale
const grayscaleData = convertToGrayscale(imageData);
diff --git a/functions/index.js.backup b/functions/index.js.backup
new file mode 100644
index 0000000..1318bcf
--- /dev/null
+++ b/functions/index.js.backup
@@ -0,0 +1,1970 @@
+import { onRequest, onCall, HttpsError } from 'firebase-functions/v2/https';
+import { defineString } from 'firebase-functions/params';
+import { initializeApp } from 'firebase-admin/app';
+import { getFirestore, FieldValue } from 'firebase-admin/firestore';
+import * as cheerio from 'cheerio';
+import pako from 'pako';
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'url';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// Initialize Firebase Admin SDK
+const app = initializeApp();
+const db = getFirestore(app);
+
+const geniusApiKeyParam = defineString('GENIUS_KEY');
+
+// Use the global fetch that comes with Node.js 18+ instead of node-fetch
+// This is more compatible with Firebase Functions environment
+const fetchWithTimeout = async (url, options = {}) => {
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), options.timeout || 10000);
+
+ try {
+ const response = await fetch(url, {
+ ...options,
+ signal: controller.signal
+ });
+ clearTimeout(timeoutId);
+ return response;
+ } catch (error) {
+ clearTimeout(timeoutId);
+ if (error.name === 'AbortError') {
+ throw new Error(`Request timeout after ${options.timeout || 10000}ms`);
+ }
+ throw error;
+ }
+};
+
+// SSR server removed - now using pure static hosting with optimized binary image system
+// All image processing is now done via dedicated Firebase Functions with Firestore caching
+
+// Keep existing health check
+export const healthCheck = onRequest({
+ timeoutSeconds: 10,
+ region: 'us-central1'
+}, (req, res) => {
+ res.status(200).send('OK');
+});
+
+/**
+ * Server-side Atkinson dithering algorithm
+ * @param {object} imageData - Canvas ImageData object
+ * @returns {Uint8Array} Binary array (1 bit per pixel, packed into bytes)
+ */
+function atkinsonDitherToBinary(imageData) {
+ const width = imageData.width;
+ const height = imageData.height;
+ const data = new Uint8ClampedArray(imageData.data);
+
+ // Convert to grayscale first
+ for (let i = 0; i < data.length; i += 4) {
+ const gray = (data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114);
+ data[i] = data[i + 1] = data[i + 2] = gray;
+ }
+
+ // Atkinson dithering matrix
+ const matrix = [
+ [0, 0, 1/8, 1/8],
+ [1/8, 1/8, 1/8, 0],
+ [0, 1/8, 0, 0]
+ ];
+
+ // Create binary output array (1 bit per pixel, packed into bytes)
+ const binarySize = Math.ceil((width * height) / 8);
+ const binaryData = new Uint8Array(binarySize);
+
+ for (let y = 0; y < height; y++) {
+ for (let x = 0; x < width; x++) {
+ const idx = (y * width + x) * 4;
+ const oldPixel = data[idx];
+
+ // Determine if pixel should be dark or light
+ const isDark = oldPixel < 128;
+
+ // Store binary result (1 for light, 0 for dark)
+ const bitIndex = y * width + x;
+ const byteIndex = Math.floor(bitIndex / 8);
+ const bitPosition = 7 - (bitIndex % 8);
+
+ if (!isDark) { // Light pixel = 1
+ binaryData[byteIndex] |= (1 << bitPosition);
+ }
+
+ const newPixel = isDark ? 0 : 255;
+ const error = (oldPixel - newPixel) / 8;
+
+ // Propagate error using Atkinson matrix
+ for (let i = 0; i < matrix.length; i++) {
+ for (let j = 0; j < matrix[i].length; j++) {
+ if (matrix[i][j] === 0) continue;
+
+ const ny = y + i;
+ const nx = x + j - 1;
+
+ if (ny < height && nx >= 0 && nx < width) {
+ const nidx = (ny * width + nx) * 4;
+ data[nidx] += error;
+ data[nidx + 1] += error;
+ data[nidx + 2] += error;
+ }
+ }
+ }
+ }
+ }
+
+ return binaryData;
+}
+
+/**
+ * Analyze binary dithered data for compression and statistics
+ */
+function analyzeBinaryData(binaryData, width, height) {
+ const totalPixels = width * height;
+ const totalBytes = binaryData.length;
+ const originalSize = totalPixels * 4; // RGBA
+ const compressionRatio = totalBytes / originalSize;
+
+ // Calculate white pixel count
+ let setBits = 0;
+ for (let byte of binaryData) {
+ setBits += byte.toString(2).split('1').length - 1;
+ }
+
+ return {
+ totalPixels,
+ totalBytes,
+ originalSize,
+ compressionRatio: compressionRatio.toFixed(3),
+ compressionPercent: ((1 - compressionRatio) * 100).toFixed(1),
+ setBits,
+ whiteFraction: (setBits / totalPixels).toFixed(3),
+ whitePercent: ((setBits / totalPixels) * 100).toFixed(1),
+ };
+}
+
+/**
+ * Process artist image to binary format and store in database
+ * Fast response - client gets binary data ASAP
+ */
+async function processAndStoreArtistImage(imageUrl, artistUrlKey, targetSize = 200) {
+ try {
+ console.log(`🚀 FAST processing artist image: ${imageUrl}`);
+ const startTime = Date.now();
+
+ // Fetch the image
+ const imageResponse = await fetchWithTimeout(imageUrl, { timeout: 8000 });
+ if (!imageResponse.ok) {
+ throw new Error(`Failed to fetch image: ${imageResponse.status}`);
+ }
+
+ const imageBuffer = await imageResponse.arrayBuffer();
+ console.log(`📦 Downloaded: ${imageBuffer.byteLength} bytes in ${Date.now() - startTime}ms`);
+
+ // Process with canvas
+ const { createCanvas, loadImage } = await import('canvas');
+ const img = await loadImage(Buffer.from(imageBuffer));
+
+ // Create canvas with target size
+ const canvas = createCanvas(targetSize, targetSize);
+ const ctx = canvas.getContext('2d');
+ ctx.drawImage(img, 0, 0, targetSize, targetSize);
+
+ const imageData = ctx.getImageData(0, 0, targetSize, targetSize);
+
+ // Apply dithering and get binary data
+ const binaryData = atkinsonDitherToBinary(imageData);
+ const analysis = analyzeBinaryData(binaryData, targetSize, targetSize);
+
+ console.log(`⚡ Binary processed in ${Date.now() - startTime}ms: ${analysis.totalBytes} bytes (${analysis.compressionPercent}% compression)`);
+
+ // Compress with Pako
+ const compressedData = pako.deflate(binaryData);
+ console.log(`🗜️ Pako compressed: ${binaryData.length} → ${compressedData.length} bytes (${((1 - compressedData.length / binaryData.length) * 100).toFixed(1)}% reduction)`);
+
+ // Store in artist document
+ const base64Binary = Buffer.from(compressedData).toString('base64');
+ const imageMetadata = {
+ binaryImageData: base64Binary,
+ imageWidth: targetSize,
+ imageHeight: targetSize,
+ originalImageUrl: imageUrl,
+ originalSize: imageBuffer.byteLength,
+ binarySize: analysis.totalBytes,
+ compressedSize: compressedData.length,
+ base64Size: base64Binary.length,
+ compressionRatio: analysis.compressionRatio,
+ pakoCompressionRatio: compressedData.length / binaryData.length,
+ totalCompressionRatio: compressedData.length / imageBuffer.byteLength,
+ processedAt: new Date(),
+ processingVersion: '1.1-pako',
+ compressionMethod: 'pako-deflate'
+ };
+
+ // Update or create artist document with binary data
+ try {
+ await db.collection('artists').doc(artistUrlKey).update({
+ imageUrl: imageUrl,
+ ...imageMetadata
+ });
+ } catch (updateError) {
+ if (updateError.code === 'not-found') {
+ // Document doesn't exist, create it
+ console.log(`📝 Creating new artist document: ${artistUrlKey}`);
+ await db.collection('artists').doc(artistUrlKey).set({
+ imageUrl: imageUrl,
+ ...imageMetadata,
+ createdAt: new Date()
+ });
+ } else {
+ throw updateError;
+ }
+ }
+
+ console.log(`💾 Stored binary data for artist ${artistUrlKey} in ${Date.now() - startTime}ms total`);
+
+ return {
+ success: true,
+ binaryData: base64Binary,
+ metadata: {
+ width: targetSize,
+ height: targetSize,
+ originalSize: imageBuffer.byteLength,
+ binarySize: analysis.totalBytes,
+ compressedSize: compressedData.length,
+ base64Size: base64Binary.length,
+ compressionRatio: analysis.compressionRatio,
+ pakoCompressionRatio: compressedData.length / binaryData.length,
+ totalCompressionRatio: compressedData.length / imageBuffer.byteLength,
+ compressionPercent: analysis.compressionPercent,
+ pakoCompressionPercent: ((1 - compressedData.length / binaryData.length) * 100).toFixed(1),
+ totalCompressionPercent: ((1 - compressedData.length / imageBuffer.byteLength) * 100).toFixed(1),
+ whitePixelPercent: analysis.whitePercent,
+ processingTimeMs: Date.now() - startTime,
+ compressionMethod: 'pako-deflate'
+ }
+ };
+
+ } catch (error) {
+ console.error(`❌ Error processing artist image:`, error);
+ throw error;
+ }
+}
+
+// Fast Artist Image Processing - prioritizes speed for client response
+export const processArtistImageBinary = onRequest({
+ timeoutSeconds: 15,
+ minInstances: 0,
+ maxInstances: 20,
+ region: 'us-central1',
+ invoker: 'public'
+}, async (req, res) => {
+ // Enable CORS
+ res.set('Access-Control-Allow-Origin', '*');
+ res.set('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
+ res.set('Access-Control-Allow-Headers', 'Content-Type');
+
+ if (req.method === 'OPTIONS') {
+ res.status(200).send('');
+ return;
+ }
+
+ const imageUrl = req.query.url || req.body?.url;
+ const artistUrlKey = req.query.artistKey || req.body?.artistKey;
+ const targetSize = parseInt(req.query.size || req.body?.size || '200');
+
+ if (!imageUrl) {
+ res.status(400).json({ error: 'No image URL provided' });
+ return;
+ }
+
+ if (!artistUrlKey) {
+ res.status(400).json({ error: 'No artist key provided' });
+ return;
+ }
+
+ try {
+ const result = await processAndStoreArtistImage(imageUrl, artistUrlKey, targetSize);
+ res.json(result);
+ } catch (error) {
+ console.error('❌ Error in processArtistImageBinary:', error);
+ res.status(500).json({
+ error: 'Failed to process artist image',
+ details: error.message
+ });
+ }
+});
+
+/**
+ * Try to convert unsupported image formats to supported ones
+ * @param {string} imageUrl - Original image URL
+ * @returns {string} Modified URL that might work better
+ */
+function tryAlternativeImageFormat(imageUrl) {
+ // Convert .webp to .jpg - Genius often has both formats
+ if (imageUrl.includes('.webp')) {
+ const jpgUrl = imageUrl.replace('.webp', '.jpg');
+ console.log(`🔄 Trying alternative format: ${jpgUrl}`);
+ return jpgUrl;
+ }
+
+ // For other formats, try to get a .jpg version by removing size specifications
+ // e.g., file.464x464x1.png -> file.jpg
+ const baseUrl = imageUrl.replace(/\.\d+x\d+x?\d*\.(png|gif|webp)$/i, '.jpg');
+ if (baseUrl !== imageUrl) {
+ console.log(`🔄 Trying simplified format: ${baseUrl}`);
+ return baseUrl;
+ }
+
+ return imageUrl;
+}
+
+/**
+ * Process album art to binary format and store in database
+ * Similar to artist processing but stores in albumArt collection
+ * Uses 800x800 resolution for high quality on results screen
+ */
+async function processAndStoreAlbumArt(imageUrl, albumArtId, targetSize = 800) {
+ const startTime = Date.now();
+ let lastError = null;
+
+ // Try original URL first, then alternative formats
+ const urlsToTry = [imageUrl];
+
+ // Add alternative format if original might be problematic
+ const altUrl = tryAlternativeImageFormat(imageUrl);
+ if (altUrl !== imageUrl) {
+ urlsToTry.push(altUrl);
+ }
+
+ // Also try without size specification for backup
+ const simpleUrl = imageUrl.replace(/\.\d+x\d+x?\d*\./g, '.');
+ if (simpleUrl !== imageUrl && !urlsToTry.includes(simpleUrl)) {
+ urlsToTry.push(simpleUrl);
+ }
+
+ for (let i = 0; i < urlsToTry.length; i++) {
+ const currentUrl = urlsToTry[i];
+
+ try {
+ console.log(`🚀 FAST processing album art: ${currentUrl} (ID: ${albumArtId})${i > 0 ? ` [attempt ${i + 1}]` : ''}`);
+
+ // Fetch the image
+ const imageResponse = await fetchWithTimeout(currentUrl, { timeout: 8000 });
+ if (!imageResponse.ok) {
+ throw new Error(`Failed to fetch image: ${imageResponse.status}`);
+ }
+
+ const imageBuffer = await imageResponse.arrayBuffer();
+ console.log(`📦 Downloaded: ${imageBuffer.byteLength} bytes in ${Date.now() - startTime}ms`);
+
+ // Process with canvas
+ const { createCanvas, loadImage } = await import('canvas');
+ const img = await loadImage(Buffer.from(imageBuffer));
+
+ // Create canvas with target size
+ const canvas = createCanvas(targetSize, targetSize);
+ const ctx = canvas.getContext('2d');
+ ctx.drawImage(img, 0, 0, targetSize, targetSize);
+
+ const imageData = ctx.getImageData(0, 0, targetSize, targetSize);
+
+ // Apply dithering and get binary data
+ const binaryData = atkinsonDitherToBinary(imageData);
+ const analysis = analyzeBinaryData(binaryData, targetSize, targetSize);
+
+ console.log(`⚡ Binary processed in ${Date.now() - startTime}ms: ${analysis.totalBytes} bytes (${analysis.compressionPercent}% compression)`);
+
+ // Compress with Pako
+ const compressedData = pako.deflate(binaryData);
+ console.log(`🗜️ Pako compressed: ${binaryData.length} → ${compressedData.length} bytes (${((1 - compressedData.length / binaryData.length) * 100).toFixed(1)}% reduction)`);
+
+ // Store in albumArt collection
+ const base64Binary = Buffer.from(compressedData).toString('base64');
+ const albumArtMetadata = {
+ binaryImageData: base64Binary,
+ imageWidth: targetSize,
+ imageHeight: targetSize,
+ originalImageUrl: imageUrl, // Store original URL for reference
+ processedImageUrl: currentUrl, // Store URL that actually worked
+ processedAt: new Date(),
+ processingVersion: '1.1-pako',
+ compressionMethod: 'pako-deflate'
+ };
+
+ // Store in albumArt collection using the provided ID
+ await db.collection('albumArt').doc(albumArtId).set(albumArtMetadata);
+
+ console.log(`💾 Stored album art binary data for ${albumArtId} in ${Date.now() - startTime}ms total${i > 0 ? ` (used fallback URL)` : ''}`);
+
+ return {
+ success: true,
+ binaryData: base64Binary,
+ metadata: {
+ albumArtId: albumArtId,
+ width: targetSize,
+ height: targetSize,
+ originalSize: imageBuffer.byteLength,
+ binarySize: analysis.totalBytes,
+ compressedSize: compressedData.length,
+ compressionRatio: analysis.compressionRatio,
+ pakoCompressionRatio: compressedData.length / binaryData.length,
+ totalCompressionRatio: compressedData.length / imageBuffer.byteLength,
+ compressionPercent: analysis.compressionPercent,
+ pakoCompressionPercent: ((1 - compressedData.length / binaryData.length) * 100).toFixed(1),
+ totalCompressionPercent: ((1 - compressedData.length / imageBuffer.byteLength) * 100).toFixed(1),
+ whitePixelPercent: analysis.whitePercent,
+ processingTimeMs: Date.now() - startTime,
+ compressionMethod: 'pako-deflate',
+ usedFallbackUrl: i > 0
+ }
+ };
+
+ } catch (error) {
+ lastError = error;
+ const isUnsupportedFormat = error.message.includes('Unsupported image type');
+ const isLastAttempt = i === urlsToTry.length - 1;
+
+ if (isUnsupportedFormat && !isLastAttempt) {
+ console.log(`⚠️ Format not supported for ${currentUrl}, trying alternative...`);
+ continue; // Try next URL
+ } else if (isLastAttempt) {
+ console.error(`❌ Error processing album art ${albumArtId} (all ${urlsToTry.length} URLs failed):`, error.message);
+ break; // Give up after all attempts
+ } else {
+ console.log(`⚠️ Error with ${currentUrl}, trying alternative:`, error.message);
+ continue; // Try next URL
+ }
+ }
+ }
+
+ // If we get here, all attempts failed
+ throw lastError || new Error('All image format attempts failed');
+}
+
+// Fast Album Art Processing - prioritizes speed for client response
+export const processAlbumArtBinary = onRequest({
+ timeoutSeconds: 15,
+ minInstances: 0,
+ maxInstances: 20,
+ region: 'us-central1',
+ invoker: 'public'
+}, async (req, res) => {
+ // Enable CORS
+ res.set('Access-Control-Allow-Origin', '*');
+ res.set('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
+ res.set('Access-Control-Allow-Headers', 'Content-Type');
+
+ if (req.method === 'OPTIONS') {
+ res.status(200).send('');
+ return;
+ }
+
+ const imageUrl = req.query.url || req.body?.url;
+ const albumArtId = req.query.albumArtId || req.body?.albumArtId;
+ const targetSize = parseInt(req.query.size || req.body?.size || '200');
+
+ if (!imageUrl) {
+ res.status(400).json({ error: 'No image URL provided' });
+ return;
+ }
+
+ if (!albumArtId) {
+ res.status(400).json({ error: 'No album art ID provided' });
+ return;
+ }
+
+ try {
+ const result = await processAndStoreAlbumArt(imageUrl, albumArtId, targetSize);
+ res.json(result);
+ } catch (error) {
+ console.error('❌ Error in processAlbumArtBinary:', error);
+ res.status(500).json({
+ error: 'Failed to process album art',
+ details: error.message
+ });
+ }
+});
+
+// Binary Image Processing Function
+export const processImageBinary = onRequest({
+ timeoutSeconds: 30,
+ minInstances: 0,
+ maxInstances: 10,
+ region: 'us-central1',
+ invoker: 'public'
+}, async (req, res) => {
+ // Enable CORS
+ res.set('Access-Control-Allow-Origin', '*');
+ res.set('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
+ res.set('Access-Control-Allow-Headers', 'Content-Type');
+
+ if (req.method === 'OPTIONS') {
+ res.status(200).send('');
+ return;
+ }
+
+ const imageUrl = req.query.url || req.body?.url;
+ const targetSize = parseInt(req.query.size || req.body?.size || '200');
+ const returnBinary = req.query.binary === 'true' || req.body?.binary === true;
+ const logAnalysis = req.query.log === 'true' || req.body?.log === true;
+
+ if (!imageUrl) {
+ res.status(400).json({ error: 'No image URL provided' });
+ return;
+ }
+
+ try {
+ console.log(`🎨 Processing image: ${imageUrl} (size: ${targetSize}, binary: ${returnBinary})`);
+
+ // Fetch the image
+ const imageResponse = await fetchWithTimeout(imageUrl, { timeout: 10000 });
+ if (!imageResponse.ok) {
+ throw new Error(`Failed to fetch image: ${imageResponse.status}`);
+ }
+
+ const imageBuffer = await imageResponse.arrayBuffer();
+ console.log(`📦 Downloaded image: ${imageBuffer.byteLength} bytes`);
+
+ // Process with canvas
+ const { createCanvas, loadImage } = await import('canvas');
+ const img = await loadImage(Buffer.from(imageBuffer));
+
+ // Create canvas with target size
+ const canvas = createCanvas(targetSize, targetSize);
+ const ctx = canvas.getContext('2d');
+ ctx.drawImage(img, 0, 0, targetSize, targetSize);
+
+ const imageData = ctx.getImageData(0, 0, targetSize, targetSize);
+ console.log(`🖼️ Got image data: ${imageData.width}x${imageData.height}`);
+
+ if (returnBinary) {
+ // Apply dithering and get binary data
+ const binaryData = atkinsonDitherToBinary(imageData);
+ const analysis = analyzeBinaryData(binaryData, targetSize, targetSize);
+
+ if (logAnalysis) {
+ console.log('🔍 BINARY IMAGE ANALYSIS:');
+ console.log(`📊 Image: ${targetSize}x${targetSize} pixels`);
+ console.log(`📦 Original size: ${imageBuffer.byteLength} bytes`);
+ console.log(`🗜️ Binary size: ${analysis.totalBytes} bytes`);
+ console.log(`📉 Compression: ${analysis.compressionPercent}% reduction (${analysis.compressionRatio}x)`);
+ console.log(`⚫ White pixels: ${analysis.whitePercent}% (${analysis.setBits}/${analysis.totalPixels})`);
+ }
+
+ // Return binary data with metadata
+ const base64Binary = Buffer.from(binaryData).toString('base64');
+
+ res.json({
+ success: true,
+ format: 'binary',
+ data: base64Binary,
+ metadata: {
+ width: targetSize,
+ height: targetSize,
+ originalSize: imageBuffer.byteLength,
+ binarySize: analysis.totalBytes,
+ compressionRatio: analysis.compressionRatio,
+ compressionPercent: analysis.compressionPercent,
+ whitePixelPercent: analysis.whitePercent
+ }
+ });
+ } else {
+ // Return original or processed image
+ const buffer = canvas.toBuffer('image/png');
+ res.set('Content-Type', 'image/png');
+ res.send(buffer);
+ }
+
+ } catch (error) {
+ console.error('❌ Error processing image:', error);
+ res.status(500).json({
+ error: 'Failed to process image',
+ details: error.message
+ });
+ }
+});
+
+// ========================================
+// NEW CACHE STRATEGY FUNCTIONS
+// ========================================
+
+/**
+ * Helper function to get Genius API key with fallback
+ * @returns {string} The Genius API key
+ */
+async function getGeniusApiKey() {
+ let geniusApiKey = geniusApiKeyParam.value();
+
+ if (!geniusApiKey) {
+ try {
+ const localConfigPath = path.join(__dirname, 'local-config.json');
+ const localConfig = JSON.parse(fs.readFileSync(localConfigPath, 'utf8'));
+ geniusApiKey = localConfig.genius.key;
+ } catch (error) {
+ console.error('Error loading local config:', error);
+ throw new Error('API key not found. Please configure your API key.');
+ }
+ }
+
+ return geniusApiKey;
+}
+
+/**
+ * Helper function to check if artist songs need refresh (older than 1 week)
+ * @param {Date} songsLastUpdated - Last update timestamp
+ * @returns {boolean} Whether refresh is needed
+ */
+function needsRefresh(songsLastUpdated, isFullyCached) {
+ // If we know the artist is NOT fully cached, always continue fetching pages
+ if (isFullyCached === false) return true;
+
+ if (!songsLastUpdated) return true;
+ const oneWeekAgo = new Date();
+ oneWeekAgo.setDate(oneWeekAgo.getDate() - 7);
+ return new Date(songsLastUpdated) < oneWeekAgo;
+}
+
+/**
+ * Extract artist image URL from songs data
+ * Searches through songs to find a matching artist ID and returns their image_url
+ * @param {Object[]} songs - Array of song objects from Genius API
+ * @param {number} targetArtistId - The artist ID to search for
+ * @param {number} maxSongsToCheck - Maximum number of songs to check (default: 11)
+ * @returns {string|null} Artist image URL or null if not found
+ */
+function extractArtistImageUrl(songs, targetArtistId, maxSongsToCheck = 11) {
+ console.log(`Searching for image URL for artist ID ${targetArtistId} in ${Math.min(songs.length, maxSongsToCheck)} songs`);
+
+ const songsToCheck = songs.slice(0, maxSongsToCheck);
+ // Ensure targetArtistId is a number for comparison with API response
+ const targetId = typeof targetArtistId === 'string' ? parseInt(targetArtistId, 10) : targetArtistId;
+
+ for (const song of songsToCheck) {
+ // Check primary artist first
+ if (song.primary_artist && song.primary_artist.id === targetId) {
+ const imageUrl = song.primary_artist.image_url;
+ if (imageUrl) {
+ console.log(`Found artist image URL in primary artist: ${imageUrl}`);
+ return imageUrl;
+ }
+ }
+
+ // Check featured artists if primary artist doesn't match
+ if (song.featured_artists && Array.isArray(song.featured_artists)) {
+ for (const featuredArtist of song.featured_artists) {
+ if (featuredArtist.id === targetId) {
+ const imageUrl = featuredArtist.image_url;
+ if (imageUrl) {
+ console.log(`Found artist image URL in featured artists: ${imageUrl}`);
+ return imageUrl;
+ }
+ }
+ }
+ }
+ }
+
+ console.log(`No image URL found for artist ID ${targetId} in ${songsToCheck.length} songs`);
+ return null;
+}
+
+/**
+ * Fetch song metadata from Genius API for a specific artist page
+ * @param {number} artistId - Genius artist ID
+ * @param {number} page - Page number (1-based)
+ * @returns {Object} { songs: Song[], hasMore: boolean, totalSongs: number }
+ */
+async function getSongsByArtist(artistId, page = 1) {
+ console.log(`Fetching songs for artist ${artistId}, page ${page}`);
+
+ try {
+ const geniusApiKey = await getGeniusApiKey();
+ const headers = { "Authorization": `Bearer ${geniusApiKey}` };
+
+ // Fetch 50 songs per page, sorted by popularity
+ const response = await fetchWithTimeout(
+ `https://api.genius.com/artists/${artistId}/songs?per_page=50&page=${page}&sort=popularity`,
+ { headers }
+ );
+
+ if (!response.ok) {
+ throw new Error(`Genius API error: ${response.status} ${response.statusText}`);
+ }
+
+ const data = await response.json();
+
+ if (!data.response || !data.response.songs) {
+ throw new Error('Invalid API response structure');
+ }
+
+ const songs = data.response.songs;
+ console.log(`Fetched ${songs.length} songs for artist ${artistId}, page ${page}`);
+
+ // Transform songs to our schema format
+ const transformedSongs = songs.map(song => ({
+ id: song.id.toString(), // Use as Firestore document ID
+ title: song.title,
+ url: song.url,
+ songArtImageUrl: song.song_art_image_url,
+ artistNames: song.artist_names,
+ primaryArtist: {
+ id: song.primary_artist.id,
+ name: song.primary_artist.name,
+ url: song.primary_artist.url
+ },
+ // Lyrics fields - initially null
+ lyrics: null,
+ lyricsScrapedAt: null,
+ scrapingAttempts: 0,
+ scrapingError: null,
+ // Metadata
+ addedAt: new Date(),
+ scrapingStatus: 'pending'
+ }));
+
+ const hasMore = songs.length === 50; // If we got a full page, there might be more
+
+ return {
+ songs: transformedSongs,
+ rawSongs: songs, // Include raw API response for image URL extraction
+ hasMore,
+ totalSongs: songs.length, // This is just for current page, will be updated later
+ pageNumber: page
+ };
+
+ } catch (error) {
+ console.error(`Error fetching songs for artist ${artistId}, page ${page}:`, error);
+ throw error;
+ }
+}
+
+/**
+ * Extract the hash/ID from a Genius image URL for album art deduplication
+ * Example: https://images.genius.com/bda1518357007cbd7ab978c4a6764e26.711x711x1.jpg
+ * Returns: bda1518357007cbd7ab978c4a6764e26
+ */
+function extractGeniusImageHash(imageUrl) {
+ try {
+ if (!imageUrl) return null;
+
+ // Extract the filename from the URL
+ const filename = imageUrl.split('/').pop();
+
+ // Extract the hash (everything before the first dot)
+ const hash = filename.split('.')[0];
+
+ // Validate it looks like a hash (32 character hex string)
+ if (hash && /^[a-f0-9]{32}$/i.test(hash)) {
+ return hash.toLowerCase();
+ }
+
+ // Fallback: use the full filename if it doesn't match expected pattern
+ console.warn(`Unexpected Genius URL format: ${imageUrl}, using filename as ID`);
+ return filename.replace(/[^a-zA-Z0-9]/g, '-').toLowerCase();
+
+ } catch (error) {
+ console.error('Error extracting hash from Genius URL:', error);
+ return null;
+ }
+}
+
+/**
+ * Store song documents in Firestore songs collection
+ * @param {Object[]} songs - Array of song objects
+ * @returns {Promise} Array of song IDs that were stored
+ */
+async function storeSongsInFirestore(songs) {
+ console.log(`Storing ${songs.length} songs in Firestore`);
+
+ try {
+ const batch = db.batch();
+ const songsCollection = db.collection('songs');
+ const storedSongIds = [];
+
+ for (const song of songs) {
+ const songRef = songsCollection.doc(song.id);
+
+ // Check if song already exists to avoid overwriting existing data
+ const existingDoc = await songRef.get();
+
+ if (!existingDoc.exists) {
+ // Extract album art ID for future processing (but don't process yet)
+ let albumArtId = null;
+ if (song.songArtImageUrl) {
+ albumArtId = extractGeniusImageHash(song.songArtImageUrl);
+ }
+
+ // Remove the id from the document data since it's used as the document ID
+ const { id, ...songData } = song;
+
+ // Add album art ID to song data for later processing during lyric scraping
+ songData.albumArtId = albumArtId;
+
+ batch.set(songRef, songData);
+ storedSongIds.push(song.id);
+ console.log(`Queued song ${song.id} for storage: ${song.title}${albumArtId ? ` (album art ID: ${albumArtId})` : ''}`);
+ } else {
+ console.log(`Song ${song.id} already exists, skipping: ${song.title}`);
+ storedSongIds.push(song.id); // Still include in list since it's available
+ }
+ }
+
+ if (storedSongIds.length > 0) {
+ await batch.commit();
+ console.log(`Successfully stored ${storedSongIds.length} songs in Firestore`);
+ } else {
+ console.log('No new songs to store');
+ }
+
+ return storedSongIds;
+
+ } catch (error) {
+ console.error('Error storing songs in Firestore:', error);
+ throw error;
+ }
+}
+
+/**
+ * Check if album art exists, process if not
+ * @param {string} imageUrl - Original album art URL
+ * @param {string} albumArtId - Extracted hash ID
+ * @returns {Promise} True if processed/exists, false if failed
+ */
+async function checkAndProcessAlbumArt(imageUrl, albumArtId) {
+ try {
+ // Check if album art already exists
+ const albumArtRef = db.collection('albumArt').doc(albumArtId);
+ const albumArtDoc = await albumArtRef.get();
+
+ if (albumArtDoc.exists) {
+ // Already processed
+ return true;
+ }
+
+ // Process and store the album art
+ console.log(`🎨 Processing new album art: ${albumArtId}`);
+ await processAndStoreAlbumArt(imageUrl, albumArtId, 800);
+ console.log(`✅ Successfully processed album art: ${albumArtId}`);
+ return true;
+
+ } catch (error) {
+ console.error(`❌ Error processing album art ${albumArtId}:`, error);
+ return false;
+ }
+}
+
+/**
+ * Update artist document with new song IDs and metadata
+ * @param {string} artistUrlKey - Artist document ID (URL slug)
+ * @param {string[]} newSongIds - Array of new song IDs to add
+ * @param {Object} metadata - Additional metadata to update
+ */
+async function updateArtistSongList(artistUrlKey, newSongIds, metadata) {
+ console.log(`Updating artist ${artistUrlKey} with ${newSongIds.length} new songs`);
+
+ try {
+ const artistRef = db.collection('artists').doc(artistUrlKey);
+
+ // First, get the current artist document to check existing songIds
+ const artistDoc = await artistRef.get();
+ if (!artistDoc.exists) {
+ throw new Error(`Artist document not found: ${artistUrlKey}`);
+ }
+
+ const artistData = artistDoc.data();
+ const existingSongIds = artistData.songIds || [];
+
+ // Filter out song IDs that are already in the artist's list
+ const trulyNewSongIds = newSongIds.filter(id => !existingSongIds.includes(id));
+
+ if (trulyNewSongIds.length === 0) {
+ console.log('No new song IDs to add to artist document');
+ return;
+ }
+
+ const updateData = {
+ songIds: FieldValue.arrayUnion(...trulyNewSongIds),
+ songsFetched: metadata.songsFetched,
+ totalSongs: metadata.totalSongs,
+ songsLastUpdated: new Date(),
+ isFullyCached: metadata.isFullyCached || false,
+ cacheVersion: 1
+ };
+
+ await artistRef.update(updateData);
+ console.log(`Successfully updated artist ${artistUrlKey} with ${trulyNewSongIds.length} new song IDs`);
+
+ } catch (error) {
+ console.error(`Error updating artist song list for ${artistUrlKey}:`, error);
+ throw error;
+ }
+}
+
+/**
+ * Core logic for populating artist songs (without Firebase Functions wrapper)
+ * @param {string} artistUrlKey - Artist document ID
+ * @returns {Promise} Result object
+ */
+async function populateArtistSongsCore(artistUrlKey, { onlyFirstPage = false } = {}) {
+ console.log(`Starting song population for artist: ${artistUrlKey}`);
+
+ // Get artist document from Firestore
+ const artistDoc = await db.collection('artists').doc(artistUrlKey).get();
+ if (!artistDoc.exists) {
+ throw new Error('Artist not found');
+ }
+
+ const artistData = artistDoc.data();
+ console.log(`Found artist: ${artistData.name} (Genius ID: ${artistData.geniusId})`);
+
+ const artistId = artistData.geniusId;
+ if (!artistId) {
+ throw new Error('Artist does not have a Genius ID');
+ }
+
+ // Check if refresh is needed
+ if (!needsRefresh(artistData.songsLastUpdated, artistData.isFullyCached)) {
+ console.log('Artist songs are up to date, no refresh needed');
+
+ // Even if songs are up to date, check if we need to extract image URL
+ // Only attempt if imageUrl is undefined (never attempted), not null (already attempted)
+ if (artistData.imageUrl === undefined && artistData.songIds && artistData.songIds.length > 0) {
+ console.log('Songs up to date but missing image URL - attempting extraction from existing songs...');
+
+ try {
+ // Try to extract image URL from first few existing songs
+ const firstSongIds = artistData.songIds.slice(0, 5); // Check first 5 songs
+ let foundImageUrl = null;
+
+ for (const songId of firstSongIds) {
+ const songDoc = await db.collection('songs').doc(songId).get();
+ if (songDoc.exists) {
+ const songData = songDoc.data();
+ if (songData.primaryArtist && songData.primaryArtist.id === artistId) {
+ // This is a bit tricky since we don't have the full API response here
+ // We need to make a small API call to get the artist image
+ console.log('Found matching song, need to fetch from API for image URL...');
+
+ // Make a quick API call to get the artist details
+ const geniusApiKey = await getGeniusApiKey();
+ const headers = { "Authorization": `Bearer ${geniusApiKey}` };
+
+ const artistResponse = await fetchWithTimeout(
+ `https://api.genius.com/artists/${artistId}`,
+ { headers }
+ );
+
+ if (artistResponse.ok) {
+ const artistApiData = await artistResponse.json();
+ if (artistApiData.response && artistApiData.response.artist) {
+ foundImageUrl = artistApiData.response.artist.image_url;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // If no image URL found through songs, try direct artist API call as fallback
+ if (!foundImageUrl) {
+ console.log('No matching songs found, trying direct artist API call...');
+ try {
+ const geniusApiKey = await getGeniusApiKey();
+ const headers = { "Authorization": `Bearer ${geniusApiKey}` };
+
+ const artistResponse = await fetchWithTimeout(
+ `https://api.genius.com/artists/${artistId}`,
+ { headers }
+ );
+
+ if (artistResponse.ok) {
+ const artistApiData = await artistResponse.json();
+ if (artistApiData.response && artistApiData.response.artist) {
+ foundImageUrl = artistApiData.response.artist.image_url;
+ console.log(`Found image URL via direct artist API call: ${foundImageUrl}`);
+ }
+ }
+ } catch (directApiError) {
+ console.error('Error in direct artist API call:', directApiError);
+ }
+ }
+
+ // Process and store image if found, otherwise store null
+ if (foundImageUrl) {
+ try {
+ console.log(`🖼️ Found artist image via API, processing to binary format...`);
+ await processAndStoreArtistImage(foundImageUrl, artistUrlKey, 200);
+ console.log(`✅ Successfully processed and stored artist image binary data`);
+ } catch (imageUpdateError) {
+ console.error('Error processing/storing artist image:', imageUpdateError);
+ // Fallback: store just the URL if binary processing fails
+ await db.collection('artists').doc(artistUrlKey).update( {
+ imageUrl: foundImageUrl
+ });
+ console.log(`⚠️ Stored URL only due to processing error: ${foundImageUrl}`);
+ }
+ } else {
+ await db.collection('artists').doc(artistUrlKey).update( {
+ imageUrl: null
+ });
+ console.log('No image URL found, stored null');
+ }
+
+ } catch (imageError) {
+ console.error('Error extracting image URL for up-to-date artist:', imageError);
+ // Don't fail the entire operation if image extraction fails
+ }
+ }
+
+ return {
+ success: true,
+ message: 'Songs already up to date',
+ totalSongs: (artistData.songIds || []).length,
+ isUpToDate: true
+ };
+ }
+
+ let page = 1;
+ let allSongIds = [...(artistData.songIds || [])];
+ let totalFetched = 0;
+ const maxSongs = 1000;
+ let artistImageUrlExtracted = false; // Track if we've already extracted the image URL
+
+ console.log(`Starting with ${allSongIds.length} existing songs`);
+
+ // Fetch songs page by page
+ while (totalFetched < maxSongs) {
+ console.log(`Fetching page ${page}...`);
+
+ const result = await getSongsByArtist(artistId, page);
+
+ if (result.songs.length === 0) {
+ console.log('No more songs available');
+ break;
+ }
+
+ // Extract and store artist image URL (only once, from first few songs)
+ if (!artistImageUrlExtracted && artistData.imageUrl === undefined) {
+ console.log('Attempting to extract artist image URL from songs data...');
+
+ // Calculate how many songs we should check based on which page we're on
+ // We want to check up to 11 songs total across pages
+ const songsCheckedSoFar = (page - 1) * 50;
+ const maxSongsToCheckThisPage = Math.max(0, 11 - songsCheckedSoFar);
+
+ if (maxSongsToCheckThisPage > 0) {
+ // Use raw songs data from API response for image URL extraction
+ const artistImageUrl = extractArtistImageUrl(result.rawSongs, artistId, maxSongsToCheckThisPage);
+
+ if (artistImageUrl) {
+ try {
+ // Process image to binary format and store immediately
+ console.log(`🖼️ Found artist image, processing to binary format...`);
+ await processAndStoreArtistImage(artistImageUrl, artistUrlKey, 200);
+ console.log(`✅ Successfully processed and stored artist image binary data`);
+ artistImageUrlExtracted = true;
+ } catch (imageUpdateError) {
+ console.error('Error processing/storing artist image:', imageUpdateError);
+ // Fallback: store just the URL if binary processing fails
+ try {
+ await db.collection('artists').doc(artistUrlKey).update( {
+ imageUrl: artistImageUrl
+ });
+ console.log(`⚠️ Stored URL only due to processing error: ${artistImageUrl}`);
+ artistImageUrlExtracted = true;
+ } catch (urlFallbackError) {
+ console.error('Error storing artist image URL fallback:', urlFallbackError);
+ }
+ // Don't fail the entire operation if image update fails
+ }
+ }
+ }
+
+ // If we've checked 11 songs total and still haven't found an image URL, store null
+ const totalSongsChecked = Math.min(songsCheckedSoFar + result.songs.length, 11);
+ if (totalSongsChecked >= 11 && !artistImageUrlExtracted) {
+ try {
+ await db.collection('artists').doc(artistUrlKey).update( {
+ imageUrl: null
+ });
+ console.log('No artist image URL found after checking 11 songs, stored null');
+ } catch (imageUpdateError) {
+ console.error('Error updating artist image URL to null:', imageUpdateError);
+ }
+ artistImageUrlExtracted = true;
+ }
+ }
+
+ // Store songs in Firestore
+ const storedSongIds = await storeSongsInFirestore(result.songs);
+
+ // Add new song IDs to our list (filter out duplicates)
+ const newSongIds = storedSongIds.filter(id => !allSongIds.includes(id));
+ allSongIds.push(...newSongIds);
+
+ totalFetched += result.songs.length;
+
+ // Update artist document with progress
+ await updateArtistSongList(artistUrlKey, storedSongIds, {
+ songsFetched: allSongIds.length,
+ totalSongs: allSongIds.length,
+ isFullyCached: !result.hasMore
+ });
+
+ console.log(`Page ${page} complete: ${newSongIds.length} new songs, ${allSongIds.length} total`);
+
+ if (page === 1) {
+ console.log(`✅ First 50 songs cached for ${artistUrlKey}. Triggering queue build on client...`);
+ }
+
+ // Early exit after first page if requested
+ if (onlyFirstPage) {
+ console.log(`⏹️ Early return after first page for ${artistUrlKey}`);
+ break;
+ }
+
+ // Break if no more pages
+ if (!result.hasMore) {
+ console.log('Reached end of songs for artist');
+ break;
+ }
+
+ page++;
+
+ // Small delay to be respectful to the API
+ await new Promise(resolve => setTimeout(resolve, 200));
+ }
+
+ console.log(`Completed song population for ${artistUrlKey}: ${allSongIds.length} total songs`);
+
+ return {
+ success: true,
+ totalSongs: allSongIds.length,
+ newSongs: totalFetched,
+ isFullyCached: totalFetched < maxSongs,
+ pagesProcessed: page - 1
+ };
+}
+
+/**
+ * Orchestrates fetching all songs for an artist (up to 1000 songs)
+ * Called when client finds empty songIds array or when refresh is needed
+ */
+export const populateArtistSongs = onCall({
+ timeoutSeconds: 300, // 5 minutes for large artists
+ minInstances: 0,
+ maxInstances: 10,
+ region: 'us-central1'
+}, async (request, context) => {
+ const { artistUrlKey, onlyFirstPage = false } = request.data;
+
+ if (!artistUrlKey) {
+ throw new HttpsError('invalid-argument', 'Artist URL key is required');
+ }
+
+ try {
+ return await populateArtistSongsCore(artistUrlKey, { onlyFirstPage });
+ } catch (error) {
+ console.error(`Error populating songs for artist ${artistUrlKey}:`, error);
+ throw new HttpsError('internal', `Failed to populate artist songs: ${error.message}`);
+ }
+});
+
+/**
+ * Core logic for scraping song lyrics (without Firebase Functions wrapper)
+ * @param {string[]} songIds - Array of song IDs to scrape
+ * @param {string} artistUrlKey - Artist document ID
+ * @returns {Promise} Result object
+ */
+async function scrapeSongLyricsCore(songIds, artistUrlKey) {
+ console.log(`Starting lyrics scraping for ${songIds.length} songs`);
+
+ const results = {
+ successful: [],
+ failed: [],
+ skipped: []
+ };
+
+ for (const songId of songIds) {
+ try {
+ console.log(`Scraping lyrics for song ${songId}`);
+
+ // Get song document from Firestore
+ const songDoc = await db.collection('songs').doc(songId).get();
+ if (!songDoc.exists) {
+ results.failed.push({ songId, error: 'Song not found' });
+ continue;
+ }
+
+ const songData = songDoc.data();
+
+ // Skip if already has lyrics or failed permanently
+ if (songData.lyrics || songData.scrapingStatus === 'failed') {
+ console.log(`Skipping song ${songId}: already processed`);
+ results.skipped.push(songId);
+ continue;
+ }
+
+ // Check retry limit
+ if (songData.scrapingAttempts >= 2) {
+ console.log(`Skipping song ${songId}: max retries exceeded`);
+ results.failed.push({ songId, error: 'Max retries exceeded' });
+ continue;
+ }
+
+ // Update status to 'scraping'
+ await db.collection('songs').doc(songId).update( {
+ scrapingStatus: 'scraping',
+ scrapingAttempts: (songData.scrapingAttempts || 0) + 1
+ });
+
+ // Use existing lyrics scraping logic
+ const lyrics = await scrapeLyricsFromUrl(songData.url);
+
+ if (lyrics && lyrics.trim().length > 0) {
+ // Process album art now that we know this song will be used
+ if (songData.albumArtId && songData.songArtImageUrl) {
+ try {
+ console.log(`🎨 Processing album art for scraped song: ${songData.albumArtId}`);
+ await checkAndProcessAlbumArt(songData.songArtImageUrl, songData.albumArtId);
+ console.log(`✅ Album art processed for song ${songId}`);
+ } catch (albumArtError) {
+ console.warn(`⚠️ Album art processing failed for song ${songId}: ${albumArtError.message}`);
+ // Don't fail lyric scraping if album art processing fails
+ }
+ }
+
+ // Update song document with lyrics
+ await db.collection('songs').doc(songId).update( {
+ lyrics: lyrics,
+ lyricsScrapedAt: new Date(),
+ scrapingStatus: 'completed',
+ scrapingError: null
+ });
+
+ results.successful.push(songId);
+ console.log(`Successfully scraped lyrics for song ${songId}: ${songData.title}`);
+
+ // Update artist cachedSongIds immediately for real-time access
+ await db.collection('artists').doc(artistUrlKey).update( {
+ cachedSongIds: FieldValue.arrayUnion(songId),
+ lyricsScraped: FieldValue.increment(1)
+ });
+ } else {
+ throw new Error('No lyrics found or empty lyrics');
+ }
+
+ } catch (error) {
+ console.error(`Error scraping song ${songId}:`, error);
+
+ // Update song document with error
+ await db.collection('songs').doc(songId).update( {
+ scrapingStatus: 'failed',
+ scrapingError: error.message
+ });
+
+ results.failed.push({ songId, error: error.message });
+ }
+
+ // Small delay between songs
+ await new Promise(resolve => setTimeout(resolve, 300));
+ }
+
+ // Artist cachedSongIds are now updated individually after each successful scrape
+
+ console.log(`Lyrics scraping completed: ${results.successful.length} successful, ${results.failed.length} failed, ${results.skipped.length} skipped`);
+
+ return {
+ success: true,
+ results: results,
+ scrapedCount: results.successful.length
+ };
+}
+
+/**
+ * Scrape lyrics for specified songs using existing scraping logic
+ * Batch operation for efficiency with retry logic
+ */
+export const scrapeSongLyrics = onCall({
+ timeoutSeconds: 300, // 5 minutes for batch scraping
+ minInstances: 0,
+ maxInstances: 20,
+ region: 'us-central1'
+}, async (request, context) => {
+ const { songIds, artistUrlKey } = request.data;
+
+ if (!songIds || !Array.isArray(songIds) || songIds.length === 0) {
+ throw new HttpsError('invalid-argument', 'Song IDs array is required');
+ }
+
+ if (!artistUrlKey) {
+ throw new HttpsError('invalid-argument', 'Artist URL key is required');
+ }
+
+ try {
+ return await scrapeSongLyricsCore(songIds, artistUrlKey);
+ } catch (error) {
+ console.error('Error in scrapeSongLyrics:', error);
+ throw new HttpsError('internal', `Failed to scrape lyrics: ${error.message}`);
+ }
+});
+
+// Note: This function scrapes the complete lyrics for each song
+
+/**
+ * Scrape only the actual lyrics from a Genius song URL
+ * This function extracts ONLY the actual song lyrics, avoiding annotations,
+ * descriptions, and other non-lyrical content from the start.
+ *
+ * @param {string} songUrl - The Genius song URL
+ * @returns {Promise} The complete extracted lyrics
+ */
+async function scrapeLyricsFromUrl(songUrl) {
+ try {
+ console.log(`Attempting to scrape lyrics from: ${songUrl}`);
+
+ // Validate URL format
+ if (!songUrl || typeof songUrl !== 'string') {
+ throw new Error(`Invalid song URL: ${songUrl}`);
+ }
+
+ if (!songUrl.includes('genius.com')) {
+ throw new Error(`URL does not appear to be a Genius URL: ${songUrl}`);
+ }
+
+ // Fetch the song page with proper error handling
+ const songPageResponse = await fetchWithTimeout(songUrl, {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
+ }
+ });
+
+ if (!songPageResponse.ok) {
+ throw new Error(`HTTP ${songPageResponse.status}: ${songPageResponse.statusText} for URL: ${songUrl}`);
+ }
+
+ const songPageHtml = await songPageResponse.text();
+
+ if (!songPageHtml || songPageHtml.length < 100) {
+ throw new Error('Received empty or invalid HTML response');
+ }
+
+ // Parse the page with cheerio
+ const $ = cheerio.load(songPageHtml);
+
+ // Target ALL lyrics containers - Genius often splits lyrics across multiple divs
+ const lyricsContainers = $('div[data-lyrics-container="true"]');
+
+ if (lyricsContainers.length === 0) {
+ throw new Error('No lyrics containers found');
+ }
+
+ console.log(`Found ${lyricsContainers.length} lyrics container(s)`);
+
+ let allLyricsText = '';
+
+ // Process each lyrics container
+ lyricsContainers.each((index, container) => {
+ const $container = $(container);
+
+ // Remove elements that should be excluded from lyrics
+ $container.find('[data-exclude-from-selection="true"]').remove();
+
+ // Remove headers, footers, and annotation elements
+ $container.find('.LyricsHeader__Container, .LyricsFooter__Container').remove();
+ $container.find('a[href*="/annotations/"]').remove();
+
+ // Get the raw text content, preserving line breaks
+ let containerText = $container.html() || '';
+
+ // Convert HTML to clean text
+ containerText = containerText
+ // Convert tags to newlines
+ .replace(/ /gi, '\n')
+ // Remove all HTML tags completely (including , section headers, etc.)
+ .replace(/<[^>]*>/gi, '')
+ // Decode HTML entities
+ .replace(/&/g, '&')
+ .replace(/</g, '<')
+ .replace(/>/g, '>')
+ .replace(/"/g, '"')
+ .replace(/'/g, "'")
+ .replace(/ /g, ' ')
+ // Clean up whitespace
+ .split('\n')
+ .map(line => line.trim())
+ .filter(line => {
+ // Filter out section headers and empty lines
+ if (!line) return false;
+ if (line.match(/^\[.*\]$/)) return false; // Remove [Intro], [Verse], etc.
+ if (line.match(/^(Intro|Verse|Chorus|Bridge|Outro|Pre-Chorus|Post-Chorus|Hook|Refrain)(\s|\d|$)/i)) return false;
+ return true;
+ })
+ .join('\n');
+
+ if (containerText.trim()) {
+ if (allLyricsText) allLyricsText += '\n\n';
+ allLyricsText += containerText.trim();
+ }
+ });
+
+ // Final cleanup
+ let lyrics = allLyricsText
+ // Remove multiple consecutive newlines
+ .replace(/\n{3,}/g, '\n\n')
+ // Remove any remaining section markers that might have slipped through
+ .replace(/^\[.*\]$/gm, '')
+ // Clean up any remaining whitespace issues
+ .split('\n')
+ .map(line => line.trim())
+ .filter(line => line.length > 0)
+ .join('\n')
+ .trim();
+
+ if (!lyrics || lyrics.length < 10) {
+ throw new Error('Extracted lyrics are too short or empty');
+ }
+
+ console.log(`Successfully scraped ${lyrics.length} characters of clean lyrics`);
+ return lyrics;
+
+ } catch (error) {
+ console.error(`Error scraping lyrics from ${songUrl}:`, error);
+ // Include more context in the error for debugging
+ throw new Error(`Failed to scrape lyrics from ${songUrl}: ${error.message}`);
+ }
+}
+
+//TODO: Push songs to db when they are scraped instead of waiting for all songs to be scraped
+/**
+ * Core logic for loading songs around a specific position (without Firebase Functions wrapper)
+ * @param {string} songId - The song ID to start from
+ * @param {boolean} shouldReverse - Whether to load previous songs
+ * @param {string} artistUrlKey - Artist document ID
+ * @returns {Promise} Result object
+ */
+async function loadStartingFromIdCore(songId, shouldReverse = false, artistUrlKey, rangeSize = 10) {
+ console.log(`Loading songs starting from ${songId} for artist ${artistUrlKey}, reverse: ${shouldReverse}, rangeSize: ${rangeSize}`);
+
+ // Get artist document to find songIds array
+ const artistDoc = await db.collection('artists').doc(artistUrlKey).get();
+ if (!artistDoc.exists) {
+ throw new Error('Artist not found');
+ }
+
+ const artistData = artistDoc.data();
+ const songIds = artistData.songIds || [];
+ const cachedSongIds = artistData.cachedSongIds || [];
+
+ console.log(`Artist has ${songIds.length} total songs, ${cachedSongIds.length} cached`);
+
+ // Find position of songId in the songIds array
+ const currentPosition = songIds.indexOf(songId.toString());
+ if (currentPosition === -1) {
+ throw new Error('Song not found in artist song list');
+ }
+
+ console.log(`Found song at position ${currentPosition}`);
+
+ // Determine target range (configurable number of songs in the specified direction)
+ const windowSize = Math.max(1, Number(rangeSize) || 10);
+ let startPos, endPos;
+ if (shouldReverse) {
+ // Load the current song and up to (windowSize-1) previous songs
+ startPos = Math.max(0, currentPosition - (windowSize - 1));
+ endPos = currentPosition + 1; // end is non-inclusive, so +1 to include current
+ } else {
+ // Load the current song and up to (windowSize-1) next songs
+ startPos = currentPosition;
+ endPos = Math.min(songIds.length, currentPosition + windowSize);
+ }
+
+ const targetSongIds = songIds.slice(startPos, endPos);
+
+ // Filter out songs that already have lyrics cached
+ const songsNeedingLyrics = targetSongIds.filter(id => !cachedSongIds.includes(id));
+
+ console.log(`Found ${songsNeedingLyrics.length} songs needing lyrics out of ${targetSongIds.length} target songs`);
+
+ // Scrape missing lyrics if any
+ let scrapingResults = null;
+ if (songsNeedingLyrics.length > 0) {
+ try {
+ console.log(`Attempting to scrape lyrics for ${songsNeedingLyrics.length} songs`);
+
+ // Call core scraping function directly
+ const scrapingResponse = await scrapeSongLyricsCore(songsNeedingLyrics, artistUrlKey);
+ scrapingResults = scrapingResponse.results;
+
+ console.log(`Scraping completed: ${scrapingResults.successful.length} successful, ${scrapingResults.failed.length} failed`);
+
+ } catch (scrapingError) {
+ console.error('Error during lyrics scraping:', scrapingError);
+ // Don't fail the entire function if scraping fails
+ // Just log the error and continue with what we have
+ scrapingResults = {
+ successful: [],
+ failed: songsNeedingLyrics.map(id => ({ songId: id, error: scrapingError.message })),
+ skipped: []
+ };
+ }
+ } else {
+ console.log('All target songs already have cached lyrics');
+ }
+
+ // Fetch and return the loaded songs
+ const loadedSongs = {};
+ let songsLoadedCount = 0;
+
+ for (const songId of targetSongIds) {
+ try {
+ const songDoc = await db.collection('songs').doc(songId).get();
+ if (songDoc.exists) {
+ loadedSongs[songId] = { id: songId, ...songDoc.data() };
+ songsLoadedCount++;
+ } else {
+ console.warn(`Song document not found for ID: ${songId}`);
+ }
+ } catch (songError) {
+ console.error(`Error loading song ${songId}:`, songError);
+ // Continue with other songs even if one fails
+ }
+ }
+
+ console.log(`Successfully loaded ${songsLoadedCount} songs`);
+
+ return {
+ success: true,
+ queuePosition: currentPosition,
+ loadedSongs: loadedSongs,
+ scrapingResults: scrapingResults,
+ targetRange: { start: startPos, end: endPos },
+ songsScraped: scrapingResults ? scrapingResults.successful.length : 0,
+ songsLoaded: songsLoadedCount,
+ totalTargetSongs: targetSongIds.length
+ };
+}
+
+/**
+ * Intelligently loads songs around a specific position in the queue
+ * Handles forward/backward navigation efficiently
+ */
+export const loadStartingFromId = onCall({
+ timeoutSeconds: 120,
+ minInstances: 0,
+ maxInstances: 20,
+ region: 'us-central1'
+}, async (request, context) => {
+ const { songId, shouldReverse = false, artistUrlKey, rangeSize = 10 } = request.data;
+
+ if (!songId || !artistUrlKey) {
+ throw new HttpsError('invalid-argument', 'Song ID and artist URL key are required');
+ }
+
+ try {
+ return await loadStartingFromIdCore(songId, shouldReverse, artistUrlKey, rangeSize);
+ } catch (error) {
+ console.error(`Error in loadStartingFromId for song ${songId}:`, error);
+
+ // Provide more detailed error information
+ if (error instanceof HttpsError) {
+ throw error; // Re-throw HttpsError as-is
+ } else {
+ throw new HttpsError('internal', `Failed to load songs: ${error.message}`, {
+ songId,
+ artistUrlKey,
+ shouldReverse,
+ originalError: error.message
+ });
+ }
+ }
+});
+
+// ========================================
+// LEGACY FUNCTIONS (Keep for backward compatibility during transition)
+// ========================================
+
+/**
+ * Legacy function - keep for backward compatibility
+ * TODO: Gradually migrate clients to use new system
+ */
+export const initialArtistSearch = onCall({
+ timeoutSeconds: 60,
+ minInstances: 0,
+ maxInstances: 100,
+ region: 'us-central1'
+}, async (request, context) => {
+ // TODO: Implement legacy compatibility or redirect to new system
+ console.log('Legacy initialArtistSearch called - consider migrating to new system');
+ throw new HttpsError('unimplemented', 'This function is being migrated to the new caching system');
+});
+
+/**
+ * Legacy function - keep for backward compatibility
+ * TODO: Gradually migrate clients to use new system
+ */
+export const searchByArtistId = onCall({
+ timeoutSeconds: 60,
+ minInstances: 0,
+ maxInstances: 100,
+ region: 'us-central1'
+}, async (request, context) => {
+ // TODO: Implement legacy compatibility or redirect to new system
+ console.log('Legacy searchByArtistId called - consider migrating to new system');
+ throw new HttpsError('unimplemented', 'This function is being migrated to the new caching system');
+});
+
+// ========================================
+// UTILITY FUNCTIONS FOR TESTING
+// ========================================
+
+/**
+ * Test function to validate the new caching system
+ */
+export const testCacheSystem = onCall({
+ timeoutSeconds: 120,
+ region: 'us-central1'
+}, async (request, context) => {
+ const { artistUrlKey, testType = 'populate' } = request.data;
+
+ if (!artistUrlKey) {
+ throw new HttpsError('invalid-argument', 'Artist URL key is required for testing');
+ }
+
+ // Initialize results at the beginning to avoid reference errors
+ const results = {
+ testType: testType,
+ artistUrlKey: artistUrlKey,
+ timestamp: new Date(),
+ steps: []
+ };
+
+ try {
+ console.log(`Running cache system test for artist: ${artistUrlKey}`);
+
+ // Test song population
+ if (testType === 'full' || testType === 'populate') {
+ console.log('Testing song population...');
+ try {
+ const populateResult = await populateArtistSongsCore(artistUrlKey);
+ results.steps.push({
+ step: 'populate',
+ success: populateResult.success,
+ data: populateResult
+ });
+ } catch (error) {
+ console.error('Error in populate step:', error);
+ results.steps.push({
+ step: 'populate',
+ success: false,
+ error: error.message
+ });
+ }
+ }
+
+ // Test lyrics scraping
+ if (testType === 'full' || testType === 'scrape') {
+ console.log('Testing lyrics scraping...');
+
+ try {
+ // Get first few song IDs from artist for testing
+ const artistDoc = await db.collection('artists').doc(artistUrlKey).get();
+ if (artistDoc.exists) {
+ const artistData = artistDoc.data();
+ const testSongIds = (artistData.songIds || []).slice(0, 2); // Test with first 2 songs
+
+ if (testSongIds.length > 0) {
+ const scrapeResult = await scrapeSongLyricsCore(testSongIds, artistUrlKey);
+ results.steps.push({
+ step: 'scrape',
+ success: scrapeResult.success,
+ data: scrapeResult
+ });
+ } else {
+ results.steps.push({
+ step: 'scrape',
+ success: false,
+ error: 'No songs available for testing'
+ });
+ }
+ } else {
+ results.steps.push({
+ step: 'scrape',
+ success: false,
+ error: 'Artist not found'
+ });
+ }
+ } catch (error) {
+ console.error('Error in scrape step:', error);
+ results.steps.push({
+ step: 'scrape',
+ success: false,
+ error: error.message
+ });
+ }
+ }
+
+ // Test smart loading
+ if (testType === 'full' || testType === 'load') {
+ console.log('Testing smart loading...');
+
+ try {
+ const artistDoc = await db.collection('artists').doc(artistUrlKey).get();
+ if (artistDoc.exists) {
+ const artistData = artistDoc.data();
+ const songIds = artistData.songIds || [];
+
+ if (songIds.length > 5) {
+ const testSongId = songIds[2]; // Test with 3rd song
+ const loadResult = await loadStartingFromIdCore(testSongId, false, artistUrlKey);
+ results.steps.push({
+ step: 'load',
+ success: loadResult.success,
+ data: loadResult
+ });
+ } else {
+ results.steps.push({
+ step: 'load',
+ success: false,
+ error: 'Not enough songs for testing smart loading'
+ });
+ }
+ } else {
+ results.steps.push({
+ step: 'load',
+ success: false,
+ error: 'Artist not found'
+ });
+ }
+ } catch (error) {
+ console.error('Error in load step:', error);
+ results.steps.push({
+ step: 'load',
+ success: false,
+ error: error.message
+ });
+ }
+ }
+
+ // Determine overall success
+ const allStepsSuccessful = results.steps.every(step => step.success);
+
+ return {
+ success: allStepsSuccessful,
+ testResults: results,
+ summary: {
+ totalSteps: results.steps.length,
+ successfulSteps: results.steps.filter(step => step.success).length,
+ failedSteps: results.steps.filter(step => !step.success).length
+ }
+ };
+
+ } catch (error) {
+ console.error('Error in cache system test:', error);
+
+ // Add the error as a failed step
+ results.steps.push({
+ step: 'error',
+ success: false,
+ error: error.message
+ });
+
+ return {
+ success: false,
+ error: error.message,
+ testResults: results,
+ summary: {
+ totalSteps: results.steps.length,
+ successfulSteps: results.steps.filter(step => step.success).length,
+ failedSteps: results.steps.filter(step => !step.success).length
+ }
+ };
+ }
+});
+
+/**
+ * Helper function to get artist information for testing
+ */
+export const getArtistInfo = onCall({
+ timeoutSeconds: 30,
+ region: 'us-central1'
+}, async (request, context) => {
+ const { artistUrlKey } = request.data;
+
+ if (!artistUrlKey) {
+ throw new HttpsError('invalid-argument', 'Artist URL key is required');
+ }
+
+ try {
+ const artistDoc = await db.collection('artists').doc(artistUrlKey).get();
+ if (!artistDoc.exists) {
+ throw new HttpsError('not-found', 'Artist not found');
+ }
+
+ const artistData = artistDoc.data();
+
+ return {
+ success: true,
+ artist: {
+ name: artistData.name,
+ geniusId: artistData.geniusId,
+ urlKey: artistUrlKey, // Add the URL key to the response
+ totalSongs: (artistData.songIds || []).length,
+ cachedSongs: (artistData.cachedSongIds || []).length,
+ songIds: artistData.songIds || [], // Include songIds array
+ lastUpdated: artistData.songsLastUpdated,
+ isFullyCached: artistData.isFullyCached || false
+ }
+ };
+
+ } catch (error) {
+ console.error(`Error getting artist info for ${artistUrlKey}:`, error);
+ throw new HttpsError('internal', `Failed to get artist info: ${error.message}`);
+ }
+});
+
+/**
+ * Diagnostic function to inspect song data and URLs
+ */
+export const diagnoseSongData = onCall({
+ timeoutSeconds: 60,
+ region: 'us-central1'
+}, async (request, context) => {
+ const { artistUrlKey, songId } = request.data;
+
+ if (!artistUrlKey) {
+ throw new HttpsError('invalid-argument', 'Artist URL key is required');
+ }
+
+ try {
+ const results = {
+ timestamp: new Date(),
+ artistUrlKey: artistUrlKey,
+ diagnostics: {}
+ };
+
+ // Get artist info
+ const artistDoc = await db.collection('artists').doc(artistUrlKey).get();
+ if (!artistDoc.exists) {
+ throw new HttpsError('not-found', 'Artist not found');
+ }
+
+ const artistData = artistDoc.data();
+ results.diagnostics.artist = {
+ name: artistData.name,
+ totalSongs: (artistData.songIds || []).length,
+ cachedSongs: (artistData.cachedSongIds || []).length,
+ firstFewSongIds: (artistData.songIds || []).slice(0, 5)
+ };
+
+ // If specific song ID provided, examine it
+ if (songId) {
+ const songDoc = await db.collection('songs').doc(songId).get();
+ if (songDoc.exists) {
+ const songData = songDoc.data();
+ results.diagnostics.specificSong = {
+ id: songId,
+ title: songData.title,
+ url: songData.url,
+ urlValid: songData.url && songData.url.includes('genius.com'),
+ hasLyrics: !!songData.lyrics,
+ scrapingStatus: songData.scrapingStatus,
+ scrapingAttempts: songData.scrapingAttempts,
+ scrapingError: songData.scrapingError
+ };
+ } else {
+ results.diagnostics.specificSong = {
+ id: songId,
+ exists: false
+ };
+ }
+ } else {
+ // Examine first few songs
+ const songIds = (artistData.songIds || []).slice(0, 3);
+ results.diagnostics.sampleSongs = [];
+
+ for (const id of songIds) {
+ const songDoc = await db.collection('songs').doc(id).get();
+ if (songDoc.exists) {
+ const songData = songDoc.data();
+ results.diagnostics.sampleSongs.push({
+ id: id,
+ title: songData.title,
+ url: songData.url,
+ urlValid: songData.url && songData.url.includes('genius.com'),
+ hasLyrics: !!songData.lyrics,
+ scrapingStatus: songData.scrapingStatus
+ });
+ } else {
+ results.diagnostics.sampleSongs.push({
+ id: id,
+ exists: false
+ });
+ }
+ }
+ }
+
+ return {
+ success: true,
+ diagnostics: results
+ };
+
+ } catch (error) {
+ console.error(`Error in diagnoseSongData:`, error);
+ throw new HttpsError('internal', `Failed to diagnose song data: ${error.message}`);
+ }
+});
+
+/**
+ * Test lyrics scraping for a specific song URL
+ */
+export const testLyricsScraping = onCall({
+ timeoutSeconds: 60,
+ region: 'us-central1'
+}, async (request, context) => {
+ const { songUrl, songId } = request.data;
+
+ if (!songUrl && !songId) {
+ throw new HttpsError('invalid-argument', 'Either songUrl or songId is required');
+ }
+
+ try {
+ let testUrl = songUrl;
+
+ // If songId provided, get URL from database
+ if (songId && !songUrl) {
+ const songDoc = await db.collection('songs').doc(songId).get();
+ if (!songDoc.exists) {
+ throw new HttpsError('not-found', 'Song not found');
+ }
+ testUrl = songDoc.data().url;
+ }
+
+ console.log(`Testing lyrics scraping for URL: ${testUrl}`);
+
+ const startTime = Date.now();
+ const lyrics = await scrapeLyricsFromUrl(testUrl);
+ const duration = Date.now() - startTime;
+
+ return {
+ success: true,
+ url: testUrl,
+ lyrics: lyrics,
+ lyricsLength: lyrics.length,
+ lyricsLines: lyrics.split('\n').length,
+ scrapingDuration: duration
+ };
+
+ } catch (error) {
+ console.error(`Error testing lyrics scraping:`, error);
+ return {
+ success: false,
+ url: songUrl || 'unknown',
+ error: error.message,
+ errorType: error.constructor.name
+ };
+ }
+});
diff --git a/functions/package-lock.json b/functions/package-lock.json
index 43ff98c..afe181b 100644
--- a/functions/package-lock.json
+++ b/functions/package-lock.json
@@ -716,11 +716,13 @@
}
},
"node_modules/@emnapi/runtime": {
- "version": "1.8.1",
- "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.8.1.tgz",
- "integrity": "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==",
+ "version": "1.5.0",
+ "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.5.0.tgz",
+ "integrity": "sha512-97/BJ3iXHww3djw6hYIfErCZFee7qCtrneuLa20UXFCOTCfBM2cvQHjWJ2EG0s0MtdNwInarqCTz35i4wWXHsQ==",
+ "dev": true,
"license": "MIT",
"optional": true,
+ "peer": true,
"dependencies": {
"tslib": "^2.4.0"
}
@@ -1116,471 +1118,6 @@
"node": ">=6"
}
},
- "node_modules/@img/colour": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz",
- "integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==",
- "license": "MIT",
- "engines": {
- "node": ">=18"
- }
- },
- "node_modules/@img/sharp-darwin-arm64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz",
- "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==",
- "cpu": [
- "arm64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "darwin"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-darwin-arm64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-darwin-x64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz",
- "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==",
- "cpu": [
- "x64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "darwin"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-darwin-x64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-libvips-darwin-arm64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz",
- "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==",
- "cpu": [
- "arm64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "darwin"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-darwin-x64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz",
- "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==",
- "cpu": [
- "x64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "darwin"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linux-arm": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz",
- "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==",
- "cpu": [
- "arm"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linux-arm64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz",
- "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==",
- "cpu": [
- "arm64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linux-ppc64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz",
- "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==",
- "cpu": [
- "ppc64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linux-riscv64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz",
- "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==",
- "cpu": [
- "riscv64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linux-s390x": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz",
- "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==",
- "cpu": [
- "s390x"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linux-x64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz",
- "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==",
- "cpu": [
- "x64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz",
- "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==",
- "cpu": [
- "arm64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-libvips-linuxmusl-x64": {
- "version": "1.2.4",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz",
- "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==",
- "cpu": [
- "x64"
- ],
- "license": "LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "linux"
- ],
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-linux-arm": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz",
- "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==",
- "cpu": [
- "arm"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linux-arm": "1.2.4"
- }
- },
- "node_modules/@img/sharp-linux-arm64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz",
- "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==",
- "cpu": [
- "arm64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linux-arm64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-linux-ppc64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz",
- "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==",
- "cpu": [
- "ppc64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linux-ppc64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-linux-riscv64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz",
- "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==",
- "cpu": [
- "riscv64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linux-riscv64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-linux-s390x": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz",
- "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==",
- "cpu": [
- "s390x"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linux-s390x": "1.2.4"
- }
- },
- "node_modules/@img/sharp-linux-x64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz",
- "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==",
- "cpu": [
- "x64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linux-x64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-linuxmusl-arm64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz",
- "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==",
- "cpu": [
- "arm64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linuxmusl-arm64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-linuxmusl-x64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz",
- "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==",
- "cpu": [
- "x64"
- ],
- "license": "Apache-2.0",
- "optional": true,
- "os": [
- "linux"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-libvips-linuxmusl-x64": "1.2.4"
- }
- },
- "node_modules/@img/sharp-wasm32": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz",
- "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==",
- "cpu": [
- "wasm32"
- ],
- "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
- "optional": true,
- "dependencies": {
- "@emnapi/runtime": "^1.7.0"
- },
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-win32-arm64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz",
- "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==",
- "cpu": [
- "arm64"
- ],
- "license": "Apache-2.0 AND LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "win32"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-win32-ia32": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz",
- "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==",
- "cpu": [
- "ia32"
- ],
- "license": "Apache-2.0 AND LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "win32"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
- "node_modules/@img/sharp-win32-x64": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz",
- "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==",
- "cpu": [
- "x64"
- ],
- "license": "Apache-2.0 AND LGPL-3.0-or-later",
- "optional": true,
- "os": [
- "win32"
- ],
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- }
- },
"node_modules/@inquirer/checkbox": {
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/@inquirer/checkbox/-/checkbox-4.2.2.tgz",
@@ -6202,9 +5739,9 @@
}
},
"node_modules/detect-libc": {
- "version": "2.1.2",
- "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
- "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
+ "version": "2.0.4",
+ "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.4.tgz",
+ "integrity": "sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==",
"license": "Apache-2.0",
"engines": {
"node": ">=8"
@@ -12398,9 +11935,9 @@
"license": "MIT"
},
"node_modules/semver": {
- "version": "7.7.3",
- "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
- "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+ "version": "7.7.2",
+ "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
+ "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==",
"license": "ISC",
"bin": {
"semver": "bin/semver.js"
@@ -12510,50 +12047,6 @@
"integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
"license": "ISC"
},
- "node_modules/sharp": {
- "version": "0.34.5",
- "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz",
- "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==",
- "hasInstallScript": true,
- "license": "Apache-2.0",
- "dependencies": {
- "@img/colour": "^1.0.0",
- "detect-libc": "^2.1.2",
- "semver": "^7.7.3"
- },
- "engines": {
- "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
- },
- "funding": {
- "url": "https://opencollective.com/libvips"
- },
- "optionalDependencies": {
- "@img/sharp-darwin-arm64": "0.34.5",
- "@img/sharp-darwin-x64": "0.34.5",
- "@img/sharp-libvips-darwin-arm64": "1.2.4",
- "@img/sharp-libvips-darwin-x64": "1.2.4",
- "@img/sharp-libvips-linux-arm": "1.2.4",
- "@img/sharp-libvips-linux-arm64": "1.2.4",
- "@img/sharp-libvips-linux-ppc64": "1.2.4",
- "@img/sharp-libvips-linux-riscv64": "1.2.4",
- "@img/sharp-libvips-linux-s390x": "1.2.4",
- "@img/sharp-libvips-linux-x64": "1.2.4",
- "@img/sharp-libvips-linuxmusl-arm64": "1.2.4",
- "@img/sharp-libvips-linuxmusl-x64": "1.2.4",
- "@img/sharp-linux-arm": "0.34.5",
- "@img/sharp-linux-arm64": "0.34.5",
- "@img/sharp-linux-ppc64": "0.34.5",
- "@img/sharp-linux-riscv64": "0.34.5",
- "@img/sharp-linux-s390x": "0.34.5",
- "@img/sharp-linux-x64": "0.34.5",
- "@img/sharp-linuxmusl-arm64": "0.34.5",
- "@img/sharp-linuxmusl-x64": "0.34.5",
- "@img/sharp-wasm32": "0.34.5",
- "@img/sharp-win32-arm64": "0.34.5",
- "@img/sharp-win32-ia32": "0.34.5",
- "@img/sharp-win32-x64": "0.34.5"
- }
- },
"node_modules/shebang-command": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
diff --git a/genius-scraper.js b/genius-scraper.js
new file mode 100644
index 0000000..313446e
--- /dev/null
+++ b/genius-scraper.js
@@ -0,0 +1,468 @@
+import axios from 'axios';
+import * as cheerio from 'cheerio';
+import fs from 'fs';
+import path from 'path';
+
+class GeniusArtistScraper {
+ constructor() {
+ this.baseUrl = 'https://genius.com/artists-index/';
+ this.results = {
+ popularArtists: [],
+ regularArtists: []
+ };
+ this.requestDelay = 500; // 500ms delay between requests to be respectful
+ }
+
+ /**
+ * Extract artist ID from iOS app link on artist page
+ * @param {string} artistUrl - The URL of the artist page
+ * @returns {string|null} The artist ID or null if not found
+ */
+ async extractArtistId(artistUrl) {
+ try {
+ // Add delay to be respectful
+ await new Promise(resolve => setTimeout(resolve, this.requestDelay));
+
+ const response = await axios.get(artistUrl, {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
+ }
+ });
+
+ const $ = cheerio.load(response.data);
+
+ // Look for iOS app link:
+ const iosAppLink = $('link[rel="alternate"][href*="ios-app://"]').attr('href');
+
+ if (iosAppLink) {
+ // Extract the ID from the end of the URL: ios-app://709482991/genius/artists/673285
+ const match = iosAppLink.match(/\/artists\/(\d+)$/);
+ if (match) {
+ return match[1];
+ }
+ }
+
+ return null;
+ } catch (error) {
+ console.warn(`Failed to extract ID for ${artistUrl}:`, error.message);
+ return null;
+ }
+ }
+
+ /**
+ * Add progress indicator for long-running operations
+ */
+ logProgress(current, total, type) {
+ const percentage = ((current / total) * 100).toFixed(1);
+ const progressBar = '█'.repeat(Math.floor(percentage / 5)) + '░'.repeat(20 - Math.floor(percentage / 5));
+ process.stdout.write(`\r${type}: [${progressBar}] ${percentage}% (${current}/${total})`);
+ if (current === total) console.log(); // New line when complete
+ }
+
+ /**
+ * Scrape artist links from a specific letter page
+ * @param {string} letter - The letter to scrape (e.g., 'j', 'a', 'b')
+ * @param {boolean} includeIds - Whether to fetch artist IDs (slower)
+ * @returns {Object} Object containing popularArtists and regularArtists arrays
+ */
+ async scrapeArtistsByLetter(letter, includeIds = true) {
+ try {
+ console.log(`Scraping artists for letter: ${letter.toUpperCase()}`);
+
+ const url = `${this.baseUrl}${letter.toLowerCase()}`;
+ const response = await axios.get(url, {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
+ }
+ });
+
+ const $ = cheerio.load(response.data);
+
+ // Reset results for this letter
+ this.results = {
+ popularArtists: [],
+ regularArtists: []
+ };
+
+ // Extract popular artists
+ console.log('Extracting popular artists...');
+ $('li.artists_index_list-popular_artist').each((index, element) => {
+ const artistLink = $(element).find('a.artists_index_list-artist_name');
+ const name = artistLink.text().trim();
+ const url = artistLink.attr('href');
+
+ if (name && url) {
+ this.results.popularArtists.push({
+ name: name,
+ url: url,
+ type: 'popular',
+ id: null // Will be populated later if includeIds is true
+ });
+ }
+ });
+
+ // Extract regular artists
+ console.log('Extracting regular artists...');
+ // Look for ul.artists_index_list that comes after popular artists
+ const regularArtistLists = $('ul.artists_index_list').not(':has(.artists_index_list-popular_artist)');
+
+ regularArtistLists.each((listIndex, listElement) => {
+ $(listElement).find('li').each((index, element) => {
+ const artistLink = $(element).find('a').first();
+ const name = artistLink.text().trim();
+ const url = artistLink.attr('href');
+
+ // Only include links that point to artist pages
+ if (name && url && url.includes('/artists/')) {
+ this.results.regularArtists.push({
+ name: name,
+ url: url,
+ type: 'regular',
+ id: null // Will be populated later if includeIds is true
+ });
+ }
+ });
+ });
+
+ console.log(`Found ${this.results.popularArtists.length} popular artists`);
+ console.log(`Found ${this.results.regularArtists.length} regular artists`);
+
+ // Extract artist IDs if requested
+ if (includeIds) {
+ console.log('\n🔍 Extracting artist IDs from individual pages...');
+ console.log('⚠️ This may take several minutes due to rate limiting');
+
+ // Process popular artists
+ if (this.results.popularArtists.length > 0) {
+ console.log('\nFetching popular artist IDs:');
+ for (let i = 0; i < this.results.popularArtists.length; i++) {
+ const artist = this.results.popularArtists[i];
+ this.logProgress(i + 1, this.results.popularArtists.length, 'Popular Artists');
+ artist.id = await this.extractArtistId(artist.url);
+ }
+ }
+
+ // Process regular artists
+ if (this.results.regularArtists.length > 0) {
+ console.log('\nFetching regular artist IDs:');
+ for (let i = 0; i < this.results.regularArtists.length; i++) {
+ const artist = this.results.regularArtists[i];
+ this.logProgress(i + 1, this.results.regularArtists.length, 'Regular Artists');
+ artist.id = await this.extractArtistId(artist.url);
+ }
+ }
+
+ // Count successful ID extractions
+ const popularWithIds = this.results.popularArtists.filter(a => a.id !== null).length;
+ const regularWithIds = this.results.regularArtists.filter(a => a.id !== null).length;
+
+ console.log(`\n✅ Successfully extracted ${popularWithIds}/${this.results.popularArtists.length} popular artist IDs`);
+ console.log(`✅ Successfully extracted ${regularWithIds}/${this.results.regularArtists.length} regular artist IDs`);
+ }
+
+ return this.results;
+
+ } catch (error) {
+ console.error(`Error scraping letter ${letter}:`, error.message);
+ throw error;
+ }
+ }
+
+ /**
+ * Save results to JSON file
+ * @param {string} letter - The letter that was scraped
+ * @param {Object} data - The scraped data
+ * @param {string} outputDir - Directory to save the file (optional)
+ */
+ saveToFile(letter, data, outputDir = '.') {
+ const filename = `genius-artists-${letter.toLowerCase()}.json`;
+ const filepath = path.join(outputDir, filename);
+ const output = {
+ letter: letter.toUpperCase(),
+ timestamp: new Date().toISOString(),
+ totalArtists: data.popularArtists.length + data.regularArtists.length,
+ popularCount: data.popularArtists.length,
+ regularCount: data.regularArtists.length,
+ artists: {
+ popular: data.popularArtists,
+ regular: data.regularArtists
+ }
+ };
+
+ fs.writeFileSync(filepath, JSON.stringify(output, null, 2));
+ console.log(`Results saved to ${filepath}`);
+ return filepath;
+ }
+
+ /**
+ * Display summary of scraped data
+ */
+ displaySummary(data) {
+ console.log('\n=== SCRAPING SUMMARY ===');
+ console.log(`Popular Artists: ${data.popularArtists.length}`);
+ console.log(`Regular Artists: ${data.regularArtists.length}`);
+ console.log(`Total Artists: ${data.popularArtists.length + data.regularArtists.length}`);
+
+ // Show ID extraction summary if IDs were fetched
+ const popularWithIds = data.popularArtists.filter(a => a.id !== null).length;
+ const regularWithIds = data.regularArtists.filter(a => a.id !== null).length;
+ const totalWithIds = popularWithIds + regularWithIds;
+
+ if (data.popularArtists.length > 0 && data.popularArtists[0].id !== undefined) {
+ console.log(`\nArtist IDs Extracted: ${totalWithIds}/${data.popularArtists.length + data.regularArtists.length} (${((totalWithIds / (data.popularArtists.length + data.regularArtists.length)) * 100).toFixed(1)}%)`);
+ }
+
+ if (data.popularArtists.length > 0) {
+ console.log('\nFirst 5 Popular Artists:');
+ data.popularArtists.slice(0, 5).forEach((artist, index) => {
+ const idDisplay = artist.id ? ` (ID: ${artist.id})` : '';
+ console.log(` ${index + 1}. ${artist.name}${idDisplay} - ${artist.url}`);
+ });
+ }
+
+ if (data.regularArtists.length > 0) {
+ console.log('\nFirst 5 Regular Artists:');
+ data.regularArtists.slice(0, 5).forEach((artist, index) => {
+ const idDisplay = artist.id ? ` (ID: ${artist.id})` : '';
+ console.log(` ${index + 1}. ${artist.name}${idDisplay} - ${artist.url}`);
+ });
+ }
+ }
+
+ /**
+ * Prepare data for Firebase Firestore (for future use)
+ * @param {string} letter - The letter that was scraped
+ * @param {Object} data - The scraped data
+ * @returns {Array} Array of artist documents ready for Firestore
+ */
+ /**
+ * Create output directory with timestamp
+ * @returns {string} The created directory path
+ */
+ createOutputDirectory() {
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-').split('T')[0];
+ const dirname = `genius-artists-${timestamp}`;
+
+ if (!fs.existsSync(dirname)) {
+ fs.mkdirSync(dirname, { recursive: true });
+ }
+
+ return dirname;
+ }
+
+ /**
+ * Scrape all letters (a-z) and save results to a folder
+ * @param {boolean} includeIds - Whether to fetch artist IDs
+ * @returns {Object} Summary of bulk scraping results
+ */
+ async scrapeAllLetters(includeIds = true) {
+ console.log('🎵 Starting bulk scraping for all letters (A-Z)...\n');
+
+ const outputDir = this.createOutputDirectory();
+ console.log(`📁 Results will be saved to: ${outputDir}/\n`);
+
+ const letters = 'abcdefghijklmnopqrstuvwxyz'.split('');
+ const results = {
+ successful: [],
+ failed: [],
+ totalArtists: 0,
+ totalPopular: 0,
+ totalRegular: 0,
+ totalWithIds: 0,
+ startTime: new Date(),
+ outputDirectory: outputDir
+ };
+
+ for (let i = 0; i < letters.length; i++) {
+ const letter = letters[i];
+ const progress = `[${i + 1}/${letters.length}]`;
+
+ try {
+ console.log(`\n${progress} 🔄 Processing letter: ${letter.toUpperCase()}`);
+
+ const letterResults = await this.scrapeArtistsByLetter(letter, includeIds);
+
+ // Save to file
+ const filepath = this.saveToFile(letter, letterResults, outputDir);
+
+ // Update summary stats
+ const letterTotal = letterResults.popularArtists.length + letterResults.regularArtists.length;
+ const letterWithIds = includeIds ?
+ letterResults.popularArtists.filter(a => a.id !== null).length +
+ letterResults.regularArtists.filter(a => a.id !== null).length : 0;
+
+ results.successful.push({
+ letter: letter.toUpperCase(),
+ popular: letterResults.popularArtists.length,
+ regular: letterResults.regularArtists.length,
+ total: letterTotal,
+ withIds: letterWithIds,
+ filepath: filepath
+ });
+
+ results.totalArtists += letterTotal;
+ results.totalPopular += letterResults.popularArtists.length;
+ results.totalRegular += letterResults.regularArtists.length;
+ results.totalWithIds += letterWithIds;
+
+ console.log(`✅ Letter ${letter.toUpperCase()}: ${letterTotal} artists processed`);
+
+ } catch (error) {
+ console.error(`❌ Failed to process letter ${letter.toUpperCase()}:`, error.message);
+ results.failed.push({
+ letter: letter.toUpperCase(),
+ error: error.message
+ });
+ }
+
+ // Small delay between letters to be extra respectful
+ if (i < letters.length - 1) {
+ await new Promise(resolve => setTimeout(resolve, 1000));
+ }
+ }
+
+ results.endTime = new Date();
+ results.duration = Math.round((results.endTime - results.startTime) / 1000 / 60); // minutes
+
+ // Save summary file
+ const summaryPath = path.join(outputDir, 'scraping-summary.json');
+ fs.writeFileSync(summaryPath, JSON.stringify(results, null, 2));
+
+ return results;
+ }
+
+ /**
+ * Display bulk scraping summary
+ */
+ displayBulkSummary(results) {
+ console.log('\n' + '='.repeat(60));
+ console.log('🎯 BULK SCRAPING COMPLETE');
+ console.log('='.repeat(60));
+
+ console.log(`📊 Overall Statistics:`);
+ console.log(` • Letters processed: ${results.successful.length}/${results.successful.length + results.failed.length}`);
+ console.log(` • Total artists: ${results.totalArtists.toLocaleString()}`);
+ console.log(` • Popular artists: ${results.totalPopular.toLocaleString()}`);
+ console.log(` • Regular artists: ${results.totalRegular.toLocaleString()}`);
+
+ if (results.totalWithIds > 0) {
+ const idSuccessRate = ((results.totalWithIds / results.totalArtists) * 100).toFixed(1);
+ console.log(` • Artists with IDs: ${results.totalWithIds.toLocaleString()} (${idSuccessRate}%)`);
+ }
+
+ console.log(` • Duration: ${results.duration} minutes`);
+ console.log(` • Output directory: ${results.outputDirectory}/`);
+
+ if (results.failed.length > 0) {
+ console.log(`\n❌ Failed letters (${results.failed.length}):`);
+ results.failed.forEach(fail => {
+ console.log(` • ${fail.letter}: ${fail.error}`);
+ });
+ }
+
+ console.log(`\n✅ Results saved to: ${results.outputDirectory}/`);
+ console.log(`📄 Summary saved to: ${path.join(results.outputDirectory, 'scraping-summary.json')}`);
+ }
+
+ prepareForFirestore(letter, data) {
+ const firestoreData = [];
+
+ // Add popular artists
+ data.popularArtists.forEach(artist => {
+ firestoreData.push({
+ name: artist.name,
+ url: artist.url,
+ id: artist.id,
+ type: 'popular',
+ letter: letter.toLowerCase(),
+ scrapedAt: new Date()
+ });
+ });
+
+ // Add regular artists
+ data.regularArtists.forEach(artist => {
+ firestoreData.push({
+ name: artist.name,
+ url: artist.url,
+ id: artist.id,
+ type: 'regular',
+ letter: letter.toLowerCase(),
+ scrapedAt: new Date()
+ });
+ });
+
+ return firestoreData;
+ }
+}
+
+// Main execution function
+async function main() {
+ const scraper = new GeniusArtistScraper();
+
+ // Parse command line arguments
+ const args = process.argv.slice(2);
+ const firstArg = args[0] || 'j';
+ const includeIds = !args.includes('--no-ids'); // Include IDs by default, unless --no-ids flag is passed
+
+ try {
+ console.log('🎵 Genius Artist Scraper Starting...\n');
+
+ if (!includeIds) {
+ console.log('⚡ Fast mode: Skipping artist ID extraction\n');
+ }
+
+ // Check if bulk scraping all letters
+ if (firstArg.toLowerCase() === 'all') {
+ console.log('🌟 Bulk mode: Scraping all letters A-Z');
+
+ if (includeIds) {
+ console.log('⚠️ This will take several hours with ID extraction enabled');
+ console.log('💡 Consider using --no-ids flag for much faster bulk scraping\n');
+ }
+
+ // Perform bulk scraping
+ const bulkResults = await scraper.scrapeAllLetters(includeIds);
+
+ // Display bulk summary
+ scraper.displayBulkSummary(bulkResults);
+
+ console.log('\n🎉 Bulk scraping completed successfully!');
+
+ } else {
+ // Single letter scraping (original functionality)
+ const letter = firstArg;
+
+ // Scrape the specified letter
+ const results = await scraper.scrapeArtistsByLetter(letter, includeIds);
+
+ // Display summary
+ scraper.displaySummary(results);
+
+ // Save to file
+ const filename = scraper.saveToFile(letter, results);
+
+ // Prepare for Firestore (just showing the structure for now)
+ const firestoreData = scraper.prepareForFirestore(letter, results);
+ console.log(`\nPrepared ${firestoreData.length} documents for Firestore`);
+
+ console.log('\n✅ Scraping completed successfully!');
+ console.log(`📁 Data saved to: ${filename}`);
+
+ if (includeIds) {
+ console.log('\n💡 Tip: Use --no-ids flag for faster scraping without artist IDs');
+ console.log('💡 Tip: Use "all" to scrape all letters A-Z at once');
+ }
+ }
+
+ } catch (error) {
+ console.error('❌ Scraping failed:', error.message);
+ process.exit(1);
+ }
+}
+
+// Run the scraper if this file is executed directly
+if (import.meta.url === `file://${process.argv[1]}`) {
+ main();
+}
+
+export default GeniusArtistScraper;
\ No newline at end of file
diff --git a/lookup-song.js b/lookup-song.js
new file mode 100644
index 0000000..9d19fc5
--- /dev/null
+++ b/lookup-song.js
@@ -0,0 +1,112 @@
+/**
+ * Quick Song Lookup by ID
+ * Usage: node lookup-song.js [song-id2] [song-id3] ...
+ */
+
+import { initializeApp } from 'firebase/app';
+import { getFirestore, doc, getDoc } from 'firebase/firestore';
+import { firebaseConfig } from './src/lib/services/initFirebase.js';
+
+// Initialize Firebase
+const app = initializeApp(firebaseConfig);
+const db = getFirestore(app);
+
+async function lookupSong(songId) {
+ try {
+ const songIdStr = songId.toString().trim();
+
+ console.log(`\n🔍 Looking up song ID: ${songIdStr}...`);
+
+ const songRef = doc(db, 'songs', songIdStr);
+ const songSnap = await getDoc(songRef);
+
+ if (!songSnap.exists()) {
+ console.log(`❌ Song not found in database\n`);
+ return;
+ }
+
+ const songData = songSnap.data();
+
+ console.log(`\n${'='.repeat(80)}`);
+ console.log(`✅ SONG FOUND`);
+ console.log(`${'='.repeat(80)}\n`);
+
+ console.log(`ID: ${songIdStr}`);
+ console.log(`Title: "${songData.title || 'Unknown'}"`);
+ console.log(`Artist: ${songData.artistNames || songData.primaryArtist?.name || 'Unknown'}`);
+ console.log(`URL: ${songData.url || 'N/A'}`);
+
+ if (songData.albumName) {
+ console.log(`Album: ${songData.albumName}`);
+ }
+
+ // Check lyrics
+ const hasLyrics = songData.lyrics &&
+ songData.lyrics !== 'null' &&
+ songData.lyrics !== null &&
+ typeof songData.lyrics === 'string' &&
+ songData.lyrics.trim().length > 0;
+
+ console.log(`\n🎵 Lyrics:`);
+ if (hasLyrics) {
+ const lyricsLength = songData.lyrics.length;
+ const wordCount = songData.lyrics.split(/\s+/).length;
+
+ console.log(` ✅ HAS LYRICS (${lyricsLength.toLocaleString()} chars, ~${wordCount} words)`);
+
+ // Show first few lines
+ const lines = songData.lyrics.split('\n').filter(l => l.trim()).slice(0, 3);
+ console.log(` First lines:`);
+ lines.forEach(line => console.log(` ${line}`));
+ if (songData.lyrics.split('\n').length > 3) {
+ console.log(` ...`);
+ }
+ } else {
+ console.log(` ❌ NO LYRICS`);
+ }
+
+ console.log(`\n🔧 Status: ${songData.scrapingStatus || 'none'} (${songData.scrapingAttempts || 0} attempts)`);
+
+ if (songData.scrapingError) {
+ console.log(`⚠️ Error: ${songData.scrapingError}`);
+ }
+
+ console.log(`\n${'='.repeat(80)}\n`);
+
+ } catch (error) {
+ console.error(`❌ Error: ${error.message}\n`);
+ }
+}
+
+async function main() {
+ const songIds = process.argv.slice(2);
+
+ if (songIds.length === 0) {
+ console.log(`\n${'='.repeat(80)}`);
+ console.log(`🔍 FIRESTORE SONG LOOKUP`);
+ console.log(`${'='.repeat(80)}\n`);
+ console.log(`Usage: node lookup-song.js [song-id2] [song-id3] ...\n`);
+ console.log(`Examples:`);
+ console.log(` node lookup-song.js 10000344`);
+ console.log(` node lookup-song.js 10000344 9592352 7470207\n`);
+ process.exit(0);
+ }
+
+ console.log(`\n🔍 Searching for ${songIds.length} song(s)...`);
+
+ for (let i = 0; i < songIds.length; i++) {
+ await lookupSong(songIds[i]);
+
+ if (i < songIds.length - 1) {
+ console.log(`${'─'.repeat(80)}\n`);
+ }
+ }
+
+ console.log(`✅ Done!\n`);
+}
+
+main().catch(error => {
+ console.error('❌ Fatal error:', error);
+ process.exit(1);
+});
+
diff --git a/lyrictypesk@0.0.1 b/lyrictypesk@0.0.1
new file mode 100644
index 0000000..e69de29
diff --git a/package-lock.json b/package-lock.json
index f17fc9d..624af3c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,10 +9,7 @@
"version": "0.0.1",
"license": "MIT",
"dependencies": {
- "axios": "^1.13.2",
- "chalk": "^5.6.2",
- "cheerio": "^1.1.2",
- "cli-progress": "^3.12.0",
+ "cheerio": "^1.0.0-rc.12",
"firebase": "^12.2.1",
"firebase-admin": "^12.1.0",
"js-cookie": "^3.0.5",
@@ -4946,35 +4943,9 @@
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
+ "devOptional": true,
"license": "MIT"
},
- "node_modules/axios": {
- "version": "1.13.2",
- "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.2.tgz",
- "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==",
- "license": "MIT",
- "dependencies": {
- "follow-redirects": "^1.15.6",
- "form-data": "^4.0.4",
- "proxy-from-env": "^1.1.0"
- }
- },
- "node_modules/axios/node_modules/form-data": {
- "version": "4.0.5",
- "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
- "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
- "license": "MIT",
- "dependencies": {
- "asynckit": "^0.4.0",
- "combined-stream": "^1.0.8",
- "es-set-tostringtag": "^2.1.0",
- "hasown": "^2.0.2",
- "mime-types": "^2.1.12"
- },
- "engines": {
- "node": ">= 6"
- }
- },
"node_modules/axobject-query": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz",
@@ -5403,6 +5374,7 @@
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
@@ -5453,6 +5425,7 @@
"version": "5.6.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz",
"integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==",
+ "dev": true,
"license": "MIT",
"engines": {
"node": "^12.17.0 || ^14.13 || >=16.0.0"
@@ -5706,18 +5679,6 @@
"node": ">=10"
}
},
- "node_modules/cli-progress": {
- "version": "3.12.0",
- "resolved": "https://registry.npmjs.org/cli-progress/-/cli-progress-3.12.0.tgz",
- "integrity": "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A==",
- "license": "MIT",
- "dependencies": {
- "string-width": "^4.2.3"
- },
- "engines": {
- "node": ">=4"
- }
- },
"node_modules/cli-spinners": {
"version": "2.9.2",
"resolved": "https://registry.npmjs.org/cli-spinners/-/cli-spinners-2.9.2.tgz",
@@ -5884,6 +5845,7 @@
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"delayed-stream": "~1.0.0"
@@ -6482,6 +6444,7 @@
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+ "devOptional": true,
"license": "MIT",
"engines": {
"node": ">=0.4.0"
@@ -6614,6 +6577,7 @@
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.1",
@@ -6772,6 +6736,7 @@
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+ "devOptional": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@@ -6781,6 +6746,7 @@
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+ "devOptional": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@@ -6790,6 +6756,7 @@
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0"
@@ -6802,6 +6769,7 @@
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
@@ -7776,26 +7744,6 @@
"dev": true,
"license": "MIT"
},
- "node_modules/follow-redirects": {
- "version": "1.15.11",
- "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
- "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
- "funding": [
- {
- "type": "individual",
- "url": "https://github.com/sponsors/RubenVerborgh"
- }
- ],
- "license": "MIT",
- "engines": {
- "node": ">=4.0"
- },
- "peerDependenciesMeta": {
- "debug": {
- "optional": true
- }
- }
- },
"node_modules/foreground-child": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz",
@@ -7919,6 +7867,7 @@
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+ "devOptional": true,
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
@@ -7999,6 +7948,7 @@
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.2",
@@ -8023,6 +7973,7 @@
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"dunder-proto": "^1.0.1",
@@ -8370,6 +8321,7 @@
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+ "devOptional": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@@ -8413,6 +8365,7 @@
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+ "devOptional": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@@ -8425,6 +8378,7 @@
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"has-symbols": "^1.0.3"
@@ -8450,6 +8404,7 @@
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
@@ -9714,6 +9669,7 @@
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+ "devOptional": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@@ -9773,6 +9729,7 @@
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+ "devOptional": true,
"license": "MIT",
"engines": {
"node": ">= 0.6"
@@ -9782,6 +9739,7 @@
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+ "devOptional": true,
"license": "MIT",
"dependencies": {
"mime-db": "1.52.0"
@@ -11197,6 +11155,7 @@
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
+ "dev": true,
"license": "MIT"
},
"node_modules/pupa": {
diff --git a/package.json b/package.json
index a3f9dec..d2034b1 100644
--- a/package.json
+++ b/package.json
@@ -5,8 +5,6 @@
"scripts": {
"build": "vite build",
"dev": "vite dev",
- "scrape": "npm run update:scrape && npm run update:compare && npm run update:prescrape && npm run update:upload-artists",
- "upload": "npm run update:upload-songs && npm run update:update-popular",
"dev:vite": "vite dev",
"preview": "vite preview",
"check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json",
@@ -43,10 +41,7 @@
"vite": "^5.0.3"
},
"dependencies": {
- "axios": "^1.13.2",
- "chalk": "^5.6.2",
- "cheerio": "^1.1.2",
- "cli-progress": "^3.12.0",
+ "cheerio": "^1.0.0-rc.12",
"firebase": "^12.2.1",
"firebase-admin": "^12.1.0",
"js-cookie": "^3.0.5",
diff --git a/prescraper.js b/prescraper.js
new file mode 100644
index 0000000..849fd82
--- /dev/null
+++ b/prescraper.js
@@ -0,0 +1,920 @@
+#!/usr/bin/env node
+
+import fs from 'fs/promises';
+import path from 'path';
+import { fileURLToPath } from 'url';
+import * as cheerio from 'cheerio';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+/**
+ * Prescraper Configuration
+ */
+const config = {
+ // Number of songs to scrape lyrics for per artist (configurable via CLI)
+ maxSongsToScrape: 10,
+
+ // Which artists to process
+ artistFilters: {
+ letters: ['all'], // Specific letters like ['a', 'b'] or 'all'
+ types: ['popular', 'regular'], // Include popular, regular, or both
+ maxArtistsPerLetter: null, // Limit artists per letter (for testing)
+ skipExisting: true // Skip artists already in output files
+ },
+
+ // Rate limiting to be respectful to Genius
+ delays: {
+ betweenArtists: 1000, // 1 second
+ betweenSongs: 500, // 0.5 seconds
+ betweenPages: 200 // 0.2 seconds
+ },
+
+ // Output configuration
+ output: {
+ directory: `./prescraped-data-${new Date().toISOString().split('T')[0]}/`,
+ filePerLetter: true, // One file per letter vs one big file
+ resumable: true // Save progress and allow resuming
+ },
+
+ // API configuration
+ api: {
+ timeout: 10000, // 10 second timeout
+ maxRetries: 3,
+ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
+ }
+};
+
+/**
+ * Global state tracking
+ */
+const state = {
+ processed: {
+ artists: 0,
+ songs: 0,
+ lyrics: 0
+ },
+ errors: {
+ artists: 0,
+ songs: 0,
+ lyrics: 0
+ },
+ startTime: null
+};
+
+/**
+ * Utility function for delays
+ */
+const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
+
+/**
+ * Enhanced fetch with timeout and retries
+ */
+async function fetchWithTimeout(url, options = {}) {
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), options.timeout || config.api.timeout);
+
+ let lastError;
+ for (let attempt = 1; attempt <= config.api.maxRetries; attempt++) {
+ try {
+ const response = await fetch(url, {
+ ...options,
+ signal: controller.signal,
+ headers: {
+ 'User-Agent': config.api.userAgent,
+ ...options.headers
+ }
+ });
+
+ clearTimeout(timeoutId);
+
+ if (!response.ok && response.status >= 500) {
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+ }
+
+ return response;
+ } catch (error) {
+ lastError = error;
+ clearTimeout(timeoutId);
+
+ if (error.name === 'AbortError') {
+ console.warn(`⏰ Request timeout (attempt ${attempt}/${config.api.maxRetries}): ${url}`);
+ } else {
+ console.warn(`🔄 Request failed (attempt ${attempt}/${config.api.maxRetries}): ${error.message}`);
+ }
+
+ if (attempt < config.api.maxRetries) {
+ const delayMs = Math.pow(2, attempt) * 1000; // Exponential backoff
+ console.log(`⏳ Retrying in ${delayMs}ms...`);
+ await delay(delayMs);
+ }
+ }
+ }
+
+ throw lastError || new Error(`Failed after ${config.api.maxRetries} attempts`);
+}
+
+/**
+ * Load artist data from genius-artists JSON files
+ */
+async function loadArtistData() {
+ console.log('📂 Loading artist data from genius-artists files...');
+
+ const artistsDir = path.join(__dirname, 'genius-artists-2025-07-11');
+ const artists = [];
+
+ try {
+ const files = await fs.readdir(artistsDir);
+ const jsonFiles = files.filter(file => file.startsWith('genius-artists-') && file.endsWith('.json'));
+
+ console.log(`Found ${jsonFiles.length} artist files`);
+
+ for (const file of jsonFiles) {
+ const letter = file.replace('genius-artists-', '').replace('.json', '');
+
+ // Skip if not in target letters
+ if (config.artistFilters.letters[0] !== 'all' && !config.artistFilters.letters.includes(letter)) {
+ continue;
+ }
+
+ const filePath = path.join(artistsDir, file);
+ const content = await fs.readFile(filePath, 'utf8');
+ const data = JSON.parse(content);
+
+ // Combine popular and regular artists based on filter
+ let letterArtists = [];
+
+ if (config.artistFilters.types.includes('popular')) {
+ letterArtists.push(...(data.artists.popular || []));
+ }
+
+ if (config.artistFilters.types.includes('regular')) {
+ letterArtists.push(...(data.artists.regular || []));
+ }
+
+ // Apply max artists per letter limit if specified
+ if (config.artistFilters.maxArtistsPerLetter) {
+ letterArtists = letterArtists.slice(0, config.artistFilters.maxArtistsPerLetter);
+ }
+
+ // Add letter info to each artist
+ letterArtists.forEach(artist => {
+ artist.letter = letter;
+ artist.urlKey = artist.url.split('/').pop(); // Extract URL key for Firebase compatibility
+ });
+
+ artists.push(...letterArtists);
+ console.log(`📄 Loaded ${letterArtists.length} artists from ${file}`);
+ }
+
+ console.log(`✅ Total artists loaded: ${artists.length}`);
+ return artists;
+
+ } catch (error) {
+ console.error('❌ Error loading artist data:', error);
+ throw error;
+ }
+}
+
+/**
+ * Get Genius API key from local config
+ */
+async function getGeniusApiKey() {
+ try {
+ const configPath = path.join(__dirname, 'functions', 'local-config.json');
+ const localConfig = JSON.parse(await fs.readFile(configPath, 'utf8'));
+ return localConfig.genius.key;
+ } catch (error) {
+ console.error('❌ Error loading Genius API key from functions/local-config.json');
+ console.error('Please ensure you have a valid local-config.json file with your Genius API key');
+ throw error;
+ }
+}
+
+/**
+ * Fetch song metadata from Genius API for a specific artist page
+ * (Ported from Firebase Functions)
+ */
+async function getSongsByArtist(artistId, geniusApiKey, page = 1) {
+ console.log(` 📀 Fetching songs for artist ${artistId}, page ${page}`);
+
+ try {
+ const headers = { "Authorization": `Bearer ${geniusApiKey}` };
+
+ // Fetch 50 songs per page, sorted by popularity
+ const response = await fetchWithTimeout(
+ `https://api.genius.com/artists/${artistId}/songs?per_page=50&page=${page}&sort=popularity`,
+ { headers }
+ );
+
+ if (!response.ok) {
+ throw new Error(`Genius API error: ${response.status} ${response.statusText}`);
+ }
+
+ const data = await response.json();
+
+ if (!data.response || !data.response.songs) {
+ throw new Error('Invalid API response structure');
+ }
+
+ const songs = data.response.songs;
+
+ // Transform songs to our schema format
+ const transformedSongs = songs.map(song => ({
+ id: song.id.toString(),
+ title: cleanUnicodeText(song.title),
+ url: song.url,
+ songArtImageUrl: song.song_art_image_url,
+ artistNames: cleanUnicodeText(song.artist_names),
+ primaryArtist: {
+ id: song.primary_artist.id,
+ name: cleanUnicodeText(song.primary_artist.name),
+ url: song.primary_artist.url
+ },
+ // Album art ID extraction (same logic as Firebase Functions)
+ albumArtId: extractGeniusImageHash(song.song_art_image_url)
+ }));
+
+ const hasMore = songs.length === 50; // If we got a full page, there might be more
+
+ return {
+ songs: transformedSongs,
+ rawSongs: songs, // Include raw API response for image URL extraction
+ hasMore,
+ pageNumber: page
+ };
+
+ } catch (error) {
+ console.error(`❌ Error fetching songs for artist ${artistId}, page ${page}:`, error);
+ throw error;
+ }
+}
+
+/**
+ * Extract the hash/ID from a Genius image URL for album art deduplication
+ * (Ported from Firebase Functions)
+ */
+function extractGeniusImageHash(imageUrl) {
+ try {
+ if (!imageUrl) return null;
+
+ // Extract the filename from the URL
+ const filename = imageUrl.split('/').pop();
+
+ // Extract the hash (everything before the first dot)
+ const hash = filename.split('.')[0];
+
+ // Validate it looks like a hash (32 character hex string)
+ if (hash && /^[a-f0-9]{32}$/i.test(hash)) {
+ return hash.toLowerCase();
+ }
+
+ // Fallback: use the full filename if it doesn't match expected pattern
+ console.warn(`Unexpected Genius URL format: ${imageUrl}, using filename as ID`);
+ return filename.replace(/[^a-zA-Z0-9]/g, '-').toLowerCase();
+
+ } catch (error) {
+ console.error('Error extracting hash from Genius URL:', error);
+ return null;
+ }
+}
+
+/**
+ * Extract artist image URL from songs data
+ * (Ported from Firebase Functions)
+ */
+function extractArtistImageUrl(songs, targetArtistId, maxSongsToCheck = 11) {
+ const targetId = typeof targetArtistId === 'string' ? parseInt(targetArtistId, 10) : targetArtistId;
+
+ for (const song of songs.slice(0, maxSongsToCheck)) {
+ // Check primary artist first
+ if (song.primary_artist && song.primary_artist.id === targetId) {
+ const imageUrl = song.primary_artist.image_url;
+ if (imageUrl) {
+ return imageUrl;
+ }
+ }
+
+ // Check featured artists if primary artist doesn't match
+ if (song.featured_artists && Array.isArray(song.featured_artists)) {
+ for (const featuredArtist of song.featured_artists) {
+ if (featuredArtist.id === targetId) {
+ const imageUrl = featuredArtist.image_url;
+ if (imageUrl) {
+ return imageUrl;
+ }
+ }
+ }
+ }
+ }
+
+ return null;
+}
+
+/**
+ * Get all songs for an artist (up to 1000 songs)
+ * (Ported from Firebase Functions populateArtistSongsCore logic)
+ */
+async function getAllSongsForArtist(artist, geniusApiKey) {
+ console.log(`🎵 Fetching all songs for: ${artist.name}`);
+
+ let page = 1;
+ let allSongs = [];
+ const maxSongs = 1000;
+ let artistImageUrl = null;
+
+ while (allSongs.length < maxSongs) {
+ try {
+ const result = await getSongsByArtist(artist.id, geniusApiKey, page);
+
+ if (result.songs.length === 0) {
+ console.log(` ✅ No more songs available (${allSongs.length} total)`);
+ break;
+ }
+
+ // Extract artist image URL from first page if not found yet
+ if (!artistImageUrl && page === 1) {
+ artistImageUrl = extractArtistImageUrl(result.rawSongs, artist.id);
+ }
+
+ allSongs.push(...result.songs);
+ console.log(` 📄 Page ${page}: ${result.songs.length} songs (${allSongs.length} total)`);
+
+ // Break if no more pages
+ if (!result.hasMore) {
+ console.log(` ✅ Reached end of songs (${allSongs.length} total)`);
+ break;
+ }
+
+ page++;
+
+ // Rate limiting
+ await delay(config.delays.betweenPages);
+
+ } catch (error) {
+ console.error(` ❌ Error fetching page ${page}:`, error);
+ state.errors.songs++;
+ break;
+ }
+ }
+
+ return {
+ songs: allSongs,
+ artistImageUrl: artistImageUrl
+ };
+}
+
+/**
+ * Parse letter range input and return array of letters
+ * Supports: 'a,b,c', 'a-c', 'a c', 'all', or mixed formats
+ */
+function parseLetterRange(input) {
+ if (!input || input.toLowerCase() === 'all') {
+ return ['all'];
+ }
+
+ const letters = new Set();
+
+ // Split by commas and spaces, then process each part
+ const parts = input.toLowerCase().split(/[,\s]+/).filter(part => part.length > 0);
+
+ for (const part of parts) {
+ if (part === 'all') {
+ return ['all'];
+ } else if (part.includes('-')) {
+ // Handle range like 'a-g' or 'c-f'
+ const [start, end] = part.split('-');
+ if (start && end && start.length === 1 && end.length === 1) {
+ const startCode = start.charCodeAt(0);
+ const endCode = end.charCodeAt(0);
+
+ if (startCode >= 97 && startCode <= 122 && endCode >= 97 && endCode <= 122) {
+ // Valid range (a-z)
+ for (let code = startCode; code <= endCode; code++) {
+ letters.add(String.fromCharCode(code));
+ }
+ } else {
+ console.warn(`⚠️ Invalid letter range: ${part} (must be a-z)`);
+ }
+ } else {
+ console.warn(`⚠️ Invalid range format: ${part} (use format like 'a-g')`);
+ }
+ } else if ((part.length === 1 && part >= 'a' && part <= 'z') || part === '0') {
+ // Single letter or number indicator
+ letters.add(part);
+ } else {
+ console.warn(`⚠️ Invalid letter: ${part} (must be a-z or 0 for numbers)`);
+ }
+ }
+
+ // Convert to sorted array, with '0' first
+ return Array.from(letters).sort((a, b) => {
+ if (a === '0') return -1;
+ if (b === '0') return 1;
+ return a.localeCompare(b);
+ });
+}
+
+/**
+ * Print configuration summary
+ */
+function printConfig() {
+ console.log('\n' + '='.repeat(60));
+ console.log('🚀 LYRICTYPE PRESCRAPER STARTING');
+ console.log('='.repeat(60));
+ console.log(`📋 Configuration:`);
+ console.log(` Max songs to scrape per artist: ${config.maxSongsToScrape}`);
+ const letterDisplay = config.artistFilters.letters[0] === 'all' ? 'all (0, a-z)' : config.artistFilters.letters.join(', ');
+ console.log(` Target letters: ${letterDisplay}`);
+ console.log(` Artist types: ${config.artistFilters.types.join(', ')}`);
+ if (config.artistFilters.maxArtistsPerLetter) {
+ console.log(` Max artists per letter: ${config.artistFilters.maxArtistsPerLetter}`);
+ }
+ console.log(` Output directory: ${config.output.directory}`);
+ console.log(` Resumable: ${config.output.resumable}`);
+ console.log('='.repeat(60) + '\n');
+}
+
+/**
+ * Clean problematic Unicode characters from text
+ */
+function cleanUnicodeText(text) {
+ if (!text || typeof text !== 'string') {
+ return text;
+ }
+
+ // Define problematic Unicode characters to remove or replace
+ const replacements = {
+ // Zero-width and invisible characters (remove)
+ '\u200B': '', // ZERO WIDTH SPACE
+ '\u200C': '', // ZERO WIDTH NON-JOINER
+ '\u200D': '', // ZERO WIDTH JOINER
+ '\u200E': '', // LEFT-TO-RIGHT MARK
+ '\u200F': '', // RIGHT-TO-LEFT MARK
+ '\u00AD': '', // SOFT HYPHEN
+ '\uFEFF': '', // BYTE ORDER MARK
+
+ // Line separators (replace with regular newlines)
+ '\u2028': '\n', // LINE SEPARATOR
+ '\u2029': '\n', // PARAGRAPH SEPARATOR
+ '\u0085': '\n', // NEXT LINE
+
+ // Other problematic characters
+ '\u000B': '\n', // VERTICAL TAB
+ '\u000C': '\n', // FORM FEED
+ '\u001C': '', // FILE SEPARATOR
+ '\u001D': '', // GROUP SEPARATOR
+ '\u001E': '', // RECORD SEPARATOR
+ '\u001F': '', // UNIT SEPARATOR
+ };
+
+ // Apply replacements
+ let cleaned = text;
+ for (const [oldChar, newChar] of Object.entries(replacements)) {
+ cleaned = cleaned.replace(new RegExp(oldChar, 'g'), newChar);
+ }
+
+ // Clean up multiple consecutive newlines
+ cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
+
+ return cleaned;
+}
+
+/**
+ * Scrape lyrics from a Genius song URL
+ * (Ported from Firebase Functions scrapeLyricsFromUrl)
+ */
+async function scrapeLyricsFromUrl(songUrl) {
+ try {
+ // Validate URL format
+ if (!songUrl || typeof songUrl !== 'string') {
+ throw new Error(`Invalid song URL: ${songUrl}`);
+ }
+
+ if (!songUrl.includes('genius.com')) {
+ throw new Error(`URL does not appear to be a Genius URL: ${songUrl}`);
+ }
+
+ // Fetch the song page with proper error handling
+ const songPageResponse = await fetchWithTimeout(songUrl);
+
+ if (!songPageResponse.ok) {
+ throw new Error(`HTTP ${songPageResponse.status}: ${songPageResponse.statusText} for URL: ${songUrl}`);
+ }
+
+ const songPageHtml = await songPageResponse.text();
+
+ if (!songPageHtml || songPageHtml.length < 100) {
+ throw new Error('Received empty or invalid HTML response');
+ }
+
+ // Parse the page with cheerio
+ const $ = cheerio.load(songPageHtml);
+
+ // Target ALL lyrics containers - Genius often splits lyrics across multiple divs
+ const lyricsContainers = $('div[data-lyrics-container="true"]');
+
+ if (lyricsContainers.length === 0) {
+ throw new Error('No lyrics containers found');
+ }
+
+ let allLyricsText = '';
+
+ // Process each lyrics container
+ lyricsContainers.each((index, container) => {
+ const $container = $(container);
+
+ // Remove elements that should be excluded from lyrics
+ $container.find('[data-exclude-from-selection="true"]').remove();
+
+ // Remove headers, footers, and annotation elements
+ $container.find('.LyricsHeader__Container, .LyricsFooter__Container').remove();
+ $container.find('a[href*="/annotations/"]').remove();
+
+ // Get the raw text content, preserving line breaks
+ let containerText = $container.html() || '';
+
+ // Convert HTML to clean text
+ containerText = containerText
+ // Convert tags to newlines
+ .replace(/ /gi, '\n')
+ // Remove all HTML tags completely (including , section headers, etc.)
+ .replace(/<[^>]*>/gi, '')
+ // Decode HTML entities
+ .replace(/&/g, '&')
+ .replace(/</g, '<')
+ .replace(/>/g, '>')
+ .replace(/"/g, '"')
+ .replace(/'/g, "'")
+ .replace(/ /g, ' ')
+ // Clean up whitespace
+ .split('\n')
+ .map(line => line.trim())
+ .filter(line => {
+ // Filter out section headers and empty lines
+ if (!line) return false;
+ if (line.match(/^\[.*\]$/)) return false; // Remove [Intro], [Verse], etc.
+ if (line.match(/^(Intro|Verse|Chorus|Bridge|Outro|Pre-Chorus|Post-Chorus|Hook|Refrain)(\s|\d|$)/i)) return false;
+ return true;
+ })
+ .join('\n');
+
+ if (containerText.trim()) {
+ if (allLyricsText) allLyricsText += '\n\n';
+ allLyricsText += containerText.trim();
+ }
+ });
+
+ // Final cleanup
+ let lyrics = allLyricsText
+ // Remove multiple consecutive newlines
+ .replace(/\n{3,}/g, '\n\n')
+ // Remove any remaining section markers that might have slipped through
+ .replace(/^\[.*\]$/gm, '')
+ // Clean up any remaining whitespace issues
+ .split('\n')
+ .map(line => line.trim())
+ .filter(line => line.length > 0)
+ .join('\n')
+ .trim();
+
+ if (!lyrics || lyrics.length < 10) {
+ throw new Error('Extracted lyrics are too short or empty');
+ }
+
+ // Clean problematic Unicode characters
+ const cleanedLyrics = cleanUnicodeText(lyrics);
+
+ return cleanedLyrics;
+
+ } catch (error) {
+ console.error(` ❌ Error scraping lyrics from ${songUrl}:`, error.message);
+ throw error;
+ }
+}
+
+/**
+ * Scrape lyrics for selected songs of an artist
+ */
+async function scrapeLyricsForArtist(artist, songs, maxSongs) {
+ console.log(` 🎤 Scraping lyrics for top ${Math.min(maxSongs, songs.length)} songs...`);
+
+ const songsToScrape = songs.slice(0, maxSongs);
+ const scrapedSongs = [];
+
+ for (let i = 0; i < songsToScrape.length; i++) {
+ const song = songsToScrape[i];
+
+ try {
+ console.log(` 📝 [${i + 1}/${songsToScrape.length}] ${song.title}`);
+ const startTime = Date.now();
+
+ const lyrics = await scrapeLyricsFromUrl(song.url);
+ const scrapingDuration = Date.now() - startTime;
+
+ scrapedSongs.push({
+ ...song,
+ lyrics: lyrics,
+ scrapedAt: new Date().toISOString(),
+ scrapingDuration: scrapingDuration
+ });
+
+ state.processed.lyrics++;
+ console.log(` ✅ Success (${scrapingDuration}ms, ${lyrics.length} chars)`);
+
+ // Rate limiting between songs
+ if (i < songsToScrape.length - 1) {
+ await delay(config.delays.betweenSongs);
+ }
+
+ } catch (error) {
+ console.error(` ❌ Failed to scrape "${song.title}": ${error.message}`);
+ state.errors.lyrics++;
+
+ // Still add the song without lyrics for metadata completeness
+ scrapedSongs.push({
+ ...song,
+ lyrics: null,
+ scrapingError: error.message,
+ scrapedAt: new Date().toISOString()
+ });
+ }
+ }
+
+ return scrapedSongs;
+}
+
+/**
+ * Process a single artist: fetch songs and scrape lyrics
+ */
+async function processArtist(artist, geniusApiKey, artistIndex, totalArtists) {
+ const startTime = Date.now();
+
+ console.log(`\n[${artistIndex + 1}/${totalArtists}] 🎨 Processing: ${artist.name}`);
+ console.log(` 🔗 URL: ${artist.url}`);
+ console.log(` 🆔 Genius ID: ${artist.id}`);
+
+ try {
+ // Get all songs for the artist
+ const { songs, artistImageUrl } = await getAllSongsForArtist(artist, geniusApiKey);
+
+ if (songs.length === 0) {
+ console.log(` ⚠️ No songs found for ${artist.name}`);
+ state.errors.artists++;
+ return null;
+ }
+
+ console.log(` 📚 Found ${songs.length} songs total`);
+ state.processed.songs += songs.length;
+
+ // Scrape lyrics for top N songs
+ const scrapedSongs = await scrapeLyricsForArtist(artist, songs, config.maxSongsToScrape);
+
+ const processingTime = Date.now() - startTime;
+ const successfulScrapes = scrapedSongs.filter(s => s.lyrics).length;
+
+ console.log(` ✅ Completed: ${successfulScrapes}/${scrapedSongs.length} lyrics scraped (${processingTime}ms)`);
+ state.processed.artists++;
+
+ // Return structured data
+ return {
+ // Original artist data
+ name: artist.name,
+ geniusId: artist.id,
+ url: artist.url,
+ urlKey: artist.urlKey,
+ letter: artist.letter,
+ type: artist.type,
+
+ // Scraped metadata
+ imageUrl: artistImageUrl,
+ totalSongs: songs.length,
+
+ // Complete song list (metadata only)
+ allSongs: songs,
+
+ // Songs with scraped lyrics
+ scrapedSongs: scrapedSongs,
+
+ // Processing metadata
+ processingStats: {
+ totalSongsFound: songs.length,
+ songsScraped: scrapedSongs.length,
+ lyricsScraped: successfulScrapes,
+ scrapingErrors: scrapedSongs.length - successfulScrapes,
+ processingTime: processingTime
+ },
+
+ processedAt: new Date().toISOString()
+ };
+
+ } catch (error) {
+ console.error(` 💥 Failed to process ${artist.name}: ${error.message}`);
+ state.errors.artists++;
+ return null;
+ }
+}
+
+/**
+ * Save processed data to JSON file
+ */
+async function saveToFile(data, letter) {
+ const filename = `prescraped-${letter}.json`;
+ const filepath = path.join(config.output.directory, filename);
+
+ const output = {
+ letter: letter,
+ processedAt: new Date().toISOString(),
+ artists: data.filter(item => item !== null), // Remove failed artists
+ summary: {
+ totalProcessed: data.filter(item => item !== null).length,
+ totalFailed: data.filter(item => item === null).length,
+ totalSongs: data.reduce((sum, item) => sum + (item?.totalSongs || 0), 0),
+ totalLyrics: data.reduce((sum, item) => sum + (item?.processingStats?.lyricsScraped || 0), 0)
+ }
+ };
+
+ await fs.writeFile(filepath, JSON.stringify(output, null, 2));
+ console.log(`💾 Saved results to: ${filename}`);
+
+ return output;
+}
+
+/**
+ * Main execution function
+ */
+async function main() {
+ try {
+ state.startTime = Date.now();
+
+ // Parse command line arguments
+ if (process.argv.includes('--help') || process.argv.includes('-h')) {
+ console.log(`
+Usage: node prescraper.js [options]
+
+Options:
+ --songs Number of songs to scrape per artist (default: ${config.maxSongsToScrape})
+ --letters Letters/ranges to process (default: all)
+ --test Test mode: limit artists per letter
+ --help, -h Show this help message
+
+Letter Formats:
+ all Process all letters (0, a-z)
+ 0 Process artists starting with numbers
+ a,b,c Process specific letters
+ a-g Process range from a to g
+ c g Process letters c and g (space-separated)
+ 0,a-c,x,z Mixed: numbers, range a-c, plus letters x and z
+
+Examples:
+ node prescraper.js --songs 5 --letters a,b,c
+ node prescraper.js --letters 0 # Process artists starting with numbers
+ node prescraper.js --letters a-g # Process letters a through g
+ node prescraper.js --letters "c g" # Process letters c and g
+ node prescraper.js --letters 0,a-c,x-z # Process numbers, a-c and x-z ranges
+ node prescraper.js --test 2 # Process only 2 artists per letter
+ `);
+ return;
+ }
+
+ // Parse CLI arguments
+ const argsIndex = process.argv.indexOf('--songs');
+ if (argsIndex !== -1 && process.argv[argsIndex + 1]) {
+ config.maxSongsToScrape = parseInt(process.argv[argsIndex + 1], 10);
+ }
+
+ const lettersIndex = process.argv.indexOf('--letters');
+ if (lettersIndex !== -1 && process.argv[lettersIndex + 1]) {
+ const lettersArg = process.argv[lettersIndex + 1];
+ config.artistFilters.letters = parseLetterRange(lettersArg);
+ console.log(`📝 Parsed letters: ${lettersArg} → ${config.artistFilters.letters.join(', ')}`);
+ }
+
+ const testIndex = process.argv.indexOf('--test');
+ if (testIndex !== -1 && process.argv[testIndex + 1]) {
+ config.artistFilters.maxArtistsPerLetter = parseInt(process.argv[testIndex + 1], 10);
+ console.log(`🧪 TEST MODE: Processing max ${config.artistFilters.maxArtistsPerLetter} artists per letter`);
+ }
+
+ // Show configuration after parsing CLI arguments
+ printConfig();
+
+ // Load Genius API key
+ console.log('🔑 Loading Genius API key...');
+ const geniusApiKey = await getGeniusApiKey();
+ console.log('✅ API key loaded');
+
+ // Load artist data
+ const artists = await loadArtistData();
+
+ if (artists.length === 0) {
+ console.log('⚠️ No artists found matching criteria');
+ return;
+ }
+
+ // Create output directory
+ await fs.mkdir(config.output.directory, { recursive: true });
+ console.log(`📁 Created output directory: ${config.output.directory}`);
+
+ console.log(`\n🎯 Processing ${artists.length} artists...`);
+ console.log('Press Ctrl+C to stop gracefully\n');
+
+ // Group artists by letter for organized processing
+ const artistsByLetter = artists.reduce((acc, artist) => {
+ if (!acc[artist.letter]) acc[artist.letter] = [];
+ acc[artist.letter].push(artist);
+ return acc;
+ }, {});
+
+ const letters = Object.keys(artistsByLetter).sort();
+ console.log(`📋 Processing ${letters.length} letters: ${letters.join(', ')}`);
+
+ // Process each letter
+ for (const letter of letters) {
+ const letterArtists = artistsByLetter[letter];
+ console.log(`\n${'='.repeat(50)}`);
+ console.log(`📖 LETTER: ${letter.toUpperCase()} (${letterArtists.length} artists)`);
+ console.log('='.repeat(50));
+
+ const letterResults = [];
+
+ // Process each artist in the letter
+ for (let i = 0; i < letterArtists.length; i++) {
+ const artist = letterArtists[i];
+
+ try {
+ const result = await processArtist(artist, geniusApiKey, i, letterArtists.length);
+ letterResults.push(result);
+
+ // Rate limiting between artists
+ if (i < letterArtists.length - 1) {
+ await delay(config.delays.betweenArtists);
+ }
+
+ } catch (error) {
+ console.error(`💥 Critical error processing ${artist.name}:`, error);
+ letterResults.push(null);
+ state.errors.artists++;
+ }
+ }
+
+ // Save results for this letter
+ try {
+ await saveToFile(letterResults, letter);
+
+ const successful = letterResults.filter(r => r !== null).length;
+ const failed = letterResults.filter(r => r === null).length;
+ const totalLyrics = letterResults.reduce((sum, r) => sum + (r?.processingStats?.lyricsScraped || 0), 0);
+
+ console.log(`\n✅ Letter ${letter.toUpperCase()} completed:`);
+ console.log(` Artists: ${successful} successful, ${failed} failed`);
+ console.log(` Lyrics scraped: ${totalLyrics}`);
+
+ } catch (saveError) {
+ console.error(`❌ Error saving results for letter ${letter}:`, saveError);
+ }
+ }
+
+ // Final summary
+ const totalTime = Date.now() - state.startTime;
+ console.log(`\n${'='.repeat(60)}`);
+ console.log('🎉 PRESCRAPING COMPLETED!');
+ console.log('='.repeat(60));
+ console.log(`⏱️ Total time: ${Math.round(totalTime / 1000)}s`);
+ console.log(`📊 Final stats:`);
+ console.log(` Artists processed: ${state.processed.artists}`);
+ console.log(` Songs fetched: ${state.processed.songs}`);
+ console.log(` Lyrics scraped: ${state.processed.lyrics}`);
+ console.log(` Total errors: ${state.errors.artists + state.errors.songs + state.errors.lyrics}`);
+ console.log(`📁 Output saved to: ${config.output.directory}`);
+ console.log('='.repeat(60));
+
+ } catch (error) {
+ console.error('💥 Fatal error:', error);
+ process.exit(1);
+ }
+}
+
+// Handle graceful shutdown
+process.on('SIGINT', () => {
+ console.log('\n\n🛑 Received SIGINT, shutting down gracefully...');
+ console.log(`📊 Final stats:`);
+ console.log(` Artists processed: ${state.processed.artists}`);
+ console.log(` Songs processed: ${state.processed.songs}`);
+ console.log(` Lyrics scraped: ${state.processed.lyrics}`);
+ console.log(` Errors: ${state.errors.artists + state.errors.songs + state.errors.lyrics}`);
+ process.exit(0);
+});
+
+// Run the script
+if (import.meta.url === `file://${process.argv[1]}`) {
+ main();
+}
+
+export {
+ config,
+ loadArtistData,
+ getAllSongsForArtist,
+ extractGeniusImageHash,
+ extractArtistImageUrl
+};
diff --git a/scripts/ARTIST_UPDATE_SYSTEM_README.md b/scripts/ARTIST_UPDATE_SYSTEM_README.md
deleted file mode 100644
index 8e2836d..0000000
--- a/scripts/ARTIST_UPDATE_SYSTEM_README.md
+++ /dev/null
@@ -1,562 +0,0 @@
-# Artist Update System
-
-A comprehensive, automated system for scraping, comparing, and uploading artist and song data from Genius to Firestore.
-
-## Quick Start
-
-### Test the System (2 minutes)
-```bash
-npm run test:all
-```
-
-### Full Update (30-60 minutes)
-```bash
-# Scrape and prepare data
-npm run scrape
-
-# Review data in scraping-data/, then upload
-npm run upload
-```
-
-## Overview
-
-This system provides a complete workflow for:
-1. **Scraping** artist lists from Genius
-2. **Comparing** with existing Firestore data
-3. **Prescraping** songs for new artists
-4. **Uploading** new artists and songs to Firestore
-5. **Updating** popular artist flags
-
-### Key Features
-
-- ✅ **Incremental Updates**: Only processes new/changed artists
-- ✅ **Safe by Default**: Dry-run mode, skip existing data
-- ✅ **Testing Built-In**: `--limit` option on all scripts
-- ✅ **Progress Tracking**: TUI with progress bars and ETAs
-- ✅ **Workflow Timer**: Cumulative elapsed time tracked across all scripts
-- ✅ **Error Handling**: Comprehensive error logging
-- ✅ **Manual Control**: Each step is a separate command
-- ✅ **Timestamped Data**: All data saved with timestamps
-- ✅ **Search Tokens**: Automatic generation for autocomplete
-
-### Two-Phase Workflow
-
-The system is split into two main commands for safety:
-
-1. **`npm run scrape`** - Data gathering phase (20-45 min)
- - Scrapes artist lists from Genius
- - Compares with Firestore to find new artists
- - Scrapes songs for new artists
- - Uploads new artist documents (with search tokens)
- - **Stops and prompts you to review data**
-
-2. **`npm run upload`** - Data upload phase (2-6 min)
- - Uploads songs to Firestore
- - Updates popular artist flags
-
-This separation allows you to:
-- Review scraped data before committing to Firestore
-- Catch any issues before modifying song data
-- Re-run the upload phase if needed without re-scraping
-
-## Scripts
-
-| Script | Purpose | Input | Output |
-|--------|---------|-------|--------|
-| `scrape-artists.js` | Scrape Genius artist lists | Genius website | `artist-lists/` |
-| `compare-artists.js` | Identify new artists | `artist-lists/` + Firestore | `new-artists/` |
-| `prescrape-new-artists.js` | Scrape songs for new artists | `new-artists/` | `song-data/` |
-| `upload-artists.js` | Upload artists to Firestore | `new-artists/` | Firestore `artists` |
-| `upload-songs.js` | Upload songs to Firestore | `song-data/` | Firestore `songs` |
-| `update-popular-flags.js` | Update popular flags | `new-artists/` | Firestore `artists` |
-
-## NPM Commands
-
-### Production Workflow
-
-```bash
-# Recommended: Two-phase workflow
-npm run scrape # 20-45 min: Scrape, compare, prescrape, upload artists
-npm run upload # 2-6 min: Upload songs and update flags
-
-# Individual steps (for debugging/manual control)
-npm run update:scrape # 15-30 min: Scrape all letters
-npm run update:compare # ~1 min: Compare with Firestore
-npm run update:prescrape # 5-15 min: Scrape songs
-npm run update:upload-artists # ~1 min: Upload artists
-npm run update:upload-songs # 1-5 min: Upload songs
-npm run update:update-popular # ~1 min: Update flags
-```
-
-### Test Workflow
-
-```bash
-# Individual tests
-npm run test:scrape # 30 sec: 10 artists from letter J
-npm run test:compare # 15 sec: Compare
-npm run test:prescrape # 10 sec: 5 artists, 2 songs each
-npm run test:upload-artists # 5 sec: Dry-run 5 artists
-npm run test:upload-songs # 3 sec: Dry-run 5 songs
-npm run test:update-popular # 2 sec: Dry-run flag updates
-
-# Full test
-npm run test:all # ~2 min: Complete workflow test
-```
-
-## Workflow Details
-
-### Step 1: Scrape Artists
-
-**Command**: `npm run update:scrape`
-**Script**: `scripts/scrape-artists.js`
-**Time**: 15-30 minutes (all letters)
-
-Scrapes artist lists from Genius by letter (0, a-z), including:
-- Popular artists (top 20 per letter)
-- Regular artists (all others)
-- Genius IDs (extracted from iOS app links)
-
-**Output**:
-```
-scraping-data/artist-lists/2026-01-04-20-22/
-├── artists-0.json
-├── artists-a.json
-├── ...
-├── artists-z.json
-├── summary.json
-├── errors.json
-└── .complete
-```
-
-**Options**:
-```bash
-# Specific letters
-node scripts/scrape-artists.js --letters a,b,c
-
-# Test with limited data
-node scripts/scrape-artists.js --letters j --limit 10
-
-# Skip ID extraction (faster)
-node scripts/scrape-artists.js --no-ids
-```
-
-### Step 2: Compare with Firestore
-
-**Command**: `npm run update:compare`
-**Script**: `scripts/compare-artists.js`
-**Time**: ~1 minute
-
-Compares scraped artists with existing Firestore data to identify:
-- New artists (not in Firestore)
-- Popular status changes (add/remove)
-
-**Output**:
-```
-scraping-data/new-artists/2026-01-04-21-01/
-├── new-artists-0.json
-├── new-artists-a.json
-├── ...
-├── new-artists-z.json
-├── comparison-report.json ← Summary of changes
-├── errors.json
-└── .complete
-```
-
-**Options**:
-```bash
-# Use specific timestamp
-node scripts/compare-artists.js --date 2026-01-04-20-22
-
-# Preview only
-node scripts/compare-artists.js --dry-run
-```
-
-### Step 3: Prescrape Songs
-
-**Command**: `npm run update:prescrape`
-**Script**: `scripts/prescrape-new-artists.js`
-**Time**: 5-15 minutes (depends on new artist count)
-
-Scrapes songs and lyrics for newly identified artists:
-- Up to 10 songs per artist (configurable)
-- Full lyrics extraction
-- Robust error handling with retries
-
-**Output**:
-```
-scraping-data/song-data/2026-01-04-20-48/
-├── songs-0.json
-├── songs-a.json
-├── ...
-├── songs-z.json
-├── prescrape-summary.json
-├── errors.json
-└── .complete
-```
-
-**Options**:
-```bash
-# Test with limited data
-node scripts/prescrape-new-artists.js --limit 5 --max-songs 2
-
-# Specific letters
-node scripts/prescrape-new-artists.js --letters a,b,c
-
-# More songs per artist
-node scripts/prescrape-new-artists.js --max-songs 20
-```
-
-### Step 4: Upload Artists
-
-**Command**: `npm run update:upload-artists`
-**Script**: `scripts/upload-artists.js`
-**Time**: ~1 minute
-
-Uploads new artists to Firestore with:
-- Search tokens for autocomplete
-- Sanitized field values
-- Validated slugs
-- Batch processing (500 per batch)
-
-**Options**:
-```bash
-# Dry run (recommended first)
-node scripts/upload-artists.js --dry-run
-
-# Test with limited data
-node scripts/upload-artists.js --limit 10 --dry-run
-
-# Overwrite existing (careful!)
-node scripts/upload-artists.js --no-skip
-```
-
-### Step 5: Upload Songs
-
-**Command**: `npm run update:upload-songs`
-**Script**: `scripts/upload-songs.js`
-**Time**: 1-5 minutes (depends on song count)
-
-Uploads songs to Firestore with:
-- Song ID extraction
-- Artist slug linking
-- Sanitized lyrics
-- Batch processing (500 per batch)
-
-**Options**:
-```bash
-# Dry run
-node scripts/upload-songs.js --dry-run
-
-# Test with limited data
-node scripts/upload-songs.js --limit 10 --dry-run
-
-# Overwrite existing
-node scripts/upload-songs.js --no-skip
-```
-
-### Step 6: Update Popular Flags
-
-**Command**: `npm run update:update-popular`
-**Script**: `scripts/update-popular-flags.js`
-**Time**: ~1 minute
-
-Updates popular artist flags based on comparison:
-- Adds popular flag to new popular artists
-- Removes popular flag from artists no longer popular
-- Maintains exactly 20 popular artists per letter
-
-**Options**:
-```bash
-# Dry run
-node scripts/update-popular-flags.js --dry-run
-
-# Use specific comparison
-node scripts/update-popular-flags.js --date 2026-01-04-21-01
-```
-
-## Data Structure
-
-### Directory Layout
-
-```
-scraping-data/
-├── artist-lists/ ← Step 1: Scraped artist lists
-│ └── YYYY-MM-DD-HH-MM/
-├── new-artists/ ← Step 2: Comparison results
-│ └── YYYY-MM-DD-HH-MM/
-├── song-data/ ← Step 3: Prescraped songs
-│ └── YYYY-MM-DD-HH-MM/
-└── upload-data/ ← Steps 4-6: Upload summaries
- └── YYYY-MM-DD-HH-MM/
-```
-
-### Firestore Collections
-
-#### `artists` Collection
-
-```javascript
-// Document ID: artist-slug (e.g., "kendrick-lamar")
-{
- name: "Kendrick Lamar",
- url: "https://genius.com/artists/Kendrick-lamar",
- geniusId: 1421,
- type: "popular", // or "regular"
- isPopular: true,
- searchTokens: ["k", "ke", "ken", "kend", ...],
- nameForSorting: "kendrick lamar",
- firstLetter: "k",
- uploadedAt: "2026-01-04T21:10:00.000Z",
- updatedAt: "2026-01-04T21:20:00.000Z"
-}
-```
-
-#### `songs` Collection
-
-```javascript
-// Document ID: song-slug (e.g., "kendrick-lamar-humble-lyrics")
-{
- title: "HUMBLE.",
- url: "https://genius.com/Kendrick-lamar-humble-lyrics",
- artist: "Kendrick Lamar",
- artistSlug: "kendrick-lamar",
- lyrics: "Nobody pray for me...",
- uploadedAt: "2026-01-04T21:15:00.000Z",
- scrapedAt: "2026-01-04T20:48:00.000Z"
-}
-```
-
-## Common Workflows
-
-### Monthly Update
-
-```bash
-# 1. Scrape latest data (30 min)
-npm run update:scrape
-
-# 2. Compare with database (1 min)
-npm run update:compare
-
-# 3. Review comparison-report.json
-cat scraping-data/new-artists/$(ls -t scraping-data/new-artists | head -1)/comparison-report.json
-
-# 4. Prescrape songs (10 min)
-npm run update:prescrape
-
-# 5. Upload everything (5 min)
-npm run update:upload-artists
-npm run update:upload-songs
-npm run update:update-popular
-```
-
-### Quick Test Before Production
-
-```bash
-# Test entire workflow with limited data
-npm run test:all
-
-# If successful, run production
-npm run update:all
-```
-
-### Scrape Specific Letters Only
-
-```bash
-# Scrape only letters J and K
-node scripts/scrape-artists.js --letters j,k
-
-# Compare (will only process these letters)
-npm run update:compare
-
-# Prescrape only these letters
-node scripts/prescrape-new-artists.js --letters j,k
-
-# Upload
-npm run update:upload-artists
-npm run update:upload-songs
-npm run update:update-popular
-```
-
-### Re-upload from Existing Data
-
-```bash
-# Upload from specific timestamp (no re-scraping)
-node scripts/upload-artists.js --date 2026-01-04-21-01 --dry-run
-node scripts/upload-songs.js --date 2026-01-04-20-48 --dry-run
-
-# If looks good, remove --dry-run
-node scripts/upload-artists.js --date 2026-01-04-21-01
-node scripts/upload-songs.js --date 2026-01-04-20-48
-```
-
-## Error Handling
-
-### Error Logging
-
-All scripts log errors to `errors.json` files:
-
-```json
-{
- "totalErrors": 15,
- "errorsByType": {
- "network_timeout": 8,
- "invalid_slug": 5,
- "sanitization_failed": 2
- },
- "detailedErrors": [
- {
- "timestamp": "2026-01-04T20:22:15.123Z",
- "type": "network_timeout",
- "message": "Request timeout",
- "details": {
- "artist": "Artist Name",
- "url": "https://..."
- }
- }
- ]
-}
-```
-
-### Handling Errors
-
-1. **Check error counts** in summary files
-2. **Review errors.json** for details
-3. **Re-run specific letters** if needed
-4. **Most errors are graceful** - process continues
-
-### Common Issues
-
-**Too many network timeouts?**
-- Increase delays in script config
-- Run specific letters separately
-- Use `--no-ids` to skip ID extraction
-
-**Slug extraction failures?**
-- Review invalid URLs in errors.json
-- Usually rare, won't affect most artists
-
-**Upload failures?**
-- Check Firestore permissions
-- Verify Firebase config
-- Check network connection
-
-## Best Practices
-
-### Safety
-
-1. **Always test first**: `npm run test:all`
-2. **Use dry-run**: `--dry-run` on upload commands
-3. **Review reports**: Check comparison-report.json
-4. **Check errors**: Review error counts in summaries
-5. **Manual steps**: Upload is separate from scraping
-
-### Performance
-
-1. **Batch processing**: Default 500 items per batch
-2. **Rate limiting**: Built-in delays between requests
-3. **Skip existing**: Default behavior, use `--no-skip` carefully
-4. **Parallel testing**: Test multiple letters at once
-
-### Monitoring
-
-1. **Progress bars**: Real-time status during execution
-2. **Summary files**: Statistics for each run
-3. **Error logs**: Categorized error tracking
-4. **Timestamps**: Easy to track runs over time
-
-## Troubleshooting
-
-### Script fails to find latest timestamp
-
-**Problem**: "No new-artists data found"
-**Solution**: Run the previous step first or specify `--date`
-
-### Firestore permission denied
-
-**Problem**: Upload fails with permission error
-**Solution**: Check Firebase config and Firestore rules
-
-### Rate limited by Genius
-
-**Problem**: Many 429 errors
-**Solution**: Increase delays or run specific letters
-
-### Out of memory
-
-**Problem**: Node runs out of memory
-**Solution**: Reduce batch size or process fewer letters
-
-## Development
-
-### Adding New Features
-
-1. **Utility modules**: Add to `scripts/utils/`
-2. **Error types**: Use `errorLogger.logError(type, details, message)`
-3. **TUI**: Use `tui.printInfo`, `tui.createProgressBar`, etc.
-4. **Testing**: Add `--limit` and `--dry-run` options
-
-### Code Style
-
-- No emojis in logs or comments
-- Categorized error logging
-- Progress bars for long operations
-- Helpful CLI help messages
-- Timestamped output directories
-
-## Workflow Timer
-
-The system tracks elapsed time across all scripts using the `artist-lists` directory timestamp:
-
-- **Automatic Tracking**: Uses directory creation time from first script
-- **No Extra Files**: Leverages existing timestamped directories
-- **Live Display**: Progress bars show "Total: 2m 34s" that updates every second
-- **Per-Script Summary**: Each script displays current elapsed time upon completion
-- **Final Total**: `upload-artists.js` (last script) shows the total workflow time
-
-Example output:
-```
-Workflow Elapsed Time: 2m 34s
-────────────────────────────────────────────────────────
-TOTAL WORKFLOW TIME: 23m 47s
-────────────────────────────────────────────────────────
-```
-
-The timer helps you:
-- Track actual processing time across multiple scripts
-- Estimate how long future runs will take
-- Identify bottlenecks in the workflow
-
-## Implementation Timeline
-
-- **Week 1**: Utility modules (timestamp, paths, TUI, error logging)
-- **Week 2-3**: Scraping scripts (scrape, compare, prescrape)
-- **Week 4**: Upload scripts (artists, songs, popular flags)
-- **Total**: 4 weeks of implementation
-
-## System Requirements
-
-- Node.js >= 18
-- Firebase project with Firestore
-- Network access to Genius.com
-- ~1GB disk space for cached data
-
-## Dependencies
-
-- `firebase`: Firestore integration
-- `cheerio`: HTML parsing
-- `axios`: HTTP requests
-- `unidecode`: Text normalization
-- `cli-progress`: Progress bars
-- `chalk`: Colored terminal output
-
-## License
-
-MIT
-
-## Support
-
-For issues, questions, or contributions, please refer to the implementation plan and week-specific completion documents:
-- `ARTIST_UPDATE_SYSTEM_PLAN.md`
-- `WEEK_1_COMPLETE.md`
-- `WEEK_2_3_COMPLETE.md`
-- `WEEK_4_COMPLETE.md`
-
diff --git a/scripts/compare-artists.js b/scripts/compare-artists.js
deleted file mode 100644
index a932156..0000000
--- a/scripts/compare-artists.js
+++ /dev/null
@@ -1,492 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * Artist Comparison Script
- * Compares scraped artist lists with Firestore to identify new artists
- */
-
-import fs from 'fs/promises';
-import { initializeApp } from 'firebase/app';
-import { getFirestore, collection, getDocs, query, orderBy, limit, startAfter } from 'firebase/firestore';
-import { firebaseConfig } from '../src/lib/services/initFirebase.js';
-import * as tui from './utils/tui.js';
-import * as paths from './utils/paths.js';
-import { generateTimestamp, getCurrentISO, calculateETA, getWorkflowElapsed } from './utils/timestamp.js';
-import { createErrorLogger } from './utils/error-logger.js';
-
-class ArtistComparator {
- constructor(options = {}) {
- this.inputTimestamp = options.timestamp || null;
- this.outputDir = options.outputDir || null;
- this.errorLogger = createErrorLogger('artist-comparison');
- this.stats = {
- geniusTotal: 0,
- firestoreTotal: 0,
- newArtists: 0,
- existingArtists: 0,
- popularInGenius: 0,
- popularInFirestore: 0,
- popularToAdd: 0,
- popularToRemove: 0
- };
- this.firestoreArtists = new Map(); // Map of slug -> artist data
- }
-
- /**
- * Extract slug from Genius artist URL
- * @param {string} url - The Genius artist URL
- * @returns {string|null} The artist slug
- */
- extractSlug(url) {
- if (!url) return null;
- const match = url.match(/\/artists\/([^/?#]+)/);
- return match ? match[1].toLowerCase() : null;
- }
-
- /**
- * Load artist lists from scraping-data
- */
- async loadArtistLists() {
- const timestamp = this.inputTimestamp || await paths.findLatestTimestamp('artist-lists');
-
- if (!timestamp) {
- throw new Error('No artist lists found. Run scrape-artists.js first.');
- }
-
- const inputDir = paths.getArtistListsDir(timestamp);
- const isComplete = await paths.isDirectoryComplete(inputDir);
-
- if (!isComplete) {
- tui.printWarning(`Artist list directory not marked complete: ${inputDir}`);
- }
-
- tui.printInfo(`Loading artist lists from: ${timestamp}`);
-
- const allArtists = [];
- const letters = paths.getAllLetters();
-
- for (const letter of letters) {
- const filePath = paths.getLetterFilePath(inputDir, letter, 'artists');
-
- try {
- const content = await fs.readFile(filePath, 'utf8');
- const data = JSON.parse(content);
-
- // Add artists with letter metadata
- if (data.artists && data.artists.popular) {
- data.artists.popular.forEach(artist => {
- allArtists.push({ ...artist, letter, source: 'popular' });
- });
- }
-
- if (data.artists && data.artists.regular) {
- data.artists.regular.forEach(artist => {
- allArtists.push({ ...artist, letter, source: 'regular' });
- });
- }
- } catch (error) {
- if (error.code !== 'ENOENT') {
- this.errorLogger.logError('file_read_error', {
- file: filePath,
- letter
- }, error.message);
- }
- }
- }
-
- this.stats.geniusTotal = allArtists.length;
- this.stats.popularInGenius = allArtists.filter(a => a.type === 'popular').length;
-
- tui.printInfo(`Loaded ${tui.formatNumber(allArtists.length)} artists from Genius`);
-
- return { allArtists, timestamp, inputDir };
- }
-
- /**
- * Fetch all artists from Firestore
- */
- async fetchFirestoreArtists() {
- tui.printInfo('Fetching artists from Firestore...');
-
- const app = initializeApp(firebaseConfig);
- const db = getFirestore(app);
-
- const artistsRef = collection(db, 'artists');
- let lastDoc = null;
- let batchCount = 0;
- const batchSize = 1000;
-
- const progressBar = tui.createProgressBar('Fetching from Firestore', 100);
-
- while (true) {
- batchCount++;
-
- let q = query(artistsRef, orderBy('name'), limit(batchSize));
- if (lastDoc) {
- q = query(artistsRef, orderBy('name'), startAfter(lastDoc), limit(batchSize));
- }
-
- try {
- const snapshot = await getDocs(q);
-
- if (snapshot.empty) {
- break;
- }
-
- snapshot.docs.forEach(doc => {
- const data = doc.data();
- // Use document ID (slug) as the key for reliable comparison
- this.firestoreArtists.set(doc.id.toLowerCase(), {
- slug: doc.id,
- name: data.name,
- geniusId: data.geniusId || null,
- type: data.type || 'regular',
- url: data.url || null
- });
- });
-
- lastDoc = snapshot.docs[snapshot.docs.length - 1];
-
- // Update progress (estimate based on batch count)
- const progress = Math.min(95, batchCount * 5);
- progressBar.update(progress);
-
- } catch (error) {
- this.errorLogger.logError('firestore_fetch_error', {
- batch: batchCount
- }, error.message);
- break;
- }
- }
-
- progressBar.update(100);
- progressBar.stop();
-
- this.stats.firestoreTotal = this.firestoreArtists.size;
- this.stats.popularInFirestore = Array.from(this.firestoreArtists.values())
- .filter(a => a.type === 'popular').length;
-
- tui.printInfo(`Loaded ${tui.formatNumber(this.firestoreArtists.size)} artists from Firestore`);
- }
-
- /**
- * Compare and identify new artists
- */
- compareArtists(geniusArtists) {
- tui.printInfo('Comparing artist lists...');
-
- const newArtists = [];
- const existingArtists = [];
- const popularUpdates = {
- toAdd: [],
- toRemove: []
- };
-
- // Identify new artists by comparing slugs
- for (const artist of geniusArtists) {
- const slug = this.extractSlug(artist.url);
-
- if (!slug) {
- // Can't extract slug, treat as new
- newArtists.push(artist);
- continue;
- }
-
- if (this.firestoreArtists.has(slug.toLowerCase())) {
- existingArtists.push(artist);
- } else {
- newArtists.push(artist);
- }
- }
-
- // Identify popular status changes by comparing slugs
- // First, find all current popular artists in Firestore (by slug)
- const currentPopularSlugs = new Set(
- Array.from(this.firestoreArtists.entries())
- .filter(([slug, data]) => data.type === 'popular')
- .map(([slug, data]) => slug.toLowerCase())
- );
-
- // Find artists that should be popular (from Genius, by slug)
- const shouldBePopularSlugs = new Set();
- for (const artist of geniusArtists) {
- if (artist.type === 'popular') {
- const slug = this.extractSlug(artist.url);
- if (slug) {
- shouldBePopularSlugs.add(slug.toLowerCase());
- }
- }
- }
-
- // Artists to add popular flag (in Genius popular but not in Firestore popular)
- for (const artist of geniusArtists) {
- if (artist.type === 'popular') {
- const slug = this.extractSlug(artist.url);
- if (slug) {
- const slugLower = slug.toLowerCase();
- if (this.firestoreArtists.has(slugLower) && !currentPopularSlugs.has(slugLower)) {
- popularUpdates.toAdd.push({
- name: artist.name,
- slug: slug,
- action: 'add_popular',
- reason: 'now_in_genius_popular_top_20'
- });
- }
- }
- }
- }
-
- // Artists to remove popular flag (in Firestore popular but not in Genius popular)
- for (const [slug, firestoreArtist] of this.firestoreArtists.entries()) {
- if (firestoreArtist.type === 'popular' && !shouldBePopularSlugs.has(slug.toLowerCase())) {
- popularUpdates.toRemove.push({
- name: firestoreArtist.name,
- slug: slug,
- action: 'remove_popular',
- reason: 'no_longer_in_genius_popular_top_20'
- });
- }
- }
-
- this.stats.newArtists = newArtists.length;
- this.stats.existingArtists = existingArtists.length;
- this.stats.popularToAdd = popularUpdates.toAdd.length;
- this.stats.popularToRemove = popularUpdates.toRemove.length;
-
- return { newArtists, popularUpdates };
- }
-
- /**
- * Save new artists by letter
- */
- async saveNewArtistsByLetter(newArtists, outputDir, sourceTimestamp) {
- const artistsByLetter = {};
-
- // Group by letter
- for (const artist of newArtists) {
- const letter = artist.letter || '0';
- if (!artistsByLetter[letter]) {
- artistsByLetter[letter] = [];
- }
- artistsByLetter[letter].push(artist);
- }
-
- // Save each letter file
- const letters = paths.getAllLetters();
- for (const letter of letters) {
- const artists = artistsByLetter[letter] || [];
- const filePath = paths.getLetterFilePath(outputDir, letter, 'new-artists');
-
- const data = {
- letter: letter.toUpperCase(),
- comparisonDate: getCurrentISO(),
- sourceTimestamp,
- newArtists: artists,
- count: artists.length
- };
-
- await fs.writeFile(filePath, JSON.stringify(data, null, 2));
- }
- }
-
- /**
- * Save comparison report
- */
- async saveComparisonReport(outputDir, sourceTimestamp, popularUpdates) {
- const perLetter = {};
- const newArtistsByLetter = {};
-
- // Calculate per-letter statistics
- const letters = paths.getAllLetters();
- for (const letter of letters) {
- const filePath = paths.getLetterFilePath(outputDir, letter, 'new-artists');
-
- try {
- const content = await fs.readFile(filePath, 'utf8');
- const data = JSON.parse(content);
- newArtistsByLetter[letter] = data.count;
- perLetter[letter] = {
- newCount: data.count
- };
- } catch {
- newArtistsByLetter[letter] = 0;
- }
- }
-
- const report = {
- timestamp: getCurrentISO(),
- sourceDirectory: `scraping-data/artist-lists/${sourceTimestamp}`,
- statistics: {
- totalGeniusArtists: this.stats.geniusTotal,
- totalFirestoreArtists: this.stats.firestoreTotal,
- newArtists: this.stats.newArtists,
- existingArtists: this.stats.existingArtists,
- popularChanges: {
- addedToPopular: this.stats.popularToAdd,
- removedFromPopular: this.stats.popularToRemove,
- totalChanges: this.stats.popularToAdd + this.stats.popularToRemove
- }
- },
- newArtistsByLetter,
- popularUpdates,
- errors: this.errorLogger.getSummary()
- };
-
- const reportPath = `${outputDir}/comparison-report.json`;
- await fs.writeFile(reportPath, JSON.stringify(report, null, 2));
- }
-
- /**
- * Run comparison
- */
- async compare() {
- tui.printHeader('ARTIST COMPARISON');
-
- // Load Genius artists
- const { allArtists, timestamp: sourceTimestamp, inputDir } = await this.loadArtistLists();
-
- // Fetch Firestore artists
- await this.fetchFirestoreArtists();
-
- // Compare
- const { newArtists, popularUpdates } = this.compareArtists(allArtists);
-
- // Prepare output directory
- const outputTimestamp = generateTimestamp();
- const outputDir = this.outputDir || await paths.createTimestampedDir('new-artists', outputTimestamp);
-
- tui.printInfo(`Saving results to: ${outputTimestamp}`);
-
- // Save filtered lists
- await this.saveNewArtistsByLetter(newArtists, outputDir, sourceTimestamp);
-
- // Save comparison report
- await this.saveComparisonReport(outputDir, sourceTimestamp, popularUpdates);
-
- // Save errors if any
- await this.errorLogger.saveToFile(outputDir);
-
- // Mark complete
- await paths.markDirectoryComplete(outputDir);
-
- return { outputDir, outputTimestamp };
- }
-
- /**
- * Display results
- */
- displayResults() {
- console.log('');
- tui.printStats('Comparison Results', {
- 'Genius Artists': tui.formatNumber(this.stats.geniusTotal),
- 'Firestore Artists': tui.formatNumber(this.stats.firestoreTotal),
- 'New Artists': tui.formatNumber(this.stats.newArtists),
- 'Existing Artists': tui.formatNumber(this.stats.existingArtists)
- });
-
- if (this.stats.popularToAdd > 0 || this.stats.popularToRemove > 0) {
- console.log('');
- tui.printStats('Popular Status Changes', {
- 'To Add': tui.formatNumber(this.stats.popularToAdd),
- 'To Remove': tui.formatNumber(this.stats.popularToRemove),
- 'Total Changes': tui.formatNumber(this.stats.popularToAdd + this.stats.popularToRemove)
- });
- }
-
- if (this.errorLogger.hasErrors()) {
- tui.printErrorSummary(this.errorLogger.getErrorCounts());
- }
- }
-}
-
-/**
- * Parse CLI arguments
- */
-function parseArgs() {
- const args = process.argv.slice(2);
- const options = {
- timestamp: null,
- outputDir: null,
- dryRun: false,
- quiet: false
- };
-
- for (let i = 0; i < args.length; i++) {
- const arg = args[i];
-
- if (arg === '--date' && args[i + 1]) {
- options.timestamp = args[i + 1];
- i++;
- } else if (arg === '--output-dir' && args[i + 1]) {
- options.outputDir = args[i + 1];
- i++;
- } else if (arg === '--dry-run') {
- options.dryRun = true;
- } else if (arg === '--quiet') {
- options.quiet = true;
- } else if (arg === '--help' || arg === '-h') {
- console.log(`
-Usage: node scripts/compare-artists.js [options]
-
-Options:
- --date Use specific artist list timestamp (YYYY-MM-DD-HH-MM)
- Default: use latest
- --output-dir Custom output directory
- --dry-run Preview only, don't save files
- --quiet Minimal output
- --help, -h Show this help message
-
-Examples:
- node scripts/compare-artists.js
- node scripts/compare-artists.js --date 2026-01-04-18-30
- node scripts/compare-artists.js --dry-run
-`);
- process.exit(0);
- }
- }
-
- return options;
-}
-
-/**
- * Main execution
- */
-async function main() {
- const options = parseArgs();
-
- if (options.dryRun) {
- tui.printWarning('DRY RUN MODE: No files will be saved');
- }
-
- const comparator = new ArtistComparator({
- timestamp: options.timestamp,
- outputDir: options.outputDir
- });
-
- try {
- const { outputDir, outputTimestamp } = await comparator.compare();
-
- comparator.displayResults();
-
- const workflowElapsed = await getWorkflowElapsed();
- if (workflowElapsed !== null) {
- tui.printWorkflowTime(workflowElapsed);
- }
-
- tui.printSuccess('Comparison complete!');
- tui.printInfo(`Output: ${outputDir}`);
- tui.printFooter();
-
- } catch (error) {
- tui.printError(`Comparison failed: ${error.message}`);
- console.error(error);
- process.exit(1);
- }
-}
-
-if (import.meta.url === `file://${process.argv[1]}`) {
- main();
-}
-
-export default ArtistComparator;
-
diff --git a/scripts/prescrape-new-artists.js b/scripts/prescrape-new-artists.js
deleted file mode 100644
index f296bc0..0000000
--- a/scripts/prescrape-new-artists.js
+++ /dev/null
@@ -1,551 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * Prescrape New Artists
- * Scrapes song data for newly identified artists from comparison step
- */
-
-import fs from 'fs/promises';
-import * as cheerio from 'cheerio';
-import * as tui from './utils/tui.js';
-import * as paths from './utils/paths.js';
-import { generateTimestamp, getCurrentISO, getWorkflowElapsed } from './utils/timestamp.js';
-import { createErrorLogger } from './utils/error-logger.js';
-
-class NewArtistPrescraper {
- constructor(options = {}) {
- this.inputTimestamp = options.timestamp || null;
- this.outputDir = options.outputDir || null;
- this.maxSongsPerArtist = options.maxSongsPerArtist || 10;
- this.maxArtists = options.maxArtists || null; // Limit total artists for testing
- this.letters = options.letters || paths.getAllLetters();
- this.dryRun = options.dryRun || false;
- this.delays = {
- betweenArtists: options.delayBetweenArtists || 1000,
- betweenSongs: options.delayBetweenSongs || 500,
- betweenPages: options.delayBetweenPages || 200
- };
- this.api = {
- timeout: options.timeout || 10000,
- maxRetries: options.maxRetries || 3,
- userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
- };
- this.errorLogger = createErrorLogger('prescraper');
- this.stats = {
- totalArtists: 0,
- processedArtists: 0,
- skippedArtists: 0,
- totalSongs: 0,
- processedSongs: 0,
- scrapedLyrics: 0,
- failedLyrics: 0
- };
- this.currentProgress = {
- letter: '',
- artist: '',
- song: ''
- };
- }
-
- /**
- * Delay utility
- */
- delay(ms) {
- return new Promise(resolve => setTimeout(resolve, ms));
- }
-
- /**
- * Fetch with timeout and retries
- */
- async fetchWithTimeout(url, options = {}) {
- const controller = new AbortController();
- const timeoutId = setTimeout(() => controller.abort(), this.api.timeout);
-
- let lastError;
- for (let attempt = 1; attempt <= this.api.maxRetries; attempt++) {
- try {
- const response = await fetch(url, {
- ...options,
- signal: controller.signal,
- headers: {
- 'User-Agent': this.api.userAgent,
- ...options.headers
- }
- });
-
- clearTimeout(timeoutId);
-
- if (!response.ok && response.status >= 500) {
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
- }
-
- return response;
- } catch (error) {
- lastError = error;
- clearTimeout(timeoutId);
-
- if (attempt < this.api.maxRetries) {
- const delayMs = Math.pow(2, attempt) * 1000;
- await this.delay(delayMs);
- }
- }
- }
-
- throw lastError || new Error(`Failed after ${this.api.maxRetries} attempts`);
- }
-
- /**
- * Load new artists from comparison output
- */
- async loadNewArtists() {
- const timestamp = this.inputTimestamp || await paths.findLatestTimestamp('new-artists');
-
- if (!timestamp) {
- throw new Error('No new-artists data found. Run compare-artists.js first.');
- }
-
- const inputDir = paths.getNewArtistsDir(timestamp);
- const isComplete = await paths.isDirectoryComplete(inputDir);
-
- if (!isComplete) {
- tui.printWarning(`New-artists directory not marked complete: ${inputDir}`);
- }
-
- tui.printInfo(`Loading new artists from: ${timestamp}`);
-
- const artistsByLetter = {};
- let totalArtists = 0;
-
- for (const letter of this.letters) {
- const filePath = paths.getLetterFilePath(inputDir, letter, 'new-artists');
-
- try {
- const content = await fs.readFile(filePath, 'utf8');
- const data = JSON.parse(content);
-
- if (data.newArtists && data.newArtists.length > 0) {
- artistsByLetter[letter] = data.newArtists;
- totalArtists += data.newArtists.length;
- }
- } catch (error) {
- if (error.code !== 'ENOENT') {
- this.errorLogger.logError('file_read_error', {
- file: filePath,
- letter
- }, error.message);
- }
- }
- }
-
- this.stats.totalArtists = totalArtists;
- // Apply limit if specified
- if (this.maxArtists && this.maxArtists < totalArtists) {
- tui.printWarning(`Limiting to first ${this.maxArtists} artists for testing`);
- let remainingLimit = this.maxArtists;
-
- for (const letter of Object.keys(artistsByLetter)) {
- if (remainingLimit <= 0) {
- delete artistsByLetter[letter];
- } else if (artistsByLetter[letter].length > remainingLimit) {
- artistsByLetter[letter] = artistsByLetter[letter].slice(0, remainingLimit);
- remainingLimit = 0;
- } else {
- remainingLimit -= artistsByLetter[letter].length;
- }
- }
-
- // Recalculate total
- totalArtists = Object.values(artistsByLetter).reduce((sum, arr) => sum + arr.length, 0);
- this.stats.totalArtists = totalArtists;
- }
-
- tui.printInfo(`Loaded ${tui.formatNumber(totalArtists)} new artists across ${Object.keys(artistsByLetter).length} letters`);
-
- return { artistsByLetter, timestamp };
- }
-
- /**
- * Scrape songs for a single artist
- */
- async scrapeArtistSongs(artist) {
- this.currentProgress.artist = artist.name;
-
- try {
- const response = await this.fetchWithTimeout(artist.url);
- const html = await response.text();
- const $ = cheerio.load(html);
-
- const songs = [];
- const seenUrls = new Set();
-
- // Find all links containing "-lyrics" in the href
- $('a[href*="-lyrics"]').each((i, el) => {
- if (songs.length >= this.maxSongsPerArtist) return false; // Stop when we have enough
-
- const songUrl = $(el).attr('href');
- if (!songUrl) return;
-
- // Build full URL
- const fullUrl = songUrl.startsWith('http') ? songUrl : `https://genius.com${songUrl}`;
-
- // Skip if we've already seen this URL
- if (seenUrls.has(fullUrl)) return;
- seenUrls.add(fullUrl);
-
- // Extract title - try multiple methods
- let title = $(el).text().trim();
-
- // If no text, try getting it from the URL
- if (!title || title.length === 0) {
- const urlMatch = fullUrl.match(/genius\.com\/(.+)-lyrics/);
- if (urlMatch) {
- title = urlMatch[1].replace(/-/g, ' ');
- }
- }
-
- // Only add if we have a valid title and URL looks like a song page
- if (title && title.length > 0 && fullUrl.includes('genius.com/') && fullUrl.includes('-lyrics')) {
- songs.push({
- title,
- url: fullUrl,
- artist: artist.name,
- artistUrl: artist.url
- });
- }
- });
-
- return songs;
- } catch (error) {
- this.errorLogger.logError('artist_scrape_failed', {
- artist: artist.name,
- url: artist.url
- }, error.message);
- return [];
- }
- }
-
- /**
- * Scrape lyrics for a single song
- */
- async scrapeSongLyrics(song) {
- this.currentProgress.song = song.title;
-
- try {
- await this.delay(this.delays.betweenSongs);
- const response = await this.fetchWithTimeout(song.url);
- const html = await response.text();
- const $ = cheerio.load(html);
-
- let lyrics = '';
- const lyricsContainers = $('[data-lyrics-container="true"]');
-
- if (lyricsContainers.length > 0) {
- lyricsContainers.each((i, container) => {
- const text = $(container).text().trim();
- if (text) {
- lyrics += text + '\n\n';
- }
- });
- lyrics = lyrics.trim();
- }
-
- if (!lyrics || lyrics.length === 0) {
- this.errorLogger.logError('empty_lyrics', {
- song: song.title,
- artist: song.artist,
- url: song.url
- }, 'No lyrics found');
- this.stats.failedLyrics++;
- return null;
- }
-
- this.stats.scrapedLyrics++;
- return lyrics;
- } catch (error) {
- this.errorLogger.logError('lyrics_scrape_failed', {
- song: song.title,
- artist: song.artist,
- url: song.url
- }, error.message);
- this.stats.failedLyrics++;
- return null;
- }
- }
-
- /**
- * Process a single letter
- */
- async processLetter(letter, artists, outputDir, progressBar) {
- this.currentProgress.letter = letter.toUpperCase();
- const processedSongs = [];
-
- for (const artist of artists) {
- this.currentProgress.artist = artist.name;
- this.currentProgress.song = '';
-
- // Update progress
- progressBar.increment({
- status: `Letter ${letter.toUpperCase()}: ${artist.name}`
- });
-
- // Scrape artist's songs
- const songs = await this.scrapeArtistSongs(artist);
- this.stats.processedArtists++;
-
- if (songs.length === 0) {
- this.stats.skippedArtists++;
- await this.delay(this.delays.betweenArtists);
- continue;
- }
-
- this.stats.totalSongs += songs.length;
-
- // Scrape lyrics for each song
- for (const song of songs) {
- const lyrics = await this.scrapeSongLyrics(song);
-
- if (lyrics) {
- processedSongs.push({
- ...song,
- lyrics,
- scrapedAt: getCurrentISO()
- });
- }
-
- this.stats.processedSongs++;
-
- // Update progress with song info
- progressBar.update({
- status: `Letter ${letter.toUpperCase()}: ${artist.name} - ${song.title}`
- });
- }
-
- await this.delay(this.delays.betweenArtists);
- }
-
- // Save letter file
- if (!this.dryRun) {
- const filePath = paths.getLetterFilePath(outputDir, letter, 'songs');
- const data = {
- letter: letter.toUpperCase(),
- scrapedAt: getCurrentISO(),
- totalSongs: processedSongs.length,
- songs: processedSongs
- };
-
- await fs.writeFile(filePath, JSON.stringify(data, null, 2));
- }
-
- return processedSongs.length;
- }
-
- /**
- * Run prescraping
- */
- async prescrape() {
- tui.printHeader('PRESCRAPE NEW ARTISTS');
-
- if (this.dryRun) {
- tui.printWarning('DRY RUN MODE: No files will be saved');
- }
-
- // Load new artists
- const { artistsByLetter, timestamp: sourceTimestamp } = await this.loadNewArtists();
-
- if (this.stats.totalArtists === 0) {
- tui.printInfo('No new artists to prescrape!');
- return null;
- }
-
- // Prepare output directory
- const outputTimestamp = generateTimestamp();
- const outputDir = this.outputDir || await paths.createTimestampedDir('song-data', outputTimestamp);
-
- tui.printInfo(`Saving results to: ${outputTimestamp}`);
- tui.printInfo(`Max songs per artist: ${this.maxSongsPerArtist}`);
-
- // Create progress bar
- const progressBar = tui.createProgressBar(
- 'Prescraping',
- this.stats.totalArtists,
- 'Initializing...'
- );
-
- // Process each letter
- for (const letter of this.letters) {
- const artists = artistsByLetter[letter];
- if (!artists || artists.length === 0) continue;
-
- await this.processLetter(letter, artists, outputDir, progressBar);
- }
-
- progressBar.stop();
-
- // Save summary
- if (!this.dryRun) {
- const summaryPath = `${outputDir}/prescrape-summary.json`;
- const summary = {
- timestamp: getCurrentISO(),
- sourceDirectory: `scraping-data/new-artists/${sourceTimestamp}`,
- configuration: {
- maxSongsPerArtist: this.maxSongsPerArtist,
- letters: this.letters
- },
- statistics: {
- totalArtists: this.stats.totalArtists,
- processedArtists: this.stats.processedArtists,
- skippedArtists: this.stats.skippedArtists,
- totalSongs: this.stats.totalSongs,
- processedSongs: this.stats.processedSongs,
- scrapedLyrics: this.stats.scrapedLyrics,
- failedLyrics: this.stats.failedLyrics
- },
- errors: this.errorLogger.getSummary()
- };
-
- await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
-
- // Save errors
- await this.errorLogger.saveToFile(outputDir);
-
- // Mark complete
- await paths.markDirectoryComplete(outputDir);
- }
-
- return { outputDir, outputTimestamp };
- }
-
- /**
- * Display results
- */
- displayResults() {
- console.log('');
- tui.printStats('Prescraping Results', {
- 'Total Artists': tui.formatNumber(this.stats.totalArtists),
- 'Processed Artists': tui.formatNumber(this.stats.processedArtists),
- 'Skipped Artists': tui.formatNumber(this.stats.skippedArtists),
- 'Total Songs': tui.formatNumber(this.stats.totalSongs),
- 'Scraped Lyrics': tui.formatNumber(this.stats.scrapedLyrics),
- 'Failed Lyrics': tui.formatNumber(this.stats.failedLyrics),
- 'Success Rate': `${((this.stats.scrapedLyrics / this.stats.totalSongs) * 100).toFixed(1)}%`
- });
-
- if (this.errorLogger.hasErrors()) {
- tui.printErrorSummary(this.errorLogger.getErrorCounts());
- }
- }
-}
-
-/**
- * Parse CLI arguments
- */
-function parseArgs() {
- const args = process.argv.slice(2);
- const options = {
- timestamp: null,
- outputDir: null,
- maxSongsPerArtist: 10,
- maxArtists: null,
- letters: null,
- dryRun: false,
- quiet: false
- };
-
- for (let i = 0; i < args.length; i++) {
- const arg = args[i];
-
- if (arg === '--date' && args[i + 1]) {
- options.timestamp = args[i + 1];
- i++;
- } else if (arg === '--output-dir' && args[i + 1]) {
- options.outputDir = args[i + 1];
- i++;
- } else if (arg === '--max-songs' && args[i + 1]) {
- options.maxSongsPerArtist = parseInt(args[i + 1], 10);
- i++;
- } else if (arg === '--limit' && args[i + 1]) {
- options.maxArtists = parseInt(args[i + 1], 10);
- i++;
- } else if (arg === '--letters' && args[i + 1]) {
- options.letters = args[i + 1].split(',').map(l => l.trim().toLowerCase());
- i++;
- } else if (arg === '--dry-run') {
- options.dryRun = true;
- } else if (arg === '--quiet') {
- options.quiet = true;
- } else if (arg === '--help' || arg === '-h') {
- console.log(`
-Usage: node scripts/prescrape-new-artists.js [options]
-
-Options:
- --date Use specific new-artists data (YYYY-MM-DD-HH-MM)
- Default: use latest
- --output-dir Custom output directory
- --max-songs Max songs per artist (default: 10)
- --limit Max artists to process (for testing)
- --letters Comma-separated letters to process (e.g., 'a,b,c')
- Default: all letters
- --dry-run Preview only, don't save files
- --quiet Minimal output
- --help, -h Show this help message
-
-Examples:
- node scripts/prescrape-new-artists.js
- node scripts/prescrape-new-artists.js --limit 10 --max-songs 2
- node scripts/prescrape-new-artists.js --max-songs 20
- node scripts/prescrape-new-artists.js --letters a,b,c
- node scripts/prescrape-new-artists.js --date 2026-01-04-20-26
-`);
- process.exit(0);
- }
- }
-
- return options;
-}
-
-/**
- * Main execution
- */
-async function main() {
- const options = parseArgs();
-
- const prescraper = new NewArtistPrescraper({
- timestamp: options.timestamp,
- outputDir: options.outputDir,
- maxSongsPerArtist: options.maxSongsPerArtist,
- maxArtists: options.maxArtists,
- letters: options.letters,
- dryRun: options.dryRun
- });
-
- try {
- const result = await prescraper.prescrape();
-
- if (result) {
- prescraper.displayResults();
-
- const workflowElapsed = await getWorkflowElapsed();
- if (workflowElapsed !== null) {
- tui.printWorkflowTime(workflowElapsed);
- }
-
- tui.printSuccess('Prescraping complete!');
- tui.printInfo(`Output: ${result.outputDir}`);
- } else {
- tui.printInfo('No work to do.');
- }
-
- tui.printFooter();
- } catch (error) {
- tui.printError(`Prescraping failed: ${error.message}`);
- console.error(error);
- process.exit(1);
- }
-}
-
-if (import.meta.url === `file://${process.argv[1]}`) {
- main();
-}
-
-export default NewArtistPrescraper;
-
diff --git a/scripts/scrape-artists.js b/scripts/scrape-artists.js
deleted file mode 100644
index 1be4afb..0000000
--- a/scripts/scrape-artists.js
+++ /dev/null
@@ -1,413 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * Artist List Scraper
- * Scrapes artist lists from Genius and saves to timestamped directories
- */
-
-import axios from 'axios';
-import * as cheerio from 'cheerio';
-import fs from 'fs/promises';
-import * as tui from './utils/tui.js';
-import * as paths from './utils/paths.js';
-import { generateTimestamp, getCurrentISO, calculateETA, getWorkflowElapsed } from './utils/timestamp.js';
-import { createErrorLogger } from './utils/error-logger.js';
-
-class ArtistScraper {
- constructor(options = {}) {
- this.baseUrl = 'https://genius.com/artists-index/';
- this.requestDelay = options.requestDelay || 500;
- this.includeIds = options.includeIds !== false;
- this.maxArtistsPerLetter = options.maxArtistsPerLetter || null; // Limit for testing
- this.outputDir = options.outputDir || null;
- this.errorLogger = createErrorLogger('artist-scraping');
- this.stats = {
- totalArtists: 0,
- popularArtists: 0,
- regularArtists: 0,
- artistsWithIds: 0,
- idExtractionFailed: 0,
- networkErrors: 0
- };
- }
-
- /**
- * Extract artist ID from iOS app link
- */
- async extractArtistId(artistUrl, artistName) {
- try {
- await new Promise(resolve => setTimeout(resolve, this.requestDelay));
-
- const response = await axios.get(artistUrl, {
- headers: {
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
- },
- timeout: 10000
- });
-
- const $ = cheerio.load(response.data);
- const iosAppLink = $('link[rel="alternate"][href*="ios-app://"]').attr('href');
-
- if (iosAppLink) {
- const match = iosAppLink.match(/\/artists\/(\d+)$/);
- if (match) {
- this.stats.artistsWithIds++;
- return match[1];
- }
- }
-
- this.stats.idExtractionFailed++;
- this.errorLogger.logError('id_extraction_failed', {
- artist: artistName,
- url: artistUrl
- }, 'iOS app link not found');
-
- return null;
- } catch (error) {
- this.stats.idExtractionFailed++;
-
- if (error.code === 'ECONNABORTED' || error.code === 'ETIMEDOUT') {
- this.errorLogger.logError('network_timeout', {
- artist: artistName,
- url: artistUrl
- }, `Request timeout: ${error.message}`);
- } else {
- this.errorLogger.logError('network_error', {
- artist: artistName,
- url: artistUrl
- }, error.message);
- }
-
- return null;
- }
- }
-
- /**
- * Scrape artists for a specific letter
- */
- async scrapeArtistsByLetter(letter) {
- const url = `${this.baseUrl}${letter.toLowerCase()}`;
-
- try {
- const response = await axios.get(url, {
- headers: {
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
- },
- timeout: 15000
- });
-
- const $ = cheerio.load(response.data);
- const results = {
- popular: [],
- regular: []
- };
-
- // Extract popular artists
- $('li.artists_index_list-popular_artist').each((index, element) => {
- const artistLink = $(element).find('a.artists_index_list-artist_name');
- const name = artistLink.text().trim();
- const url = artistLink.attr('href');
-
- if (name && url) {
- results.popular.push({
- name,
- url,
- type: 'popular',
- id: null
- });
- }
- });
-
- // Extract regular artists
- const regularArtistLists = $('ul.artists_index_list').not(':has(.artists_index_list-popular_artist)');
- regularArtistLists.each((listIndex, listElement) => {
- $(listElement).find('li').each((index, element) => {
- const artistLink = $(element).find('a').first();
- const name = artistLink.text().trim();
- const url = artistLink.attr('href');
-
- if (name && url && url.includes('/artists/')) {
- results.regular.push({
- name,
- url,
- type: 'regular',
- id: null
- });
- }
- });
- });
-
- // Apply limit if specified (for testing)
- if (this.maxArtistsPerLetter) {
- const totalArtists = results.popular.length + results.regular.length;
- if (totalArtists > this.maxArtistsPerLetter) {
- // Prioritize popular artists, then regular
- if (results.popular.length >= this.maxArtistsPerLetter) {
- results.popular = results.popular.slice(0, this.maxArtistsPerLetter);
- results.regular = [];
- } else {
- const remainingSlots = this.maxArtistsPerLetter - results.popular.length;
- results.regular = results.regular.slice(0, remainingSlots);
- }
- }
- }
-
- this.stats.popularArtists += results.popular.length;
- this.stats.regularArtists += results.regular.length;
- this.stats.totalArtists += results.popular.length + results.regular.length;
-
- return results;
-
- } catch (error) {
- this.stats.networkErrors++;
- this.errorLogger.logError('network_error', {
- letter,
- url
- }, error.message);
-
- return { popular: [], regular: [] };
- }
- }
-
- /**
- * Extract IDs for all artists in results
- */
- async extractIds(results, letter, progressBar) {
- const allArtists = [...results.popular, ...results.regular];
- const total = allArtists.length;
-
- for (let i = 0; i < total; i++) {
- const artist = allArtists[i];
- artist.id = await this.extractArtistId(artist.url, artist.name);
-
- if (progressBar) {
- progressBar.update(i + 1, {
- info: `Letter ${letter.toUpperCase()} - ${artist.name.substring(0, 30)}`
- });
- }
- }
- }
-
- /**
- * Save letter results to file
- */
- async saveLetterFile(letter, results, outputDir) {
- const filePath = paths.getLetterFilePath(outputDir, letter, 'artists');
- const data = {
- letter: letter.toUpperCase(),
- scrapedAt: getCurrentISO(),
- totalArtists: results.popular.length + results.regular.length,
- popularCount: results.popular.length,
- regularCount: results.regular.length,
- artists: results
- };
-
- await fs.writeFile(filePath, JSON.stringify(data, null, 2));
- }
-
- /**
- * Scrape all letters
- */
- async scrapeAll(lettersToScrape = null) {
- const timestamp = generateTimestamp();
- const outputDir = this.outputDir || await paths.createTimestampedDir('artist-lists', timestamp);
-
- tui.printHeader('ARTIST LIST SCRAPER');
- tui.printInfo(`Output directory: ${outputDir}`);
- tui.printInfo(`Include IDs: ${this.includeIds ? 'Yes' : 'No (faster)'}`);
-
- if (this.maxArtistsPerLetter) {
- tui.printWarning(`Limiting to ${this.maxArtistsPerLetter} artists per letter (testing mode)`);
- }
-
- const allLetters = paths.getAllLetters();
- const letters = lettersToScrape || allLetters;
-
- tui.printInfo(`Letters to scrape: ${letters.length} (${letters.join(', ')})`);
- console.log('');
-
- const startTime = Date.now();
- const progressBar = tui.createProgressBar('Scraping Progress', letters.length);
-
- for (let i = 0; i < letters.length; i++) {
- const letter = letters[i];
- const displayLetter = letter === '0' ? 'Numbers' : letter.toUpperCase();
-
- progressBar.update(i, {
- info: `Current: Letter ${displayLetter}`
- });
-
- // Scrape artists for this letter
- const results = await this.scrapeArtistsByLetter(letter);
-
- // Extract IDs if requested
- if (this.includeIds && (results.popular.length > 0 || results.regular.length > 0)) {
- const idBar = tui.createProgressBar(` Extracting IDs (${displayLetter})`,
- results.popular.length + results.regular.length);
- await this.extractIds(results, letter, idBar);
- idBar.stop();
- }
-
- // Save to file
- await this.saveLetterFile(letter, results, outputDir);
-
- // Update progress
- const elapsed = (Date.now() - startTime) / 1000;
- const eta = calculateETA(i + 1, letters.length, elapsed);
- progressBar.update(i + 1, {
- info: `Completed: ${displayLetter} | ETA: ${eta}`
- });
-
- // Delay between letters
- if (i < letters.length - 1) {
- await new Promise(resolve => setTimeout(resolve, 1000));
- }
- }
-
- progressBar.stop();
-
- // Save summary and errors
- await this.saveSummary(outputDir, timestamp, letters);
- await this.errorLogger.saveToFile(outputDir);
- await paths.markDirectoryComplete(outputDir);
-
- return { outputDir, timestamp };
- }
-
- /**
- * Save summary.json
- */
- async saveSummary(outputDir, timestamp, letters) {
- const summary = {
- timestamp: getCurrentISO(),
- timestampDir: timestamp,
- lettersScraped: letters,
- statistics: {
- totalArtists: this.stats.totalArtists,
- popularArtists: this.stats.popularArtists,
- regularArtists: this.stats.regularArtists,
- artistsWithIds: this.stats.artistsWithIds,
- idExtractionFailed: this.stats.idExtractionFailed
- },
- errors: this.errorLogger.getSummary()
- };
-
- const summaryPath = `${outputDir}/summary.json`;
- await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
- }
-
- /**
- * Display final results
- */
- displayResults() {
- console.log('');
- tui.printStats('Statistics', {
- 'Total Artists': tui.formatNumber(this.stats.totalArtists),
- 'Popular Artists': tui.formatNumber(this.stats.popularArtists),
- 'Regular Artists': tui.formatNumber(this.stats.regularArtists),
- 'Artists with IDs': tui.formatNumber(this.stats.artistsWithIds),
- 'ID Extraction Failed': tui.formatNumber(this.stats.idExtractionFailed)
- });
-
- if (this.errorLogger.hasErrors()) {
- tui.printErrorSummary(this.errorLogger.getErrorCounts());
- }
- }
-}
-
-/**
- * Parse CLI arguments
- */
-function parseArgs() {
- const args = process.argv.slice(2);
- const options = {
- letters: null,
- includeIds: true,
- maxArtistsPerLetter: null,
- outputDir: null,
- quiet: false
- };
-
- for (let i = 0; i < args.length; i++) {
- const arg = args[i];
-
- if (arg === '--letters' && args[i + 1]) {
- const letterArg = args[i + 1];
- options.letters = letterArg.split(',').map(l => l.trim().toLowerCase());
- i++;
- } else if (arg === '--no-ids') {
- options.includeIds = false;
- } else if (arg === '--limit' && args[i + 1]) {
- options.maxArtistsPerLetter = parseInt(args[i + 1], 10);
- i++;
- } else if (arg === '--output-dir' && args[i + 1]) {
- options.outputDir = args[i + 1];
- i++;
- } else if (arg === '--quiet') {
- options.quiet = true;
- } else if (arg === '--help' || arg === '-h') {
- console.log(`
-Usage: node scripts/scrape-artists.js [options]
-
-Options:
- --letters Comma-separated letters to scrape (e.g., "a,b,c" or "j,k")
- Default: all letters (0, a-z)
- --no-ids Skip artist ID extraction (much faster)
- --limit Max artists per letter (for testing)
- --output-dir Custom output directory
- --quiet Minimal output (no TUI)
- --help, -h Show this help message
-
-Examples:
- node scripts/scrape-artists.js
- node scripts/scrape-artists.js --letters j --limit 10
- node scripts/scrape-artists.js --letters j,k
- node scripts/scrape-artists.js --no-ids
- node scripts/scrape-artists.js --letters a,b,c --no-ids
-`);
- process.exit(0);
- }
- }
-
- return options;
-}
-
-/**
- * Main execution
- */
-async function main() {
- const options = parseArgs();
-
- const scraper = new ArtistScraper({
- includeIds: options.includeIds,
- maxArtistsPerLetter: options.maxArtistsPerLetter,
- outputDir: options.outputDir
- });
-
- try {
- const { outputDir, timestamp } = await scraper.scrapeAll(options.letters);
-
- scraper.displayResults();
-
- const workflowElapsed = await getWorkflowElapsed();
- if (workflowElapsed !== null) {
- tui.printWorkflowTime(workflowElapsed);
- }
-
- tui.printSuccess(`Scraping complete!`);
- tui.printInfo(`Output: ${outputDir}`);
- tui.printFooter();
-
- } catch (error) {
- tui.printError(`Scraping failed: ${error.message}`);
- console.error(error);
- process.exit(1);
- }
-}
-
-if (import.meta.url === `file://${process.argv[1]}`) {
- main();
-}
-
-export default ArtistScraper;
-
diff --git a/scripts/utils/error-logger.js b/scripts/utils/error-logger.js
deleted file mode 100644
index 4d51493..0000000
--- a/scripts/utils/error-logger.js
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Error Logger
- * Handles error collection and logging to errors.json
- */
-
-import fs from 'fs/promises';
-import path from 'path';
-import { getCurrentISO } from './timestamp.js';
-
-/**
- * Error Logger class
- */
-export class ErrorLogger {
- constructor(phase) {
- this.phase = phase;
- this.errors = [];
- this.errorCounts = {};
- }
-
- /**
- * Log an error
- * @param {string} type - Error type (e.g., 'network_timeout', 'parsing_failed')
- * @param {object} context - Error context (artist, song, url, etc.)
- * @param {string} message - Error message
- */
- logError(type, context, message) {
- const error = {
- type,
- timestamp: getCurrentISO(),
- message,
- ...context
- };
-
- this.errors.push(error);
-
- // Update counts
- this.errorCounts[type] = (this.errorCounts[type] || 0) + 1;
- }
-
- /**
- * Get total error count
- * @returns {number} Total errors
- */
- getTotalErrors() {
- return this.errors.length;
- }
-
- /**
- * Get error counts by type
- * @returns {object} Error counts
- */
- getErrorCounts() {
- return { ...this.errorCounts };
- }
-
- /**
- * Get all errors
- * @returns {array} All error objects
- */
- getAllErrors() {
- return [...this.errors];
- }
-
- /**
- * Check if there are any errors
- * @returns {boolean} True if errors exist
- */
- hasErrors() {
- return this.errors.length > 0;
- }
-
- /**
- * Save errors to errors.json file
- * @param {string} outputDir - Directory to save errors.json
- * @returns {Promise}
- */
- async saveToFile(outputDir) {
- if (!this.hasErrors()) {
- return;
- }
-
- const errorData = {
- phase: this.phase,
- timestamp: getCurrentISO(),
- totalErrors: this.getTotalErrors(),
- errorsByType: this.getErrorCounts(),
- errors: this.getAllErrors()
- };
-
- const filePath = path.join(outputDir, 'errors.json');
- await fs.writeFile(filePath, JSON.stringify(errorData, null, 2));
- }
-
- /**
- * Create a summary object
- * @returns {object} Error summary
- */
- getSummary() {
- return {
- totalErrors: this.getTotalErrors(),
- errorsByType: this.getErrorCounts()
- };
- }
-
- /**
- * Clear all errors
- */
- clear() {
- this.errors = [];
- this.errorCounts = {};
- }
-}
-
-/**
- * Create a new error logger
- * @param {string} phase - Phase name (e.g., 'scraping', 'prescraping', 'uploading')
- * @returns {ErrorLogger} Error logger instance
- */
-export function createErrorLogger(phase) {
- return new ErrorLogger(phase);
-}
-
diff --git a/scripts/utils/timestamp.js b/scripts/utils/timestamp.js
deleted file mode 100644
index 49236ff..0000000
--- a/scripts/utils/timestamp.js
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- * Timestamp Utilities
- * Handles timestamp generation and parsing for directory naming
- */
-
-import fs from 'fs/promises';
-import path from 'path';
-import { fileURLToPath } from 'url';
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-const WORKSPACE_ROOT = path.resolve(__dirname, '../..');
-
-/**
- * Generate a timestamp string for directory naming
- * Format: YYYY-MM-DD-HH-MM
- * @returns {string} Timestamp string
- */
-export function generateTimestamp() {
- const now = new Date();
- const year = now.getFullYear();
- const month = String(now.getMonth() + 1).padStart(2, '0');
- const day = String(now.getDate()).padStart(2, '0');
- const hours = String(now.getHours()).padStart(2, '0');
- const minutes = String(now.getMinutes()).padStart(2, '0');
-
- return `${year}-${month}-${day}-${hours}-${minutes}`;
-}
-
-/**
- * Parse a timestamp string into a Date object
- * @param {string} timestamp - Timestamp in format YYYY-MM-DD-HH-MM
- * @returns {Date} Date object
- */
-export function parseTimestamp(timestamp) {
- const parts = timestamp.split('-');
- if (parts.length !== 5) {
- throw new Error(`Invalid timestamp format: ${timestamp}. Expected YYYY-MM-DD-HH-MM`);
- }
-
- const [year, month, day, hours, minutes] = parts.map(Number);
- return new Date(year, month - 1, day, hours, minutes);
-}
-
-/**
- * Validate a timestamp string
- * @param {string} timestamp - Timestamp to validate
- * @returns {boolean} True if valid
- */
-export function isValidTimestamp(timestamp) {
- try {
- const parts = timestamp.split('-');
- if (parts.length !== 5) return false;
-
- const date = parseTimestamp(timestamp);
- return !isNaN(date.getTime());
- } catch {
- return false;
- }
-}
-
-/**
- * Format a Date object as ISO string
- * @param {Date} date - Date to format
- * @returns {string} ISO string
- */
-export function toISOString(date) {
- return date.toISOString();
-}
-
-/**
- * Get current ISO timestamp
- * @returns {string} Current time as ISO string
- */
-export function getCurrentISO() {
- return new Date().toISOString();
-}
-
-/**
- * Format duration in seconds to human readable string
- * @param {number} seconds - Duration in seconds
- * @returns {string} Formatted duration (e.g., "2h 15m 30s")
- */
-export function formatDuration(seconds) {
- if (seconds < 60) {
- return `${Math.round(seconds)}s`;
- }
-
- const hours = Math.floor(seconds / 3600);
- const minutes = Math.floor((seconds % 3600) / 60);
- const secs = Math.floor(seconds % 60);
-
- const parts = [];
- if (hours > 0) parts.push(`${hours}h`);
- if (minutes > 0) parts.push(`${minutes}m`);
- if (secs > 0 || parts.length === 0) parts.push(`${secs}s`);
-
- return parts.join(' ');
-}
-
-/**
- * Calculate estimated time remaining
- * @param {number} completed - Items completed
- * @param {number} total - Total items
- * @param {number} elapsedSeconds - Elapsed time in seconds
- * @returns {string} Formatted ETA
- */
-export function calculateETA(completed, total, elapsedSeconds) {
- if (completed === 0) return 'Calculating...';
- if (completed >= total) return '0s';
-
- const rate = completed / elapsedSeconds;
- const remaining = total - completed;
- const etaSeconds = remaining / rate;
-
- return formatDuration(etaSeconds);
-}
-
-/**
- * Get elapsed time from a directory's creation timestamp
- * @param {string} dirPath - Path to the directory
- * @returns {Promise} Elapsed seconds, or null if directory doesn't exist
- */
-export async function getWorkflowElapsedFromDir(dirPath) {
- try {
- const stats = await fs.stat(dirPath);
- const elapsed = (Date.now() - stats.birthtimeMs) / 1000;
- return elapsed;
- } catch (error) {
- return null;
- }
-}
-
-/**
- * Get elapsed time from the latest artist-lists directory
- * @returns {Promise} Elapsed seconds, or null if no directory exists
- */
-export async function getWorkflowElapsed() {
- try {
- const artistListsBase = path.join(WORKSPACE_ROOT, 'scraping-data', 'artist-lists');
- const entries = await fs.readdir(artistListsBase, { withFileTypes: true });
-
- // Find all timestamp directories
- const timestampDirs = entries
- .filter(entry => entry.isDirectory() && isValidTimestamp(entry.name))
- .map(entry => ({
- name: entry.name,
- path: path.join(artistListsBase, entry.name)
- }))
- .sort((a, b) => b.name.localeCompare(a.name)); // Most recent first
-
- if (timestampDirs.length === 0) {
- return null;
- }
-
- // Use the most recent directory
- const latestDir = timestampDirs[0];
- return await getWorkflowElapsedFromDir(latestDir.path);
- } catch (error) {
- return null;
- }
-}
-
diff --git a/scripts/utils/tui.js b/scripts/utils/tui.js
deleted file mode 100644
index 9e45dc7..0000000
--- a/scripts/utils/tui.js
+++ /dev/null
@@ -1,256 +0,0 @@
-/**
- * TUI (Terminal User Interface) Utilities
- * Provides progress bars and formatted output
- * NO EMOJIS - Clean professional output only
- */
-
-import cliProgress from 'cli-progress';
-import chalk from 'chalk';
-import * as timestamp from './timestamp.js';
-
-/**
- * Create a new progress bar
- * @param {string} title - Title for the progress bar
- * @param {number} total - Total items
- * @param {boolean} showWorkflowTime - Show workflow elapsed time
- * @returns {object} Progress bar instance
- */
-export function createProgressBar(title, total, showWorkflowTime = true) {
- const format = showWorkflowTime
- ? `${title}: [{bar}] {percentage}% | {value}/{total} | ETA: {eta_formatted} | Total: {workflow_time}`
- : `${title}: [{bar}] {percentage}% | {value}/{total} | ETA: {eta_formatted}`;
-
- const bar = new cliProgress.SingleBar({
- format,
- barCompleteChar: '\u2588',
- barIncompleteChar: '\u2591',
- hideCursor: true,
- clearOnComplete: false,
- stopOnComplete: true
- });
-
- bar.start(total, 0, {
- eta_formatted: 'Calculating...',
- workflow_time: '0s'
- });
-
- // Override update to include workflow time
- if (showWorkflowTime) {
- const originalUpdate = bar.update.bind(bar);
- bar.update = async function(value, payload = {}) {
- const elapsed = await timestamp.getWorkflowElapsed();
- if (elapsed !== null) {
- payload.workflow_time = timestamp.formatDuration(elapsed);
- }
- originalUpdate(value, payload);
- };
- }
-
- return bar;
-}
-
-/**
- * Create a multi-bar progress container
- * @returns {object} MultiBar instance
- */
-export function createMultiBar() {
- return new cliProgress.MultiBar({
- clearOnComplete: false,
- hideCursor: true,
- format: '{title}: [{bar}] {percentage}% | {value}/{total} | {info}'
- });
-}
-
-/**
- * Print section header
- * @param {string} title - Header title
- */
-export function printHeader(title) {
- const width = 80;
- const line = '='.repeat(width);
- console.log('\n' + line);
- console.log(title);
- console.log(line + '\n');
-}
-
-/**
- * Print section footer
- */
-export function printFooter() {
- const width = 80;
- console.log('='.repeat(width) + '\n');
-}
-
-/**
- * Print info message
- * @param {string} message - Message to print
- */
-export function printInfo(message) {
- console.log(`[INFO] ${message}`);
-}
-
-/**
- * Print success message
- * @param {string} message - Message to print
- */
-export function printSuccess(message) {
- console.log(chalk.green(`[SUCCESS] ${message}`));
-}
-
-/**
- * Print warning message
- * @param {string} message - Message to print
- */
-export function printWarning(message) {
- console.log(chalk.yellow(`[WARN] ${message}`));
-}
-
-/**
- * Print error message
- * @param {string} message - Message to print
- */
-export function printError(message) {
- console.log(chalk.red(`[ERROR] ${message}`));
-}
-
-/**
- * Print statistics table
- * @param {string} title - Table title
- * @param {object} stats - Statistics object
- */
-export function printStats(title, stats) {
- console.log(`\n${title}:`);
- for (const [key, value] of Object.entries(stats)) {
- const formattedKey = key.replace(/([A-Z])/g, ' $1').trim();
- const capitalizedKey = formattedKey.charAt(0).toUpperCase() + formattedKey.slice(1);
- console.log(` ${capitalizedKey}: ${value}`);
- }
-}
-
-/**
- * Print error summary
- * @param {object} errorCounts - Error counts by type
- */
-export function printErrorSummary(errorCounts) {
- if (Object.keys(errorCounts).length === 0) {
- return;
- }
-
- console.log('\nErrors:');
- for (const [type, count] of Object.entries(errorCounts)) {
- if (count > 0) {
- const formattedType = type.replace(/_/g, ' ');
- console.log(` ${formattedType}: ${count}`);
- }
- }
-}
-
-/**
- * Format number with commas
- * @param {number} num - Number to format
- * @returns {string} Formatted number
- */
-export function formatNumber(num) {
- return num.toLocaleString();
-}
-
-/**
- * Create a status display that updates in place
- * @returns {object} Status display object
- */
-export function createStatusDisplay() {
- let lastLine = '';
-
- return {
- /**
- * Update the status line
- * @param {string} message - Status message
- */
- update(message) {
- if (lastLine) {
- process.stdout.write('\r' + ' '.repeat(lastLine.length) + '\r');
- }
- process.stdout.write(message);
- lastLine = message;
- },
-
- /**
- * Clear the status line
- */
- clear() {
- if (lastLine) {
- process.stdout.write('\r' + ' '.repeat(lastLine.length) + '\r');
- lastLine = '';
- }
- },
-
- /**
- * Finish with a newline
- */
- finish() {
- if (lastLine) {
- process.stdout.write('\n');
- lastLine = '';
- }
- }
- };
-}
-
-/**
- * Ask for user confirmation
- * @param {string} question - Question to ask
- * @returns {Promise} True if user confirms
- */
-export async function confirm(question) {
- const readline = await import('readline');
- const rl = readline.createInterface({
- input: process.stdin,
- output: process.stdout
- });
-
- return new Promise((resolve) => {
- rl.question(`${question} (Y/n): `, (answer) => {
- rl.close();
- const normalized = answer.toLowerCase().trim();
- resolve(normalized === 'y' || normalized === 'yes' || normalized === '');
- });
- });
-}
-
-/**
- * Print progress info (current item being processed)
- * @param {string} label - Label (e.g., "Current Letter")
- * @param {string} value - Value to display
- */
-export function printProgressInfo(label, value) {
- console.log(`${label}: ${value}`);
-}
-
-/**
- * Clear console (use sparingly)
- */
-export function clearConsole() {
- console.clear();
-}
-
-/**
- * Print workflow elapsed time
- * @param {number} seconds - Elapsed seconds
- * @param {string} label - Optional label
- */
-export function printWorkflowTime(seconds, label = 'Workflow Elapsed Time') {
- console.log(`${label}: ${chalk.cyan(timestamp.formatDuration(seconds))}`);
-}
-
-/**
- * Print total workflow time at completion
- * @param {number} seconds - Total elapsed seconds
- */
-export async function printTotalWorkflowTime(seconds) {
- console.log('');
- console.log(chalk.bold.cyan('─'.repeat(60)));
- console.log(chalk.bold.cyan(`TOTAL WORKFLOW TIME: ${timestamp.formatDuration(seconds)}`));
- console.log(chalk.bold.cyan('─'.repeat(60)));
- console.log('');
-}
-
diff --git a/scripts/search-songs-by-id.js b/search-songs-by-id.js
similarity index 98%
rename from scripts/search-songs-by-id.js
rename to search-songs-by-id.js
index 851516c..e5c7bd2 100644
--- a/scripts/search-songs-by-id.js
+++ b/search-songs-by-id.js
@@ -5,7 +5,7 @@
import { initializeApp } from 'firebase/app';
import { getFirestore, doc, getDoc } from 'firebase/firestore';
-import { firebaseConfig } from '../src/lib/services/initFirebase.js';
+import { firebaseConfig } from './src/lib/services/initFirebase.js';
import * as readline from 'readline';
// Initialize Firebase
diff --git a/src/lib/components/GrayscaleImageRenderer.svelte b/src/lib/components/GrayscaleImageRenderer.svelte
index 763975a..33d1e96 100644
--- a/src/lib/components/GrayscaleImageRenderer.svelte
+++ b/src/lib/components/GrayscaleImageRenderer.svelte
@@ -1,24 +1,7 @@
{#if selectedSong.imageUrl}
-
+ {#key selectedSong.imageUrl}
+ {#if $ditherImages && grayscaleImageData && imageMetadata && !useFallback}
+
+ {:else}
+
+ {/if}
+ {/key}
{:else}
@@ -274,6 +400,18 @@
Completed: {new Date(selectedSong.completedAt).toLocaleDateString()}
+
+
+
+
+
+
+ Replay Song
+
{:else}
@@ -580,8 +718,31 @@
.completion-date {
text-align: center;
opacity: 0.6;
+ color: var(--primary-color);
+ }
+
+ .replay-button {
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ gap: 6px;
margin-top: auto;
+ padding: 8px 16px;
+ background-color: var(--secondary-color);
color: var(--primary-color);
+ border: 1px solid var(--primary-color);
+ cursor: pointer;
+ font-family: inherit;
+ transition: background-color 0.15s ease;
+ }
+
+ .replay-button:hover {
+ background-color: var(--primary-color);
+ color: var(--secondary-color);
+ }
+
+ .replay-button:hover svg path {
+ fill: var(--secondary-color);
}
.no-selection {
diff --git a/src/lib/components/TypingTest.svelte b/src/lib/components/TypingTest.svelte
index 927f695..9760c1e 100644
--- a/src/lib/components/TypingTest.svelte
+++ b/src/lib/components/TypingTest.svelte
@@ -1,5 +1,5 @@
+