diff --git a/.env.example b/.env.example index 61d7e03..529ee3e 100644 --- a/.env.example +++ b/.env.example @@ -22,6 +22,11 @@ DATABASE_URL="file:./prisma/dev.db" # Optional: custom API base URL (proxy or local model server) # ANTHROPIC_BASE_URL= +# ── Twitter/X Live Import (optional) ────────────────────────────────── +# Required only for the /api/import/twitter Live Import endpoint. +# This is the public app-only bearer token from the Twitter web client. +# X_BEARER_TOKEN=your-twitter-bearer-token-here + # ── Access control (optional) ──────────────────────────────────────── # Set BOTH to enable HTTP Basic Auth on the entire app. diff --git a/.gitignore b/.gitignore index f9fa46a..a37e458 100644 --- a/.gitignore +++ b/.gitignore @@ -46,14 +46,19 @@ next-env.d.ts # generated prisma client /app/generated/prisma -# Database files — never commit these (contain your private bookmarks) +# Database files — never commit these (contain your private tweets) prisma/dev.db prisma/dev.db-journal +prisma/dev.db.backup-* dev.db dev.db-journal *.db *.db-journal +# User data exports — never commit +bookmarks.json +likes.json + # OS .DS_Store Thumbs.db diff --git a/CLAUDE.md b/CLAUDE.md index 29041ca..7df9bc2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,6 @@ # Siftly -Self-hosted Twitter/X bookmark manager with AI-powered categorization, search, and visualization. +Self-hosted Twitter/X bookmark & likes manager with AI-powered categorization, search, and visualization. ## Quick Setup @@ -34,6 +34,7 @@ To verify it's working, hit: `GET /api/settings/cli-status` ```bash npx next dev # Start dev server (port 3000) npx tsc --noEmit # Type check +npm test # Run parser tests (vitest) npx prisma studio # Database GUI npx prisma db push # Apply schema changes to DB npm run build # Production build @@ -45,7 +46,8 @@ npm run build # Production build app/ api/ categorize/ # 4-stage AI pipeline (start/stop/status via SSE) - import/ # Bookmark JSON import + dedup + import/ # Multi-format import (JSON, .js, .zip) with dedup + update-on-reimport + twitter/ # Live Import via Twitter GraphQL API (bookmarks + likes) search/ai/ # FTS5 + Claude semantic search settings/ cli-status/ # GET — returns Claude CLI auth status @@ -68,7 +70,7 @@ lib/ vision-analyzer.ts # Image vision + semantic tagging fts.ts # SQLite FTS5 full-text search rawjson-extractor.ts # Entity extraction from tweet JSON - parser.ts # Multi-format bookmark JSON parser + parser.ts # Multi-format parser (bookmarklet, console, Twitter archive .js/.zip) exporter.ts # CSV / JSON / ZIP export prisma/schema.prisma # SQLite schema (Bookmark, Category, MediaItem, Setting, ImportJob) diff --git a/README.md b/README.md index 14f36f9..d627307 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@

Siftly

-

Self-hosted Twitter/X bookmark manager with AI-powered organization

+

Self-hosted Twitter/X bookmark & likes manager with AI-powered organization

Import · Analyze · Categorize · Search · Explore

@@ -20,26 +20,26 @@ ## What is Siftly? -Siftly turns your Twitter/X bookmarks into a **searchable, categorized, visual knowledge base** — running entirely on your machine. No cloud, no subscriptions, no browser extensions required. Everything stays local except the AI API calls you configure. +Siftly turns your Twitter/X **bookmarks and likes** into a **searchable, categorized, visual knowledge base** — running entirely on your machine. No cloud, no subscriptions, no browser extensions required. Everything stays local except the AI API calls you configure. -It runs a **4-stage AI pipeline** on your bookmarks: +It runs a **4-stage AI pipeline** on your tweets: ``` -📥 Import (built-in bookmarklet or console script — no extensions needed) +📥 Import (bookmarklet, file upload, Twitter archive, or Live Import API) ↓ 🏷️ Entity Extraction — mines hashtags, URLs, mentions, and 100+ known tools from raw tweet data (free, zero API calls) ↓ 👁️ Vision Analysis — reads text, objects, and context from every image/GIF/video thumbnail (30–40 visual tags per image) ↓ -🧠 Semantic Tagging — generates 25–35 searchable tags per bookmark for AI-powered search +🧠 Semantic Tagging — generates 25–35 searchable tags per tweet for AI-powered search ↓ -📂 Categorization — assigns each bookmark to 1–3 categories with confidence scores +📂 Categorization — assigns each tweet to 1–3 categories with confidence scores ``` After the pipeline runs, you get: -- **AI search** — find bookmarks by meaning, not just keywords (*"funny meme about crypto crashing"*) -- **Interactive mindmap** — explore your entire bookmark graph visually -- **Filtered browsing** — grid or list view, filter by category, media type, and date +- **AI search** — find tweets by meaning, not just keywords (*"funny meme about crypto crashing"*) +- **Interactive mindmap** — explore your entire tweet graph visually +- **Filtered browsing** — grid or list view, filter by category, source (bookmarks/likes), media type, and date - **Export tools** — download media, export as CSV / JSON / ZIP --- @@ -120,33 +120,55 @@ New accounts include $5 free credit — enough for thousands of bookmarks at Hai --- -## Importing Your Bookmarks +## Importing Your Tweets -Siftly has **built-in import tools** — no browser extensions required. Go to the **Import** page and choose either method: +Siftly supports **4 import methods** for both bookmarks and likes — no browser extensions required. ### Method A — Bookmarklet *(Recommended)* 1. Go to **Import** in the Siftly sidebar 2. Drag the **"Export X Bookmarks"** link to your browser's bookmark bar - *(or right-click the bookmark bar → Add Bookmark → paste the URL)* -3. Go to [x.com/i/bookmarks](https://x.com/i/bookmarks) while logged in to X -4. Click **"Export X Bookmarks"** in your bookmark bar — a purple button appears on the page -5. Click **"▶ Auto-scroll"** — the tool scrolls through and captures all your bookmarks automatically -6. When complete, click the purple **"Export N bookmarks"** button — `bookmarks.json` downloads -7. Back in Siftly → **Import** → drop or upload the file +3. Navigate to [x.com/i/bookmarks](https://x.com/i/bookmarks) (or your likes page) while logged in +4. Click the bookmarklet — a purple capture button appears on the page +5. Click **"▶ Auto-scroll"** — captures all visible tweets automatically +6. Export → upload to Siftly + +The bookmarklet also captures **quoted tweet content** for more accurate categorization. ### Method B — Browser Console Script -1. Go to [x.com/i/bookmarks](https://x.com/i/bookmarks) while logged in to X -2. Open DevTools: press `F12` (Windows/Linux) or `⌘⌥J` (Mac), then go to the **Console** tab -3. Copy the console script from the Siftly Import page, paste it into the console, and press Enter -4. Click **"▶ Auto-scroll"** and wait for all bookmarks to be captured -5. Click the export button — `bookmarks.json` downloads automatically -6. Back in Siftly → **Import** → upload the file +1. Open [x.com/i/bookmarks](https://x.com/i/bookmarks) with DevTools Console (`F12` or `⌘⌥J`) +2. Paste the console script from the Import page, press Enter +3. Auto-scroll → export → upload + +### Method C — Twitter Data Archive + +1. Request your data archive from Twitter: Settings → Your Account → Download an archive +2. Extract the ZIP and find `like.js` or `bookmarks.js` +3. Upload the `.js` file (or the whole `.zip`) directly to Siftly + +Siftly auto-detects the source (bookmarks vs likes) from the archive filename. + +### Method D — Live Import API + +For direct API access using your Twitter session cookies: + +```bash +curl -X POST http://localhost:3000/api/import/twitter \ + -H 'Content-Type: application/json' \ + -d '{"authToken":"YOUR_AUTH_TOKEN","ct0":"YOUR_CT0","source":"bookmark"}' +``` + +For likes, add `"source":"like"` and `"userId":"YOUR_USER_ID"`. + +> Requires `X_BEARER_TOKEN` in your `.env` file — see `.env.example`. -### Re-importing +### Smart Re-importing -Re-import anytime — Siftly automatically skips duplicates and only adds new bookmarks. +Re-import anytime — Siftly handles deduplication intelligently: +- **Skips** tweets that already exist with equal or better data +- **Updates** existing tweets when incoming data is richer (longer text from quoted tweets, real author handles replacing `@unknown`, new media) +- Updated tweets are automatically re-queued for AI categorization --- @@ -155,7 +177,7 @@ Re-import anytime — Siftly automatically skips duplicates and only adds new bo **Categorization starts automatically as soon as you import.** You can also trigger it manually from: - The **Import** page (after upload) -- The **Mindmap** page (when bookmarks are uncategorized) +- The **Mindmap** page (when tweets are uncategorized) - The **Categorize** page in the sidebar ### The 4-Stage Pipeline @@ -164,10 +186,14 @@ Re-import anytime — Siftly automatically skips duplicates and only adds new bo |-------|-------------| | **Entity Extraction** | Mines hashtags, URLs, @mentions, and 100+ known tool/product names from stored tweet JSON — free, zero API calls | | **Vision Analysis** | Analyzes every image, GIF, and video thumbnail — OCR text, objects, scene, mood, meme templates, 30–40 visual tags per image | -| **Semantic Tagging** | Generates 25–35 precise search tags per bookmark by combining tweet text + image context. Also extracts sentiment, people, and company names. | -| **Categorization** | Assigns 1–3 categories per bookmark with confidence scores using all enriched data | +| **Semantic Tagging** | Generates 25–35 precise search tags per tweet by combining tweet text + image context. Also extracts sentiment, people, and company names. | +| **Categorization** | Assigns 1–3 categories per tweet with confidence scores using all enriched data | -The pipeline is **incremental** — if interrupted, it picks up where it left off. Use **"Re-run everything (force all)"** to re-analyze bookmarks that were already processed. +The pipeline is **incremental** — if interrupted, it picks up where it left off. Use **"Re-run everything (force all)"** to re-analyze tweets that were already processed. + +### Quote Tweets + +Siftly extracts quoted tweet content and appends it to the parent tweet text for better categorization. A tweet that says "The sleep debt is real" but quotes a thread about Claude Code will correctly be categorized as tech — not health. --- @@ -175,7 +201,7 @@ The pipeline is **incremental** — if interrupted, it picks up where it left of ### 🔍 AI Search -Natural language queries across all bookmark data: +Natural language queries across all tweet data: - *"funny meme about crypto crashing"* - *"react hooks tutorial"* @@ -186,17 +212,17 @@ Searches tweet text, image OCR, visual tags, semantic tags, and categories simul ### 🗺️ Mindmap -Interactive force-directed graph showing all bookmarks organized by category: +Interactive force-directed graph showing all tweets organized by category: -- Expand/collapse any category to reveal its bookmarks -- Click a bookmark node to open the original tweet on X +- Expand/collapse any category to reveal its tweets +- Click a tweet node to open the original on X - Color-coded legend by category -- If bookmarks aren't categorized yet, an inline **AI Categorize** button starts the pipeline without leaving the page +- If tweets aren't categorized yet, an inline **AI Categorize** button starts the pipeline without leaving the page ### 📚 Browse & Filter - **Grid view** (masonry layout) or **List view** -- Filter by category, media type (photo / video), or search text +- Filter by category, source (bookmarks / likes), media type (photo / video), or search text - Sort by newest or oldest - Pagination with 24 items per page - Active filter chips — removable individually or all at once @@ -241,6 +267,7 @@ All settings are manageable in the **Settings** page at `/settings` or via envir | API Base URL | `ANTHROPIC_BASE_URL` | Custom endpoint for proxies or local Anthropic-compatible models | | AI Model | Settings page only | Haiku 4.5 (default, fastest/cheapest), Sonnet 4.6, Opus 4.6 | | OpenAI Key | Settings page only | Alternative provider if no Anthropic key is set | +| X Bearer Token | `X_BEARER_TOKEN` | Required for Live Import API only (see `.env.example`) | | Database | `DATABASE_URL` | SQLite file path (default: `file:./prisma/dev.db`) | ### Custom API Endpoint @@ -266,9 +293,9 @@ siftly/ │ │ │ └── [slug]/ # Individual category operations │ │ ├── categorize/ # 4-stage AI pipeline (start, status, stop) │ │ ├── export/ # CSV, JSON, ZIP export -│ │ ├── import/ # JSON file import with dedup + auto-pipeline trigger +│ │ ├── import/ # Multi-format import with dedup + update-on-reimport │ │ │ ├── bookmarklet/ # Bookmarklet-specific import endpoint -│ │ │ └── twitter/ # Twitter-specific import endpoint +│ │ │ └── twitter/ # Live Import via Twitter GraphQL API (bookmarks + likes) │ │ ├── link-preview/ # Server-side OG metadata scraper │ │ ├── media/ # Media proxy/download endpoint │ │ ├── mindmap/ # Graph nodes + edges for visualization @@ -306,7 +333,7 @@ siftly/ │ ├── image-context.ts # Shared image context builder │ ├── fts.ts # SQLite FTS5 full-text search index │ ├── rawjson-extractor.ts # Entity extraction from raw tweet JSON -│ ├── parser.ts # Multi-format JSON parser +│ ├── parser.ts # Multi-format JSON parser (bookmarklet, console, Twitter archive) │ ├── exporter.ts # CSV, JSON, ZIP export │ ├── types.ts # Shared TypeScript types │ └── db.ts # Prisma client singleton @@ -321,7 +348,7 @@ siftly/ ### Database Schema ``` -Bookmark — tweet text, author, date, raw JSON, semantic tags, enrichment metadata +Bookmark — tweet text, author, date, source (bookmark/like), raw JSON, semantic tags, enrichment metadata ├── MediaItem — images / videos / GIFs with AI-generated image tags └── BookmarkCategory — category assignments with confidence scores (0–1) @@ -377,6 +404,9 @@ npx next dev # Type check npx tsc --noEmit +# Run tests +npm test + # Open database GUI npx prisma studio @@ -410,7 +440,7 @@ Add domain strings to `KNOWN_TOOL_DOMAINS` in `lib/rawjson-extractor.ts` to have - All data is stored **locally** in a SQLite file on your machine - The only external calls are to the AI provider you configure (tweet text + image data) - No telemetry, no tracking, no accounts required -- Your bookmarks never touch any third-party server except your configured AI endpoint +- Your tweets never touch any third-party server except your configured AI endpoint --- diff --git a/app/ai-search/page.tsx b/app/ai-search/page.tsx index e716e80..da23fb7 100644 --- a/app/ai-search/page.tsx +++ b/app/ai-search/page.tsx @@ -222,7 +222,7 @@ export default function AISearchPage() { {searched && !loading && results.length === 0 && !error && (
-

No bookmarks matched that description. Try different words.

+

No tweets matched that description. Try different words.

)} diff --git a/app/api/import/route.ts b/app/api/import/route.ts index 9842712..fb76ed4 100644 --- a/app/api/import/route.ts +++ b/app/api/import/route.ts @@ -1,6 +1,48 @@ import { NextRequest, NextResponse } from 'next/server' import prisma from '@/lib/db' -import { parseBookmarksJson } from '@/lib/parser' +import { parseTweetsJson, parseTweetsWithMeta } from '@/lib/parser' +import type { ParsedTweet } from '@/lib/parser' +import { upsertTweets, type IncomingTweet } from '@/lib/upsert-tweet' +import JSZip from 'jszip' + +async function extractArchiveFiles(zipBuffer: ArrayBuffer): Promise<{ filename: string; content: string }[]> { + const zip = await JSZip.loadAsync(zipBuffer) + const results: { filename: string; content: string }[] = [] + for (const [path, entry] of Object.entries(zip.files)) { + if (entry.dir) continue + const name = path.split('/').pop() ?? '' + // Match like.js, bookmark.js, and partitioned files like like.part0.js + if (/^(like|bookmark)(\.part\d+)?\.js$/.test(name)) { + const content = await entry.async('string') + results.push({ filename: name, content }) + } + } + return results +} + +function toIncomingTweets(bookmarks: ParsedTweet[], source: string): IncomingTweet[] { + return bookmarks.map((b) => ({ + tweetId: b.tweetId, + text: b.text, + authorHandle: b.authorHandle, + authorName: b.authorName, + tweetCreatedAt: b.tweetCreatedAt, + rawJson: b.rawJson, + source, + media: b.media, + })) +} + +function resolveSource( + sourceParam: string | undefined, + detectedSource: 'like' | 'bookmark' | undefined, + jsonSource: string | undefined, +): string { + if (detectedSource) return detectedSource + if (sourceParam === 'like' || sourceParam === 'bookmark') return sourceParam + if (jsonSource === 'like') return 'like' + return 'bookmark' +} export async function POST(request: NextRequest): Promise { let formData: FormData @@ -21,13 +63,7 @@ export async function POST(request: NextRequest): Promise { const filename = file instanceof File ? file.name : 'bookmarks.json' - - let jsonString: string - try { - jsonString = await file.text() - } catch { - return NextResponse.json({ error: 'Failed to read file content' }, { status: 400 }) - } + const ext = filename.split('.').pop()?.toLowerCase() ?? '' // Create an import job to track progress const importJob = await prisma.importJob.create({ @@ -39,9 +75,98 @@ export async function POST(request: NextRequest): Promise { }, }) - let parsedBookmarks try { - parsedBookmarks = parseBookmarksJson(jsonString) + let totalParsed = 0 + let importedCount = 0 + let skippedCount = 0 + let updatedCount = 0 + let erroredCount = 0 + + if (ext === 'zip') { + // --- ZIP archive: extract like.js / bookmark.js files --- + const zipBuffer = await file.arrayBuffer() + const archiveFiles = await extractArchiveFiles(zipBuffer) + + if (archiveFiles.length === 0) { + await prisma.importJob.update({ + where: { id: importJob.id }, + data: { status: 'error', errorMessage: 'No like.js or bookmark.js files found in ZIP' }, + }) + return NextResponse.json( + { error: 'No like.js or bookmark.js files found in ZIP' }, + { status: 422 } + ) + } + + for (const archiveFile of archiveFiles) { + const { tweets, detectedSource } = parseTweetsWithMeta(archiveFile.content) + const source = resolveSource(sourceParam ?? undefined, detectedSource, undefined) + const result = await upsertTweets(toIncomingTweets(tweets, source)) + totalParsed += tweets.length + importedCount += result.imported + skippedCount += result.skipped + updatedCount += result.updated + erroredCount += result.errored + } + } else if (ext === 'js') { + // --- Archive .js file (like.js, bookmark.js) --- + const content = await file.text() + const { tweets, detectedSource } = parseTweetsWithMeta(content) + const source = resolveSource(sourceParam ?? undefined, detectedSource, undefined) + totalParsed = tweets.length + const result = await upsertTweets(toIncomingTweets(tweets, source)) + importedCount = result.imported + skippedCount = result.skipped + updatedCount = result.updated + erroredCount = result.errored + } else { + // --- JSON file --- + let jsonString: string + try { + jsonString = await file.text() + } catch { + await prisma.importJob.update({ + where: { id: importJob.id }, + data: { status: 'error', errorMessage: 'Failed to read file content' }, + }) + return NextResponse.json({ error: 'Failed to read file content' }, { status: 400 }) + } + + const parsedTweets = parseTweetsJson(jsonString) + + // Detect source from JSON payload + let jsonSource: string | undefined + try { + const parsed = JSON.parse(jsonString) + if (typeof parsed?.source === 'string') jsonSource = parsed.source + } catch { /* already parsed above */ } + const source = resolveSource(sourceParam ?? undefined, undefined, jsonSource) + + totalParsed = parsedTweets.length + const result = await upsertTweets(toIncomingTweets(parsedTweets, source)) + importedCount = result.imported + skippedCount = result.skipped + updatedCount = result.updated + erroredCount = result.errored + } + + await prisma.importJob.update({ + where: { id: importJob.id }, + data: { + status: 'done', + totalCount: totalParsed, + processedCount: importedCount, + }, + }) + + return NextResponse.json({ + jobId: importJob.id, + imported: importedCount, + skipped: skippedCount, + updated: updatedCount, + errored: erroredCount, + parsed: totalParsed, + }) } catch (err) { await prisma.importJob.update({ where: { id: importJob.id }, @@ -51,83 +176,8 @@ export async function POST(request: NextRequest): Promise { }, }) return NextResponse.json( - { error: `Failed to parse bookmarks JSON: ${err instanceof Error ? err.message : String(err)}` }, + { error: `Import failed: ${err instanceof Error ? err.message : String(err)}` }, { status: 422 } ) } - - // Determine source: formData param > JSON field > default "bookmark" - let jsonSource: string | undefined - try { - const parsed = JSON.parse(jsonString) - if (typeof parsed?.source === 'string') jsonSource = parsed.source - } catch { /* already parsed above */ } - const source = (sourceParam === 'like' || sourceParam === 'bookmark') - ? sourceParam - : (jsonSource === 'like' ? 'like' : 'bookmark') - - await prisma.importJob.update({ - where: { id: importJob.id }, - data: { totalCount: parsedBookmarks.length }, - }) - - let importedCount = 0 - let skippedCount = 0 - - for (const bookmark of parsedBookmarks) { - try { - const existing = await prisma.bookmark.findUnique({ - where: { tweetId: bookmark.tweetId }, - select: { id: true }, - }) - - if (existing) { - skippedCount++ - continue - } - - const created = await prisma.bookmark.create({ - data: { - tweetId: bookmark.tweetId, - text: bookmark.text, - authorHandle: bookmark.authorHandle, - authorName: bookmark.authorName, - tweetCreatedAt: bookmark.tweetCreatedAt, - rawJson: bookmark.rawJson, - source, - }, - }) - - if (bookmark.media.length > 0) { - await prisma.mediaItem.createMany({ - data: bookmark.media.map((m) => ({ - bookmarkId: created.id, - type: m.type, - url: m.url, - thumbnailUrl: m.thumbnailUrl ?? null, - })), - }) - } - - importedCount++ - } catch (err) { - console.error(`Failed to import tweet ${bookmark.tweetId}:`, err) - skippedCount++ - } - } - - await prisma.importJob.update({ - where: { id: importJob.id }, - data: { - status: 'done', - processedCount: importedCount, - }, - }) - - return NextResponse.json({ - jobId: importJob.id, - imported: importedCount, - skipped: skippedCount, - parsed: parsedBookmarks.length, - }) } diff --git a/app/api/import/twitter/route.ts b/app/api/import/twitter/route.ts index 3154484..84aa008 100644 --- a/app/api/import/twitter/route.ts +++ b/app/api/import/twitter/route.ts @@ -1,7 +1,7 @@ import { NextRequest, NextResponse } from 'next/server' -import prisma from '@/lib/db' +import { upsertTweets, type IncomingTweet } from '@/lib/upsert-tweet' -const BEARER = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I%2BxMb1nYFAA%3DUognEfK4ZPxYowpr4nMskopkC%2FDO' +const BEARER = process.env.X_BEARER_TOKEN ?? '' const FEATURES = JSON.stringify({ graphql_timeline_v2_bookmark_timeline: true, @@ -32,7 +32,7 @@ const FEATURES = JSON.stringify({ // filter by "graphql", find the "Likes" request, and grab the ID from the URL path. const ENDPOINTS = { bookmark: { - queryId: 'j5KExFXy1niL_uGnBhHNxA', + queryId: 'BBxBluh79axE_HJzZPcBDw', operationName: 'Bookmarks', referer: 'https://x.com/i/bookmarks', getInstructions: (d: Record): unknown[] => @@ -40,14 +40,14 @@ const ENDPOINTS = { (d as any)?.data?.bookmark_timeline_v2?.timeline?.instructions ?? [], }, like: { - // PLACEHOLDER — you must replace this with the real query ID from x.com Network tab - queryId: 'REPLACE_ME', + queryId: 'zPJ36q7-jHyvvHcmx8yymg', operationName: 'Likes', referer: 'https://x.com', getInstructions: (d: Record): unknown[] => { // eslint-disable-next-line @typescript-eslint/no-explicit-any const a = d as any - return a?.data?.user?.result?.timeline_v2?.timeline?.instructions + return a?.data?.user?.result?.timeline?.timeline?.instructions + ?? a?.data?.user?.result?.timeline_v2?.timeline?.instructions ?? a?.data?.liked_tweets_timeline?.timeline?.instructions ?? [] }, @@ -94,16 +94,29 @@ interface ArticleResult { content_state?: { blocks?: ArticleBlock[] } } +interface UserResult { + legacy?: UserLegacy + core?: UserLegacy // New Twitter structure puts screen_name/name here +} + interface TweetResult { __typename?: string rest_id?: string legacy?: TweetLegacy - core?: { user_results?: { result?: { legacy?: UserLegacy } } } + core?: { user_results?: { result?: UserResult } } note_tweet?: { note_tweet_results?: { result?: { text?: string } } } article?: { article_results?: { result?: ArticleResult } } + quoted_status_result?: { result?: TweetResult } tweet?: TweetResult } +function getUserInfo(userResult?: UserResult): { screen_name: string; name: string } { + return { + screen_name: userResult?.legacy?.screen_name ?? userResult?.core?.screen_name ?? 'unknown', + name: userResult?.legacy?.name ?? userResult?.core?.name ?? 'Unknown', + } +} + async function fetchPage(authToken: string, ct0: string, source: Source, cursor?: string, userId?: string) { const cfg = ENDPOINTS[source] const variables = JSON.stringify({ @@ -193,24 +206,42 @@ function articleBlocksText(article: ArticleResult): string { } function tweetFullText(tweet: TweetResult): string { + let text: string if (tweet.note_tweet?.note_tweet_results?.result?.text) { - return decodeHtmlEntities(tweet.note_tweet.note_tweet_results.result.text) - } - const article = tweet.article?.article_results?.result - if (article) { - const parts: string[] = [] - if (article.title) parts.push(article.title) - if (article.content) parts.push(article.content) - - // Fallback: some X articles ship content in content_state.blocks - if (parts.length === 0) { - const blocks = articleBlocksText(article) - if (blocks) parts.push(blocks) + text = decodeHtmlEntities(tweet.note_tweet.note_tweet_results.result.text) + } else { + const article = tweet.article?.article_results?.result + if (article) { + const parts: string[] = [] + if (article.title) parts.push(article.title) + if (article.content) parts.push(article.content) + + // Fallback: some X articles ship content in content_state.blocks + if (parts.length === 0) { + const blocks = articleBlocksText(article) + if (blocks) parts.push(blocks) + } + + text = parts.length > 0 ? decodeHtmlEntities(parts.join('\n\n')) : decodeHtmlEntities(tweet.legacy?.full_text ?? '') + } else { + text = decodeHtmlEntities(tweet.legacy?.full_text ?? '') } + } - if (parts.length > 0) return decodeHtmlEntities(parts.join('\n\n')) + // Append quoted tweet content for better categorization + let qt = tweet.quoted_status_result?.result + if (qt?.__typename === 'TweetWithVisibilityResults' && qt.tweet) { + qt = qt.tweet } - return decodeHtmlEntities(tweet.legacy?.full_text ?? '') + if (qt) { + const qtText = qt.legacy?.full_text || qt.note_tweet?.note_tweet_results?.result?.text + const qtAuthor = getUserInfo(qt.core?.user_results?.result).screen_name + if (qtText) { + text += `\n\n[Quote @${qtAuthor}]: ${decodeHtmlEntities(qtText)}` + } + } + + return text } function extractMedia(tweet: TweetResult) { @@ -254,6 +285,10 @@ export async function POST(request: NextRequest): Promise { const source: Source = body.source === 'like' ? 'like' : 'bookmark' const userId = body.userId?.trim() + if (!BEARER) { + return NextResponse.json({ error: 'X_BEARER_TOKEN is not configured. Add it to your .env file.' }, { status: 500 }) + } + if (!authToken?.trim() || !ct0?.trim()) { return NextResponse.json({ error: 'authToken and ct0 are required' }, { status: 400 }) } @@ -264,6 +299,8 @@ export async function POST(request: NextRequest): Promise { let imported = 0 let skipped = 0 + let updated = 0 + let errored = 0 let cursor: string | undefined try { @@ -271,49 +308,30 @@ export async function POST(request: NextRequest): Promise { const data = await fetchPage(authToken.trim(), ct0.trim(), source, cursor, userId) const { tweets, nextCursor } = parsePage(data, source) - for (const tweet of tweets) { - if (!tweet.rest_id) continue - - const exists = await prisma.bookmark.findUnique({ - where: { tweetId: tweet.rest_id }, - select: { id: true }, - }) - - if (exists) { - skipped++ - continue - } - - const media = extractMedia(tweet) - const userLegacy = tweet.core?.user_results?.result?.legacy ?? {} - - const created = await prisma.bookmark.create({ - data: { - tweetId: tweet.rest_id, + // Convert raw tweets to IncomingTweet format for the shared upsert + const incoming: IncomingTweet[] = tweets + .filter((t) => t.rest_id) + .map((tweet) => { + const { screen_name, name } = getUserInfo(tweet.core?.user_results?.result) + return { + tweetId: tweet.rest_id!, text: tweetFullText(tweet), - authorHandle: userLegacy.screen_name ?? 'unknown', - authorName: userLegacy.name ?? 'Unknown', + authorHandle: screen_name, + authorName: name, tweetCreatedAt: tweet.legacy?.created_at ? new Date(tweet.legacy.created_at) : null, rawJson: JSON.stringify(tweet), source, - }, + media: extractMedia(tweet), + } }) - if (media.length > 0) { - await prisma.mediaItem.createMany({ - data: media.map((m) => ({ - bookmarkId: created.id, - type: m.type, - url: m.url, - thumbnailUrl: m.thumbnailUrl ?? null, - })), - }) - } - - imported++ - } + const result = await upsertTweets(incoming) + imported += result.imported + skipped += result.skipped + updated += result.updated + errored += result.errored if (!nextCursor || tweets.length === 0) break cursor = nextCursor @@ -325,5 +343,5 @@ export async function POST(request: NextRequest): Promise { ) } - return NextResponse.json({ imported, skipped }) + return NextResponse.json({ imported, skipped, updated, errored }) } diff --git a/app/bookmarks/page.tsx b/app/bookmarks/page.tsx index 260fa5e..b318bce 100644 --- a/app/bookmarks/page.tsx +++ b/app/bookmarks/page.tsx @@ -398,11 +398,11 @@ function BookmarksPageInner() { {total > 0 ? ( <> {total.toLocaleString()} - {' '}bookmark{total !== 1 ? 's' : ''} + {' '}tweet{total !== 1 ? 's' : ''} {filters.q && for "{filters.q}"} ) : ( - 'No bookmarks found' + 'No tweets found' )}

@@ -421,7 +421,7 @@ function BookmarksPageInner() {
-

No bookmarks match your filters

+

No tweets match your filters

Try adjusting your search or removing some filters.

diff --git a/app/categories/[slug]/page.tsx b/app/categories/[slug]/page.tsx index f2b99d0..4784541 100644 --- a/app/categories/[slug]/page.tsx +++ b/app/categories/[slug]/page.tsx @@ -130,7 +130,7 @@ export default function CategoryPage() { {category.description && (

{category.description}

)} -

{total.toLocaleString()} bookmark{total !== 1 ? 's' : ''}

+

{total.toLocaleString()} tweet{total !== 1 ? 's' : ''}

+ {method === 'live' ? ( + ) : method === 'archive' ? ( + ) : method === 'bookmarklet' ? ( ) : ( @@ -1204,14 +1320,14 @@ function CategorizeStep({ importedCount, force = false }: { importedCount: numbe

Already up to date

-

All bookmarks in this file were already imported

+

All tweets in this file were already imported

- View your bookmarks + View your tweets