Document a few details about the final dataset

flesler · Jul 6, 2023 · 194de9b · 194de9b
1 parent 0ec96ab
commit 194de9b
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -12,22 +12,29 @@ In addition, the final JSON includes several questions generated by GPT-4, they
 
 I aimed to keep the original dataset intact, so all modifications are recorded in [inputs/overrides.json](inputs/overrides.json) and even those don't change the original text, just re-arrange it. It still needs some more cleaning up but I won't be doing that in the short-term.
 
-You can view the dataset here: https://flesler.github.io/satoshi-data/
+You can view the dataset as JSON [here](docs/qa.json) and visually here: https://flesler.github.io/satoshi-data/
 
 ## Setup
 
 ```
 npm install
 ```
 
-## Regenerate the JSON file
+### Regenerate the JSON file
 
 ```
 npm run start
 ```
 
-## Preview the JSON file locally
+### Preview the JSON file locally
 
 ```
 npm run serve
 ```
+
+## More about the dataset
+
+The output is a JSON file, with an array of objects. They are sorted chronologically. Many have a `type` which is one of:
+1. gpt-4: Those generated with GPT-4
+1. favorite: The ones I personally liked the most
+1. ignore: The ones I personally didn't find useful for my particular needs
diff --git a/docs/index.html b/docs/index.html
@@ -1,20 +1,20 @@
 <!DOCTYPE html>
 <html>
-	<head>
-		<title>Q&As of Satoshi Nakamoto</title>
-		<style type="text/css">
-			body { margin: 0; }
-			.qa { border-bottom: 1px solid black; padding: 10px; }
-			.ignore { background-color: #EEE; opacity: 0.7; }
-			.ignore::before { content: "❌"; }
-			.hide-ignored .ignore { display: none; }
-			.favorite { background-color: lightyellow; }
-			.favorite::before { content: "⭐️"; }
-			.gpt-4::before { content: "🤖"; }
-			p { margin: 10px 0 0 0; }
-		</style>
-	</head>
-	<body class="hide-ignored">
+  <head>
+    <title>Q&As of Satoshi Nakamoto</title>
+    <style type="text/css">
+      body { margin: 0; }
+      .qa { border-bottom: 1px solid black; padding: 10px; }
+      .ignore { background-color: #EEE; opacity: 0.7; }
+      .ignore::before { content: "❌"; }
+      .hide-ignored .ignore { display: none; }
+      .favorite { background-color: lightyellow; }
+      .favorite::before { content: "⭐️"; }
+      .gpt-4::before { content: "🤖"; }
+      p { margin: 10px 0 0 0; }
+    </style>
+  </head>
+  <body class="hide-ignored">
     <label id="show-ignored">
       <input type="checkbox" /> Show Ignored
     </label>
@@ -46,5 +46,5 @@
         document.body.className = check.checked ? '' : 'hide-ignored'
       }
     </script>
-	</body>
+  </body>
 </html>
diff --git a/parse.ts b/parse.ts
@@ -48,7 +48,7 @@ const splitEmail = (email: string | undefined) => {
     .replace(/> >/g, '>>')
     .split('\n')
     .map(l => l.trim())
-  
+
   const signature = lines.indexOf('---------------------------------------------------------------------')
   if (signature !== -1) {
     let len = signature - 1
@@ -114,7 +114,7 @@ const parseEmails = () => {
       }
     }
   }
-  
+
   return out
 }
 
@@ -167,7 +167,7 @@ const parsePosts = () => {
       delete post.satoshi_id
       continue
     }
-    
+
     const [first, ...parts] = overrides[post.url]?.parts || splitPost(post.content)
     if (first) {
       // Should be using nested_level for some, but seems like Satoshi replied without nesting correctly (?)
@@ -189,14 +189,14 @@ const parsePosts = () => {
       }
     }
   }
-  
+
   return out
 }
 
 const qas = parsePosts().concat(parseEmails())
   .map((qa) => ({
     type: overrides[qa.src]?.type, ...qa,
-    date: new Date(qa.date + ' UTC').toISOString().split('.')[0].replace('T', ' '), 
+    date: new Date(qa.date + ' UTC').toISOString().split('.')[0].replace('T', ' '),
   }))
   .sort((a, b) => a.date > b.date ? 1 : a.date < b.date ? -1 : 0)
   .map((qa, i) => ({ id: i + 1, ...qa }))