chore: add data integrity validation test (#125)

kkweon · web-flow · commit 5ec13e9b4543 · 2021-06-24T20:29:32.000-07:00
* chore: add data integrity validation test

* chore: update database
diff --git a/server/internal/data/data_test.go b/server/internal/data/data_test.go
@@ -0,0 +1,36 @@
+package data
+
+import (
+	"regexp"
+	"strconv"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+// TestValidData validates data integrity of the database.
+func TestValidData(t *testing.T) {
+	r := regexp.MustCompile(`pr-?(\d+)`)
+
+	for prID, video := range DB.GetPrIdToVideo() {
+		// Every video should have PR ID set.
+		assert.Equal(t, prID, video.GetPrId())
+
+		title := strings.ToLower(video.GetVideo().GetVideoTitle())
+		if title != "" {
+			ret := r.FindStringSubmatch(title)
+			assert.Equal(t, len(ret), 2)
+
+			// nolint: gosec
+			atoi, err := strconv.Atoi(ret[1])
+			if err != nil {
+				assert.NoError(t, err)
+			}
+
+			// Every video's title should contain PR-XXX that matches PR-ID.
+			// WARNING: it's fragile. The title format may change in the future.
+			assert.Equal(t, prID, int32(atoi))
+		}
+	}
+}
diff --git a/server/internal/data/database.pbtxt b/server/internal/data/database.pbtxt
@@ -4432,7 +4432,7 @@ pr_id_to_video: {
         url: "https://github.com/CompVis/taming-transformers"
         owner: "CompVis"
         framework: FRAMEWORK_PYTORCH
-        number_of_stars: 1455
+        number_of_stars: 1457
         description: "Taming Transformers for High-Resolution Image Synthesis"
       }
       repositories: {
@@ -5095,7 +5095,7 @@ pr_id_to_video: {
       video_id: "iCgT8G4PkqI"
       video_title: "PR-051: Conditional Generative Adversarial Nets"
       number_of_likes: 24
-      number_of_views: 3459
+      number_of_views: 3460
       published_date: {
         seconds: 1512310569
       }
@@ -15635,6 +15635,16 @@ pr_id_to_video: {
         description: "Repo for my Master Thesis at ULiège in 2019 (Machine learning under resource constraints)"
       }
     }
+    video: {
+      video_id: "vUNAJsO2G98"
+      video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks"
+      number_of_likes: 4
+      number_of_views: 509
+      published_date: {
+        seconds: 1565712056
+      }
+      uploader: "Sunghoon Joo"
+    }
   }
 }
 pr_id_to_video: {
@@ -15659,16 +15669,6 @@ pr_id_to_video: {
         description: "**MAML**, or **Model-Agnostic Meta-Learning**, is a model and task-agnostic algorithm for meta-learning that trains a model’s parameters such that a small number of gradient updates will lead to fast learning on a new task.\r\n\r\nConsider a model represented by a parametrized function $f\\_{\\theta}$ with parameters $\\theta$. When adapting to a new task $\\mathcal{T}\\_{i}$, the model’s parameters $\\theta$ become $\\theta'\\_{i}$. With MAML, the updated parameter vector $\\theta'\\_{i}$ is computed using one or more gradient descent updates on task $\\mathcal{T}\\_{i}$. For example, when using one gradient update,\r\n\r\n$$ \\theta'\\_{i} = \\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right) $$\r\n\r\nThe step size $\\alpha$ may be fixed as a hyperparameter or metalearned. The model parameters are trained by optimizing for the performance of $f\\_{\\theta'\\_{i}}$ with respect to $\\theta$ across tasks sampled from $p\\left(\\mathcal{T}\\_{i}\\right)$. More concretely the meta-objective is as follows:\r\n\r\n$$ \\min\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right) = \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right)}\\right) $$\r\n\r\nNote that the meta-optimization is performed over the model parameters $\\theta$, whereas the objective is computed using the updated model parameters $\\theta'$. In effect MAML aims to optimize the model parameters such that one or a small number of gradient steps on a new task will produce maximally effective behavior on that task. The meta-optimization across tasks is performed via stochastic gradient descent (SGD), such that the model parameters $\\theta$ are updated as follows:\r\n\r\n$$ \\theta \\leftarrow \\theta - \\beta\\nabla\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right)$$\r\n\r\nwhere $\\beta$ is the meta step size."
       }
     }
-    video: {
-      video_id: "vUNAJsO2G98"
-      video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks"
-      number_of_likes: 4
-      number_of_views: 509
-      published_date: {
-        seconds: 1565712056
-      }
-      uploader: "Sunghoon Joo"
-    }
   }
 }
 pr_id_to_video: {
diff --git a/server/internal/data/mapping_table.pbtxt b/server/internal/data/mapping_table.pbtxt
@@ -932,7 +932,6 @@ rows:  {
 rows:  {
   pr_id:  188
   paper_arxiv_id:  "1902.08438"
-  youtube_video_id:  "vUNAJsO2G98"
 }
 rows:  {
   pr_id:  189

Original file line number	Diff line number	Diff line change
`@@ -4432,7 +4432,7 @@ pr_id_to_video: {`
`4432`	`4432`	`url: "https://github.com/CompVis/taming-transformers"`
`4433`	`4433`	`owner: "CompVis"`
`4434`	`4434`	`framework: FRAMEWORK_PYTORCH`
`4435`		`- number_of_stars: 1455`
	`4435`	`+ number_of_stars: 1457`
`4436`	`4436`	`description: "Taming Transformers for High-Resolution Image Synthesis"`
`4437`	`4437`	`}`
`4438`	`4438`	`repositories: {`
`@@ -5095,7 +5095,7 @@ pr_id_to_video: {`
`5095`	`5095`	`video_id: "iCgT8G4PkqI"`
`5096`	`5096`	`video_title: "PR-051: Conditional Generative Adversarial Nets"`
`5097`	`5097`	`number_of_likes: 24`
`5098`		`- number_of_views: 3459`
	`5098`	`+ number_of_views: 3460`
`5099`	`5099`	`published_date: {`
`5100`	`5100`	`seconds: 1512310569`
`5101`	`5101`	`}`
`@@ -15635,6 +15635,16 @@ pr_id_to_video: {`
`15635`	`15635`	`description: "Repo for my Master Thesis at ULiège in 2019 (Machine learning under resource constraints)"`
`15636`	`15636`	`}`
`15637`	`15637`	`}`
	`15638`	`+ video: {`
	`15639`	`+ video_id: "vUNAJsO2G98"`
	`15640`	`+ video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks"`
	`15641`	`+ number_of_likes: 4`
	`15642`	`+ number_of_views: 509`
	`15643`	`+ published_date: {`
	`15644`	`+ seconds: 1565712056`
	`15645`	`+ }`
	`15646`	`+ uploader: "Sunghoon Joo"`
	`15647`	`+ }`
`15638`	`15648`	`}`
`15639`	`15649`	`}`
`15640`	`15650`	`pr_id_to_video: {`
`@@ -15659,16 +15669,6 @@ pr_id_to_video: {`
`15659`	`15669`	description: "MAML, or Model-Agnostic Meta-Learning, is a model and task-agnostic algorithm for meta-learning that trains a model’s parameters such that a small number of gradient updates will lead to fast learning on a new task.\r\n\r\nConsider a model represented by a parametrized function $f\\_{\\theta}$ with parameters $\\theta$. When adapting to a new task $\\mathcal{T}\\_{i}$, the model’s parameters $\\theta$ become $\\theta'\\_{i}$. With MAML, the updated parameter vector $\\theta'\\_{i}$ is computed using one or more gradient descent updates on task $\\mathcal{T}\\_{i}$. For example, when using one gradient update,\r\n\r\n$$ \\theta'\\_{i} = \\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right) $$\r\n\r\nThe step size $\\alpha$ may be fixed as a hyperparameter or metalearned. The model parameters are trained by optimizing for the performance of $f\\_{\\theta'\\_{i}}$ with respect to $\\theta$ across tasks sampled from $p\\left(\\mathcal{T}\\_{i}\\right)$. More concretely the meta-objective is as follows:\r\n\r\n$$ \\min\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right) = \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right)}\\right) $$\r\n\r\nNote that the meta-optimization is performed over the model parameters $\\theta$, whereas the objective is computed using the updated model parameters $\\theta'$. In effect MAML aims to optimize the model parameters such that one or a small number of gradient steps on a new task will produce maximally effective behavior on that task. The meta-optimization across tasks is performed via stochastic gradient descent (SGD), such that the model parameters $\\theta$ are updated as follows:\r\n\r\n$$ \\theta \\leftarrow \\theta - \\beta\\nabla\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right)$$\r\n\r\nwhere $\\beta$ is the meta step size."
`15660`	`15670`	`}`
`15661`	`15671`	`}`
`15662`		`- video: {`
`15663`		`- video_id: "vUNAJsO2G98"`
`15664`		`- video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks"`
`15665`		`- number_of_likes: 4`
`15666`		`- number_of_views: 509`
`15667`		`- published_date: {`
`15668`		`- seconds: 1565712056`
`15669`		`- }`
`15670`		`- uploader: "Sunghoon Joo"`
`15671`		`- }`
`15672`	`15672`	`}`
`15673`	`15673`	`}`
`15674`	`15674`	`pr_id_to_video: {`
Original file line number	Diff line number	Diff line change
`@@ -932,7 +932,6 @@ rows: {`
`932`	`932`	`rows: {`
`933`	`933`	`pr_id: 188`
`934`	`934`	`paper_arxiv_id: "1902.08438"`
`935`		`- youtube_video_id: "vUNAJsO2G98"`
`936`	`935`	`}`
`937`	`936`	`rows: {`
`938`	`937`	`pr_id: 189`