Skip to content

Commit 5ec13e9

Browse files
authored
chore: add data integrity validation test (#125)
* chore: add data integrity validation test * chore: update database
1 parent 7423acb commit 5ec13e9

File tree

3 files changed

+48
-13
lines changed

3 files changed

+48
-13
lines changed

server/internal/data/data_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package data
2+
3+
import (
4+
"regexp"
5+
"strconv"
6+
"strings"
7+
"testing"
8+
9+
"github.com/stretchr/testify/assert"
10+
)
11+
12+
// TestValidData validates data integrity of the database.
13+
func TestValidData(t *testing.T) {
14+
r := regexp.MustCompile(`pr-?(\d+)`)
15+
16+
for prID, video := range DB.GetPrIdToVideo() {
17+
// Every video should have PR ID set.
18+
assert.Equal(t, prID, video.GetPrId())
19+
20+
title := strings.ToLower(video.GetVideo().GetVideoTitle())
21+
if title != "" {
22+
ret := r.FindStringSubmatch(title)
23+
assert.Equal(t, len(ret), 2)
24+
25+
// nolint: gosec
26+
atoi, err := strconv.Atoi(ret[1])
27+
if err != nil {
28+
assert.NoError(t, err)
29+
}
30+
31+
// Every video's title should contain PR-XXX that matches PR-ID.
32+
// WARNING: it's fragile. The title format may change in the future.
33+
assert.Equal(t, prID, int32(atoi))
34+
}
35+
}
36+
}

server/internal/data/database.pbtxt

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4432,7 +4432,7 @@ pr_id_to_video: {
44324432
url: "https://github.com/CompVis/taming-transformers"
44334433
owner: "CompVis"
44344434
framework: FRAMEWORK_PYTORCH
4435-
number_of_stars: 1455
4435+
number_of_stars: 1457
44364436
description: "Taming Transformers for High-Resolution Image Synthesis"
44374437
}
44384438
repositories: {
@@ -5095,7 +5095,7 @@ pr_id_to_video: {
50955095
video_id: "iCgT8G4PkqI"
50965096
video_title: "PR-051: Conditional Generative Adversarial Nets"
50975097
number_of_likes: 24
5098-
number_of_views: 3459
5098+
number_of_views: 3460
50995099
published_date: {
51005100
seconds: 1512310569
51015101
}
@@ -15635,6 +15635,16 @@ pr_id_to_video: {
1563515635
description: "Repo for my Master Thesis at ULiège in 2019 (Machine learning under resource constraints)"
1563615636
}
1563715637
}
15638+
video: {
15639+
video_id: "vUNAJsO2G98"
15640+
video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks"
15641+
number_of_likes: 4
15642+
number_of_views: 509
15643+
published_date: {
15644+
seconds: 1565712056
15645+
}
15646+
uploader: "Sunghoon Joo"
15647+
}
1563815648
}
1563915649
}
1564015650
pr_id_to_video: {
@@ -15659,16 +15669,6 @@ pr_id_to_video: {
1565915669
description: "**MAML**, or **Model-Agnostic Meta-Learning**, is a model and task-agnostic algorithm for meta-learning that trains a model’s parameters such that a small number of gradient updates will lead to fast learning on a new task.\r\n\r\nConsider a model represented by a parametrized function $f\\_{\\theta}$ with parameters $\\theta$. When adapting to a new task $\\mathcal{T}\\_{i}$, the model’s parameters $\\theta$ become $\\theta'\\_{i}$. With MAML, the updated parameter vector $\\theta'\\_{i}$ is computed using one or more gradient descent updates on task $\\mathcal{T}\\_{i}$. For example, when using one gradient update,\r\n\r\n$$ \\theta'\\_{i} = \\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right) $$\r\n\r\nThe step size $\\alpha$ may be fixed as a hyperparameter or metalearned. The model parameters are trained by optimizing for the performance of $f\\_{\\theta'\\_{i}}$ with respect to $\\theta$ across tasks sampled from $p\\left(\\mathcal{T}\\_{i}\\right)$. More concretely the meta-objective is as follows:\r\n\r\n$$ \\min\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right) = \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta - \\alpha\\nabla\\_{\\theta}\\mathcal{L}\\_{\\mathcal{T}\\_{i}}\\left(f\\_{\\theta}\\right)}\\right) $$\r\n\r\nNote that the meta-optimization is performed over the model parameters $\\theta$, whereas the objective is computed using the updated model parameters $\\theta'$. In effect MAML aims to optimize the model parameters such that one or a small number of gradient steps on a new task will produce maximally effective behavior on that task. The meta-optimization across tasks is performed via stochastic gradient descent (SGD), such that the model parameters $\\theta$ are updated as follows:\r\n\r\n$$ \\theta \\leftarrow \\theta - \\beta\\nabla\\_{\\theta} \\sum\\_{\\mathcal{T}\\_{i} \\sim p\\left(\\mathcal{T}\\right)} \\mathcal{L}\\_{\\mathcal{T\\_{i}}}\\left(f\\_{\\theta'\\_{i}}\\right)$$\r\n\r\nwhere $\\beta$ is the meta step size."
1566015670
}
1566115671
}
15662-
video: {
15663-
video_id: "vUNAJsO2G98"
15664-
video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks"
15665-
number_of_likes: 4
15666-
number_of_views: 509
15667-
published_date: {
15668-
seconds: 1565712056
15669-
}
15670-
uploader: "Sunghoon Joo"
15671-
}
1567215672
}
1567315673
}
1567415674
pr_id_to_video: {

server/internal/data/mapping_table.pbtxt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,6 @@ rows: {
932932
rows: {
933933
pr_id: 188
934934
paper_arxiv_id: "1902.08438"
935-
youtube_video_id: "vUNAJsO2G98"
936935
}
937936
rows: {
938937
pr_id: 189

0 commit comments

Comments
 (0)