Skip to content

Commit 558a01e

Browse files
authored
feat: add helpers functions (#127)
- ExtractArxivIDFromURL - ExtractPaperIDs - ExtractPRID These will be used to generate mapping_table.pbtxt from YouTube Playlist.
1 parent 2589aff commit 558a01e

File tree

5 files changed

+206
-2
lines changed

5 files changed

+206
-2
lines changed

.golangci.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,7 @@ linters:
3030
- gomoddirectives
3131
- wrapcheck
3232
- funlen
33-
- goimports # False positive with paperswithcode_go
34-
- lll # long line lint
33+
- goimports # False positive with paperswithcode_go.
34+
- lll # I want to use a long line.
35+
- goerr113 # I want to use dynamic error.
36+
- gochecknoinits # I want to use init().

metadata-manager/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/codingpot/paperswithcode-go/v2 v2.1.3
77
github.com/codingpot/pr12er/server v0.0.0-20210618171448-3868874e9b7a
88
github.com/google/go-cmp v0.5.6
9+
github.com/rocketlaunchr/google-search v1.1.2
910
github.com/sirupsen/logrus v1.8.1
1011
github.com/spf13/cobra v1.1.3
1112
github.com/spf13/viper v1.7.0

metadata-manager/go.sum

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
4545
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
4646
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
4747
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
48+
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
49+
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
4850
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
4951
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
5052
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
@@ -54,6 +56,14 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy
5456
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
5557
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
5658
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
59+
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
60+
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
61+
github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA=
62+
github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
63+
github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM=
64+
github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
65+
github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk=
66+
github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
5767
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
5868
github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
5969
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
@@ -132,6 +142,10 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V
132142
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
133143
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
134144
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
145+
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
146+
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
147+
github.com/gocolly/colly/v2 v2.0.1 h1:GGPzBEdrEsavhzVK00FQXMMHBHRpwrbbCCcEKM/0Evw=
148+
github.com/gocolly/colly/v2 v2.0.1/go.mod h1:ePrRZlJcLTU2C/f8pJzXfkdBtBDHL5hOaKLcBoiJcq8=
135149
github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
136150
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
137151
github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@@ -255,6 +269,7 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:
255269
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
256270
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
257271
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
272+
github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg=
258273
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
259274
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
260275
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
@@ -268,6 +283,8 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7
268283
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
269284
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
270285
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
286+
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
287+
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
271288
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
272289
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
273290
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
@@ -373,11 +390,15 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O
373390
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
374391
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
375392
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
393+
github.com/rocketlaunchr/google-search v1.1.2 h1:eWHvh41hINPGZvoxX5DRIuwE2H7NAZ2L79ef5seLDT0=
394+
github.com/rocketlaunchr/google-search v1.1.2/go.mod h1:6WRqswVvv6PJtvGCB1sEVxt726NolmA0QTl6xSEXel0=
376395
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
377396
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
378397
github.com/rogpeppe/go-internal v1.6.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
379398
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
380399
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
400+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
401+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
381402
github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E=
382403
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
383404
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
@@ -413,6 +434,7 @@ github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3
413434
github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI=
414435
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
415436
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
437+
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
416438
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
417439
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
418440
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
@@ -422,6 +444,8 @@ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5Cc
422444
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
423445
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
424446
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
447+
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
448+
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
425449
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
426450
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
427451
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
@@ -496,6 +520,7 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
496520
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
497521
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
498522
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
523+
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
499524
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
500525
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
501526
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -638,6 +663,8 @@ golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxb
638663
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
639664
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
640665
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
666+
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324 h1:Hir2P/De0WpUhtrKGGjvSb2YxUgyZ7EFOSLIcSSpiwE=
667+
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
641668
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
642669
golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
643670
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

metadata-manager/internal/transform/transform.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,19 @@
22
package transform
33

44
import (
5+
"context"
6+
"fmt"
7+
"regexp"
8+
"strconv"
59
"strings"
610

711
"github.com/codingpot/paperswithcode-go/v2/models"
812
"github.com/codingpot/pr12er/server/pkg/pr12er"
13+
"github.com/rocketlaunchr/google-search"
914
)
1015

16+
var prIDRegexp = regexp.MustCompile(`pr-?(\d+)`)
17+
1118
func frameworkToEnum(paperFramework string) pr12er.Framework {
1219
switch paperFramework {
1320
case "tf":
@@ -60,3 +67,63 @@ func Methods(methods []*models.Method) []*pr12er.Method {
6067
}
6168
return pr12erMethods
6269
}
70+
71+
// ExtractPaperIDs Google Search with the title and gets ArxivIDs.
72+
//
73+
// For example,
74+
// "PR-274: On mutual information maximization for representation learning"
75+
// => []string{"1907.13625", "2103.04537", "1910.08350"}.
76+
func ExtractPaperIDs(title string) ([]string, error) {
77+
searchTerm := prIDRegexp.ReplaceAllString(strings.ToLower(title), "") + " site:arxiv.org"
78+
search, err := googlesearch.Search(context.Background(), searchTerm)
79+
if err != nil {
80+
return nil, err
81+
}
82+
maxLen := 3
83+
if len(search) < maxLen {
84+
maxLen = len(search)
85+
}
86+
87+
var paperIDs []string
88+
89+
for i := 0; i < maxLen; i++ {
90+
arxivID, _ := ExtractArxivIDFromURL(search[i].URL)
91+
paperIDs = append(paperIDs, arxivID)
92+
}
93+
94+
return paperIDs, nil
95+
}
96+
97+
// ExtractArxivIDFromURL extracts ArxivID from the URL.
98+
//
99+
// For example,
100+
// https://arxiv.org/abs/2102.03732 => 2102.03732
101+
func ExtractArxivIDFromURL(url string) (string, error) {
102+
if !strings.Contains(url, "arxiv.org") {
103+
return "", fmt.Errorf("%s does not contain arxiv.org", url)
104+
}
105+
splits := strings.Split(url, "/")
106+
if len(splits) == 0 {
107+
return "", fmt.Errorf("%s does not contain any ArxivID", url)
108+
}
109+
return splits[len(splits)-1], nil
110+
}
111+
112+
// ExtractPRID extracts PR ID from the title.
113+
//
114+
// For example,
115+
//
116+
// PR-274: On mutual information maximization for representation learning
117+
// => 274.
118+
func ExtractPRID(title string) (int32, error) {
119+
submatch := prIDRegexp.FindStringSubmatch(strings.ToLower(title))
120+
if len(submatch) < 2 {
121+
return 0, fmt.Errorf("expected PR-XXX but got %s", title)
122+
}
123+
//nolint:gosec
124+
atoi, err := strconv.Atoi(submatch[1])
125+
if err != nil {
126+
return 0, err
127+
}
128+
return int32(atoi), nil
129+
}

metadata-manager/internal/transform/transform_test.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,110 @@ func Test_ownerFromURL(t *testing.T) {
9090
})
9191
}
9292
}
93+
94+
func TestExtractPRID(t *testing.T) {
95+
tests := []struct {
96+
name string
97+
title string
98+
want int32
99+
wantErr bool
100+
}{
101+
{
102+
name: "PR-274 should return 274",
103+
title: "PR-274: On mutual information maximization for representation learning",
104+
want: 274,
105+
wantErr: false,
106+
},
107+
{
108+
name: "PR274 should return 274",
109+
title: "PR274: On mutual information maximization for representation learning",
110+
want: 274,
111+
wantErr: false,
112+
},
113+
{
114+
name: "PR0 should return 0",
115+
title: "PR0: On mutual information maximization for representation learning",
116+
want: 0,
117+
wantErr: false,
118+
},
119+
{
120+
name: "Invalid name should return an error",
121+
title: "This is not a valid PR Video",
122+
wantErr: true,
123+
},
124+
}
125+
126+
for _, tt := range tests {
127+
t.Run(tt.name, func(t *testing.T) {
128+
got, err := ExtractPRID(tt.title)
129+
if tt.wantErr {
130+
assert.Error(t, err)
131+
} else {
132+
assert.NoError(t, err)
133+
assert.Equal(t, tt.want, got)
134+
}
135+
})
136+
}
137+
}
138+
139+
func TestExtractPaperIDs(t *testing.T) {
140+
t.Skip("Skip this function as Google can block you")
141+
142+
tests := []struct {
143+
name string
144+
title string
145+
want []string
146+
wantErr bool
147+
}{
148+
{
149+
name: "It returns up to 3 papers by inferring from the title",
150+
title: "PR-274: On mutual information maximization for representation learning",
151+
want: []string{"1907.13625", "2103.04537", "1910.08350"},
152+
wantErr: false,
153+
},
154+
}
155+
156+
for _, tt := range tests {
157+
t.Run(tt.name, func(t *testing.T) {
158+
got, err := ExtractPaperIDs(tt.title)
159+
if tt.wantErr {
160+
assert.Error(t, err)
161+
} else {
162+
assert.NoError(t, err)
163+
assert.Equal(t, tt.want, got)
164+
}
165+
})
166+
}
167+
}
168+
169+
func TestExtractArxivIDFromURL(t *testing.T) {
170+
tests := []struct {
171+
name string
172+
url string
173+
want string
174+
wantErr bool
175+
}{
176+
{
177+
name: "Valid URL returns the arxivID",
178+
url: "https://arxiv.org/abs/2103.04537",
179+
want: "2103.04537",
180+
wantErr: false,
181+
},
182+
{
183+
name: "Invalid URL returns an error",
184+
url: "https://gabs/2103.04537",
185+
wantErr: true,
186+
},
187+
}
188+
for _, tt := range tests {
189+
t.Run(tt.name, func(t *testing.T) {
190+
got, err := ExtractArxivIDFromURL(tt.url)
191+
if tt.wantErr {
192+
assert.Error(t, err)
193+
} else {
194+
assert.NoError(t, err)
195+
assert.Equal(t, tt.want, got)
196+
}
197+
})
198+
}
199+
}

0 commit comments

Comments
 (0)