Skip to content

Commit e4631e8

Browse files
author
Mark MacGillivray
committed
Merge remote-tracking branch 'origin/develop'
2 parents 1558091 + 1694ba0 commit e4631e8

File tree

7 files changed

+273
-161
lines changed

7 files changed

+273
-161
lines changed

worker/dist/worker.js

Lines changed: 164 additions & 91 deletions
Large diffs are not rendered by default.

worker/dist/worker.min.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

worker/src/api.coffee

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ P = () ->
235235
p[k]._cache ?= false if nd.startsWith 'auth'
236236
p[k]._index ?= true if p[k]._sheet
237237
if p[k]._index # add index functions to index endpoints
238-
for ik in ['keys', 'terms', 'suggest', 'count', 'percent', 'min', 'max', 'range', 'sum', 'mapping', '_for', '_each', '_bulk', '_refresh'] # of P.index
238+
for ik in ['keys', 'terms', 'suggest', 'count', 'percent', 'min', 'max', 'range', 'sum', 'average', 'mapping', '_for', '_each', '_bulk', '_refresh'] # of P.index
239239
p[k][ik] ?= {_indexed: ik, _auth: (if ik.startsWith('_') then 'system' else p[k]._auth)}
240240
if typeof p[k] is 'function' and not p[k]._index and not p[k]._indexed and not p[k]._kv and not p[k]._bg and (not nd.includes('.') or n.startsWith('index') or nd.split('.').pop().startsWith '_')
241241
a[k] = p[k].bind @
@@ -367,7 +367,7 @@ P._response = (res, fn) ->
367367
res = res.replace /\>\</g, '>\n<'
368368
if not res.includes('<html') and not @params.partial
369369
ret = '<!DOCTYPE html><html dir="ltr" lang="en">\n<head>\n'
370-
ret += '<meta charset="utf-8">\n<meta name="viewport" content="width=device-width, initial-scale=1.0">\n';
370+
ret += '<meta charset="utf-8">\n<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
371371
if res.includes '<title'
372372
[pt, tt] = res.split '<title'
373373
[tt, at] = tt.split '</title>'
@@ -517,6 +517,8 @@ P._wrapper = (f, n) -> # the function to wrap and the string name of the functio
517517
else
518518
res = status: 404
519519
nfeml = @params.email
520+
pfs = @params.funders
521+
delete qry.funders
520522
delete qry.email
521523
delete @params.size
522524
delete qry.size
@@ -560,44 +562,65 @@ P._wrapper = (f, n) -> # the function to wrap and the string name of the functio
560562
ks = ks.splice(ex, 1) if ex isnt -1
561563
if nfeml
562564
await @mail to: nfeml, subject: 'Your export has started (ref: ' + flid + '.csv)', text: 'Your export has started. You can download the file any time, it will keep growing until it is complete, when you will get another notification.<br><br><a href="' + eurl + '">Download csv</a><br><br>Thanks'
563-
_makecsv = (rt, qry, out, keys, notify, eurl) =>
565+
_makecsv = (rt, qry, out, keys, notify, eurl, pfs) =>
564566
first = true
567+
if pfs
568+
keys = ['DOI', 'funder.name', 'funder.award']
565569
for key in keys
566570
await fs.appendFile out, (if not first then ',"' else '"') + key + '"'
567571
first = false
568572
for await blr from @index._for rt, qry, {scroll: '5m', max: if notify is 'joe@oa.works' then 100000 else 30000}
569573
await fs.appendFile out, '\n'
570-
first = true
571-
for k in keys
572-
if k.includes '.'
573-
try
574-
blfl = await @flatten blr
575-
catch
576-
blfl = undefined
577-
else
578-
blfl = blr
579-
if Array.isArray blfl
580-
if blfl.length and typeof blfl[0] is 'object'
581-
st = []
582-
for blp in blfl
583-
st.push(blp[k]) if blp?[k]?
584-
blfl = st
585-
nar = {}
586-
nar[k] = blfl
587-
blfl = nar
588-
if not blfl? or not blfl[k]?
589-
val = ''
590-
else if typeof blfl[k] is 'object'
591-
blfl[k] = blfl[k].join(';') if Array.isArray blfl[k]
592-
val = JSON.stringify blfl[k]
593-
else
594-
val = blfl[k]
595-
val = val.replace(/"/g, '').replace(/\n/g, '').replace(/\s\s+/g, ' ') if typeof val is 'string'
596-
await fs.appendFile out, (if not first then ',"' else '"') + val + '"'
597-
first = false
574+
if pfs
575+
names = ''
576+
awards = ''
577+
if blr.funder?
578+
first = true
579+
for funder in blr.funder
580+
names += (if first then '' else ';') + (funder.name ? '')
581+
funder.award = funder.award.join(' ') if funder.award? and funder.award.length
582+
funder.award ?= ''
583+
if Array.isArray funder.award
584+
if funder.award.length
585+
funder.award = funder.award.join ' '
586+
else
587+
funder.award = ''
588+
funder.award = funder.award.replace /;/g, ''
589+
awards += (if first then '' else ';') + funder.award
590+
first = false
591+
await fs.appendFile out, '"' + blr.DOI + '","' + names + '","' + awards + '"'
592+
else
593+
first = true
594+
for k in keys
595+
if k.includes '.'
596+
try
597+
blfl = await @flatten blr
598+
catch
599+
blfl = undefined
600+
else
601+
blfl = blr
602+
if Array.isArray blfl
603+
if blfl.length and typeof blfl[0] is 'object'
604+
st = []
605+
for blp in blfl
606+
st.push(blp[k]) if blp?[k]?
607+
blfl = st
608+
nar = {}
609+
nar[k] = blfl
610+
blfl = nar
611+
if not blfl? or not blfl[k]?
612+
val = ''
613+
else if typeof blfl[k] is 'object'
614+
blfl[k] = blfl[k].join(';') if Array.isArray blfl[k]
615+
val = JSON.stringify blfl[k]
616+
else
617+
val = blfl[k]
618+
val = val.replace(/"/g, '').replace(/\n/g, '').replace(/\s\s+/g, ' ') if typeof val is 'string'
619+
await fs.appendFile out, (if not first then ',"' else '"') + val + '"'
620+
first = false
598621
if notify
599622
await @mail to: notify, subject: 'Your export is complete (ref: ' + out.split('/').pop() + ')', text: 'Your export is complete. This link will expire in approximately 2 days.<br><br><a href="' + eurl + '">Download csv</a>\n\nThanks'
600-
@waitUntil _makecsv rt, qry, out, ks, nfeml, eurl
623+
@waitUntil _makecsv rt, qry, out, ks, nfeml, eurl, pfs
601624
delete @format
602625
#if nfeml
603626
res = eurl

worker/src/report.coffee

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ P.report.orgs.supplement = (sheetname, orgname, max, changed, reload, xref, olx)
127127
try rr.supplements[0].apc_cost = parseInt row[hp]
128128
else
129129
rr.supplements[0][h] = if not row[hp] then undefined else if row[hp].trim().toLowerCase() in ['true', 'yes'] then true else if row[hp].trim().toLowerCase() in ['false', 'no'] then false else if h.toLowerCase() in ['grant_id', 'ror'] then row[hp].replace(/\//g, ',').replace(/ /g, '').split(',') else row[hp]
130+
rr.supplements[0][h] = rr.supplements[0][h].split(';') if typeof rr.supplements[0][h] is 'string' and rr.supplements[0][h].includes ';'
130131
rr.DOI ?= rr.doi
131132
if rr.DOI and rr.DOI.startsWith 'http'
132133
try rr.DOI = '10.' + rr.DOI.split('/10.')[1] # avoid dirty inputs that are full URLs
@@ -145,7 +146,6 @@ P.report.orgs.supplement = (sheetname, orgname, max, changed, reload, xref, olx)
145146
dois[changed.DOI] = changed
146147

147148
batch = []
148-
noxref = 0
149149
for d of dois
150150
loaded += 1
151151
console.log loaded
@@ -168,8 +168,9 @@ P.report.orgs.supplement = (sheetname, orgname, max, changed, reload, xref, olx)
168168

169169
if not wrr?.title or xref? or olx? #xref and olx are passed from the changes check for paid records
170170
cr = xref ? await @src.crossref.works rr.DOI # ? await @src.crossref.works.doi rr.DOI
171-
noxref += 1 if not cr?
172-
if cr? and processed = await @report.works._process cr, olx
171+
ol = olx ? await @src.openalex.works 'ids.doi:"https://doi.org/' + rr.DOI + '"', 1
172+
ol ?= await @src.openalex.works.doi rr.DOI
173+
if (cr? or ol?) and processed = await @report.works._process cr, ol
173174
if processed.is_paratext or processed.is_retracted
174175
rr = undefined
175176
else
@@ -237,7 +238,6 @@ P.report.orgs.supplement = (sheetname, orgname, max, changed, reload, xref, olx)
237238
for ss of orgsheets[os]
238239
text += ss + orgsheets[os][ss] + ' DOIs\n'
239240
text += '\n\n' + doid + ' DOIs were found in the sheets\n\n'
240-
text += noxref + ' were not found in crossref\n\n'
241241

242242
@mail
243243
to: ['mark@oa.works', 'joe@oa.works']
@@ -342,11 +342,11 @@ P.report.works.load = (timestamp, crossref, openalex, supplement, qry, oaqry, no
342342
crossref ?= @params.crossref
343343
openalex ?= @params.openalex
344344
supplement ?= false #@params.supplement
345+
overwrite = @params.overwrite ? true
345346

346347
if @params.clear
347348
await @report.works ''
348349

349-
present = []
350350
batch = []
351351

352352
if @params.year
@@ -370,23 +370,23 @@ P.report.works.load = (timestamp, crossref, openalex, supplement, qry, oaqry, no
370370
if crossref isnt false
371371
console.log 'Starting OA report works loading from crossref'
372372
for await cr from @index._for 'src_crossref_works', qry, scroll: '30m', max: @params.max, include: ['DOI', 'subject', 'title', 'subtitle', 'volume', 'issue', 'year', 'publisher', 'published', 'funder', 'license', 'is_oa']
373-
total += 1
374-
#console.log total
375-
present.push cr.DOI.toLowerCase()
376-
prc = await @report.works._process cr
377-
if prc? and not prc.is_retracted and not prc.is_paratext
378-
delete prc.is_retracted
379-
delete prc.is_paratext
380-
dt = await @date()
381-
if dt.includes(year) and timestamp
382-
exists = await @report.works cr.DOI
383-
if exists?.supplements?
384-
prc[e] ?= exists[e] for e in ['supplements', 'orgs', 'author_email_name', 'pmc_checked', 'paid', 'PMCID', 'epmc_licence', 'pmc_has_data_availability_statement']
385-
batch.push prc
386-
if batch.length is 10000
387-
await @report.works batch
388-
console.log 'OA report works loading', total, Math.ceil ((await @epoch()) - started)/60000
389-
batch = []
373+
if overwrite is true or not await @report.works cr.DOI
374+
total += 1
375+
console.log('report works load xref', total) if not timestamp and total % 20 is 0
376+
prc = await @report.works._process cr
377+
if prc? and not prc.is_retracted and not prc.is_paratext
378+
delete prc.is_retracted
379+
delete prc.is_paratext
380+
dt = await @date()
381+
if dt.includes(year) and timestamp
382+
exists = await @report.works cr.DOI
383+
if exists?.supplements?
384+
prc[e] ?= exists[e] for e in ['supplements', 'orgs', 'author_email_name', 'pmc_checked', 'paid', 'PMCID', 'epmc_licence', 'pmc_has_data_availability_statement']
385+
batch.push prc
386+
if batch.length is 10000
387+
await @report.works batch
388+
console.log 'OA report works loading', total, Math.ceil ((await @epoch()) - started)/60000
389+
batch = []
390390

391391
oaqry ?= 'authorships.institutions.display_name:* AND publication_year:' + year
392392
oaqry = '(' + oaqry + ') AND publication_year:' + year if year and not oaqry.includes ':' + year
@@ -397,18 +397,20 @@ P.report.works.load = (timestamp, crossref, openalex, supplement, qry, oaqry, no
397397
if openalex isnt false
398398
console.log 'Starting OA report works loading from openalex'
399399
for await ol from @index._for 'src_openalex_works', oaqry, scroll: '30m', max: @params.max
400-
if not ol.ids?.doi or ol.ids.doi.toLowerCase() not in present
401-
total += 1
402-
#console.log total
403-
prc = await @report.works._process undefined, ol
404-
if prc? and not prc.is_retracted and not prc.is_paratext
405-
delete prc.is_retracted
406-
delete prc.is_paratext
407-
batch.push prc
408-
if batch.length is 10000
409-
await @report.works batch
410-
console.log 'OA report works loading from openalex', total, Math.ceil ((await @epoch()) - started)/60000
411-
batch = []
400+
if not ol.ids?.doi
401+
#exists = await @report.works ol.ids.doi.split('doi.org/')[1].toLowerCase()
402+
if true #(overwrite is true or not exists?) and not exists?.authorships?
403+
total += 1
404+
console.log('report works load oalx', total) if not timestamp and total % 20 is 0
405+
prc = await @report.works._process undefined, ol
406+
if prc? and not prc.is_retracted and not prc.is_paratext
407+
delete prc.is_retracted
408+
delete prc.is_paratext
409+
batch.push prc
410+
if batch.length is 10000
411+
await @report.works batch
412+
console.log 'OA report works loading from openalex', total, Math.ceil ((await @epoch()) - started)/60000
413+
batch = []
412414

413415
if batch.length
414416
await @report.works batch

worker/src/sources/openalex.coffee

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ P.src.openalex.institutions = _index: true, _prefix: false
1717
P.src.openalex.concepts = _index: true, _prefix: false
1818
P.src.openalex.venues = _index: true, _prefix: false
1919

20+
P.src.openalex.works.doi = (doi) ->
21+
doi ?= @params.doi
22+
if not found = await @src.openalex.works 'ids.doi:"https://doi.org/' + doi + '"', 1
23+
if found = await @fetch 'https://api.openalex.org/works/https://doi.org/' + doi
24+
@waitUntil @src.openalex.works doi.toLowerCase(), found
25+
return found
26+
2027
P.src.openalex.load = (what, changes, clear, sync, last) ->
2128
what ?= @params.load ? @params.openalex
2229
return false if what not in ['works', 'venues', 'authors', 'institutions', 'concepts']

worker/src/utilities/index.coffee

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,12 +320,14 @@ P.index.min = (route, key, qry, end='min') ->
320320
if route.indexOf('/') isnt -1
321321
[route, key] = route.split '/'
322322
cq = @copy @params
323-
delete cq[k] for k in ['index', 'route', 'min', 'max', 'key', 'sum']
323+
delete cq[k] for k in ['index', 'route', 'min', 'max', 'key', 'sum', 'average']
324324
qry ?= await @index.translate cq
325325
query = if typeof key is 'object' then key else if qry? then qry else query: bool: must: [], filter: [exists: field: key]
326326
query.size = 0
327327
if end is 'sum'
328328
query.aggs = sum: sum: field: key
329+
else if end is 'average'
330+
query.aggs = average: avg: field: key
329331
else
330332
query.aggs = {}
331333
query.aggs.min = {min: {field: key}} if end in ['min', 'range']
@@ -336,6 +338,7 @@ P.index.min = (route, key, qry, end='min') ->
336338
P.index.max = (route, key, qry) -> return @index.min route, key, qry, 'max'
337339
P.index.range = (route, key, qry) -> return @index.min route, key, qry, 'range'
338340
P.index.sum = (route, key, qry) -> return @index.min route, key, qry, 'sum'
341+
P.index.average = (route, key, qry) -> return @index.min route, key, qry, 'average'
339342

340343
P.index.mapping = (route) ->
341344
route = route.replace /^\//, '' # remove any leading /

worker/src/utilities/object.coffee

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ P.dot = (o, k, v, d) ->
4747

4848
P.flatten = (obj, arrayed) ->
4949
arrayed ?= @params.arrayed ? false # arrayed puts objects in arrays at keys like author.0.name Whereas not arrayed shoves them all in one author.name (which means some that don't have the value could cause position mismatch in lists)
50-
#obj = await @src.crossref.works '10.1016/j.mee.2015.04.018'
51-
#obj = await @report.works '10.1016/j.socnet.2021.02.007'
5250
if not obj?
5351
obj = @params
5452
delete obj.arrayed
@@ -87,6 +85,12 @@ P.flatten = (obj, arrayed) ->
8785
await _flatten obj
8886
return res
8987

88+
#P.flatest = () ->
89+
# res = original: await @src.openalex.works 'doi.keyword:"https://doi.org/10.1016/j.mee.2015.04.018"', 1 #@src.crossref.works '10.1016/j.mee.2015.04.018' #@report.works '10.1016/j.socnet.2021.02.007'
90+
# res.flat = await @flatten res.original
91+
# res.arrayed = await @flatten res.original, true
92+
# return [res.arrayed]
93+
9094
P.keys = (obj) ->
9195
try obj ?= @params
9296
keys = []

0 commit comments

Comments
 (0)