diff --git a/worker/dist/worker.js b/worker/dist/worker.js index 92f0c2b..24f4932 100644 --- a/worker/dist/worker.js +++ b/worker/dist/worker.js @@ -2558,7 +2558,7 @@ P.metadata = async function(doi) { }; P.find = async function(options, metadata = {}, content) { - var _ill, _metadata, _permissions, _searches, bct, bong, dd, dps, epmc, i, len, mag, mct, ref, ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, res, uo; + var _ill, _metadata, _permissions, _searches, dd, dps, epmc, i, len, mag, ref, ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, res, uo; res = {}; _metadata = async(input) => { var ct, k; @@ -2781,28 +2781,18 @@ P.find = async function(options, metadata = {}, content) { }; await _searches(); // if nothing useful can be found and still only have title try using bing - or drop this ability? - if (mag !== false && !metadata.doi && !content && !options.url && !epmc && metadata.title && metadata.title.length > 8 && metadata.title.split(' ').length > 1) { - mct = metadata.title.toLowerCase().replace(/[^a-z0-9 ]+/g, " ").replace(/\s\s+/g, ' '); // this previously had a unidecode on it... - bong = (await this.src.microsoft.bing(mct)); - if ((bong != null ? bong.data : void 0) && bong.data.length) { - bct = bong.data[0].name.toLowerCase().replace('(pdf)', '').replace(/[^a-z0-9 ]+/g, ' ').replace(/\s\s+/g, ' '); // this had unidecode to match to above... - if (mct.replace(/ /g, '').startsWith(bct.replace(/ /g, ''))) { //and not await @blacklist bong.data[0].url - // if the URL is usable and tidy bing title is a partial match to the start of the provided title, try using it - options.url = bong.data[0].url.replace(/"/g, ''); - if (typeof options.url === 'string' && options.url.includes('pubmed.ncbi')) { - metadata.pmid = options.url.replace(/\/$/, '').split('/').pop(); - } - if (typeof options.url === 'string' && options.url.includes('/10.')) { - if (metadata.doi == null) { - metadata.doi = '10.' + options.url.split('/10.')[1]; - } - } - } - } - if (metadata.doi || metadata.pmid || options.url) { - await _searches(); // run again if anything more useful found - } - } + // if mag isnt false and not metadata.doi and not content and not options.url and not epmc and metadata.title and metadata.title.length > 8 and metadata.title.split(' ').length > 1 + // mct = metadata.title.toLowerCase().replace(/[^a-z0-9 ]+/g, " ").replace(/\s\s+/g, ' ') # this previously had a unidecode on it... + // bong = await @src.microsoft.bing mct + // if bong?.data and bong.data.length + // bct = bong.data[0].name.toLowerCase().replace('(pdf)', '').replace(/[^a-z0-9 ]+/g, ' ').replace(/\s\s+/g, ' ') # this had unidecode to match to above... + // if mct.replace(/ /g, '').startsWith bct.replace(/ /g, '') #and not await @blacklist bong.data[0].url + // # if the URL is usable and tidy bing title is a partial match to the start of the provided title, try using it + // options.url = bong.data[0].url.replace /"/g, '' + // metadata.pmid = options.url.replace(/\/$/,'').split('/').pop() if typeof options.url is 'string' and options.url.includes 'pubmed.ncbi' + // metadata.doi ?= '10.' + options.url.split('/10.')[1] if typeof options.url is 'string' and options.url.includes '/10.' + // if metadata.doi or metadata.pmid or options.url + // await _searches() # run again if anything more useful found _ill = async() => { var ref8; if ((metadata.doi || (metadata.title && metadata.title.length > 8 && metadata.title.split(' ').length > 1)) && (options.from || (options.config != null)) && (options.plugin === 'instantill' || options.ill === true)) { @@ -4889,6 +4879,39 @@ P.report = function() { return 'OA.Works report'; }; +P.report.fixtypes = async function() { + var checked, cr, fixed, ref, rr; + checked = 0; + fixed = 0; + ref = this.index._for((this.S.dev ? 'paradigm_b_' : 'paradigm_') + 'report_works', 'NOT type.keyword:"journal-article" AND NOT type.keyword:"posted-content"', { + scroll: '30m', + include: ['DOI', 'type'] + }); + for await (rr of ref) { + checked += 1; + if (cr = (await this.src.crossref.works(cr.DOI))) { + if (cr.type !== rr.type) { + fixed += 1; + rr.type = cr.type; + await this.report.works(rr); + } + } + console.log('fixing report works types', checked, fixed); + } + this.mail({ + to: ['mark@oa.works'], + subject: 'OA report works types fixed ' + fixed, + text: checked + ' checked and fixed ' + fixed + }); + return fixed; +}; + +P.report.fixtypes._async = true; + +P.report.fixtypes._bg = true; + +P.report.fixtypes._auth = 'root'; + P.report.dev2live = async function(reverse) { var batch, counter, f, ref, rm, t; if (!reverse) { @@ -5204,7 +5227,7 @@ P.report.orgs.supplement = async function(sheetname, orgname, max, changed, relo } } } - if (!(wrr != null ? wrr.title : void 0) || (xref != null) || (olx != null)) { //xref and olx are passed from the changes check for paid records + if (!(wrr != null ? wrr.title : void 0) || ((wrr != null ? wrr.authorships : void 0) == null) || (xref != null) || (olx != null)) { //xref and olx are passed from the changes check for paid records cr = xref != null ? xref : (await this.src.crossref.works(rr.DOI)); // ? await @src.crossref.works.doi rr.DOI ol = olx != null ? olx : (await this.src.openalex.works('ids.doi:"https://doi.org/' + rr.DOI + '"', 1)); if (ol == null) { @@ -5309,7 +5332,7 @@ P.report.orgs.supplement = async function(sheetname, orgname, max, changed, relo batch = []; } if (changed == null) { - text = 'https://bg.beta.oa.works/report/works?q=supplements.orgs:*\n\n'; + text = 'https://bg.' + (this.S.dev ? 'beta' : 'api') + '.oa.works/report/works?q=supplements.orgs:*\n\n'; for (os in orgsheets) { text += os + '\n'; for (ss in orgsheets[os]) { @@ -5538,7 +5561,7 @@ P.report.works.load = async function(timestamp, crossref, openalex, supplement, batch = []; } if (qry == null) { - qry = 'type.keyword:("journal-article" OR "posted-content") AND (funder.name:* OR author.affiliation.name:*) AND year.keyword:' + year; + qry = '(type.keyword:"journal-article" OR type.keyword:"posted-content") AND (funder.name:* OR author.affiliation.name:*) AND year.keyword:' + year; } if (year && !qry.includes(':' + year)) { qry = '(' + qry + ') AND year.keyword:' + year; @@ -5640,9 +5663,9 @@ P.report.works.load = async function(timestamp, crossref, openalex, supplement, console.log('OA report done loading after ' + took + ' minutes'); if (notify !== false) { this.mail({ - to: ['mark@oa.works'], + to: ['mark@oa.works', 'joe@oa.works'], subject: 'OA report works loaded ' + total + ' in ' + took + ' minutes' + (timestamp ? ' for ' + ((await this.date(timestamp))) : '') + ', ' + crcount + ' crosref, ' + alexcount + ' openalex', - text: 'https://bg.beta.oa.works/report/works' + text: 'https://bg.' + (this.S.dev ? 'beta' : 'api') + '.oa.works/report/works' }); } return total; @@ -6333,7 +6356,7 @@ P.report.check = async function(ror, reload) { console.log('OA check done after ' + took + ' minutes', reload); if (!reload) { this.mail({ - to: ['mark@oa.works', 'joe@oa.works', 'sarah@oa.works'], + to: ['joe@oa.works', 'sarah@oa.works'], subject: 'Gates OA check done ' + counter + ' in ' + took + ' minutes', text: 'https://static.oa.works/report/' + (out != null ? out : '').split('/report/').pop() }); @@ -8677,12 +8700,26 @@ P.src.openalex.venues = { }; P.src.openalex.works.doi = async function(doi) { - var found; + var abs, found, i, len, n, ref, word; if (doi == null) { doi = this.params.doi; } if (!(found = (await this.src.openalex.works('ids.doi:"https://doi.org/' + doi + '"', 1)))) { if (found = (await this.fetch('https://api.openalex.org/works/https://doi.org/' + doi))) { + if (found.abstract_inverted_index != null) { + abs = []; + for (word in found.abstract_inverted_index) { + ref = found.abstract_inverted_index[word]; + for (i = 0, len = ref.length; i < len; i++) { + n = ref[i]; + abs[n] = word; + } + } + if (abs.length) { + found.abstract = abs.join(' '); + } + delete found.abstract_inverted_index; + } this.waitUntil(this.src.openalex.works(doi.toLowerCase(), found)); } } @@ -14319,7 +14356,7 @@ P.decode = async function(content) { }; -S.built = "Thu Nov 17 2022 07:31:19 GMT+0000"; +S.built = "Fri Nov 18 2022 07:39:53 GMT+0000"; P.convert.docxtest = {_bg: true}// added by constructor P.convert.doc2txt = {_bg: true}// added by constructor diff --git a/worker/dist/worker.min.js b/worker/dist/worker.min.js index f5a68d9..5b942be 100644 --- a/worker/dist/worker.min.js +++ b/worker/dist/worker.min.js @@ -1,4 +1,4 @@ -!function(e){var t={};function i(s){if(t[s])return t[s].exports;var r=t[s]={i:s,l:!1,exports:{}};return e[s].call(r.exports,r,r.exports,i),r.l=!0,r.exports}i.m=e,i.c=t,i.d=function(e,t,s){i.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:s})},i.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.t=function(e,t){if(1&t&&(e=i(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var s=Object.create(null);if(i.r(s),Object.defineProperty(s,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var r in e)i.d(s,r,function(t){return e[t]}.bind(null,r));return s},i.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return i.d(t,"a",t),t},i.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},i.p="",i(i.s=2)}([function(e,t){var i;i=function(){return this}();try{i=i||new Function("return this")()}catch(e){"object"==typeof window&&(i=window)}e.exports=i},function(e,t,i){"use strict";i.d(t,"a",(function(){return r}));let s=e=>crypto.getRandomValues(new Uint8Array(e)),r=(e,t)=>((e,t,i)=>{let s=(2<{let a="";for(;;){let l=i(r),n=r;for(;n--;)if(a+=e[l[n]&s]||"",a.length===t)return a}}})(e,t,s)},function(e,t,i){"use strict";i.r(t),function(e,t){var s,r,a,l,n,o=i(1),u=[].indexOf;try{r=JSON.parse(SECRETS_SETTINGS)}catch(e){}try{for(n in a=JSON.parse(SECRETS_SERVER))r[n]=a[n]}catch(e){}null==r&&(r={}),null==r.name&&(r.name="OA.Works"),null==r.kv&&(r.kv="oaworks"),null==r.version&&(r.version="6.1.0"),r.pass=["docs","client",".well-known"],null==r.dev&&(r.dev=!0),null==r.headers&&(r.headers={"Access-Control-Allow-Methods":"HEAD, GET, PUT, POST, DELETE, OPTIONS","Access-Control-Allow-Origin":"*","Access-Control-Allow-Headers":"X-apikey, X-id, Origin, X-Requested-With, Content-Type, Content-Disposition, Accept, DNT, Keep-Alive, User-Agent, If-Modified-Since, Cache-Control","Permissions-Policy":"interest-cohort=()"}),null==r.formats&&(r.formats=["html","csv","json"]),null==r.svc&&(r.svc={}),null==r.src&&(r.src={});try{addEventListener("fetch",(function(e){return!1!==r.pass&&e.passThroughOnException(),e.respondWith(s.call(e))}))}catch(e){}l={},(s=async function(){var t,i,a,o,c,h,d,p,f,m,g,y,_,v,b,w,x,k,O,S,j,A,I,C,D,L,N,P,R,E,T,q,U,M,W,z,J,B,Y,F,V,X,G,$,H,Q,Z,K,ee,te,ie,se,re,ae,le,ne,oe,ue,ce,he,de;this.started=Date.now();try{null==(a=r.headers)[C="x-"+r.name.toLowerCase()]&&(a[C]=(r.version?"v"+r.version:"")+(r.built?" built "+r.built:""))}catch(e){}if(this.S=JSON.parse(JSON.stringify(r)),"function"!=typeof this.waitUntil?(null!=this.S.bg&&"string"!=typeof this.S.bg||(this.S.bg=!0),null==(o=this.S).cache&&(o.cache=!1),this.waitUntil=function(e){return!0}):this.S.kv||(this.S.kv=this.S.name.replace(/\s/g,""),e[this.S.kv]||delete this.S.kv),null==this.params&&(this.params={}),null!=this.request.url&&this.request.url.includes("?"))for(R="",b=0,S=(U=this.request.url.split("?")[1].split("&")).length;b=0&&(this.format=L,this.parts[this.parts.length-1]=this.parts[this.parts.length-1].replace("."+L,""))),"string"==typeof this.S.bg&&Array.isArray(this.S.pass)&&this.parts.length&&(ne=this.parts[0],u.call(this.S.pass,ne)>=0))throw new Error;if(he="x-"+this.S.name.toLowerCase()+"-system",this.S.name&&this.S.system&&this.headers[he]===this.S.system&&(delete this.headers[he],this.system=!0),this._logs=[],this.nolog=!1,this.params._nolog&&(this.nolog=this.S.nolog&&this.params._nolog===this.S.nolog,delete this.params._nolog),this.route=this.parts.join("/"),this.routes=[],this.fn="",""===this.route)return"HEAD"===(W=this.request.method)||"OPTIONS"===W?s._response.call(this,""):s._response.call(this,{name:null!=(z=this.S.name)?z:"OA.Works API",version:this.S.version,base:this.S.dev?this.base:void 0,built:this.S.built,user:null!=(J=null!=(B=this.user)?B.email:void 0)?J:void 0});for(y=void 0,T=[...this.parts],N=void 0,E=[],(t=(e,i,r,a,o)=>{var c,h,d,p,f,m,g,_,v,b,w,x,k,O,S,j,A;if(N&&this.fn.startsWith(r))for(;T.length&&null==e[T[0]];)this.params[N]=(this.params[N]?this.params[N]+"/":"")+T.shift(),u.call(E,N)<0&&E.push(N);for(n in S=[],e)if("function"!=(k=typeof e[n])&&"object"!==k)S.push(i[n]=e[n]);else if(null!=e[n]){if(w=r+(r?".":"")+n,"object"!=typeof e[n]||e[n]._index||e[n]._indexed||e[n]._sheet||e[n]._kv||e[n]._bg){if(null==(d=e[n])._auth&&(d._auth=null!=(p=e[n])._auths?p._auths:p._auths=a),Array.isArray(e[n]._auths)&&0===e[n]._auths.length&&(e[n]._auths=w.split(".")),Array.isArray(e[n]._auth)&&0===e[n]._auth.length&&(e[n]._auth=w.split(".")),null==(f=e[n])._cache&&(f._cache=null!=(m=e[n])._caches?m._caches:m._caches=o),w.startsWith("auth")&&null==(g=e[n])._cache&&(g._cache=!1),e[n]._sheet&&null==(_=e[n])._index&&(_._index=!0),e[n]._index)for(x=0,b=(O=["keys","terms","suggest","count","percent","min","max","range","sum","average","mapping","_for","_each","_bulk","_refresh"]).length;xasync()=>{var t,i;console.log("scheduled task",e,this.datetime()),l[e].last=await this.datetime(),delete l[e].error;try{t=l[e].fn._sheet?await this._loadsheet(l[e].fn,l[e].fn._name.replace(/\./g,"_")):await l[e].fn(l[e].fn._args);try{l[e].result=JSON.stringify(t).substr(0,200)}catch(e){}return l[e].success=!0,console.log("scheduled task result",t)}catch(t){i=t,l[e].success=!1;try{return l[e].error=JSON.stringify(i)}catch(e){}}},cron.schedule(i[n]._schedule,j(w))),n.startsWith("_")||(T.length&&T[0]===n&&this.fn.startsWith(r)&&(N=T.shift(),this.fn+=(""===this.fn?"":".")+N,"function"!=typeof i[n]||r.includes("._")||(y=i[n])),"function"==typeof i[n]&&1===w.replace("svc.","").replace("src.","").split(".").length&&this.routes.push(w.replace(/\./g,"/"))),Array.isArray(e[n])||n.startsWith("_")&&"function"!=typeof i[n]?S.push(void 0):S.push(t(e[n],i[n],w,null!=a?a:e[n]._auths,null!=o?o:e[n]._caches))}else S.push(void 0);return S})(s,this,""),N&&T.length&&(this.params[N]=this.params[N]?this.params[N]+"/"+T.join("/"):T.join("/")),D=0,I=E.length;D300&&e.status<600||e.headers)){if(null!=e.headers){for(o in e.headers)this.S.headers[o]=e.headers[o];delete e.headers}S=null!=(v=e.status)?v:200,delete e.status,0===(f=this.keys(e)).length?e=S:1===f.length&&(e=e[f[0]])}else S=200;if(!this.S.headers["Content-Type"]&&!this.S.headers["content-type"]){if(this.format&&(b=this.format,u.call(this.S.formats,b)>=0)){if("string"!=typeof e)try{e=await this.convert["json2"+this.format](e)}catch(e){}if("string"==typeof e&&"html"===this.format&&!(e=e.replace(/\>\\n<")).includes("\n\n',k+='\n\n',e.includes(""),k+="\n",e=_+r):e.includes('id="title"')&&(k+=""+e.split('id="title"')[1].split(">")[1].split("<")[0]+"\n"),d=0,m=(w=["")[0],e=e.replace(O,""),k+=O+"\n";e.includes("")&&([y,h]=e.split(""),[h,s]=h.split(""),k+=h,e=y+s),k.includes("icon")||(k+=''),k+="\n\n",k+=e.includes("\n"+e+"\n\n",e=k+"\n"}this.S.headers["Content-Type"]="html"===this.format?"text/html; charset=UTF-8":"text/csv; charset=UTF-8"}if("string"!=typeof e)try{e=JSON.stringify(e,"",2)}catch(e){}null==(l=this.S.headers)["Content-Type"]&&(l["Content-Type"]="application/json; charset=UTF-8")}try{null==(n=this.S.headers)["Content-Length"]&&(n["Content-Length"]=t.byteLength(e))}catch(e){}try{this.S.headers["x-"+this.S.name.toLowerCase()+"-took"]=Date.now()-this.started}catch(e){}try{this.cached&&(this.S.headers["x-"+this.S.name.toLowerCase()+"-cached"]=this.cached)}catch(e){}try{return new Response(e,{status:S,headers:this.S.headers})}catch(t){return{status:S,headers:this.S.headers,body:e}}},s._loadsheet=async function(e,t){var i,s,r,a,l,n;if(e._sheet.startsWith("http")&&e._sheet.includes("csv")?l=await this.convert.csv2json(e._sheet):e._sheet.startsWith("http")&&e._sheet.includes("json")?(l=await this.fetch(e._sheet))&&!Array.isArray(l)&&(l=[l]):l=await this.src.google.sheets(e._sheet),Array.isArray(l)&&l.length){if("function"==typeof e._format&&(l=await e._format.apply(this,[l])),e._key)for(i=0,s=l.length;i2e5||!(null!=(F=this.S.static)?F.folder:void 0))$={status:401};else{p=(this.fn?this.fn.replace(/\./g,"_"):"")+"_"+this.uid(),c=this.S.static.url+"/export/"+p+".csv",K>1e5&&await this.mail({to:null!=(V=null!=(X=this.S.log)?X.notify:void 0)?V:"mark@oa.works",text:"Someone is creating a large csv of size "+K+"\n\n"+c+("joe@oa.works"===O?"":"\n\nbut they are not the user who is allowed, so it should get capped")}),j=this.S.static.folder+"/export";try{(d=(await fs.readdir(j)).length)>500&&d%20==0&&this.mail({to:null!=(G=null!=(P=this.S.log)?P.notify:void 0)?G:"mark@oa.works",text:"Warning, export file count is "+d})}catch(e){await fs.mkdir(j),d=0}if(d>1e3)$={status:401};else{if(j+="/"+p+".csv",await fs.appendFile(j,""),null!=this.params.includes&&(this.params.include=this.params.includes,delete this.params.includes),null!=this.params.excludes&&(this.params.exclude=this.params.excludes,delete this.params.excludes),null!=this.params.include)g=this.params.include.split(",");else for(g=[],m=0,v=(R=await this.index.keys(H)).length;m
Download csv

Thanks'}),s=async(e,t,i,s,r,a,l)=>{var o,u,c,h,d,p,f,m,g,y,_,v,b,w,x,k,O,S,j,A,I;for(d=!0,l&&(s=["DOI","funder.name","funder.award"]),w=0,m=s.length;w
Download csv\n\nThanks'})},this.waitUntil(s(H,C,j,g,O,c,A)),delete this.format,$=c}}}else $=await this.index(H+(x.key?"/"+x.key:""),null!=D?D:C);if(null!=$||x.key&&null!=D||(await this.index(H,"object"!=typeof e._index?{}:{settings:e._index.settings,mappings:null!=(T=e._index.mappings)?T:e._index.mapping,aliases:e._index.aliases}),""!==D&&($=await this.index(H+(x.key?"/"+x.key:""),null!=D?D:x.key?void 0:C))),null==$&&null==D&&x.key&&"string"==typeof arguments[0]&&(C=await this.index.translate(arguments[0],arguments[1]))&&1===(null!=(I=await this.index(H,C))&&null!=(q=I.hits)?q.total:void 0))for(S=0,w=(U=await this.keys(I.hits.hits[0]._source)).length;S=0)){null==($=I.hits.hits[0]._source)._id&&($._id=I.hits.hits[0]._id);break}1===(null!=C?C.size:void 0)&&"object"==typeof $&&null!=(null!=(W=$.hits)?W.hits:void 0)&&($.hits.hits.length?(null==(a=$.hits.hits[0]._source)._id&&(a._id=$.hits.hits[0]._id),$=$.hits.hits[0]._source):$=void 0)}null!=C&&(x.qry=JSON.stringify(C)),null==$||null!=D||x.cached||(x.cached="index"),x.cached&&this.fn===t&&(this.cached=x.cached)}return null==$&&(e._bg||e._sheet)&&"string"==typeof this.S.bg&&this.S.bg.startsWith("http")&&(l={headers:{},body:D,params:this.copy(this.params)},this.refresh&&(l.params.refresh=!0),l.headers["x-"+this.S.name.toLowerCase()+"-rid"]=this.rid,$=await this.fetch(this.S.bg+"/"+H.replace(/\_/g,"/"),l),x.bg=!0),null==$&&e._sheet&&""!==D&&(this.refresh&&this.fn===t||!await this.index(H))&&($=await this._loadsheet(e,H),(arguments.length||"{}"!==JSON.stringify(this.params))&&($=void 0)),null!=$||e._index&&""===D||"function"!=typeof e||(i=async(e,t,i,s,r)=>{var a,l,n,u,c,h,d,p;if(t._limit&&(a=!0===t._limit?86400:t._limit,await this.kv("limit/"+r,Q+a,a)),"object"==typeof(c=await t.apply(this,i))&&(t._kv||t._index)&&null==c.took&&null==c.hits){if(t._key&&Array.isArray(c)&&c.length&&null==c[0]._id&&null!=c[0][t._key])for(u=0,n=c.length;u",d+='