diff --git a/database/009-create-mention-table.sql b/database/009-create-mention-table.sql index 77de89d3f..2ea751246 100644 --- a/database/009-create-mention-table.sql +++ b/database/009-create-mention-table.sql @@ -40,12 +40,15 @@ CREATE TABLE mention ( page VARCHAR(50), image_url VARCHAR(500) CHECK (image_url ~ '^https?://'), mention_type mention_type NOT NULL, + external_id VARCHAR(500), source VARCHAR(50) NOT NULL, version VARCHAR(100), note VARCHAR(500), scraped_at TIMESTAMPTZ, + citations_scraped_at TIMESTAMPTZ, created_at TIMESTAMPTZ NOT NULL, - updated_at TIMESTAMPTZ NOT NULL + updated_at TIMESTAMPTZ NOT NULL, + UNIQUE(external_id, source) ); CREATE FUNCTION sanitise_insert_mention() RETURNS TRIGGER LANGUAGE plpgsql AS @@ -81,6 +84,20 @@ CREATE TABLE mention_for_software ( ); +CREATE TABLE reference_paper_for_software ( + mention UUID REFERENCES mention (id), + software UUID REFERENCES software (id), + PRIMARY KEY (mention, software) +); + + +CREATE TABLE citation_for_mention ( + mention UUID REFERENCES mention (id), + citation UUID REFERENCES mention (id), + PRIMARY KEY (mention, citation) +); + + CREATE FUNCTION search_mentions_for_software(software_id UUID, search_text VARCHAR) RETURNS SETOF mention STABLE LANGUAGE plpgsql AS $$ BEGIN diff --git a/database/020-row-level-security.sql b/database/020-row-level-security.sql index 37cc0baa7..163bbf6a8 100644 --- a/database/020-row-level-security.sql +++ b/database/020-row-level-security.sql @@ -446,17 +446,9 @@ CREATE POLICY admin_all_rights ON research_domain_for_project TO rsd_admin -- mentions --- TODO: not sure what to do here, --- should a mention only be visible if you can see at least one software or project for which it relates? ALTER TABLE mention ENABLE ROW LEVEL SECURITY; CREATE POLICY anyone_can_read ON mention FOR SELECT TO rsd_web_anon, rsd_user - USING (id IN (SELECT mention FROM mention_for_software) - OR id IN (SELECT mention FROM output_for_project) - OR id IN (SELECT mention FROM impact_for_project) - OR id IN (SELECT mention_id FROM release_version)); - -CREATE POLICY maintainer_can_read ON mention FOR SELECT TO rsd_user USING (TRUE); CREATE POLICY maintainer_can_delete ON mention FOR DELETE TO rsd_user @@ -484,6 +476,30 @@ CREATE POLICY admin_all_rights ON mention_for_software TO rsd_admin WITH CHECK (TRUE); +ALTER TABLE reference_paper_for_software ENABLE ROW LEVEL SECURITY; + +CREATE POLICY anyone_can_read ON reference_paper_for_software FOR SELECT TO rsd_web_anon, rsd_user + USING (software IN (SELECT id FROM software)); + +CREATE POLICY maintainer_all_rights ON reference_paper_for_software TO rsd_user + USING (software IN (SELECT * FROM software_of_current_maintainer())) + WITH CHECK (software IN (SELECT * FROM software_of_current_maintainer())); + +CREATE POLICY admin_all_rights ON reference_paper_for_software TO rsd_admin + USING (TRUE) + WITH CHECK (TRUE); + + +ALTER TABLE citation_for_mention ENABLE ROW LEVEL SECURITY; + +CREATE POLICY anyone_can_read ON citation_for_mention FOR SELECT TO rsd_web_anon, rsd_user + USING (mention IN (SELECT id FROM mention)); + +CREATE POLICY admin_all_rights ON citation_for_mention TO rsd_admin + USING (TRUE) + WITH CHECK (TRUE); + + ALTER TABLE output_for_project ENABLE ROW LEVEL SECURITY; CREATE POLICY anyone_can_read ON output_for_project FOR SELECT TO rsd_web_anon, rsd_user diff --git a/database/109-mention-views.sql b/database/109-mention-views.sql new file mode 100644 index 000000000..5e078b144 --- /dev/null +++ b/database/109-mention-views.sql @@ -0,0 +1,82 @@ +-- SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +-- SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) +-- SPDX-FileCopyrightText: 2023 Netherlands eScience Center +-- +-- SPDX-License-Identifier: Apache-2.0 + +CREATE FUNCTION reference_papers_to_scrape() +RETURNS TABLE ( + id UUID, + doi CITEXT, + citations_scraped_at TIMESTAMPTZ, + known_dois CITEXT[] +) +LANGUAGE sql STABLE AS +$$ + SELECT mention.id, mention.doi, mention.citations_scraped_at, ARRAY_REMOVE(ARRAY_AGG(citation.doi), NULL) + FROM mention + LEFT JOIN citation_for_mention ON mention.id = citation_for_mention.mention + LEFT JOIN mention AS citation ON citation_for_mention.citation = citation.id + WHERE mention.id IN ( + SELECT mention FROM reference_paper_for_software + ) + GROUP BY mention.id +$$; + +-- UNIQUE CITATIONS BY SOFTWARE ID +CREATE FUNCTION citation_by_software() RETURNS TABLE ( + software UUID, + id UUID, + doi CITEXT, + url VARCHAR, + title VARCHAR, + authors VARCHAR, + publisher VARCHAR, + publication_year SMALLINT, + journal VARCHAR, + page VARCHAR, + image_url VARCHAR, + mention_type mention_type, + source VARCHAR, + reference_papers UUID[] +)LANGUAGE sql STABLE AS +$$ +SELECT + reference_paper_for_software.software, + mention.id, + mention.doi, + mention.url, + mention.title, + mention.authors, + mention.publisher, + mention.publication_year, + mention.journal, + mention.page, + mention.image_url, + mention.mention_type, + mention.source, + ARRAY_AGG( + reference_paper_for_software.mention + ) AS reference_paper +FROM + reference_paper_for_software +INNER JOIN + citation_for_mention ON citation_for_mention.mention = reference_paper_for_software.mention +INNER JOIN + mention ON mention.id = citation_for_mention.citation +GROUP BY + reference_paper_for_software.software, + mention.id, + mention.doi, + mention.url, + mention.title, + mention.authors, + mention.publisher, + mention.publication_year, + mention.journal, + mention.page, + mention.image_url, + mention.mention_type, + mention.source +; +$$; diff --git a/frontend/components/mention/MentionItemBase.tsx b/frontend/components/mention/MentionItemBase.tsx index 40d73ce2d..265d5f532 100644 --- a/frontend/components/mention/MentionItemBase.tsx +++ b/frontend/components/mention/MentionItemBase.tsx @@ -75,7 +75,7 @@ export default function MentionItemBase({item,pos,nav,type,role='find'}:MentionI className="text-sm" role={role} /> - + ) } diff --git a/frontend/components/mention/MentionViewItem.tsx b/frontend/components/mention/MentionViewItem.tsx index 61eba4570..7a1abc8e7 100644 --- a/frontend/components/mention/MentionViewItem.tsx +++ b/frontend/components/mention/MentionViewItem.tsx @@ -43,7 +43,7 @@ export default function MentionViewItem({item, pos}: {item: MentionItemProps, po doi={item?.doi} className="text-sm" /> - +
{item?.url ? : null} diff --git a/frontend/components/mention/useEditMentionReducer.tsx b/frontend/components/mention/useEditMentionReducer.tsx index cd8674aa8..679a6cb65 100644 --- a/frontend/components/mention/useEditMentionReducer.tsx +++ b/frontend/components/mention/useEditMentionReducer.tsx @@ -1,9 +1,11 @@ // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 dv4all +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 -import {useContext} from 'react' +import {useCallback, useContext} from 'react' import {EditMentionActionType} from './editMentionReducer' import EditMentionContext from './editMentionContext' import {MentionItemProps} from '~/types/Mention' @@ -11,58 +13,60 @@ import {MentionItemProps} from '~/types/Mention' export default function useEditMentionReducer() { const {state, dispatch} = useContext(EditMentionContext) - function setLoading(loading: boolean) { + const setLoading = useCallback((loading:boolean)=>{ dispatch({ type: EditMentionActionType.SET_LOADING, payload: loading }) - } + // eslint-disable-next-line react-hooks/exhaustive-deps + },[]) - function setMentions(mentions: MentionItemProps[]) { + const setMentions = useCallback((mentions: MentionItemProps[])=>{ dispatch({ type: EditMentionActionType.SET_MENTIONS, payload: mentions }) - } + // eslint-disable-next-line react-hooks/exhaustive-deps + },[]) - function onAdd(item: MentionItemProps) { + const onAdd = useCallback((item: MentionItemProps)=>{ dispatch({ type: EditMentionActionType.ON_ADD, payload: item }) - } + },[dispatch]) - function onNewItem() { + const onNewItem = useCallback(()=>{ dispatch({ type: EditMentionActionType.SET_EDIT_MODAL, payload: { open:true } }) - } + },[dispatch]) - function onSubmit(item:MentionItemProps) { + const onSubmit = useCallback((item:MentionItemProps)=>{ dispatch({ type: EditMentionActionType.ON_SUBMIT, payload: item }) - } + },[dispatch]) - function onUpdate(item:MentionItemProps) { + const onUpdate = useCallback((item:MentionItemProps)=>{ dispatch({ type: EditMentionActionType.ON_UPDATE, payload: item }) - } + },[dispatch]) - function onDelete(item:MentionItemProps) { + const onDelete = useCallback((item:MentionItemProps)=>{ dispatch({ type: EditMentionActionType.ON_DELETE, payload: item }) - } + },[dispatch]) - function confirmDelete(item?: MentionItemProps) { + const confirmDelete = useCallback((item?: MentionItemProps)=>{ if (item) { // open modal dispatch({ @@ -81,9 +85,9 @@ export default function useEditMentionReducer() { } }) } - } + },[dispatch]) - function setEditModal(item?: MentionItemProps) { + const setEditModal = useCallback((item?: MentionItemProps)=>{ if (item) { // show modal when item provided dispatch({ @@ -102,9 +106,9 @@ export default function useEditMentionReducer() { } }) } - } + },[dispatch]) - // console.group('useOutputContext') + // console.group('useEditMentionReducer') // console.log('state...', state) // console.groupEnd() diff --git a/frontend/components/software/edit/editSoftwarePages.tsx b/frontend/components/software/edit/editSoftwarePages.tsx index 32006c06c..d0441139d 100644 --- a/frontend/components/software/edit/editSoftwarePages.tsx +++ b/frontend/components/software/edit/editSoftwarePages.tsx @@ -18,6 +18,7 @@ import PersonAddIcon from '@mui/icons-material/PersonAdd' import ContentLoader from '~/components/layout/ContentLoader' import HomeRepairServiceIcon from '@mui/icons-material/HomeRepairService' import PendingActionsIcon from '@mui/icons-material/PendingActions' +import PostAddIcon from '@mui/icons-material/PostAdd' // import SoftwareInformation from './information' // import SoftwareContributors from './contributors' @@ -28,22 +29,25 @@ import PendingActionsIcon from '@mui/icons-material/PendingActions' // import SoftwareMaintainers from './maintainers' // use dynamic imports instead +const SoftwareContributors = dynamic(() => import('./contributors'),{ + loading: ()=> +}) const SoftwareInformation = dynamic(() => import('./information'),{ loading: ()=> }) -const SoftwareContributors = dynamic(() => import('./contributors'),{ +const SoftwareMaintainers = dynamic(() => import('./maintainers'),{ loading: ()=> }) -const SoftwareOgranisations = dynamic(() => import('./organisations'),{ +const SoftwareMentions = dynamic(() => import('./mentions'),{ loading: ()=> }) -const PackageManagers = dynamic(() => import('./package-managers'),{ +const SoftwareOgranisations = dynamic(() => import('./organisations'),{ loading: ()=> }) -const SoftwareMentions = dynamic(() => import('./mentions'),{ +const PackageManagers = dynamic(() => import('./package-managers'),{ loading: ()=> }) -const SoftwareTestimonials = dynamic(() => import('./testimonials'),{ +const ReferencePapers = dynamic(() => import('./reference-papers'),{ loading: ()=> }) const RelatedSoftware = dynamic(() => import('./related-software'),{ @@ -52,10 +56,11 @@ const RelatedSoftware = dynamic(() => import('./related-software'),{ const RelatedProjects = dynamic(() => import('./related-projects'),{ loading: ()=> }) -const SoftwareMaintainers = dynamic(() => import('./maintainers'),{ +const SoftwareTestimonials = dynamic(() => import('./testimonials'),{ loading: ()=> }) + export type EditSoftwarePageProps = { id: string status: string, @@ -82,6 +87,12 @@ export const editSoftwarePage:EditSoftwarePageProps[] = [{ icon: , render: () => , status: 'Optional information' +},{ + id: 'reference-paper', + label: 'Reference papers', + icon: , + render: () => , + status: 'Optional information' },{ id: 'mentions', label: 'Mentions', diff --git a/frontend/components/software/edit/mentions/EditMentionsProvider.tsx b/frontend/components/software/edit/mentions/EditMentionsProvider.tsx index c302c7082..da11313f1 100644 --- a/frontend/components/software/edit/mentions/EditMentionsProvider.tsx +++ b/frontend/components/software/edit/mentions/EditMentionsProvider.tsx @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) (dv4all) // SPDX-FileCopyrightText: 2022 dv4all +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -112,7 +114,7 @@ export default function EditMentionsProvider(props: any) { } } if (item.id) { - // existing RSD mention item to be added to project + // existing RSD mention item to be added to software const resp = await addToMentionForSoftware({ software, mention: item.id, diff --git a/frontend/components/software/edit/mentions/utils.ts b/frontend/components/software/edit/mentions/utils.ts index c81fcd162..c33f970cc 100644 --- a/frontend/components/software/edit/mentions/utils.ts +++ b/frontend/components/software/edit/mentions/utils.ts @@ -1,3 +1,4 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) // SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2023 Netherlands eScience Center @@ -11,8 +12,8 @@ export type SearchTermInfo = { term: string, type: 'doi' | 'title' } - export function extractSearchTerm(query: string): SearchTermInfo{ + const doiRegexMatch = query.match(doiRegex) if (doiRegexMatch != null) { return {term: doiRegexMatch[0], type: 'doi'} diff --git a/frontend/components/software/edit/reference-papers/CitationItem.tsx b/frontend/components/software/edit/reference-papers/CitationItem.tsx new file mode 100644 index 000000000..c6b464e98 --- /dev/null +++ b/frontend/components/software/edit/reference-papers/CitationItem.tsx @@ -0,0 +1,96 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {MentionItemProps} from '~/types/Mention' +import {getMentionType} from '~/components/mention/config' +import MentionAuthors from '~/components/mention/MentionAuthors' +import MentionDoi from '~/components/mention/MentionDoi' +import {MentionTitle} from '~/components/mention/MentionItemBase' +import MentionPublisherItem from '~/components/mention/MentionPublisherItem' +import useEditMentionReducer from '~/components/mention/useEditMentionReducer' + +export type CitationItemProps = MentionItemProps & { + reference_papers: string[] +} + +function ReferencePapers({papers}:{papers:Readonly}){ + const {mentions} = useEditMentionReducer() + const reference:{doi:string,url:string,title:string|null}[] =[] + + if (papers?.length===0 || mentions?.length===0){ + return ( +
+ Referenced paper: +
+ ) + } + + papers.forEach(uuid=>{ + const found = mentions.find(item=>item.id === uuid) + if (found?.doi && found.url) reference.push({doi:found.doi, url:found.url, title: found.title}) + }) + + if (reference.length === 0){ + return ( +
+ Referenced: None +
+ ) + } + + return( +
+ Referenced: { + reference.map(item=>{ + return ( + + {item.doi} + + ) + }) + } +
+ ) +} + + +export default function CitationItem({item}:{item:Readonly}){ + const title = getMentionType(item?.mention_type,'singular') + return ( +
+
+ {title} +
+ + + + + +
+ ) +} diff --git a/frontend/components/software/edit/reference-papers/CitationsBySoftware.tsx b/frontend/components/software/edit/reference-papers/CitationsBySoftware.tsx new file mode 100644 index 000000000..8bf53415d --- /dev/null +++ b/frontend/components/software/edit/reference-papers/CitationsBySoftware.tsx @@ -0,0 +1,98 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {MouseEvent,ChangeEvent, useState, useEffect} from 'react' +import TablePagination from '@mui/material/TablePagination' + +import {rowsPerPageOptions} from '~/config/pagination' +import Searchbox from '~/components/form/Searchbox' +import useCitationsForSoftware from './useCitationsBySoftware' +import CitationItem from './CitationItem' +import {useCitationCnt} from './TabCountsProvider' +import NoCitations from './NoCitations' + +export default function CitationBySoftware() { + const [query,setQuery] = useState({ + search: '', + rows: 12, + page: 0 + }) + const {citations,count,loading} = useCitationsForSoftware(query) + const {setCitationCnt} = useCitationCnt() + + useEffect(()=>{ + if (count) { + setCitationCnt(count) + } + },[count,setCitationCnt]) + + function handleTablePageChange( + event: MouseEvent | null, + newPage: number, + ) { + // Pagination component starts counting from 0, but we need to start from 1 + // handleQueryChange('page',(newPage + 1).toString()) + setQuery({ + ...query, + page: newPage + }) + } + + function handleItemsPerPage( + event: ChangeEvent, + ) { + setQuery({ + ...query, + rows: parseInt(event.target.value), + // reset to first page when changing + page: 0 + }) + } + + function handleSearch(searchFor: string) { + setQuery({ + ...query, + search: searchFor, + // reset to first page when changing + page: 0 + }) + } + + if (citations?.length===0 && loading===false){ + return
+ } + + return ( + <> +
+
+ +
+ +
+
+ { + citations?.map(item=>{ + return + }) + } +
+ + ) +} diff --git a/frontend/components/software/edit/reference-papers/EditReferencePapersProvider.tsx b/frontend/components/software/edit/reference-papers/EditReferencePapersProvider.tsx new file mode 100644 index 000000000..6dfe6b2dd --- /dev/null +++ b/frontend/components/software/edit/reference-papers/EditReferencePapersProvider.tsx @@ -0,0 +1,221 @@ +// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) +// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) (dv4all) +// SPDX-FileCopyrightText: 2022 dv4all +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {useReducer} from 'react' + +import {useSession} from '~/auth' +import logger from '~/utils/logger' +import {deleteMentionItem, updateMentionItem} from '~/utils/editMentions' +import {MentionItemProps} from '~/types/Mention' +import useSnackbar from '~/components/snackbar/useSnackbar' +import { + EditMentionAction, EditMentionActionType, + editMentionReducer, EditMentionState +} from '~/components/mention/editMentionReducer' +import EditMentionContext from '~/components/mention/editMentionContext' +import NoRefferencePapers from './NoRefferencePapers' +import { + addToReferencePaperForSoftware, + addNewReferencePaperToSoftware, + removeReferencePaperForSoftware +} from './apiReferencePapers' +import useSoftwareContext from '../useSoftwareContext' + +const initalState:EditMentionState = { + settings: { + editModalTitle: 'Reference papers', + confirmDeleteModalTitle: 'Delete reference paper', + noItemsComponent:()=> + }, + loading: true, + processing: false, + mentions: [], + editModal: { + open:false + }, + confirmModal: { + open:false + } +} + +export default function EditReferencePapersProvider(props: any) { + const {user,token} = useSession() + const {software:{id:software}} = useSoftwareContext() + const {showErrorMessage,showSuccessMessage,showInfoMessage} = useSnackbar() + const [state, dispatch] = useReducer(editMentionReducer,initalState) + + // console.group('EditReferencePapersProvider') + // console.log('software...', software) + // console.groupEnd() + + async function processOnSubmit(action: EditMentionAction) { + const item:MentionItemProps = action.payload + // new item created manually + if (item.id === null || item.id === '') { + item.id = null + item.source = 'RSD' + // new item to be added + const resp = await addNewReferencePaperToSoftware({ + item, + software, + token + }) + // debugger + if (resp.status === 200) { + dispatch({ + type: EditMentionActionType.ADD_ITEM, + // updated item is in message + payload: resp.message + }) + // show success + showSuccessMessage(`Added ${item.title}`) + } else { + showErrorMessage(resp.message as string) + } + } else if (user?.role === 'rsd_admin') { + // rsd_admin can update mention + const resp = await updateMentionItem({ + mention: item, + token, + }) + if (resp.status !== 200) { + showErrorMessage(`Failed to update ${item.title}. ${resp.message}`) + return + } + dispatch({ + type: EditMentionActionType.UPDATE_ITEM, + // item is returned in message + payload: resp.message + }) + } + } + + async function processOnAdd(action: EditMentionAction) { + const item: MentionItemProps = action.payload + // check if already in collection + if (item.doi) { + const found = state.mentions.find(mention=>mention.doi===item.doi) + if (found) { + showInfoMessage(`Reference paper with DOI ${item.doi} is already in the list.`) + return true + } + } + if (item.id) { + // existing RSD mention item to be added to software + const resp = await addToReferencePaperForSoftware({ + software, + mention: item.id, + token + }) + // debugger + if (resp.status !== 200) { + showErrorMessage(`Failed to add ${item.title}. ${resp.message}`) + } else { + dispatch({ + type: EditMentionActionType.ADD_ITEM, + payload: item + }) + // show success + showSuccessMessage(`Added ${item.title}`) + } + } else { + // probably new item from crossref or datacite + const resp = await addNewReferencePaperToSoftware({item, software, token}) + // debugger + if (resp.status === 200) { + dispatch({ + type: EditMentionActionType.ADD_ITEM, + // updated item is in message + payload: resp.message + }) + // show success + showSuccessMessage(`Added ${item.title}`) + } else { + showErrorMessage(resp.message as string) + } + } + } + + async function processOnDelete(action: EditMentionAction) { + const item = action.payload + if (item.id) { + const resp = await removeReferencePaperForSoftware({ + software, + mention: item.id, + token + }) + if (resp.status == 200) { + dispatch({ + type: EditMentionActionType.DELETE_ITEM, + // item is returned in message + payload: item + }) + // try to remove mention item + // we do not handle response result + // because if mention is referenced in other + // software the delete action will fail (and that's ok) + await deleteMentionItem({ + id: item.id, + token + }) + } else { + showErrorMessage(`Failed to delete ${item.title}. ${resp.message}`) + } + } else { + showErrorMessage(`Failed to delete ${item.title}. Invalid item id ${item.id}`) + } + } + + /** + * Middleware function that intercepts actions/messages send by mention module components + * or local components which use useMentionReducer hook. This middleware function is used + * to call other functions that perform api calls. In most cases we pass original + * action/message further to other listeners/subscribers. + * @param action + */ + function dispatchMiddleware(action: EditMentionAction) { + // console.log('impactMiddleware...action...', action) + switch (action.type) { + case EditMentionActionType.ON_SUBMIT: + // pass original action + dispatch(action) + // process item by api + // and dispatch next action (see function) + processOnSubmit(action) + break + case EditMentionActionType.ON_ADD: + // pass original action + dispatch(action) + // process item by api + processOnAdd(action) + break + case EditMentionActionType.ON_UPDATE: + // WE do not allow update of mentions with DOI + logger('ON_UPDATE action not supported','warn') + // pass original action + dispatch(action) + break + case EditMentionActionType.ON_DELETE: + // pass original action + dispatch(action) + // process item by api + processOnDelete(action) + break + default: + // just dispatch original action + dispatch(action) + } + } + + return ( + + ) +} diff --git a/frontend/components/software/edit/reference-papers/NoCitations.tsx b/frontend/components/software/edit/reference-papers/NoCitations.tsx new file mode 100644 index 000000000..12b4d45f7 --- /dev/null +++ b/frontend/components/software/edit/reference-papers/NoCitations.tsx @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) +// SPDX-FileCopyrightText: 2022 dv4all +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import Alert from '@mui/material/Alert' +import AlertTitle from '@mui/material/AlertTitle' + +export default function NoCitations() { + return ( + + No citations to show + Try adding the reference papers first. RSD scraper + will then periodically run and find all citations using OpenAlex api. + + ) +} diff --git a/frontend/components/software/edit/reference-papers/NoRefferencePapers.tsx b/frontend/components/software/edit/reference-papers/NoRefferencePapers.tsx new file mode 100644 index 000000000..d190a3b13 --- /dev/null +++ b/frontend/components/software/edit/reference-papers/NoRefferencePapers.tsx @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) +// SPDX-FileCopyrightText: 2022 dv4all +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import Alert from '@mui/material/Alert' +import AlertTitle from '@mui/material/AlertTitle' + +export default function NoReferencePapers() { + return ( + + No reference papers to show + Add one using search option! + + ) +} diff --git a/frontend/components/software/edit/reference-papers/PageTabs.tsx b/frontend/components/software/edit/reference-papers/PageTabs.tsx new file mode 100644 index 000000000..4fbbbe56f --- /dev/null +++ b/frontend/components/software/edit/reference-papers/PageTabs.tsx @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {useState} from 'react' +import Tabs from '@mui/material/Tabs' +import Tab from '@mui/material/Tab' + +import {useSession} from '~/auth' +import useSoftwareContext from '../useSoftwareContext' +import CitationsBySoftware from './CitationsBySoftware' +import ReferencePapersTab from './ReferencePapersTab' +import {useTabCountsContext} from './TabCountsProvider' +import EditReferencePapersProvider from './EditReferencePapersProvider' + + +function TabContent({tab}:{tab:Readonly<'reference'|'citation'>}){ + if (tab==='citation'){ + return + } + // reference papers is default value + return +} + +export default function PageTabs() { + const {token} = useSession() + const {software} = useSoftwareContext() + const [tab, setTab] = useState<'reference'|'citation'>('reference') + const {referencePaperCnt,citationCnt} = useTabCountsContext() + return ( + <> + setTab(value)} + > + + + + {/* Reference papers tab provider is shared on both tabs to link to reference paper DOI*/} + + + + + ) +} diff --git a/frontend/components/software/edit/reference-papers/ReferencePaper.tsx b/frontend/components/software/edit/reference-papers/ReferencePaper.tsx new file mode 100644 index 000000000..9bb843a0f --- /dev/null +++ b/frontend/components/software/edit/reference-papers/ReferencePaper.tsx @@ -0,0 +1,67 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import IconButton from '@mui/material/IconButton' +import DeleteIcon from '@mui/icons-material/Delete' + +import {MentionItemProps} from '~/types/Mention' +import MentionAuthors from '~/components/mention/MentionAuthors' +import MentionDoi from '~/components/mention/MentionDoi' +import {MentionTitle} from '~/components/mention/MentionItemBase' +import MentionPublisherItem from '~/components/mention/MentionPublisherItem' +import {getMentionType} from '~/components/mention/config' + +export type ReferencePaperProps = { + readonly item: MentionItemProps + onDelete: () => void +} + +export default function ReferencePaper({item,onDelete}:ReferencePaperProps){ + const mentionType = getMentionType(item?.mention_type,'singular') + return ( +
+ +
+
+
+ {mentionType} +
+ +
+ + + +
+ + + + +
+ ) +} diff --git a/frontend/components/software/edit/reference-papers/ReferencePapersList.tsx b/frontend/components/software/edit/reference-papers/ReferencePapersList.tsx new file mode 100644 index 000000000..579acede3 --- /dev/null +++ b/frontend/components/software/edit/reference-papers/ReferencePapersList.tsx @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: 2022 - 2023 dv4all +// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) (dv4all) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {useEffect} from 'react' +import {useSession} from '~/auth' +import ContentLoader from '~/components/layout/ContentLoader' +import useEditMentionReducer from '~/components/mention/useEditMentionReducer' +import useSoftwareContext from '../useSoftwareContext' +import ReferencePaper from './ReferencePaper' +import {useCitationCnt, useReferencePaperCnt} from './TabCountsProvider' +import {getCitationCountForSoftware, getReferencePapersForSoftware} from './apiReferencePapers' +import NoReferencePapers from './NoRefferencePapers' + +export default function ReferencePapersList() { + const {token} = useSession() + const {software} = useSoftwareContext() + const {mentions, setMentions, loading, setLoading, onDelete} = useEditMentionReducer() + const {setReferencePaperCnt} = useReferencePaperCnt() + const {setCitationCnt} = useCitationCnt() + + // console.group('ReferencePapersList') + // console.log('mentions...', mentions) + // console.log('loading...', loading) + // console.groupEnd() + // debugger + + useEffect(()=>{ + let abort = false + if (software.id && token){ + getCitationCountForSoftware(software.id,token) + .then(count=> { + // update count only if not closed already + if (abort===false) setCitationCnt(count) + }) + } + return () => { abort = true } + },[software.id,token,setCitationCnt]) + + useEffect(() => { + let abort = false + async function getReferencePapersFromApi() { + setLoading(true) + const referencePapers = await getReferencePapersForSoftware({ + software: software.id, + token + }) + if (referencePapers && abort === false) { + // debugger + setMentions(referencePapers) + setLoading(false) + } + } + if (software?.id && token) { + getReferencePapersFromApi() + } + return () => { abort = true } + },[software.id,token,setLoading,setMentions]) + + if (loading) { + return ( +
+ +
+ ) + } + + // update tab count after rendering + // avoid error: cannot update context while rendering component + setTimeout(()=>{ + setReferencePaperCnt(mentions.length) + },10) + + if (mentions.length === 0){ + return + } + + return ( + <> + {mentions.map(item=>{ + return ( + { + onDelete(item) + }} + /> + ) + })} + + ) +} diff --git a/frontend/components/software/edit/reference-papers/ReferencePapersTab.tsx b/frontend/components/software/edit/reference-papers/ReferencePapersTab.tsx new file mode 100644 index 000000000..7290bbb3f --- /dev/null +++ b/frontend/components/software/edit/reference-papers/ReferencePapersTab.tsx @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import EditSection from '~/components/layout/EditSection' +import FindSoftwareMention from '../mentions/FindSoftwareMention' +import ReferencePapersList from './ReferencePapersList' + +export default function ReferencePapersTab() { + return ( + +
+ +
+
+ {/* use mention component */} + +
+
+ ) +} diff --git a/frontend/components/software/edit/reference-papers/TabCountsProvider.tsx b/frontend/components/software/edit/reference-papers/TabCountsProvider.tsx new file mode 100644 index 000000000..4450b5a53 --- /dev/null +++ b/frontend/components/software/edit/reference-papers/TabCountsProvider.tsx @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {Dispatch, SetStateAction, createContext, useContext, useState} from 'react' + +type TabCounts={ + referencePaperCnt: number | null, + citationCnt: number | null, + setReferencePaperCnt: Dispatch>, + setCitationCnt: Dispatch>, +} + +const TabCountContext = createContext(null) + +export function TabCountsProvider(props:any){ + const [referencePaperCnt, setReferencePaperCnt] = useState(null) + const [citationCnt, setCitationCnt] = useState(null) + + return ( + + ) +} + +export function useTabCountsContext(){ + const context = useContext(TabCountContext) + if (context===null){ + throw new Error('useTabCountsContext must be used within TabCountProvider') + } + return context +} + +export function useReferencePaperCnt(){ + const {referencePaperCnt,setReferencePaperCnt} = useTabCountsContext() + return { + referencePaperCnt, + setReferencePaperCnt + } +} + +export function useCitationCnt(){ + const {citationCnt,setCitationCnt} = useTabCountsContext() + return { + citationCnt, + setCitationCnt + } +} diff --git a/frontend/components/software/edit/reference-papers/apiReferencePapers.ts b/frontend/components/software/edit/reference-papers/apiReferencePapers.ts new file mode 100644 index 000000000..9d78d78bf --- /dev/null +++ b/frontend/components/software/edit/reference-papers/apiReferencePapers.ts @@ -0,0 +1,203 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import logger from '~/utils/logger' +import {createJsonHeaders, extractReturnMessage, getBaseUrl} from '~/utils/fetchHelpers' +import {MentionItemProps, mentionColumns} from '~/types/Mention' +import {addOrGetMentionItem} from '~/utils/editMentions' +import {extractCountFromHeader} from '~/utils/extractCountFromHeader' +import {CitationItemProps} from './CitationItem' + +export async function addToReferencePaperForSoftware({mention, software, token}: + { mention: string, software: string, token: string }) { + const url = '/api/v1/reference_paper_for_software' + try { + const resp = await fetch(url, { + method: 'POST', + headers: createJsonHeaders(token), + body: JSON.stringify({ + software, + mention + }) + }) + + return extractReturnMessage(resp, mention) + + } catch (e: any) { + logger(`addToReferencePaperForSoftware: ${e?.message}`, 'error') + return { + status: 500, + message: e?.message + } + } +} + +export async function addNewReferencePaperToSoftware({item, software, token}: + { item: MentionItemProps, software: string, token: string }) { + // add new item or get existing by DOI + let resp = await addOrGetMentionItem({ + mention:item, + token + }) + // debugger + if (resp.status === 200) { + // mention item returned in message + const mention: MentionItemProps = resp.message + if (mention.id) { + resp = await addToReferencePaperForSoftware({ + software, + mention: mention.id, + token + }) + if (resp.status === 200) { + // we return mention item in message + return { + status: 200, + message: mention + } + } else { + return resp + } + } else { + return { + status: 500, + message: 'Reference paper id is missing.' + } + } + } else { + return resp + } +} + +export async function removeReferencePaperForSoftware({mention, software, token}: + { mention: string, software: string, token: string }) { + const url = `/api/v1/reference_paper_for_software?software=eq.${software}&mention=eq.${mention}` + try { + const resp = await fetch(url, { + method: 'DELETE', + headers: createJsonHeaders(token) + }) + + return extractReturnMessage(resp, mention) + + } catch (e: any) { + logger(`removeReferencePaperForSoftware: ${e?.message}`, 'error') + return { + status: 500, + message: e?.message + } + } +} + +export async function getReferencePapersForSoftware({software,token}:{software: string, token?: string}) { + try { + // the content is ordered by type ascending + const query = `software?id=eq.${software}&select=id,slug,mention!reference_paper_for_software(${mentionColumns})&mention.order=publication_year.desc,mention_type.asc` + // construct url + const url = `${getBaseUrl()}/${query}` + // make request + const resp = await fetch(url, { + method: 'GET', + headers: { + ...createJsonHeaders(token), + // request single object item + 'Accept': 'application/vnd.pgrst.object+json' + } + }) + if (resp.status === 200) { + const json = await resp.json() + // extract mentions from software object + const mentions: MentionItemProps[] = json?.mention ?? [] + return mentions + } + logger(`getReferencePapersForSoftware: [${resp.status}] [${url}]`, 'error') + // query not found + return [] + } catch (e: any) { + logger(`getReferencePapersForSoftware: ${e?.message}`, 'error') + return [] + } +} + +export type CitationState={ + count: number, + citations: CitationItemProps[] +} + + +type GetCitationsForSoftwareProps={ + software: string, + token?: string + search?: string | null + page?:number | null, + rows?:number | null +} + +export async function getCitationsForSoftware({ + software,token,search,page,rows}:GetCitationsForSoftwareProps +):Promise { + try { + // the content is ordered by type ascending + const offset = (page ?? 0) * (rows ?? 12) + let query = `rpc/citation_by_software?software=eq.${software}&order=publication_year.desc,mention_type,doi&offset=${offset}&limit=${rows}` + if (search){ + query += `&or=(title.ilike.*${search}*,authors.ilike.*${search}*,publisher.ilike.*${search}*,url.ilike.*${search}*)` + } + // construct url + const url = `${getBaseUrl()}/${query}` + // make request + const resp = await fetch(url, { + method: 'GET', + headers: { + ...createJsonHeaders(token), + 'Prefer': 'count=exact' + } + }) + if ([200,206].includes(resp.status)===true) { + const json:CitationItemProps[] = await resp.json() + return { + count: extractCountFromHeader(resp.headers) ?? 0, + citations: json, + } + } + logger(`getCitationsForSoftware: [${resp.status}] [${url}]`, 'error') + // query not found + return { + count:0, + citations:[] + } + } catch (e: any) { + logger(`getCitationsForSoftware: ${e?.message}`, 'error') + return { + count:0, + citations:[] + } + } +} + +export async function getCitationCountForSoftware(software:string,token:string){ + try { + // the content is ordered by type ascending + let query = `rpc/citation_by_software?software=eq.${software}&order=publication_year.desc,mention_type&offset=0&limit=3` + // construct url + const url = `${getBaseUrl()}/${query}` + // make request + const resp = await fetch(url, { + method: 'GET', + headers: { + ...createJsonHeaders(token), + 'Prefer': 'count=exact' + } + }) + if ([200,206].includes(resp.status)===true) { + return extractCountFromHeader(resp.headers) ?? 0 + } + logger(`getCitationCountForSoftware: [${resp.status}] [${url}]`, 'error') + return 0 + } catch (e: any) { + logger(`getCitationCountForSoftware: ${e?.message}`, 'error') + return 0 + } +} diff --git a/frontend/components/software/edit/reference-papers/config.ts b/frontend/components/software/edit/reference-papers/config.ts new file mode 100644 index 000000000..1790b150e --- /dev/null +++ b/frontend/components/software/edit/reference-papers/config.ts @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2022 - 2023 Dusan Mijatovic (dv4all) +// SPDX-FileCopyrightText: 2022 - 2023 dv4all +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) (dv4all) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {findMention} from '~/components/mention/config' +import config from '~/components/mention/ImportMentions/config' + +export const cfgReferencePapers = { + title: 'Reference papers', + findMention: { + ...findMention, + title: 'Add reference paper' + }, + doiInput: config.doiInput +} diff --git a/frontend/components/software/edit/reference-papers/index.tsx b/frontend/components/software/edit/reference-papers/index.tsx new file mode 100644 index 000000000..0c051eba1 --- /dev/null +++ b/frontend/components/software/edit/reference-papers/index.tsx @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {TabCountsProvider} from './TabCountsProvider' +import PageTabs from './PageTabs' + +export default function ReferencePapers() { + return ( +
+ + + +
+ ) +} diff --git a/frontend/components/software/edit/reference-papers/useCitationsBySoftware.tsx b/frontend/components/software/edit/reference-papers/useCitationsBySoftware.tsx new file mode 100644 index 000000000..5e7ceb16c --- /dev/null +++ b/frontend/components/software/edit/reference-papers/useCitationsBySoftware.tsx @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +import {useEffect, useState} from 'react' +import {useSession} from '~/auth' +import useSoftwareContext from '../useSoftwareContext' +import {CitationState, getCitationsForSoftware} from './apiReferencePapers' + +type UseCitationsForSoftwareProps = { + search?:string | null + page?:number | null, + rows?:number | null +} + +export default function useCitationsForSoftware({search,page=0,rows=12}:UseCitationsForSoftwareProps){ + const {token} = useSession() + const {software} = useSoftwareContext() + const [loading, setLoading] = useState(true) + const [state, setState] = useState() + + useEffect(() => { + let abort = false + async function getCitationsFromApi() { + setLoading(true) + const state = await getCitationsForSoftware({ + software: software.id, + token, + search, + page, + rows + }) + if (abort === false) { + // debugger + setState(state) + setLoading(false) + } + } + if (software && token) { + getCitationsFromApi() + } + return () => { abort = true } + },[software,token,search,page,rows]) + + // console.group('useCitationsForSoftware') + // console.log('loading...', loading) + // console.log('citationCnt...', mentions.length) + // console.groupEnd() + + return { + loading, + ...state + } +} diff --git a/frontend/types/Mention.ts b/frontend/types/Mention.ts index 05dfaed48..99b2bfd7f 100644 --- a/frontend/types/Mention.ts +++ b/frontend/types/Mention.ts @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 dv4all +// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) // // SPDX-License-Identifier: Apache-2.0 @@ -34,7 +35,7 @@ export type MentionItemProps = { // is_featured?: boolean mention_type: MentionTypeKeys | null source: string - note: string | null + note?: string | null } export const mentionColumns ='id,doi,url,title,authors,publisher,publication_year,journal,page,image_url,mention_type,source,note' diff --git a/frontend/utils/editMentions.ts b/frontend/utils/editMentions.ts index 1fb242f49..f3a670ac2 100644 --- a/frontend/utils/editMentions.ts +++ b/frontend/utils/editMentions.ts @@ -17,7 +17,8 @@ import logger from './logger' export async function getMentionsForSoftware({software,token}:{software: string, token?: string}) { try { // the content is ordered by type ascending - const query = `software?id=eq.${software}&select=id,slug,mention(${mentionColumns})&mention.order=mention_type.asc` + // Try changing 'mention' to one of the following: 'mention!mention_for_software', 'mention!reference_paper_for_software'. Find the desired relationship in the 'details' key. + const query = `software?id=eq.${software}&select=id,slug,mention!mention_for_software(${mentionColumns})&mention.order=mention_type.asc` // construct url const url = `${getBaseUrl()}/${query}` // make request diff --git a/scrapers/jobs.cron b/scrapers/jobs.cron index 7ee245e89..2b2f5e14a 100644 --- a/scrapers/jobs.cron +++ b/scrapers/jobs.cron @@ -9,4 +9,5 @@ 1-59/6 * * * * /usr/local/openjdk-18/bin/java -cp /usr/myjava/scrapers.jar nl.esciencecenter.rsd.scraper.doi.MainReleases > /proc/$(cat /var/run/crond.pid)/fd/1 2>&1 3-59/6 * * * * /usr/local/openjdk-18/bin/java -cp /usr/myjava/scrapers.jar nl.esciencecenter.rsd.scraper.doi.MainMentions > /proc/$(cat /var/run/crond.pid)/fd/1 2>&1 5-59/6 * * * * /usr/local/openjdk-18/bin/java -cp /usr/myjava/scrapers.jar nl.esciencecenter.rsd.scraper.git.MainContributors > /proc/$(cat /var/run/crond.pid)/fd/1 2>&1 +5-59/6 * * * * /usr/local/openjdk-18/bin/java -cp /usr/myjava/scrapers.jar nl.esciencecenter.rsd.scraper.doi.MainCitations > /proc/$(cat /var/run/crond.pid)/fd/1 2>&1 0 1 * * * /usr/bin/python3 -u /usr/myjava/oaipmh.py > /proc/$(cat /var/run/crond.pid)/fd/1 2>&1 diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CitationData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CitationData.java new file mode 100644 index 000000000..4b2ae61e7 --- /dev/null +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CitationData.java @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +package nl.esciencecenter.rsd.scraper.doi; + +import java.util.Collection; +import java.util.UUID; + +public class CitationData { + + public UUID id; + public String doi; + public Collection knownDois; +} diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java index 3a7f115c5..0abadd302 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java @@ -23,10 +23,10 @@ public class CrossrefMention implements Mention { - private static final Map crossrefTypeMap; + static final Map crossrefTypeMap; static { -// https://api.crossref.org/types + // https://api.crossref.org/types crossrefTypeMap = new HashMap<>(); crossrefTypeMap.put("book-section", MentionType.bookSection); @@ -97,7 +97,7 @@ public MentionRecord mentionData() { try { result.publicationYear = Utils.integerOrNull(workJson.getAsJsonObject("published").getAsJsonArray("date-parts").get(0).getAsJsonArray().get(0)); } catch (RuntimeException e) { -// year not found, we leave it at null, nothing to do + // year not found, we leave it at null, nothing to do } if (workJson.getAsJsonArray("container-title").size() > 0) { JsonArray journalTitles = workJson.getAsJsonArray("container-title"); diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java index ef1d59cf4..60dde05ce 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java @@ -21,7 +21,6 @@ import java.util.Map; import java.util.Set; import java.util.TreeSet; -import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -65,7 +64,7 @@ public class DataciteMentionRepository implements MentionRepository { private static final Pattern URL_TREE_TAG_PATTERN = Pattern.compile("/tree/([^/]+)$"); static { -// https://schema.datacite.org/meta/kernel-4.4/ + // https://schema.datacite.org/meta/kernel-4.4/ dataciteTypeMap = new HashMap<>(); dataciteTypeMap.put("Audiovisual", MentionType.presentation); dataciteTypeMap.put("Book", MentionType.book); @@ -92,7 +91,7 @@ public class DataciteMentionRepository implements MentionRepository { dataciteTypeMap.put("Software", MentionType.computerProgram); dataciteTypeMap.put("Sound", MentionType.other); dataciteTypeMap.put("Standard", MentionType.other); -// dataciteTypeMap.put("Text", MentionType.other); + // dataciteTypeMap.put("Text", MentionType.other); dataciteTypeMap.put("Workflow", MentionType.other); dataciteTypeMap.put("Other", MentionType.other); @@ -105,7 +104,7 @@ public class DataciteMentionRepository implements MentionRepository { dataciteTextTypeMap.put("Report", MentionType.report); } - // "10.5281/zenodo.1408128","10.1186/s12859-018-2165-7" + // "10.5281/zenodo.1408128","10.1186/s12859-018-2165-7" static String joinCollection(Collection dois) { return dois.stream() .collect(Collectors.joining("\",\"", "\"", "\"")); @@ -118,8 +117,8 @@ static Collection jsonStringToUniqueMentions(String json) { Set usedDois = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); for (JsonElement work : worksJson) { try { -// Sometimes, DataCite gives back two of the same results for one DOI, e.g. for 10.4122/1.1000000817, -// so we need to only add it once, otherwise we cannot POST it to the backend + // Sometimes, DataCite gives back two of the same results for one DOI, e.g. for 10.4122/1.1000000817, + // so we need to only add it once, otherwise we cannot POST it to the backend MentionRecord parsedMention = parseWork(work.getAsJsonObject()); if (usedDois.contains(parsedMention.doi)) continue; @@ -212,7 +211,7 @@ public Collection mentionData(Collection dois) { } @Override - public Map save(Collection mentions) { + public void save(Collection mentions) { throw new UnsupportedOperationException(); } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java new file mode 100644 index 000000000..8f0cf5dec --- /dev/null +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +package nl.esciencecenter.rsd.scraper.doi; + +import nl.esciencecenter.rsd.scraper.Config; +import nl.esciencecenter.rsd.scraper.Utils; + +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Collection; +import java.util.UUID; + +public class MainCitations { + + public static void main(String[] args) { + System.out.println("Start scraping citations"); + try { + String backendUrl = Config.backendBaseUrl(); + PostgrestCitationRepository localCitationRepository = new PostgrestCitationRepository(backendUrl); + + Collection referencePapersToScrape = localCitationRepository.leastRecentlyScrapedCitations(5); + OpenAlexCitations openAlexCitations = new OpenAlexCitations(); + MentionRepository localMentionRepository = new PostgrestMentionRepository(backendUrl); + String email = Config.crossrefContactEmail().orElse(null); + ZonedDateTime now = ZonedDateTime.now(); + + for (CitationData citationData : referencePapersToScrape) { + Collection citingMentions = openAlexCitations.citations(citationData.doi, email, citationData.id); + citingMentions.removeIf(mention -> citationData.knownDois.contains(mention.doi)); + localMentionRepository.save(citingMentions); + + Collection citingMentionIds = new ArrayList<>(); + for (MentionRecord citingMention : citingMentions) { + citingMentionIds.add(citingMention.id); + } + + localCitationRepository.saveCitations(backendUrl, citationData.id, citingMentionIds, now); + } + } catch (RuntimeException e) { + Utils.saveExceptionInDatabase("Citation scraper", null, null, e); + } + + System.out.println("Done scraping citations"); + } +} diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java index d9c50affc..2e3ead245 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java @@ -9,6 +9,7 @@ import java.util.Collection; import java.util.Map; +import java.util.TreeMap; import java.util.UUID; /* @@ -34,7 +35,11 @@ public static void main(String[] args) { Collection allMentions = scrapedReleasesPerConceptDoi.values().stream() .flatMap(Collection::stream) .toList(); - Map doiToId = localMentionRepository.save(allMentions); + localMentionRepository.save(allMentions); + Map doiToId = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); + for (MentionRecord mention : allMentions) { + doiToId.put(mention.doi, mention.id); + } releaseRepository.saveReleaseContent(releasesToScrape, scrapedReleasesPerConceptDoi, doiToId); System.out.println("Done scraping releases"); diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java index 28e39e14b..421e4e0f5 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java @@ -23,7 +23,30 @@ public class MentionRecord { String page; URI imageUrl; MentionType mentionType; + String externalId; String source; Instant scrapedAt; String version; + + @Override + public String toString() { + return "MentionRecord{" + + "id=" + id + + ", doi='" + doi + '\'' + + ", url=" + url + + ", title='" + title + '\'' + + ", authors='" + authors + '\'' + + ", publisher='" + publisher + '\'' + + ", publicationYear=" + publicationYear + + ", doiRegistrationDate=" + doiRegistrationDate + + ", journal='" + journal + '\'' + + ", page='" + page + '\'' + + ", imageUrl=" + imageUrl + + ", mentionType=" + mentionType + + ", externalId='" + externalId + '\'' + + ", source='" + source + '\'' + + ", scrapedAt=" + scrapedAt + + ", version='" + version + '\'' + + '}'; + } } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java index 4c7af7529..5a1abee81 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java @@ -6,8 +6,6 @@ package nl.esciencecenter.rsd.scraper.doi; import java.util.Collection; -import java.util.Map; -import java.util.UUID; public interface MentionRepository { @@ -15,5 +13,5 @@ public interface MentionRepository { Collection mentionData(Collection dois); - Map save(Collection mentions); + void save(Collection mentions); } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java new file mode 100644 index 000000000..988ae367b --- /dev/null +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java @@ -0,0 +1,194 @@ +// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +package nl.esciencecenter.rsd.scraper.doi; + +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import nl.esciencecenter.rsd.scraper.Utils; + +import java.net.URI; +import java.net.http.HttpResponse; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Optional; +import java.util.UUID; + +public class OpenAlexCitations { + + static final String DOI_FILTER_URL_UNFORMATTED = "https://api.openalex.org/works?filter=doi:%s"; + + public Collection citations(String doi, String email, UUID id) { + + String doiUrlEncoded = Utils.urlEncode(doi); + String worksUri = DOI_FILTER_URL_UNFORMATTED.formatted(doiUrlEncoded); + + Optional optionalCitationsUri = citationsUri(worksUri, email); + if (optionalCitationsUri.isEmpty()) { + return Collections.emptyList(); + } + + return scrapeCitations(optionalCitationsUri.get(), email, id); + } + + static Optional citationsUri(String worksUri, String email) { + HttpResponse response; + if (email == null || email.isBlank()) { + response = Utils.getAsHttpResponse(worksUri); + } else { + response = Utils.getAsHttpResponse(worksUri, "User-Agent", "mailto:" + email); + } + + JsonObject tree = JsonParser.parseString(response.body()).getAsJsonObject(); + + int count = tree + .getAsJsonObject("meta") + .getAsJsonPrimitive("count") + .getAsInt(); + + if (count == 0 || count > 1) { + return Optional.empty(); + } + + String citationsUri = tree + .getAsJsonArray("results") + .get(0) + .getAsJsonObject() + .getAsJsonPrimitive("cited_by_api_url") + .getAsString(); + + return Optional.of(citationsUri); + } + + // we use cursor paging as that will always work + // https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging#cursor-paging + static Collection scrapeCitations(String citationsUri, String email, UUID id) { + final int perPage = 200; + String cursor = "*"; + + Collection citations = new ArrayList<>(); + while (cursor != null) { + HttpResponse response; + String citationsUriWithCursor = citationsUri + "&per-page=" + perPage + "&cursor=" + cursor; + if (email == null || email.isBlank()) { + response = Utils.getAsHttpResponse(citationsUriWithCursor); + } else { + response = Utils.getAsHttpResponse(citationsUriWithCursor, "User-Agent", "mailto:" + email); + } + JsonObject tree = JsonParser.parseString(response.body()).getAsJsonObject(); + + cursor = Utils.stringOrNull(tree + .getAsJsonObject("meta") + .get("next_cursor") + ); + + JsonArray citationsArray = tree + .getAsJsonArray("results"); + + Instant now = Instant.now(); + for (JsonElement citation : citationsArray) { + MentionRecord citationAsMention; + try { + citationAsMention = parseCitationAsMention(citation, now); + } catch (RuntimeException e) { + Utils.saveExceptionInDatabase("Citation scraper", "mention", id, e); + continue; + } + citations.add(citationAsMention); + } + } + + return citations; + } + + static MentionRecord parseCitationAsMention(JsonElement element, Instant scrapedAt) { + JsonObject citationObject = element.getAsJsonObject(); + + MentionRecord mention = new MentionRecord(); + + String doiUrl = Utils.stringOrNull(citationObject.get("doi")); + String doi = doiUrl; + if (doi != null) { + doi = doi.replace("https://doi.org/", ""); + } + mention.doi = doi; + + if (doiUrl != null) { + mention.url = URI.create(doiUrl); + } else { + JsonArray locations = citationObject.getAsJsonArray("locations"); + mention.url = extractUrlFromLocation(locations); + } + + mention.title = Utils.stringOrNull(citationObject.get("title")); + if (mention.title == null) { + String openAlexId = citationObject.getAsJsonPrimitive("id").getAsString(); + String message = "The title of the mention with DOI %s and OpenAlex ID %s is null".formatted(doi, openAlexId); + throw new RuntimeException(message); + } + + JsonArray authorsArray = citationObject.getAsJsonArray("authorships"); + Collection authors = new ArrayList<>(); + for (JsonElement jsonElement : authorsArray) { + authors.add( + jsonElement + .getAsJsonObject() + .getAsJsonPrimitive("raw_author_name") + .getAsString() + ); + } + mention.authors = String.join(", ", authors); + + mention.publisher = null; + + mention.publicationYear = Utils.integerOrNull(citationObject.get("publication_year")); + + mention.doiRegistrationDate = null; + + mention.journal = null; + + mention.page = null; + + mention.imageUrl = null; + + String crossrefType = Utils.stringOrNull(citationObject.get("type_crossref")); + mention.mentionType = CrossrefMention.crossrefTypeMap.getOrDefault(crossrefType, MentionType.other); + + mention.externalId = citationObject + .getAsJsonObject("ids") + .getAsJsonPrimitive("openalex") + .getAsString(); + + mention.source = "OpenAlex"; + + mention.scrapedAt = scrapedAt; + + mention.version = null; + + return mention; + } + + static URI extractUrlFromLocation(JsonArray locations) { + for (JsonElement location : locations) { + JsonObject locationObject = location.getAsJsonObject(); + String landingPageUrl = Utils.stringOrNull(locationObject.get("landing_page_url")); + if (landingPageUrl != null) { + landingPageUrl = landingPageUrl.replaceAll("\\\\", "%5C"); + return URI.create(landingPageUrl); + } + + String pdfUrl = Utils.stringOrNull(locationObject.get("pdf_url")); + if (pdfUrl != null) { + return URI.create(pdfUrl); + } + } + + return null; + } +} diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestCitationRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestCitationRepository.java new file mode 100644 index 000000000..7cd3abbba --- /dev/null +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestCitationRepository.java @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +package nl.esciencecenter.rsd.scraper.doi; + +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import nl.esciencecenter.rsd.scraper.Utils; + +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Objects; +import java.util.TreeSet; +import java.util.UUID; + +public class PostgrestCitationRepository { + + private final String backendUrl; + + public PostgrestCitationRepository(String backendUrl) { + this.backendUrl = Objects.requireNonNull(backendUrl); + } + + public Collection leastRecentlyScrapedCitations(int limit) { + String oneHourAgoFilter = Utils.atLeastOneHourAgoFilter("citations_scraped_at"); + String uri = backendUrl + "/rpc/reference_papers_to_scrape?order=citations_scraped_at.asc.nullsfirst&limit=" + limit + "&" + oneHourAgoFilter; + String data = Utils.getAsAdmin(uri); + return parseJson(data); + } + + public void saveCitations(String backendUrl, UUID idCitedMention, Collection citingMentions, ZonedDateTime scrapedAt) { + String jsonPatch = "{\"citations_scraped_at\": \"%s\"}".formatted(scrapedAt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); + Utils.patchAsAdmin(backendUrl + "/mention?id=eq." + idCitedMention.toString(), jsonPatch); + + JsonArray jsonArray = new JsonArray(); + + for (UUID citingMention : citingMentions) { + JsonObject jsonObject = new JsonObject(); + jsonObject.addProperty("mention", idCitedMention.toString()); + jsonObject.addProperty("citation", citingMention.toString()); + jsonArray.add(jsonObject); + } + + String uri = backendUrl + "/citation_for_mention"; + + Utils.postAsAdmin(uri, jsonArray.toString(), "Prefer", "resolution=merge-duplicates"); + } + + static Collection parseJson(String data) { + JsonArray array = JsonParser.parseString(data).getAsJsonArray(); + Collection result = new ArrayList<>(); + + for (JsonElement jsonElement : array) { + JsonObject jsonObject = jsonElement.getAsJsonObject(); + UUID id = UUID.fromString(jsonObject.getAsJsonPrimitive("id").getAsString()); + String doi = jsonObject.getAsJsonPrimitive("doi").getAsString(); + + Collection knownDois = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + JsonArray doisArray = jsonObject.getAsJsonArray("known_dois"); + for (JsonElement element : doisArray) { + knownDois.add(element.getAsString()); + } + + CitationData entry = new CitationData(); + entry.id = id; + entry.doi = doi; + entry.knownDois = knownDois; + + result.add(entry); + } + + return result; + } +} diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java index d34732dcf..c38ae1281 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java @@ -6,23 +6,20 @@ package nl.esciencecenter.rsd.scraper.doi; import com.google.gson.FieldNamingPolicy; +import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; import com.google.gson.JsonParser; import com.google.gson.JsonPrimitive; import com.google.gson.JsonSerializer; import com.google.gson.reflect.TypeToken; -import nl.esciencecenter.rsd.scraper.Config; import nl.esciencecenter.rsd.scraper.Utils; import java.net.URI; import java.time.Instant; import java.time.ZonedDateTime; import java.util.Collection; -import java.util.HashMap; -import java.util.Map; import java.util.Objects; import java.util.UUID; @@ -64,23 +61,28 @@ public Collection mentionData(Collection dois) { } @Override - public Map save(Collection mentions) { - String scrapedMentionsJson = new GsonBuilder() + public void save(Collection mentions) { + Gson gson = new GsonBuilder() .serializeNulls() .setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES) .registerTypeAdapter(Instant.class, (JsonSerializer) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString())) .registerTypeAdapter(ZonedDateTime.class, (JsonSerializer) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString())) - .create().toJson(mentions); - String response = Utils.postAsAdmin(Config.backendBaseUrl() + "/mention?on_conflict=doi&select=doi,id", scrapedMentionsJson, "Prefer", "resolution=merge-duplicates,return=representation"); + .create(); + for (MentionRecord mention : mentions) { + String scrapedMentionJson = gson.toJson(mention); + String onConflictFilter; + if (mention.doi != null) { + onConflictFilter = "doi"; + } else { + onConflictFilter = "external_id,source"; + } - JsonArray responseAsArray = JsonParser.parseString(response).getAsJsonArray(); - Map doiToId = new HashMap<>(); - for (JsonElement jsonElement : responseAsArray) { - String doi = jsonElement.getAsJsonObject().getAsJsonPrimitive("doi").getAsString(); - UUID id = UUID.fromString(jsonElement.getAsJsonObject().getAsJsonPrimitive("id").getAsString()); - doiToId.put(doi, id); - } + String uri = "%s/mention?on_conflict=%s&select=id".formatted(backendUrl, onConflictFilter); + String response = Utils.postAsAdmin(uri, scrapedMentionJson, "Prefer", "resolution=merge-duplicates,return=representation"); - return doiToId; + JsonArray responseAsArray = JsonParser.parseString(response).getAsJsonArray(); + UUID id = UUID.fromString(responseAsArray.get(0).getAsJsonObject().getAsJsonPrimitive("id").getAsString()); + mention.id = id; + } } } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java index 69118127f..f9b798185 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java @@ -78,7 +78,8 @@ Collection parseJson(String data) { Gson gson = new GsonBuilder() .setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES) .create(); - TypeToken> typeToken = new TypeToken>() {}; + TypeToken> typeToken = new TypeToken<>() { + }; return gson.fromJson(data, typeToken.getType()); } }