diff --git a/.gitignore b/.gitignore index c711a218..f3467e3c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ fonts # Localisation files /i18n +# API data +elnData.json +elnDataPharm.json # Generated files .docusaurus .cache-loader diff --git a/docs/10_domains/10_analytical_chemistry.mdx b/docs/10_domains/10_analytical_chemistry.mdx index 533dfa22..3726256b 100644 --- a/docs/10_domains/10_analytical_chemistry.mdx +++ b/docs/10_domains/10_analytical_chemistry.mdx @@ -4,84 +4,51 @@ nfdi4chem-id: dac slug: "/analytical_chemistry" --- -import Methods from '@site/src/components/Methods.js'; -import {LbeChip} from '@site/src/components/lbe/LbeElements.js'; +import { LbeChip } from "@site/src/components/lbe/LbeElements.js"; +import ElnFinder from "@site/src/components/eln/ElnFinder.js"; - + + +## Introduction -:::info Summary: +Although analytical chemistry is one of the oldest branches of chemistry, it continues to evolve. New methods and technologies are constantly being developed. Providing the tools and techniques needed to identify and quantify the chemical constituents of a sample, analytical chemistry is a cornerstone of both academia and industry. It is essential for a wide range of applications, from environmental monitoring to drug discovery. -Analytical chemistry is one of the oldest scientific disciplines and an interdisciplinary science, combining methods of physical, inorganic and organic chemistry. Analytical chemistry aims to acquire, process, and evaluate signals to qualify and quantify the composition and to unravel the structure of matter. The analytical chemist applies classical (wet) chemistry and instrumental methods for separation, identification (qualification) and quantification. The discipline is related to many research fields in life, environmental, earth, and engineering sciences, such as metabolomics, medicine, and geochemistry.
-A typical workflow begins with the conceptualisation of the research question, and the planning of experiments, methods, and surveys to evaluate the hypotheses. Surveys are utilised in life, environmental, and earth sciences to perform experiments and/or to obtain samples required to support the research (e.g., laboratory and field campaigns, cohort studies). Experiments are conducted and samples are processed applying existing or newly established methods along with recording of accompanying metadata. Analytical chemistry applies already in the experimental or sampling stage as conditions need to be controlled or metadata has to be acquired (e.g., pH, temperature, colour). Once the product of the experiment or sample processing is obtained, it is analysed with suitable direct or combined methods for identification and quantification. Processing and interpretation of the acquired research data and [metadata](/docs/metadata) support the answering of the research question and decisions for further experiments, research, and measures. -::: +Despite the diversity of analytical methods, a common denominator is the large amount of data generated by instrumental methods. This data must be processed and interpreted to extract meaningful information. This makes analytical chemistry a challenging field for research data management. -## Type of experiments for chemical analysis +## Data Types -### Sampling +Unlike some other areas of chemistry, most research data in analytical chemistry are generated by instrumental methods. In addition, the size and complexity of the data can vary greatly depending on the method used. -- Collecting of materials for analysis -- Transport and storage without alteration of samples -- Small-scale experiments to develop / optimise approaches and equipment -- Upscaling of methods to obtain sufficient material for a comprehensive analysis +Techniques like mass spectrometry, chromatography, and spectroscopy generate complex data consisting of raw data files, metadata, and processed data. Raw data files are often proprietary and require specialised software to open and interpret. Metadata is crucial for understanding experimental conditions and parameters. Processed data can range from simple peak lists to complex multivariate models. -### Sample processing +Some open data formats are available for specific data types, such as mass spectrometry data in the [mzML format](https://www.psidev.info/mzML). The [JCAMP-DX format](https://iupac.org/what-we-do/digital-standards/jcamp-dx/) is used for optical spectroscopy data. This format is also suitable for NMR spectroscopy data, but with some major limitations. For chromatography or combined chromatography-mass spectrometry data the situation is more complex as many vendors have their own proprietary formats. -- Preparation of the sample for analysis: - - Direct methods: no further treatment (e.g., pH, RFA, MALDI, direct infusion) - - Single methods: - - Dissolving - - Extraction - - Pulverising - - Combined methods: - - Extraction and enrichment (e.g., solid-phase extraction, aqua regia digestion, volatilisation of solvents) - - Separation of interfering compounds (e.g., chromatography, precipitation) - - Chemical transforming in measurable form (e.g., complexing, derivatisation) -- Small-scale experiments for screening / optimization of separation conditions and upscaling +## ELNs and Other Tools -### Determination and evaluation -- Product characterisation with feasible methods (e.g., NMR spectroscopy, mass spectrometry, IR spectroscopy, UV/vis spectroscopy, elemental analysis) - - to identify analytes (targeted and non-targeted) - - to assess the constitution of mixtures - - to quantify analytes +General chemistry ELNs can typically be used for analytical chemistry data and may be well suited to your research topic. However, there are also specialised tools that are tailored to the needs of analytical chemists. These tools often include features for managing instrument data, processing raw data files and visualising results. They may also include tools for chemometric analysis. -## Planning of experiments + -- New and reused analytical methods and research ideas are derived from previous work of the own research group, scientific literature, datasets published in [repositories](/docs/repositories), and requirements of public calls for research, development and demonstration projects. -- Experimental design follows a logical order to achieve a specific goal, such as analytical selectivity and sensitivity, or in the case of a non-targeted analysis (e.g., in metabolomics), a coverage of a broad physical-chemical domain of analytes. -- Planning is concluded by adding the experimental details. All [metadata](/docs/metadata) is [documented](/docs/data_documentation) in an [ELN](/docs/eln) (e.g., [Chemotion ELN](https://www.chemotion.net/chemotionsaurus/index.html)) including references. +## Publishing Data -## Documentation of experiments +Data from analytical chemistry can be published on several platforms, depending on the research subject and data type. -- Documentation of research data and metadata is carried out digitally using an ELN. -- Experimental conditions (e.g., solvents, temperature, duration, pressure) are noted in the ELN and if available a laboratory information system. -- Observations and results of analytical methods with no digital output (i.e. no data files) are added manually to the ELN entry of the experiment, which may include temperatures, or the pH (with metadata where applicable). -- Obtained data from analytical instruments (e.g., NMR, MS, or IR data) are uploaded to the Chemotion ELN in open file formats and directly attached to the respective ELN experiment entry including instrumental setup metadata. -- In case instrumental metadata is not convertible to open format without information loss, conditions need to be documented in the ELN. -- Metadata related to the obtained data, such as mass, volumes, or solvent of measurement, have to be provided according to [metadata standards](/docs/format_standards). +If the analytical data have a more supporting role in a larger study it may be appropriate to publish the data in a general data repository. If the research focuses more on the analytical method itself, it may be more appropriate to publish the data in a specialised repository. -## Data producing methods +General data repositories include [Zenodo](https://zenodo.org/) or [RADAR4Chem](https://radar.products.fiz-karlsruhe.de/de/radarabout/radar4chem). For analytical data in context with synthetic chemistry data, [Chemotion Repository](https://chemotion-repository.de/) might also be a suitable option. -- Data can be collected during the experiment or after the experiment by analysing the obtained product. -- Manually determined data: Experimental observations, mass, volumes, pH, etc. -- Digital data are obtained with analytical instruments. An overview of file extensions, file sizes, and converters for several analytical methods is given in the table below. -- Raw data files in proprietary file formats should be saved alongside interoperable open file formats by using converters or the analytical device software. If no specific open format is currently available, export as .txt or .csv is recommended. Please be aware that metadata included in the header of .txt or .csv files may not follow a defined (open) format and metadata should be additionally also added into the ELN. +For method-specific data, several specialised repositories are available. A few examples include: - +- [MassBank EU](https://massbank.eu/) + A field-specific ecosystem of databases and tools for mass spectrometry reference spectra. +- [MetaboLights](https://www.ebi.ac.uk/metabolights/) + A repository for metabolomic studies. +- [nmrXiv](https://nmrxiv.org/) + A repository for NMR data. -:::note *This table will be continuously updated with new recommendations on interoperable open file formats. -::: +This list is not exhaustive, and there may be other repositories that are more suitable for your data. -## Data analysis +## Challenges -- Research data can be processed, analysed and compared (also to data of other experiments) within the [Chemotion ELN](https://www.chemotion.net/chemotionsaurus/index.html). -- Optionally, preprocessing of digital data with software of analytical device before data are transferred to the Chemotion ELN (cf. data producing methods). -- A detailed view, evaluation and interpretation of results is carried out with the Chemotion ELN features. - - -## Publishing research data - -- In addition to a research article in a scientific journal, the underlying research data are [published](/docs/data_publishing) in a [repository](/docs/repositories) and linked to the article to realise research data management according to the [FAIR data principles](/docs/fair) ([Best practice examples](/docs/best_practice)). -- Data publications in repositories include raw and processed data for reuse. -- The use of the [Chemotion ELN](https://www.chemotion.net/chemotionsaurus/index.html) enables a direct transfer of research data and the respective metadata to the [Chemotion Repository](https://www.chemotion-repository.net/welcome). Subsequently, these data are automatically shared with other repositories, e.g. [PubChem](https://pubchem.ncbi.nlm.nih.gov/). For the publication of research data in other discipline-specific repositories, such as the [MassBank](https://massbank.eu/MassBank/) for reference mass spectra, data have to be exported from the Chemotion ELN and submitted to the respective database. -- A [persistent identifier](/docs/pid) (e.g., DOI) is generated for a dataset by a repository (e.g., [DataCite](https://datacite.org/) for the Chemotion Repository), which is given in the journal article or corresponding supporting information to link the data publication with the manuscript. +The biggest challenge in managing analytical chemistry data is the diversity of the field. Different methods generate different types of data, and the data can vary greatly in size and complexity. As mentioned above, the large number of different vendors and proprietary data formats is a major barrier to data sharing and reuse. diff --git a/docs/10_domains/20_physical_chemistry.mdx b/docs/10_domains/20_physical_chemistry.mdx index fcc1600f..7b75c797 100644 --- a/docs/10_domains/20_physical_chemistry.mdx +++ b/docs/10_domains/20_physical_chemistry.mdx @@ -3,105 +3,50 @@ title: "Physical and Computational Chemistry" slug: "/physical_chemistry" --- -import Methods from '@site/src/components/Methods.js'; -import {LbeChip} from '@site/src/components/lbe/LbeElements.js'; +import { LbeChip } from "@site/src/components/lbe/LbeElements.js"; +import ElnFinder from "@site/src/components/eln/ElnFinder.js"; - + + +{/* prettier-ignore */} +{/* :::info Summary: +Physical chemistry is an interdisciplinary science at the frontier between chemistry and physics, whose topics go beyond the classical areas of the respective individual sciences. While preparative chemistry focuses on questions of the methodology of chemical synthesis of known and new substances, physical chemistry attempts to describe the properties of substances and their transformation by applying concepts of physics to objects of chemistry by means of theoretical and experimental methods. Along with organic and inorganic chemistry, physical chemistry therefore represents one of the three key disciplines of "classical" chemistry, since it provides the theoretical basis for technical chemistry and process engineering. Its knowledge is also an integral part of many other disciplines and is used, for example, for description and understanding in biology and medicine, meteorology as well as the earth sciences. Due to this great interdisciplinarity and the use of numerous physicochemical methods in almost all areas of chemistry, a complete description of physical chemistry as profile is hardly possible, which is why this article explicitly makes no claim to do so. +::: \*/} -:::info Summary: +## Introduction -Physical chemistry is an interdisciplinary science at the frontier between chemistry and physics, whose topics go beyond the classical areas of the respective individual sciences. While preparative chemistry focuses on questions of the methodology of chemical synthesis of known and new substances, physical chemistry attempts to describe the properties of substances and their transformation by applying concepts of physics to objects of chemistry by means of theoretical and experimental methods. Along with organic and inorganic chemistry, physical chemistry therefore represents one of the three key disciplines of "classical" chemistry, since it provides the theoretical basis for technical chemistry and process engineering. Its knowledge is also an integral part of many other disciplines and is used, for example, for description and understanding in biology and medicine, meteorology as well as the earth sciences. Due to this great interdisciplinarity and the use of numerous physicochemical methods in almost all areas of chemistry, a complete description of physical chemistry as profile is hardly possible, which is why this article explicitly makes no claim to do so. -::: - -# Methods Profiles - -## EPR spectroscopy - -### What is it? -- **E**lectron **P**aramagnetic **R**esonance spectroscopy belongs together with NMR (**N**uclear **M**agnetic **R**esonance) spectroscopy to the group of magnetic resonance methods -- measures the resonant microwave absorption of a paramagnetic sample in an external magnetic field (i.e measurement needs unpaired electrons) - -### For what? -- provides information about the electronic/atomic structure and the chemical environment (e.g. local environmental polarity) of the sample -- for the characterisation of molecular dynamics on the time scale of approx. 10 ps-1 μs (allows e.g. conclusions to be drawn about local nanoviscosity) -- for distance measurements in the range of about 1-8 nm - -### What kind of data is generated? -- almost exclusively [proprietary file formats](/docs/format_standards) (e.g. .spe or .DTA/.DSC) of the "Bruker Corporation" company -- transfer into [open file formats](/docs/format_standards) (e.g. .txt or .csv) either via Bruker software on the measuring device itself or via tools like [SpinToolbox](https://www.spintoolbox.com/en/) -- analysis of data using Bruker software or e.g. [EasySpin](https://www.easyspin.org/) as open-source toolbox for MATLAB - -### How to do it [FAIR](/docs/fair)? -- [documentation of all research data](/docs/data_documentation) and [metadata](/docs/metadata) is carried out digitally using an suitable [ELN](/docs/eln) (possibly in addition to a manual laboratory notebook in paper form) -- experimental conditions (e.g. sample concentration, solvent etc.) and measurement parameters (e.g. frequency, temperature) are noted in the [ELN](/docs/eln) -- observations, deviations from planned measurement protocol or other peculiarities during measurement with no digital output (i.e. no data files) are added manually to the [ELN](/docs/eln) entry of the experiment -- obtained unprocessed raw files from measurements are uploaded to [ELN](/docs/eln) in open file formats and attached directly to the respective [ELN](/docs/eln) experiment entry, including metadata with data on the instrument (e.g. manufacturer, type, etc.), measurement conditions & parameters -- [metadata](/docs/metadata) related to the obtained data, such as temperature or solvent of measurement, follow common [metadata standards](/docs/metadata) -- research data are processed, analysed and compared with open non-proprietary software tools -- simultaneously with [publication](/docs/data_publishing) as a research article in a scientific journal, the underlying research data is published in an open data [repository](/docs/repositories) and linked to the article (incl. semantically richly annotated raw and processed data in open data formats for reuse) -- an unique [persistent identifier](/docs/pid) (e.g. DOI) is generated for each dataset as well as for the journal publication - -## Quantum Mechanical (QM) calculations - -### What is it? -- **Q**uantum **M**echanical calculations are one of the major computational tools to elucidate molecular properties on a first-principles basis -- solving the Schrödinger equation provides the electronic energy of a molecule/molecular system, from which properties can be derived as higher-order derivatives. Descriptors can also be computed from orbital/density data which is equally available - -### For what? -- calculated molecular properties include e.g. molecular structures (usually local minima and transition states), energies, spectroscopic parameters/properties, dipole moments, polarizabilities and non-observables such as atomic charges and topological analysis -- properties can be calculated prior to conducting experimental measurements to guide synthesis (computational screening) or a posteriori to help interpret experimental results atomistically -- the application range depends on the level of theory used. Correlated wave function methods are commonly applied to systems with less than 100 atoms, density functional theory (DFT) up to 500 atoms, semiempirical methods can be routinely applied in the range of thousands - -### What kind of data is generated? -- data formats depend strongly on the program that is used for the QM calculations, e.g. Gaussian, ORCA, Molpro, TURBOMOLE or Jaguar, but generally formatted text files are used as input and log files. Compressed data formats are used to store wavefunction, density information and operators. Molecular structures are provided in human-readable format -- data analysis is carried out using custom scripts. A few programs provide their own scripts for common tasks (such as plotting of molecular orbitals) and dedicated GUIs - -### How to do it [FAIR](/docs/fair)? -- [documentation of all research data](/docs/data_documentation) and [metadata](/docs/metadata) is carried out digitally using a suitable repository (e.g. NOMAD, ioChem-BD or a general-purpose repository) to store the input files, main log and structures files (if not included in the log) -- reproducibility of calculations to within numerical accuracy can be ensured by storing the input files and adding the program and its version (ideally even the compiler version and any compiler flags) as metadata. Numerical thresholds are well defined but reproducibility of calculations across different programs and versions is not guaranteed. This warrants the safekeeping of version specific source files for the same time period as the stored data -- data analysis scripts should be uploaded to the repository in open file formats, attached directly to the corresponding data entry and accompanied with appropriate documentation -- if possible, analysis and evaluation of calculations should be conducted with open, non-proprietary software tools -- simultaneously with [publication](/docs/data_publishing) as a research article in a scientific journal, the data in the [repository](/docs/repositories) is linked to the article (incl. semantically richly annotated raw and processed data, if possible in open data formats for reuse) -- a unique [persistent identifier](/docs/pid) (e.g. DOI) is generated for the dataset as well as for the journal publication -- XML and CML (Chemical Markup Language) is used by a few software packages but this is not common practice - -### Challenges to make data FAIR -- no standardised transfer into [open file formats](/docs/format_standards). All repositories of quantum chemical calculations to date make use of in-house parsers to extract the calculation data from uploaded logs. This trend hinders the improvement of FAIR practices since new developers are not provided with a template for log files. Any new software can only be featured in repositories after a unique parser is developed -- lack of open meta-input and output file formats that are necessary to enable full interoperability of different programs and tools used for QM calculations. Particularly concerning is the lack of standards for: z-matrix and xyz file formats, trajectory files in molecular dynamics or structure optimisations, definition of isotopes, potential energy surfaces as well as equations used in the derivation of properties including thermodynamic quantities - -## Molecular Mechanical (MM) simulations - -### What is it? -- **M**olecular **M**echanical simulations approximate intra- and intermolecular interactions using simple Newtonian mechanics and neglect quantum effects -- the system is parametrised with a suitable force field and propagated in time by solving the system’s Newtonian equations of motion. Potentials or modifications of the force field parameters can be applied to extract thermodynamic/kinetic data - -### For what? -- systems can be as large as millions of atoms, allowing for the investigation of protein dynamics and protein-ligand interactions on a microsecond timescale. More complex systems such as protein-protein interactions or proteins embedded in a biomembrane can also be simulated -- simulations of pure liquids, mixtures or interfaces between liquids and solids or gases enable the investigation of such systems -- explaining and interpreting the behaviour of macroscopic systems by investigating them at a microscopic level - -### What kind of data is generated? -- data formats depend strongly on the program that is used for the MM calculations, e.g. AMBER, CHARMM, GROMACS, LAMMPS or NAMD but in general specifically formatted text files are used as input and log files, and a binary representation for checkpoint and trajectory files -- analysis of data using tools provided by the software’s manufacturer or custom scripts - -### How to do it [FAIR](/docs/fair)? -- [documentation of all research data](/docs/data_documentation) and [metadata](/docs/metadata) is carried out digitally using a suitable repository to store the data -- reproducibility of calculations can be ensured by storing the input file and adding the program and its version (ideally including the compiler and any compiler flags) as metadata -- if possible, analysis and evaluation of calculations should be conducted with open non-proprietary software tools -- simultaneously with [publication](/docs/data_publishing) as a research article in a scientific journal, the data in the [repository](/docs/repositories) is linked to the article (incl. semantically richly annotated raw and processed data, if possible in open data formats for reuse) -- a unique [persistent identifier](/docs/pid) (e.g. DOI) is generated for each dataset as well as for the journal publication - -### Challenges to make data FAIR -- no standardised transfer into [open file formats](/docs/format_standards) for different simulation packages -- development of open meta-input and output file formats is required to handle the multitude of different programs and tools used in MM calculations in accordance with the FAIR principles. Tools such as PLUMED can help users with this problem -- trajectory files are typically too large to store in commonly used repository environments, even when using compressed file formats. To make this data FAIR, standards for handling large amounts of data must be developed or solutions from other fields applied -- reproducibility of long time-scale molecular dynamics is unattainable (numerical noise will eventually affect the resulting trajectories, especially in a multicore environment). Depending on the numerical accuracy and the specific implementation, deviations can be observed as soon as in the picosecond range. However, thermodynamic averages or other probabilistic measurements should be achieved within a suitable margin of error. This margin would have to be estimated and provided by the authors of a publication - - -# Methods Data Format Overview - - - -:::note *This table will be continuously updated with new recommendations on interoperable open file formats. -::: +Physical Chemistry encompasses a variety of sub-disciplines, covering vast methodology, which, in turn, produce heterogenous data. +From spectroscopic measurement data, imaging, to simulation input files and in-house data analysis code, many physical chemists are experienced in handling digital data. They are often well-versed in developing software solutions to support their work. While some work with large data volumes on a regular basis, other methodologies result in small, text-based files. This discipline includes many data-literate members, the expertise of which may be harnessed to implement tools and solutions to manage their research group's data in a unified and streamlined manner. + +## Data Types + +As mentioned, the data produced in physical chemistry and its diverse sub-disciples are varied. One research group may work intensively with imaging data such as superresolution microscopy, while the other may work on method development, and again others may analyze spectroscopic data or conduct numeric simulations—or even any combination of these. + +## ELNs and Other Tools + +For effective data management, software tools should be selected in a uniform manner within a project or research group with the aim to [organize](docs/data_organisation) and streamline workflows. This involves establishing clear usage guidelines, including metadata templates drawn from minimum information standards for a given method, where available. These should be outlined in a [data management plan (DMP)](docs/dmp) for each project. Many universities supply tools and templates for DMPs (see the [respective article](doc/dmp) for more information). + +An [electronic lab notebooks (ELNs)](docs/eln) helps in the day-to-day planning and structured documentation of experiments, while some also assist in data workflow management. For disciplines with diverse research, ELNs must be flexible and customizable. Certain universities may have a central option, while each research group may chose what best fits their needs and resources if they are able to host or procure their own solution. The [ELN-Finder](https://eln-finder.ulb.tu-darmstadt.de/search?f.K03=Pharmacy,equals&spc.page=1) lists many options and the article on [choosing an ELN](docs/chose_eln) provides further assistance: + + + +In addition to ELNs, tools such as local repository and research dat management tools can assist in making data publication ready. + +For those writing scripts and developing research software solutions, [Git](https://git-scm.com/) is a highly recommended versioning tool. May universities also have their own instances of [GitLab](https://about.gitlab.com/) to assist in managing software projects. + +Specifically for research data, [DataLad](https://www.datalad.org/), which is built on top of git, can greatly assist in tracking the metadata while processing and analyzing data. While it works for steps carried out with GUI applications, its true power comes in handy for those using script-based analysis and processing steps. + +As many physical chemists may establish their own workflows or develop their own tools to acquire, process, and analyze their data, it is highly recommended to adhere to community-specific standards for file formats and metadata, where available. A bare mininum is to establish documentation and format standards within a research group, ensuring an efficient knowledge transfer from one generation of researchers to the next. An ELN can greatly assist in providing templates for documentation, while a DMP should be used to record the format standards. Employing automated workflows, e.g., from device to ELN and data storage systems, can greatly reduce manual steps in day-to-day work and can automatically ensure data and documentation are complete and formatted correctly. Some tools provide out-of-the-box solutions, such as device integration, while others provide options such as REST APIs to build custom methods. + +## Publishing Data + +[Publishing research data](docs/data_publication), especially that underlying a published article, is an important aspect that allows others in the research community to replicate and build upon a researchers work. [Research data repositories](docs/repositories) serve as platforms for data publication and can greatly assist in [FAIR](docs/fair) data publication. Such repositories range from subject-specific to general and institutional. For many in physical chemistry and its varied data, general repositories such as [RADAR4Chem](https://radar.products.fiz-karlsruhe.de/de/radarabout/radar4chem) presents an option for publishing data for which now (sub-)discipline-specific repository has been established. [ioChem-BD](https://www.iochem-bd.org/) serves as a computational chemistry repository and includes a conversion service for many common data types to ensure interoperability. The [Image Data Resource (IDR)](https://idr.openmicroscopy.org/) makes biological imaging data available to the community, while self-hosted Omero repositoy can assist those working with other types of imaging data. {/* */} For more information on choices, head [here](docs/choose_repository). + +For work that includes developing in-house research software solutions: **software is data and an integral part of research and should be published as such**. While only GitHub currently offers an automatic workflow for publishing software releases to [zenodo](https://zenodo.org/), there are methods to assign the [software a DOI, therefore making it citable](https://open.win.ox.ac.uk/pages/open-science/community/Open-WIN-Community/docs/gitlab/repo-doi/). + +## Challenges + +Common challenges in physical chemistry and FAIR data often go hand-in-hand with the large variety of sub-disciplines, methodology, and thus diverse data types. Many working in physical chemistry labs may have established their own personal workflows. Working within a group streamline and unify common steps and to establish reusable templates for metadata, be it in ELNs or in the local file system, can provide structured information not just for fellow researchers, but also for those working on FAIR data infrastructure, such as ELNs and research data repositories. + +Especially in imaging, large data volume can strain the local [storage](docs/data_storage) resources. Central storage solution can provide assistance and should be used in combination with best research data management practices to ensure the data's re-usability and avoid unorganized and inefficient use of large storage systems. diff --git a/docs/10_domains/30_polymer_chemistry.mdx b/docs/10_domains/30_polymer_chemistry.mdx deleted file mode 100644 index 23af2c00..00000000 --- a/docs/10_domains/30_polymer_chemistry.mdx +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: "Polymer Chemistry" -slug: "/polymer_chemistry" ---- - - -import Methods from '@site/src/components/Methods.js'; - -:::info Summary: - -Polymer chemistry is focused on the synthesis and characterization of monomer units, macromolecules and resultant -materials for applications in e.g. optics, electronics and energy, packaging and containers, coatings and adhesives, -catalysis or medicine. Polymeric materials can be subdivided by their origin into synthetic, inorganic and -biopolymers, where the latter is a significant subject of recent research as they can be derived from renewable -resources and therefore are deemed to be part of the solution for a sustainable future. The toolbox for monomer synthesis and modification, their polymerization as well as post-functionalization are often translated form organic chemistry, yet for cutting-edge materials new experimental strategies have to be developed. Many characterization methods, especially for monomers or small polymers, can also be adapted from established organic chemistry. However, since macromolecules can feature highly complex structures, e.g. brushes, dendrimers, rings and star shapes, advanced methods such as (2D) SEC, LCCC, DOSY NMR, or electron microscopy need to be applied in order to obtain an exhaustive structure determination (for abbreviations see table below). For their subsequent application as high-tech material, their mechanical and physical properties play an important role as well, where cyclic voltammetry or rheological characterization are just a few examples for the vast array of techniques that are available. A standard approach for the development of new polymeric materials can be described as follows: - -- Preparation of organic/inorganic monomer units. -- Small-scale experiments for screening/optimization of reaction conditions. -- Upscaling of reactions to obtain sufficient product for subsequent polymerization steps. -- (Co)polymerization of monomers in different ratios/chain lengths as well as optimization of reaction conditions. -- Post-polymerization functionalization. -- Synthesis of macroscopic materials through 3D printing, moulding etc. - -In order to collect comprehensive analytical data to characterize the synthesized materials the following steps can be taken: - -- Data can be collected during the experiment through online methods, drawing samples on a specific schedule, or after the experiment by analyzing the obtained product. -- Manually determined data: Experimental observations such as colour change or precipitants, appearance, yield and consistency of the product, solubility etc. -- Digital data are obtained with analytical devices. An overview on file extensions, file sizes and converters for several analytical methods is given in the table below. -- When necessary, pre-processing of digital data with software of analytical device before data are transferred to the Chemotion-ELN. -- Raw data files in proprietary file formats should be saved alongside interoperable open file formats by using converters or the software of the analytical device. If no specific open format is currently available, exporting as .txt or .csv is recommended. Please be aware that metadata included in the header of .txt or .csv files may not follow a defined (open) format and metadata should be additionally also added into the ELN. -::: - - -## Documentation and publication of research data - -The Chemotion-ELN was initially developed for organic chemistry and is now adapted for other disciplines in chemistry as well. The adaption for polymer chemistry and the integration of analytical methods are now ongoing, therefore the individual methods in the table below are marked respectively. Generally, the documentation of research data is conducted as follows: - -- Documentation of research data is carried out digitally using an ELN. -- Experimental conditions (e.g. solvents, temperature, duration, pressure) are noted in the ELN. -- Observations and results of analytical methods with no digital output (i.e. no data files) are added manually to the ELN entry of the experiment, including colour change or precipitants, appearance, yield and consistency of the product, solubility etc. -- Obtained data from analytical instruments (e.g. NMR, SEC, MS or IR data) are uploaded to the Chemotion-ELN in open file formats and directly attached to the respective ELN experiment entry including instrumental setup metadata. - -In an effort to realize research data management in line with the FAIR principles, the data can be published in a repository including raw and processed data for reuse, directly from the Chemotion-ELN. The use of the Chemotion-ELN enables a direct transfer of research data and the respective metadata into the Chemotion Repository. Subsequently, these data are automatically shared with other repositories, e.g. PubChem. For the publication of research data in other discipline-specific repositories, data have to be exported from the Chemotion-ELN and uploaded into the respective database. - - - -:::note *This table will be continuously updated with new recommendations on interoperable open file formats. -::: diff --git a/docs/10_domains/40_synthetic_chemistry.mdx b/docs/10_domains/40_synthetic_chemistry.mdx index d9a2a64f..1aee3983 100644 --- a/docs/10_domains/40_synthetic_chemistry.mdx +++ b/docs/10_domains/40_synthetic_chemistry.mdx @@ -4,65 +4,62 @@ nfdi4chem-id: dsoic slug: "/synthetic_chemistry" --- -import Methods from '@site/src/components/Methods.js'; -import {LbeChip} from '@site/src/components/lbe/LbeElements.js'; +import { LbeChip } from "@site/src/components/lbe/LbeElements.js"; +import ElnFinder from "@site/src/components/eln/ElnFinder.js"; - + + -:::info Summary: +## Introduction -The main goal of a synthetic organic or inorganic chemist is to synthesise desired compounds. Established methods are applied, or new synthetic methods are developed. A typical workflow starts with the planning of the experiments and methods. These are then conducted and research data as well as accompanying metadata are collected along the way. Once the final product is obtained, it is analysed with suitable methods to identify properties of the synthesised material or to ascertain the efficacy of the selected synthetic method. Processing and interpretation of the obtained research data will lead to a proof of concept for a given reaction, optimised conditions for future experiments or upscaling. -::: +During the synthesis of a desired compound, all steps, such as planning, realisation and documentation of an experiment, or characterisation of the obtained product, provide research data. These data are synthetic procedures, experimental conditions, as well as manually and digital data collected with analytical devices. Processing and interpretation of the obtained research data will lead to a proof of concept for a given reaction, optimised conditions for future experiments or upscaling. -## Type of experiments +## Data Types -- Synthesis: - - Preparation of organic/inorganic substances. - - Small-scale experiments for screening/optimisation of reaction conditions. - - Upscaling of reactions to obtain sufficient product for comprehensive characterisation or as starting material for subsequent synthesis steps. +In synthetic chemistry different types of research data can be obtained. In general, this data is not limited to characterisation of synthesised products. A typical experiment starts with its design and planning, followed by carrying out the procedure in a laboratory setting. During realisation of an experiment, observations, experimental conditions, and yields are documented. Ideally, this manually collected research data is recorded digitally in an [Electronic Lab Notebooks (ELN)](/docs/eln/). +The synthesis of a specific product is followed by analysing its properties. Regarding data collection, both manually determined and digital data can be obtained. Observations and results of analytical methods with no digital output (i.e., no data files) can be added manually to the ELN entry of the experiment, which include for example melting/boiling point, optical rotation, TLC Rf values, or refraction index. Digital data are obtained from analytical instruments, e.g., NMR, IR, MS. These data can be uploaded seamlessly from the analytical devices to an ELN and analysed therein. An overview on file extensions, file sizes and converters for several analytical methods is given in the table below. It is recommended to save raw data files in [proprietary file formats](/docs/format_standards/) alongside interoperable [open file formats](/docs/format_standards/) by using converters or the software of the analytical device. If no specific open format is currently available, export as .txt or .csv is recommended. -- Analysis: - - Product characterisation with feasible methods (e.g. NMR spectroscopy, mass spectrometry, IR spectroscopy, UV/vis spectroscopy, elemental analysis). - - to assess screening/optimisation conditions. - - to investigate desired product properties. - - to characterise materials’ properties comprehensively. +Overall, [metadata](/docs/metadata/) should always be included when collecting and [storing](/docs/data_storage/) data to allow understanding of the research data in the long term. -## Planning of experiments +# ELNs and Other Tools -- The implementation of newly inspired ideas is supported by previous work stored in the local instance of the electronic lab notebook (e.g. [Chemotion ELN](https://www.chemotion.net/chemotionsaurus/index.html)) of the own research group ([overview of ELNs for synthetic chemists](/docs/eln)), scientific literature or datasets published in [repositories](/docs/repositories) (discipline-specific repositories such as the [Chemotion repository](https://www.chemotion-repository.net/welcome)). -- Experimental design follows a logical order to achieve a specific goal, such as increasing yield or purity. -- Planning is concluded by adding the experimental details (e.g. quantities and scheduling procedures). All metadata is documented in an ELN (e.g. Chemotion ELN) including references. +For planning research data management and creating data management plans, tools such as [RDMO](https://rdmorganiser.github.io/) and [DMP-Online](https://dmponline.dcc.ac.uk/) are suitable. Many universities have their own instances of these solutions. -## Documentation of experiments +If you want your data to comply with the [FAIR principles](/docs/fair/) it can be very tedious and extremely time consuming if you try to apply the FAIR principles to your data retrospectively if your existing workflows involve a large degree of analogue documentation (e.g. paper lab notebooks). The reality is that you need tools to take care of certain aspects of the FAIR principles automatically so you don’t have to apply them manually each time. [Electronic Lab Notebooks (ELNs)](/docs/eln/) are very powerful tools that can help you with this. Depending on what ELN you use, the [metadata](/docs/metadata/) can be automatically assigned in both human and [machine-readable formats](/docs/format_standards/). Furthermore, some ELNs can automatically generate interoperable [open file formats](/docs/format_standards/#format-standards-in-chemistry) for your analytical data. Choosing the right ELN can be challenging and this process should be thought about and carried out carefully. You can find out more on [how to choose the right ELN here](/docs/choose_eln/): -- [Documentation of research data](/docs/data_documentation) and [metadata](/docs/metadata) is carried out digitally using an ELN. -- Experimental conditions (e.g. solvents, temperature, duration, pressure) are noted in the ELN. -- Observations and results of analytical methods with no digital output (i.e. no data files) are added manually to the ELN entry of the experiment, which may include appearance, yield, melting point, optical rotation or TLC Rf values (with metadata where applicable). -- Obtained data from analytical instruments (e.g. NMR, MS or IR data) are uploaded to the Chemotion ELN in open file formats and directly attached to the respective ELN experiment entry including instrumental setup metadata. -- Metadata related to the obtained data, such as temperature or solvent of measurement, have to be provided according to metadata standards, e.g. x. + -## Data producing methods +A tool to help you find the right ELN is the so-called [ELN finder](https://eln-finder.ulb.tu-darmstadt.de/home) which is a searchable online repository for many different ELNs. It is important to note that not one size fits all and that one ELN may be appropriate for one research group, another ELN may be more appropriate for a different research group. Within NFDI4Chem, Chemotion ELN is the reference instance (find out more here in our [knowledge base article overview of Chemotion](/docs/chemotion_eln/)). This means that our developments in automatically applying the FAIR data principles to research data are implemented in Chemotion first. -- Data can be collected during the experiment or after the experiment by analysing the obtained product. -- Manually determined data: Experimental observations, appearance, yield, melting/boiling point, optical rotation, TLC Rf values, refraction index, etc. -- Digital data are obtained with analytical devices. An overview on file extensions, file sizes and converters for several analytical methods is given in the table below. -- Raw data files in [proprietary file formats](/docs/format_standards) should be saved alongside interoperable [open file formats](/docs/format_standards) by using converters or the software of the analytical device. If no specific open format is currently available, export as .txt or .csv is recommended. Please be aware that metadata included in the header of .txt or .csv files may not follow a defined (open) format and metadata should be additionally also added into the ELN. +Chemotion is especially suitable for synthetic chemistry as it originally started out as an ELN for synthetic chemistry but has now been extended to a wider array of scientific disciplines through its [LabIMotion extension](https://chemotion.net/docs/labimotion). - +## Publishing Data -:::note *This table will be continuously updated with new recommendations on interoperable open file formats. -::: +[Publishing research data](/docs/data_publication/) is important in order to allow for the reuse of data by other researchers or for machine learning. Especially for machine learning, it is crucial that the data is published in a structured and standardised way. Where can you publish your data? Open access [data repositories](/docs/repositories/) are a good solution to provide your data for reuse by others. Choosing the right repository is crucial and as a general rule of thumb it is better to deposit your data in data-specific or discipline-specific repositories as these enforce more standardisation in how the data are published thus allowing for better machine-readability. -## Data analysis +In order to reach as many researchers as possible, choosing the right repository can be crucial (more on this in the [article on choosing the right repository](/docs/choose_repository/)). -- Research data are processed, analysed and compared (also to data of other experiments) within the Chemotion ELN. -- Optionally, preprocessing of digital data with software of analytical device before data are transferred to the Chemotion ELN (cf. data producing methods). -- A detailed view, evaluation and interpretation of results is carried out with the Chemotion ELN features. +Above you can see a modified version of our decision tree from our guide how to choose the right repository. Here is a table giving an overview of what data fits into what repository: -## Publishing research data +{/* prettier-ignore-start */} +| Data type | Data format | Suggested Repository | Criteria for selection | +| :---- | :---- | :---- | :---- | +| Nuclear Magnetic Resonance | Bruker XWIN-NMR format (zip), [JCAMP-DX](https://knowledgebase.nfdi4chem.de/knowledge_base/docs/JCAMP-DX/) | [**Chemotion**](https://www.chemotion-repository.net/welcome) | Passing basic checks, curation | +| Nuclear Magnetic Resonance | Bruker XWIN-NMR format, JOEL format NMReData, nmrML, ISA JSON | [**nmrXiv**](https://docs.nmrxiv.org/) | Validations / Minimum information reporting standards | +| Molecules and their properties, identification, reactions and experimental investigations | mass spectrometry: JCAMP-DX, [mzMl](https://knowledgebase.nfdi4chem.de/knowledge_base/docs/mzML/), mzXML (open, visualisable and processable), RAW for selected mass data types (processed and converted in JCAMP-DX), IR and Raman: JCAMP-DX, XRD: JCAMP-DX, UV/VIS: JCAMP-DX, Cyclic voltammetry: JCAMP-DX. \*Chemotion repo offers the option to convert data from different file formats into JCAMP-DX. | [**Chemotion**](https://www.chemotion-repository.net/welcome) | Passing basic checks, curation | +| Inorganic crystal structures | Crystallographic Information File (CIF) | [**ICSD**](https://icsd.fiz-karlsruhe.de/) | Crystal structure data available | +| Organic and metal-organic crystal structures | Crystallographic Information File (CIF) but other supporting file formats accepted | [**CSD**](https://www.ccdc.cam.ac.uk/structures/) | Cell parameters (single crystal), full coordinates (powder), in CIF format | +| Organic, inorganic and metal-organic crystal structure data | primarily Crystallographic Information File (CIF) but other supporting file formats accepted | [**joint CCDC/FIZ Access Structures ServiceD**](https://www.ccdc.cam.ac.uk/structures//) | At least one CIF file must be included in the submission and structure factor data for all structures should be provided (if possible) | +| Generic data from all disciplines of chemistry, all data that do not fit in the disciplinary repositories | format-independent | [**RADAR4Chem**](https://www.radar-service.eu/de) | Validation against metadata schema | +{/* prettier-ignore-end */} -- In addition to a research article in a scientific journal, the underlying research data are published in a repository and linked to the article to realise research data management according to the [FAIR data principles](/docs/fair) ([Best practice examples](/docs/best_practice)). -- Data publications in repositories include raw and processed data for reuse. -- The use of the Chemotion ELN enables a direct transfer of research data and the respective metadata into the Chemotion Repository. Subsequently, these data are automatically shared with other repositories, e.g. [PubChem](https://pubchem.ncbi.nlm.nih.gov/). For the publication of research data in other discipline-specific repositories, such as the [CCDC](https://www.ccdc.cam.ac.uk/) for crystallographic data, data have to be exported from the Chemotion ELN and uploaded into the respective database. -- A [persistent identifier](/docs/pid) (e.g. DOI) is generated for a dataset by a repository (via [DataCite](https://datacite.org/) for the Chemotion Repository), which is given in the journal article or corresponding supporting information to link the data publication with the manuscript. +Your own institution may also have additional guidelines & resources for publishing data, therefore it is always worth consulting the research data management experts of your local institution. + +# Challenges + +While for some data types & workflows it may be obvious how to comply with the FAIR principles, for others it is not as no community standards have been set and/or no appropriate open data formats are available. This is especially true for more niche analytical methods. Remember though: FAIR is a spectrum not an absolute. Therefore even if one of your workflows may not be as FAIR as the other, if it is as FAIR as is currently possible then it is still worth doing. + +Many old devices do not put out open-data formats and some devices have no digital output at all which makes good RDM more challenging though not impossible given the right tools (e.g. Chemotion’s [ChemConverter](https://chemotion.net/docs/services/chemconverter) which automatically generates open file formats from analytical devices which are not capable of outputting them). + +One of the biggest challenges to RDM in Chemistry at the moment is the lack of inter-ELN interoperability. This means that it is very challenging if not impossible to transfer data between different ELNs. This makes it especially challenging for interdisciplinary collaborations where collaborating groups use different ELNs. There are, however, efforts underway to establish inter-ELN interoperability such as the [ELN consortium](https://github.com/TheELNConsortium/) of which Chemotion is a member. diff --git a/docs/10_domains/50_pharmaceutical_chemistry.mdx b/docs/10_domains/50_pharmaceutical_chemistry.mdx index b10ab140..ddf85136 100644 --- a/docs/10_domains/50_pharmaceutical_chemistry.mdx +++ b/docs/10_domains/50_pharmaceutical_chemistry.mdx @@ -3,53 +3,39 @@ title: "Medicinal / Pharmaceutical Chemistry" slug: "/pharmaceutical_chemistry" --- -import Methods from '@site/src/components/Methods.js'; -import {LbeChip} from '@site/src/components/lbe/LbeElements.js'; +import Methods from "@site/src/components/Methods.js"; +import { LbeChip } from "@site/src/components/lbe/LbeElements.js"; +import ElnFinderPharm from "@site/src/components/eln/ElnFinderPharm.js"; - + + +## Introduction -:::info Summary: +Medicinal chemistry is a chemistry-based, strongly interdisciplinary discipline. It deals with the discovery, development, identification, and synthesis of biologically active compounds, the interpretation of their mechanism of action at the molecular level, and the metabolism of the active compounds. The interactions of active compounds with their biological target(s) are analysed in silico (chemoinformatics), in vitro, and in vivo to determine selectivity and potential in terms of their therapeutic efficacy and safety. Medicinal chemists prepare and/or select suitable compounds for biological evaluation that, if found to be active, could serve as lead compounds. Chemical modifications to optimise these leads ideally result in promising candidates for preclinical studies in drug development, potentially followed by clinical studies. -Medicinal chemistry is a chemistry-based, strongly interdisciplinary discipline. It deals with the discovery, development, identification, and synthesis of biologically active compounds, the interpretation of their mechanism of action at the molecular level, and the metabolism of the active compounds. -The interactions of active compounds with their biological target(s) are analysed in silico (chemoinformatics), in vitro, and in vivo to determine selectivity and potential in terms of their therapeutic efficacy and safety. -Medicinal chemists prepare and/or select suitable compounds for biological evaluation that, if found to be active, could serve as lead compounds. Chemical modifications to optimise these leads ideally result in promising candidates for preclinical studies in drug development, potentially followed by clinical studies. +## Data Types -::: +The most common types of data in pharmaceutical/medicinal chemistry can be divided into three categories: synthesis data, analytical data, and (bio)assay data. While the first two types of data (synthesis and analytical data) play a role in broad areas of chemistry, (bio)assay data, which is used to investigate the biological activity of a substance, is a particular feature of pharmaceutical/medicinal chemistry. -## Type of experiments +## ELNs and Other Tools -- Synthesis: - - Preparation of compounds including combinatorial approaches for biological evaluation in suitable test systems -- Analysis: - - Characterisation of intermediate and final products - - Analysis of (quantitative) structure-activity relationships, molecular modelling studies - - Biological evaluation, activity assays +To meet the requirements for FAIR data management, electronic lab notebooks (ELNs) are particularly useful as they can take over many documentation tasks for researchers. The [ELN-Finder](https://eln-finder.ulb.tu-darmstadt.de/search?f.K03=Pharmacy,equals&spc.page=1) offers a selection of pharmacy-specific ELNs. How to choose the right ELN for the research group can be read [here](https://knowledgebase.nfdi4chem.de/knowledge_base/docs/choose_eln/). -## Planning of experiments + -Test systems are established, either target-oriented or phenotypic assays, to identify hit compounds. Alternatively, (potential) hit compounds can be selected from available data (e.g. publications) or by virtual screening. -Starting from promising hit compounds, optimisation cycles are performed to improve the properties of the starting compounds. Suitable assay systems are required to obtain the necessary data to assess the compounds’ properties. These include potency efficacy, selectivity, potential bias, water solubility, lipophilicity, pharmacokinetic properties, e.g. bioavailability and metabolic stability, interaction with liver cytochrome P450 enzymes, hERG channel interaction, cell toxicity, potential genotoxicity, etc. The required properties depend e.g. on the indication and the application route of the drug. +For the planning of research data management and the creation of data management plans, tools such as RDMO and DMP-Online are suitable. Many universities have their own instances of one of these solutions. -## Documentation of experiments +## Publishing Data -[Documentation](/docs/data_documentation) of research data and [metadata](/docs/metadata) is carried out, preferably digitally, using a suitable [ELN](/docs/eln) and/or a manual laboratory notebook in paper form. +Publishing research data is important to allow other researchers to reuse it. To reach as many researchers as possible, choosing the right repository can be crucial (check the [Choose a Repository](https://knowledgebase.nfdi4chem.de/knowledge_base/docs/choose_repository/) article for more information). -Using an ELN: +For synthesis data and analytical data, specific repositories such as [Chemotion Repository](https://www.chemotion-repository.net/welcome) (synthesis data), [MassBankEU](https://massbank.eu/MassBank/) (mass spectra), or [nmrXiv](https://nmrxiv.org/) (for NMR data) are recommended. For the third category of data ((bio)assay data), no optimal repository is currently known, so the generic repository [RADAR4Chem](https://radar.products.fiz-karlsruhe.de/de/radarabout/radar4chem) is recommended. -- Experimental conditions and measurement parameters are noted in the ELN. -- Observations, deviations from planned measurement protocols, or other peculiarities during measurement with no digital output (i.e. no data files) are added manually to the ELN experiment entry. -- Obtained data from analytical instruments (e.g. NMR, MS, or IR data) are uploaded to the ELN in open file formats and directly attached to the respective experiment entry, including instrumental setup metadata. -- Metadata related to the obtained data follow common metadata standards. +## Challenges -## Data producing methods +Compared to many other areas of chemistry, pharmaceutical/medicinal chemistry often involves highly application-oriented research that can lead to patents and product developments. Therefore, the electronic lab notebooks used must meet higher standards in the area of audit trails. This must be considered from the outset when selecting the tools to be used. Additionally, this can lead to special requirements for publishing research data. -- Data can be collected during or after the experiment by analysing the obtained product. -- Manually determined data: Experimental observations, appearance, yield, melting/boiling point, optical rotation, TLC Rf values, refraction index, etc. -- Digital data are obtained with analytical devices. An overview on file extensions, file sizes and converters for several analytical methods is given in the table below. -- Raw data files in proprietary file formats should be saved alongside interoperable [open file formats](/docs/format_standards) by using converters or the analytical device software. If no specific open format is currently available, an export as .txt or .csv is recommended. Please be aware that metadata included in the header of .txt or .csv files may not follow a defined (open) format and metadata should be additionally also added into the ELN. +The diversity of data formats and analytical methods used also poses a challenge for good research data management (RDM). This is particularly true when laboratory/analytical instruments are used that can no longer be integrated into a network or can only be integrated via detours due to their software. - - -:::note *This table will be continuously updated with new recommendations on interoperable open file formats. -::: \ No newline at end of file +NFDI4Chem is happy to assist with these challenges, and you can contact us anytime through the Helpdesk. diff --git a/docusaurus.config.js b/docusaurus.config.js index d07b8829..7ef14085 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -11,14 +11,14 @@ const title = "NFDI4Chem Knowledge Base"; const description = "Supporting scientists to digitalise all steps of chemical research: to collect, store, process, analyse, publish, and reuse research data"; const url = "https://knowledgebase.nfdi4chem.de/"; -const baseUrl = "/knowledge_base/"; +// const baseUrl = "/knowledge_base/"; const navbar = require("./navbar.json"); const footerLinks = require("./footer.json"); // Use for stagging: // const baseUrl = '/staging/knowledge_base_matomo/'; -// const baseUrl = '/staging/knowledge_base/'; +const baseUrl = "/staging_eln/knowledge_base/"; const config = { title: title, diff --git a/package.json b/package.json index c1e04293..b07b125d 100644 --- a/package.json +++ b/package.json @@ -24,10 +24,12 @@ "clsx": "^2.1.1", "docusaurus-plugin-matomo": "^0.0.8", "i": "^0.3.7", + "moment": "^2.30.1", "prism-react-renderer": "^2.4.0", "react": "^18.3.1", "react-dom": "^18.3.1", "react-tagcloud": "^2.3.3", + "use-immer": "^0.10.0", "video-privacy": "^1.1.0" }, "devDependencies": { diff --git a/scripts/getElns.sh b/scripts/getElns.sh new file mode 100755 index 00000000..aeff5da0 --- /dev/null +++ b/scripts/getElns.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +targetDir="/var/www/html/staging_eln/knowledge_base/assets" +# targetDir="/var/www/html/knowledge_base/assets" +# targetDir="/git/n4c-kb_jl/static/assets" + +# get the data from the eln-finder + +curl -o eln-finder.json "https://eln-finder.ulb.tu-darmstadt.de/server/api/discover/search/objects?sort=dc.title,ASC&f.K03=Chemistry,equals" + +curl -o eln-finder-pharm.json "https://eln-finder.ulb.tu-darmstadt.de/server/api/discover/search/objects?sort=dc.title,ASC&f.K03=Pharmacy,equals" + +# add current date and time to the json + +jq ". + {date: \"$(date -u)\" }" eln-finder.json > $targetDir/elnData.json +jq ". + {date: \"$(date -u)\" }" eln-finder-pharm.json > $targetDir/elnDataPharm.json diff --git a/scripts/slugginator.sh b/scripts/slugginator.sh old mode 100644 new mode 100755 diff --git a/src/components/eln/Eln.module.css b/src/components/eln/Eln.module.css new file mode 100644 index 00000000..24a2f225 --- /dev/null +++ b/src/components/eln/Eln.module.css @@ -0,0 +1,126 @@ +/********** ELN-Finder ************/ + +.eln { + display: flex; + flex-direction: row; + flex-wrap: wrap; + justify-content: space-between; +} + +/* Search / Filter section */ + +.eln__searchfilter { + display: flex; + flex-direction: row; + width: 100%; + padding: 0.75rem; + border: 1px dashed var(--ifm-color-primary); + margin-bottom: 0.5rem; +} + +.eln__searchfilter__text { + translate: 0 20%; + min-width: 240px; +} + +.eln__searchfilter__buttons { + flex-grow: 1; +} + +.eln__searchfilter__search { + display: flex; + flex-direction: column; + padding: 0.5rem 0; +} + +.eln__searchfilter__search > span { + text-align: center; +} + +.eln__searchfilter__search > em { + color: var(--ifm-color-primary); + text-align: center; +} + +.eln__searchfilter__search__button { + position: absolute; + right: 1.5rem; + top: 50%; + transform: translate(0, -50%); + padding: 0; + background: none; + border: none; + line-height: 1rem; +} + +.eln__card { + width: 49.5%; + padding: 0.75rem; + border: 1px dashed var(--ifm-color-primary); + margin-bottom: 0.5rem; + transition: all var(--n4c-transform-time) ease-in-out; +} + +.eln__card__header { + display: flex; + justify-content: space-between; + align-items: center; +} + +.eln__card__desc { + padding: 0.5rem; +} + +@media screen and (max-width: 1400px) { + .eln { + display: flex; + flex-direction: column; + } + + .eln__searchfilter { + display: flex; + flex-direction: column; + align-items: center; + width: 100%; + padding: 0.75rem; + border: 1px dashed var(--ifm-color-primary); + margin-bottom: 0.75rem; + } + + .eln__searchfilter__text, + .eln__searchfilter__buttons { + width: 100%; + translate: 0; + } + + .eln__searchfilter__search { + display: flex; + flex-direction: row; + align-items: center; + } + + .eln__card { + width: 100%; + padding: 0.75rem; + border: 1px dashed var(--ifm-color-primary); + margin-bottom: 0.75rem; + transition: all var(--n4c-transform-time) ease-in-out; + } +} + +.eln__card__link { + display: flex; + padding: 0.5rem; +} + +.eln__licenseChip--opensource { + background-color: var(--ifm-color-primary); + color: white; + font-weight: bold; +} + +.eln__filterbutton--secondary { + background-color: var(--ifm-breadcrumb-item-background-active); + color: var(--ifm-color-primary); + font-weight: unset; +} diff --git a/src/components/eln/ElnCard.js b/src/components/eln/ElnCard.js new file mode 100644 index 00000000..7da873ad --- /dev/null +++ b/src/components/eln/ElnCard.js @@ -0,0 +1,45 @@ +import RepoButton from "@site/src/components/repos/RepoButton"; + +import FilterButton from "./elnFilter/FilterButton.js"; +import ShortenDesc from "../commons/ShortenDesc.js"; + +import styles from "./Eln.module.css"; + +function ElnCard({ eln, filter, setFilter }) { + return ( +
+
+

{eln.name}

+ +
+ +
+ +
+ {eln.subDisc && eln.subDisc.length > 0 && ( +
+ {eln.subDisc.map((subdisc, idx) => { + let isActive = filter.subDisc === subdisc; + return ( + + ); + })} +
+ )} +
+ ); +} + +export default ElnCard; diff --git a/src/components/eln/ElnFinder.js b/src/components/eln/ElnFinder.js new file mode 100644 index 00000000..ae8d8d17 --- /dev/null +++ b/src/components/eln/ElnFinder.js @@ -0,0 +1,168 @@ +import React, { useState, useEffect } from "react"; +import moment from "moment"; +import { useImmer } from "use-immer"; + +import ElnStatus from "./ElnStatus"; +import ElnFilter from "./elnFilter/ElnFilter"; +import ElnStack from "./ElnStack"; + +import styles from "./Eln.module.css"; + +// const elnData = require("@site/static/assets/eln_test.json"); + +function ElnFinder(props) { + // State for ELN data + + const [elnData, setElnData] = useState(null); + const [error, setError] = useState(null); + + // State for filtering + + const [filter, setFilter] = useImmer( + props.subDisc ? { subDisc: props.subDisc } : {} + ); + + // Fetch ELN data + + useEffect(() => { + fetch("../../assets/elnData.json") + .then((response) => response.json()) + .then((data) => { + setElnData(data); + console.log(data); + }) + .catch((error) => { + setError(error); + console.error(error); + }); + }, []); + + // Catch if fetch is still loading + + if (!elnData) { + return Loading...; + } + + // Define working variables + + let elnTable = []; + let allSubDisc = []; + let allLicenses = []; + + // Parse timestamp of ELN data + + const dateDownloaded = moment(elnData.date); + const relativeDate = moment(dateDownloaded).fromNow(); + + // Assemble essential ELN data + + try { + const chemElns = elnData["_embedded"].searchResult["_embedded"].objects; + + chemElns.map((eln) => { + let subDisc = []; + eln["_embedded"].indexableObject.metadata["dc.subject"].map( + (discipline) => + discipline.value.startsWith("Chemistry:") + ? subDisc.push(discipline.value.split(":")[1]) + : null + ); + + elnTable.push({ + name: eln["_embedded"].indexableObject.name, + url: eln["_embedded"].indexableObject.metadata[ + "dc.identifier.uri" + ][0].value, + license: + eln["_embedded"].indexableObject.metadata[ + "K.lizenzmodell" + ][0].value, + desc: eln["_embedded"].indexableObject.metadata[ + "dc.description.abstract" + ][0].value, + subDisc: subDisc, + }); + allSubDisc.push(subDisc); + allLicenses.push( + eln["_embedded"].indexableObject.metadata["K.lizenzmodell"][0] + .value + ); + }); + + allSubDisc = [...new Set(allSubDisc.flat())]; + allLicenses = [...new Set(allLicenses)]; + } catch (error) { + console.error(error); + return Failed to process ELN data.; + } + + // Filter ELN data based on filter state + + const filteredTable = elnTable.filter((eln) => { + if (Object.keys(filter).length === 0) { + return true; + } + + if (filter.subDisc && !eln.subDisc.includes(filter.subDisc)) { + return false; + } + + if (filter.license && eln.license !== filter.license) { + return false; + } + + if ( + filter.text && + !JSON.stringify(eln) + .toLowerCase() + .includes(filter.text.toLowerCase()) + ) { + return false; + } + + return true; + }); + + // Determine number of results and generate output + + const numberOfResults = filteredTable.length; + + let resultOutput = null; + + switch (numberOfResults) { + case elnTable.length: + resultOutput = null; + break; + case 0: + resultOutput = "No results found."; + break; + case 1: + resultOutput = "1 result found."; + break; + default: + resultOutput = numberOfResults + " results found."; + break; + } + + // Render ELN Finder component + + return ( + + +
+ + +
+
+ ); +} + +export default ElnFinder; diff --git a/src/components/eln/ElnFinderPharm.js b/src/components/eln/ElnFinderPharm.js new file mode 100644 index 00000000..9f828d11 --- /dev/null +++ b/src/components/eln/ElnFinderPharm.js @@ -0,0 +1,151 @@ +import React, { useState, useEffect } from "react"; +import moment from "moment"; +import { useImmer } from "use-immer"; + +import ElnStatus from "./ElnStatus"; +import ElnFilter from "./elnFilter/ElnFilter"; +import ElnStack from "./ElnStack"; + +import styles from "./Eln.module.css"; + +// const elnData = require("@site/static/assets/eln_test.json"); + +function ElnFinderPharm(props) { + // State for ELN data + + const [elnData, setElnData] = useState(null); + const [error, setError] = useState(null); + + // State for filtering + + const [filter, setFilter] = useImmer( + props.subDisc ? { subDisc: props.subDisc } : {} + ); + + // Fetch ELN data + + useEffect(() => { + fetch("../../assets/elnDataPharm.json") + .then((response) => response.json()) + .then((data) => { + setElnData(data); + console.log(data); + }) + .catch((error) => { + setError(error); + console.error(error); + }); + }, []); + + // Catch if fetch is still loading + + if (!elnData) { + return Loading...; + } + + // Define working variables + + let elnTable = []; + let allLicenses = []; + + // Parse timestamp of ELN data + + const dateDownloaded = moment(elnData.date); + const relativeDate = moment(dateDownloaded).fromNow(); + + // Assemble essential ELN data + + try { + const chemElns = elnData["_embedded"].searchResult["_embedded"].objects; + + chemElns.map((eln) => { + elnTable.push({ + name: eln["_embedded"].indexableObject.name, + url: eln["_embedded"].indexableObject.metadata[ + "dc.identifier.uri" + ][0].value, + license: + eln["_embedded"].indexableObject.metadata[ + "K.lizenzmodell" + ][0].value, + desc: eln["_embedded"].indexableObject.metadata[ + "dc.description.abstract" + ][0].value, + }); + allLicenses.push( + eln["_embedded"].indexableObject.metadata["K.lizenzmodell"][0] + .value + ); + }); + + allLicenses = [...new Set(allLicenses)]; + } catch (error) { + console.error(error); + return Failed to process ELN data.; + } + + // Filter ELN data based on filter state + + const filteredTable = elnTable.filter((eln) => { + if (Object.keys(filter).length === 0) { + return true; + } + + if (filter.license && eln.license !== filter.license) { + return false; + } + + if ( + filter.text && + !JSON.stringify(eln) + .toLowerCase() + .includes(filter.text.toLowerCase()) + ) { + return false; + } + + return true; + }); + + // Determine number of results and generate output + + const numberOfResults = filteredTable.length; + + let resultOutput = null; + + switch (numberOfResults) { + case elnTable.length: + resultOutput = null; + break; + case 0: + resultOutput = "No results found."; + break; + case 1: + resultOutput = "1 result found."; + break; + default: + resultOutput = numberOfResults + " results found."; + break; + } + + // Render ELN Finder component + + return ( + + +
+ + +
+
+ ); +} + +export default ElnFinderPharm; diff --git a/src/components/eln/ElnStack.js b/src/components/eln/ElnStack.js new file mode 100644 index 00000000..42b59029 --- /dev/null +++ b/src/components/eln/ElnStack.js @@ -0,0 +1,15 @@ +import React from "react"; + +import ElnCard from "./ElnCard"; + +function ElnStack({ filteredTable, filter, setFilter }) { + return ( + + {filteredTable.map((eln, idx) => ( + + ))} + + ); +} + +export default ElnStack; diff --git a/src/components/eln/ElnStatus.js b/src/components/eln/ElnStatus.js new file mode 100644 index 00000000..108f58e2 --- /dev/null +++ b/src/components/eln/ElnStatus.js @@ -0,0 +1,21 @@ +import Link from "@docusaurus/Link"; + +function ElnStatus({ relativeDate }) { + return ( +

+ + Data kindly provided by{" "} + + ELN Finder + {" "} + ( + {relativeDate !== "Invalid date" + ? "last updated " + relativeDate + : "last update unknown"} + ). + +

+ ); +} + +export default ElnStatus; diff --git a/src/components/eln/elnFilter/ElnFilter.js b/src/components/eln/elnFilter/ElnFilter.js new file mode 100644 index 00000000..20f47689 --- /dev/null +++ b/src/components/eln/elnFilter/ElnFilter.js @@ -0,0 +1,98 @@ +import React from "react"; + +import TextSearch from "./TextSearch"; +import FilterButton from "./FilterButton"; + +import styles from "../Eln.module.css"; + +// Assemble buttons for filtering section + +function ButtonFilters({ allSubDisc, allLicenses, filter, setFilter }) { + let subDiscButtons = []; + + if (allSubDisc) { + subDiscButtons = ["All", ...allSubDisc]; + } + let licenseButtons = ["All", ...allLicenses]; + + // Check if active prop should be handed to FilterButton + + function isActive(type, label) { + // check if filter value is equal to label + + if (filter[type] === label) { + return true; + } + + // check if object is empty and label is "All" + + if ( + (label === "All" && Object.keys(filter).length === 0) || + (label === "All" && !filter[type]) + ) { + return true; + } else { + return false; + } + } + + return ( + + {subDiscButtons.length > 0 && ( +
+
Filter by subdisciplines
+

+ {subDiscButtons.map((subDisc, idx) => ( + + ))} +

+
+ )} +
+
Filter by license
+

+ {licenseButtons.map((license, idx) => ( + + ))} +

+
+
+ ); +} + +function ElnFilter({ + allSubDisc, + allLicenses, + filter, + setFilter, + resultOutput, +}) { + return ( +
+
+ +
+
+ +
+
+ ); +} + +export default ElnFilter; diff --git a/src/components/eln/elnFilter/FilterButton.js b/src/components/eln/elnFilter/FilterButton.js new file mode 100644 index 00000000..ee278ba4 --- /dev/null +++ b/src/components/eln/elnFilter/FilterButton.js @@ -0,0 +1,32 @@ +import clsx from "clsx"; + +import styles from "../Eln.module.css"; + +function FilterButton(props) { + const handleClick = () => { + if (props.label === "All") { + props.setFilter((draft) => { + delete draft[props.type]; + }); + } else { + props.setFilter((draft) => { + draft[props.type] = props.label; + }); + } + }; + + // Conditional styling for button + + let buttonClass = clsx("lbe__filterbutton", { + [styles["eln__filterbutton--secondary"]]: props.secondary, + "lbe__filterbutton--active": props.active, + }); + + return ( + + ); +} + +export default FilterButton; diff --git a/src/components/eln/elnFilter/TextSearch.js b/src/components/eln/elnFilter/TextSearch.js new file mode 100644 index 00000000..1559ea0d --- /dev/null +++ b/src/components/eln/elnFilter/TextSearch.js @@ -0,0 +1,42 @@ +import React, { useState } from "react"; + +import clsx from "clsx"; + +import styles from "../Eln.module.css"; + +function TextSearch({ resultOutput, filter, setFilter }) { + const handleChange = (e) => + setFilter((draft) => { + draft.text = e.target.value; + }); + + return ( +
+ + + {filter.text && ( + + )} +   + + {resultOutput} +
+ ); +} + +export default TextSearch; diff --git a/src/css/custom.css b/src/css/custom.css index 33dc6757..85a16d15 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -585,7 +585,8 @@ table thead tr { margin: unset; } -.lbe__searchfilter__section h4 { +.lbe__searchfilter__section h4, +h5 { margin: 0.5rem 0; } @@ -651,7 +652,7 @@ table thead tr { color: var(--ifm-color-primary); } -.lbe__filterbutton.lbe__chip { +.lbe__chip { background: var(--ifm-breadcrumb-item-background-active); color: var(--ifm-color-primary); font-size: calc(0.8rem * var(--ifm-breadcrumb-size-multiplier));