From 85df2762aba35770ecf0c26edaef3672bf6f1e54 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Thu, 14 Apr 2016 13:18:18 -0400 Subject: [PATCH 01/13] fixed #29 removed beacon-of-beacons and beaconizer submodules --- .gitmodules | 9 --------- beacon-of-beacons | 1 - beaconizer | 1 - 3 files changed, 11 deletions(-) delete mode 100644 .gitmodules delete mode 160000 beacon-of-beacons delete mode 160000 beaconizer diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 3c496c87..00000000 --- a/.gitmodules +++ /dev/null @@ -1,9 +0,0 @@ -[submodule "beaconizer"] - path = beaconizer - url = https://github.com/mcupak/beaconizer.git -[submodule "beacon-of-beacons"] - path = beacon-of-beacons - url = https://github.com/mcupak/beacon-of-beacons.git -[submodule] - beaconizer = master - beacon-of-beacons = master diff --git a/beacon-of-beacons b/beacon-of-beacons deleted file mode 160000 index 46f5adf6..00000000 --- a/beacon-of-beacons +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 46f5adf623ac91f063fde91c094f56f3355719b4 diff --git a/beaconizer b/beaconizer deleted file mode 160000 index 2869cde7..00000000 --- a/beaconizer +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2869cde70107b99f0c55ec52498eef45443a63c6 From 09c24e441b95889dea7691a33a50ab01f5076a8b Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Thu, 14 Apr 2016 14:02:18 -0400 Subject: [PATCH 02/13] base of 0.3 schema --- schema/beacon.avdl | 304 ++++++++++++++++++++++++-------------- schema/beaconmethods.avdl | 27 ++++ schema/consentcode.avdl | 88 +++++++++++ 3 files changed, 308 insertions(+), 111 deletions(-) create mode 100644 schema/beaconmethods.avdl create mode 100644 schema/consentcode.avdl diff --git a/schema/beacon.avdl b/schema/beacon.avdl index c58de834..55e56f11 100644 --- a/schema/beacon.avdl +++ b/schema/beacon.avdl @@ -4,167 +4,249 @@ A Beacon is a web service for genetic data sharing that can be queried for information about specific alleles. */ -protocol BEACON { +protocol Beacons { -/** -A request for information about a specific site -*/ -record QueryResource { - /** - The reference bases for this variant, starting from `position`, in the genome - described by the field `reference`. (see variants.avdl) - */ +import idl "consentcode.avdl"; + +/** Query for information about a specific allele. */ +record BeaconAlleleRequest { + /** + Reference name. + + Accepted values: 1-22, X, Y. + */ + string referenceName; + + /** + Position, allele locus (0-based). + + Accepted values: non-negative integers smaller than reference length. + */ + long start; + + /** + Reference bases for this variant (starting from `start`). + + Accepted values: see the REF field in VCF 4.2 specification + (https://samtools.github.io/hts-specs/VCFv4.2.pdf). + */ string referenceBases; - /** - The bases that appear instead of the reference bases. (see variants.avdl) - */ - string alternateBases; + /** + The bases that appear instead of the reference bases. - /** The chromosome of the request */ - string chromosome; + Accepted values: see the ALT field in VCF 4.2 specification + (https://samtools.github.io/hts-specs/VCFv4.2.pdf). + */ + string alternateBases; - /** 0-based allele locus */ - long position; + /** Assembly identifier (GRC notation, e.g. `GRCh37`). */ + string assemblyId; - /** The version of the reference */ - string reference; + /** + Identifiers of datasets, as defined in `BeaconDataset`. - /** The name of the targeted population */ - union{ null, string } dataset = null; + If this field is null/not specified, all datasets should be queried. + */ + union{ null, array } datasetIds = null; } -/** -ErrorResource -*/ -record ErrorResource { - /** Error name/code, e.g. "bad_request" or "unauthorized". */ +/** Dataset of a beacon. */ +record BeaconDataset { + /** Unique identifier of the dataset. */ + string id; + + /** Name of the dataset. */ string name; - /** Error message. */ + /** Description of the dataset. */ union{ null, string } description = null; -} -/** -DataUseRequirementResource -*/ -record DataUseRequirementResource { - /** Data Use requirement */ - string name; + /** Assembly identifier (GRC notation, e.g. `GRCh37`). */ + string assemblyId; - /** Description of Data Use requirement. */ - union{ null, string } description = null; -} + /** + Data use conditions for this dataset based on consent codes. + */ + union{ null, org.ga4gh.consentcode.ConsentCodeDataUse } consentCodeDataUse; -/** -DataUseResource -*/ -record DataUseResource { - /** Data Use category.*/ - string category; + /** The time the dataset was created in the beacon in ms from the epoch. */ + long created = null; - /** Description of Data Use category. */ - union{ null, string } description = null; + /** + The time the dataset was last updated in the beacon in ms from the epoch. + */ + long updated = null; - /** Data Use requirements. */ - array requirements = []; -} + /** Version of the dataset. */ + union{ null, string } version = null; -/** -DataSetSizeResource -*/ -record DataSizeResource { - /** Total number of variant positions in the data set */ - int variants; + /** Total number of variants in the dataset. */ + union{ null, long } variantCount = null; + + /** Total number of calls in the dataset. */ + union{ null, long } callCount = null; - /** Total number of samples in the data set */ - int samples; + /** Total number of samples in the dataset. */ + union{ null, long } sampleCount = null; + + /** URL to an external system providing more dataset information. */ + union{ null, string } externalUrl = null; + + /** Additional structured metadata, key-value pairs. */ + union{ null, map } info = null; } -/** -DataSetResource -*/ -record DataSetResource { - /** Dataset name */ +/** Organization owning a beacon. */ +record BeaconOrganization { + /** Unique identifier of the organization. */ string id; - /** Reference genome */ - string reference; + /** Name of the organization. */ + string name; - /** Dataset description */ + /** Description of the organization. */ union{ null, string } description = null; - /** Dimensions of the data set. Should be provided if the beacon reports allele frequencies. */ - union{ null, DataSizeResource } size = null; + /** Address of the organization. */ + union{ null, string } address = null; + + /** Web of the organization (URL). */ + union{ null, string } welcomeUrl = null; + + /** + URL with the contact for the beacon operator/maintainer, e.g. link to + a contact form or an email in RFC 2368 scheme. + */ + union{ null, string } contactUrl = null; - /** Data use limitations, specified as a set of DataUseResource. */ - array data_use = []; + /** URL to the logo of the organization (image). */ + union{ null, string } logoUrl = null; + + /** Additional structured metadata, key-value pairs. */ + union{ null, map } info = null; } -/** -BeaconInformationResource -*/ -record BeaconInformationResource { - /** (Unique) beacon ID. Recommended pattern: [organization]-[beacon] (no special characters). */ +/** Beacon. */ +record Beacon { + /** Unique identifier of the beacon. */ string id; - /** Name of the owning organization. */ - string organization; + /** Name of the beacon. */ + string name; + + /** Version of the API provided by the beacon. */ + string apiVersion; + + /** Organization owning the beacon. */ + BeaconOrganization organization; + + /** Description of the beacon. */ + union{ null, string } description = null; + + /** Version of the beacon. */ + union{ null, string } version = null; - /** Beacon description. */ - string description; + /** URL to the welcome page/UI for this beacon. */ + union{ null, string } welcomeUrl = null; - /** Datasets served by the beacon. */ - array datasets = []; + /** Alternative URL to the API, e.g. a restricted version of this beacon. */ + union{ null, string } alternativeUrl = null; - /** Beacon API version supported. */ - string api; + /** The time this beacon was created in ms from the epoch. */ + union { null, long } created = null; - /** URL to the homepage for this beacon. */ - union{ null, string } homepage = null; + /** The time this beacon was last updated in ms from the epoch. */ + union { null, long } updated = null; - /** An email address for contact. */ - union{ null, string } email = null; + /** + Datasets served by the beacon. Any beacon should specify at least one + dataset. + */ + array datasets = []; - /** Auth type. Expected value is OAUTH2. Defaults to NONE. */ - union{ null, string } auth = null; + /** + Examples of interesting queries, e.g. a few queries demonstrating different + responses. + */ + union{ null, array } sampleAlleleRequests = null; - /** Examples of interesting queries, e.g. a few queries demonstrating different types of responses. */ - union{ null, array } queries = null; + /** Additional structured metadata, key-value pairs. */ + union{ null, map } info = null; } -/** -The response to the Beacon query -*/ -record ResponseResource { - /** Indicator of whether the beacon has observed the variant. */ +/** Beacon-specific error representing an unexpected problem. */ +record BeaconError { + /** Numeric error code. */ + int errorCode; + + /** Error message. */ + union{ null, string } message = null; +} + +/** Dataset's response to a query for information about a specific allele. */ +record BeaconDatasetAlleleResponse { + /** Identifier of the dataset, as defined in `BeaconDataset`. */ + string datasetId; + + /** + Indicator of whether the given allele was observed in the dataset. + + This should be non-null, unless there was an error, in which case + `error` has to be null. + */ union{ null, boolean } exists; + /** + Dataset-specific error. + + This should be non-null in exceptional situations only, in which case + `exists` has to be null. + */ + union{ null, BeaconError } `error` = null; + /** Frequency of this allele in the dataset. Between 0 and 1, inclusive. */ union{ null, double } frequency = null; - /** Number of observations of this allele in the dataset. */ - union{ null, int } observed = null; + /** Number of variants matching the allele request in the dataset. */ + union{ null, long } variantCount = null; + + /** Number of calls matching the allele request in the dataset. */ + union{ null, long } callCount = null; - /** Additional message. OK if request succeeded. */ - union{ null, string } info = null; + /** Number of samples matching the allele request in the dataset. */ + union{ null, long } sampleCount = null; - /** Error details. Provided if a beacon encountered an error. */ - union{ null, ErrorResource } err = null; + /** Additional note or description of the response. */ + union{ null, string } note = null; + + /** + URL to an external system, such as a secured beacon or a system providing + more information about a given allele. + */ + union{ null, string } externalUrl = null; + + /** Additional structured metadata, key-value pairs. */ + union{ null, map } info = null; } -/** -The response from the Beacon -*/ -record BeaconResponseResource { - /** Beacon ID */ - string beacon; +/** Beacon's response to a query for information about a specific allele. */ +record BeaconAlleleResponse { + /** Identifier of the beacon, as defined in `Beacon`. */ + string beaconId; - /** Query */ - QueryResource query; + /** Allele request as interpreted by the beacon. */ + union{ null, BeaconAlleleRequest } alleleRequest; - /** Response */ - ResponseResource response; + /** Indicator of whether the beacon has observed the allele. */ + array datasetAlleleResponses = []; + + /** + Beacon-specific error. + + This should be non-null in exceptional situations only. + */ + union{ null, BeaconError } `error` = null; } -} \ No newline at end of file +} diff --git a/schema/beaconmethods.avdl b/schema/beaconmethods.avdl new file mode 100644 index 00000000..69dc50ee --- /dev/null +++ b/schema/beaconmethods.avdl @@ -0,0 +1,27 @@ +@namespace("org.ga4gh.beacon") + +protocol BeaconMethods { + +import idl "beacon.avdl"; + +/**************** / *******************/ +/** +Gets beacon information. + +`GET /` (root of the beacon API) returns a representation of `Beacon`. +*/ +Beacon getBeacon(); + +/**************** /alleles *******************/ +/** +Gets response to a beacon query for information about a specific allele. + +`GET /alleles` uses `BeaconAlleleRequest` for parameters and returns +a representation of `BeaconAlleleResponse`. Example: `GET /alleles? +referenceName=1&start=1000&referenceBases=A&alternateBases=C&assemblyId=GRCh37& +datasetIds=d1&datasetIds=d2` +*/ +BeaconAlleleResponse getBeaconAlleleResponse( + BeaconAlleleRequest beaconAlleleRequest); + +} \ No newline at end of file diff --git a/schema/consentcode.avdl b/schema/consentcode.avdl new file mode 100644 index 00000000..6e314d29 --- /dev/null +++ b/schema/consentcode.avdl @@ -0,0 +1,88 @@ +@namespace("org.ga4gh.consentcode") + +/** +Data use conditions based on consent codes as introduced in +http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1005772. +*/ +protocol ConsentCodeDataUseConditions { + +/** Data use condition. */ +record ConsentCodeDataUseCondition { + /** + Consent code abbreviation, e.g. `NRES` for no restrictions primary category. + */ + string code; + + /** Description of the condition. */ + union{ null, string } description = null; +} + +/** Data use of a resource based on consent codes. */ +record ConsentCodeDataUse { + /** + Primary data use category. + + Accepted values (- code: name - description): + - NRES: no restrictions - No restrictions on data use. + - GRU(CC): general research use and clinical care - For + health/medical/biomedical purposes and other biological research, + including the study of population origins or ancestry. + - HMB(CC): health/medical/biomedical research and clinical care - Use of the + data is limited to health/medical/biomedical purposes, does not include + the study of population origins or ancestry. + - DS-[XX](CC): disease-specific research and clinical care - Use of the data + must be related to [disease]. + - POA: population origins/ancestry research - Use of the data is limited to + the study of population origins or ancestry. + */ + ConsentCodeDataUseCondition primaryCategory; + + /** + Secondary data use categories. + + Accepted values (- code: name - description): + - RS-[XX]: other research-specific restrictions - Use of the data is limited + to studies of [research type] (e.g., pediatric research). + - RUO: research use only - Use of data is limited to research purposes + (e.g., does not include its use in clinical care). + - NMDS: no “general methods” research - Use of the data includes methods + development research (e.g., development of software or algorithms) ONLY + within the bounds of other data use limitations. + - GSO: genetic studies only - Use of the data is limited to genetic studies + only (i.e., no research using only the phenotype data). + */ + array secondaryCategories = []; + + /** + Data use requirements. + + Accepted values (- code: name - description): + - NPU: not-for-profit use only - Use of the data is limited to + not-for-profit organizations. + - PUB: publication required - Requestor agrees to make results of studies + using the data available to the larger scientific community. + - COL-[XX]: collaboration required - Requestor must agree to collaboration + with the primary study investigator(s). + - RTN: return data to database/resource - Requestor must return + derived/enriched data to the database/resource. + - IRB: ethics approval required - Requestor must provide documentation of + local IRB/REC approval. + - GS-[XX]: geographical restrictions - Use of the data is limited to within + [geographic region]. + - MOR-[XX]: publication moratorium/embargo - Requestor agrees not to publish + results of studies until [date]. + - TS-[XX]: time limits on use - Use of data is approved for [x months]. + - US: user-specific restrictions - Use of data is limited to use by approved + users. + - PS: project-specific restrictions - Use of data is limited to use within + an approved project. + - IS: institution-specific restrictions - Use of data is limited to use + within an approved institution. + */ + array requirements = []; + + /** Version of the data use specification. */ + string version; +} + +} From 13793beee1adeefa675e94cf6195000979674444 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Thu, 14 Apr 2016 15:43:43 -0400 Subject: [PATCH 03/13] fixed #6 renamed endpoint from alleles to query --- schema/beaconmethods.avdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/schema/beaconmethods.avdl b/schema/beaconmethods.avdl index 69dc50ee..554f0402 100644 --- a/schema/beaconmethods.avdl +++ b/schema/beaconmethods.avdl @@ -12,12 +12,12 @@ Gets beacon information. */ Beacon getBeacon(); -/**************** /alleles *******************/ +/**************** /query *******************/ /** -Gets response to a beacon query for information about a specific allele. +Gets response to a beacon query for allele information. -`GET /alleles` uses `BeaconAlleleRequest` for parameters and returns -a representation of `BeaconAlleleResponse`. Example: `GET /alleles? +`GET /query` uses `BeaconAlleleRequest` for parameters and returns +a representation of `BeaconAlleleResponse`. Example: `GET /query? referenceName=1&start=1000&referenceBases=A&alternateBases=C&assemblyId=GRCh37& datasetIds=d1&datasetIds=d2` */ From 334383da3a3737d3bcdabf161b5e23c03a8aad00 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Mon, 18 Apr 2016 00:44:59 -0400 Subject: [PATCH 04/13] minor clarifications in documentation (comments) --- schema/beacon.avdl | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/schema/beacon.avdl b/schema/beacon.avdl index 55e56f11..07793a0b 100644 --- a/schema/beacon.avdl +++ b/schema/beacon.avdl @@ -8,10 +8,14 @@ protocol Beacons { import idl "consentcode.avdl"; -/** Query for information about a specific allele. */ +/** +Query for information about a specific allele. + +Based on `org.ga4gh.models.Variant`. +*/ record BeaconAlleleRequest { /** - Reference name. + Reference name (chromosome). Accepted values: 1-22, X, Y. */ @@ -90,7 +94,7 @@ record BeaconDataset { /** Total number of samples in the dataset. */ union{ null, long } sampleCount = null; - /** URL to an external system providing more dataset information. */ + /** URL to an external system providing more dataset information (RFC 3986 format). */ union{ null, string } externalUrl = null; /** Additional structured metadata, key-value pairs. */ @@ -111,16 +115,18 @@ record BeaconOrganization { /** Address of the organization. */ union{ null, string } address = null; - /** Web of the organization (URL). */ + /** URL of the website of the organization (RFC 3986 format). */ union{ null, string } welcomeUrl = null; /** URL with the contact for the beacon operator/maintainer, e.g. link to - a contact form or an email in RFC 2368 scheme. + a contact form (RFC 3986 format) or an email (RFC 2368 format). */ union{ null, string } contactUrl = null; - /** URL to the logo of the organization (image). */ + /** + URL to the logo (PNG/JPG format) of the organization (RFC 3986 format). + */ union{ null, string } logoUrl = null; /** Additional structured metadata, key-value pairs. */ @@ -147,10 +153,13 @@ record Beacon { /** Version of the beacon. */ union{ null, string } version = null; - /** URL to the welcome page/UI for this beacon. */ + /** URL to the welcome page for this beacon (RFC 3986 format). */ union{ null, string } welcomeUrl = null; - /** Alternative URL to the API, e.g. a restricted version of this beacon. */ + /** + Alternative URL to the API, e.g. a restricted version of this beacon + (RFC 3986 format). + */ union{ null, string } alternativeUrl = null; /** The time this beacon was created in ms from the epoch. */ @@ -222,7 +231,7 @@ record BeaconDatasetAlleleResponse { /** URL to an external system, such as a secured beacon or a system providing - more information about a given allele. + more information about a given allele (RFC 3986 format). */ union{ null, string } externalUrl = null; From 6c3a175febcadf955e0c855c7d6a4b1460956d15 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Tue, 19 Apr 2016 17:01:06 -0400 Subject: [PATCH 05/13] fixed #38 added description of how to contribute --- CONTRIBUTING.md | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- 2 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..d3140113 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,89 @@ +# How to contribute + +Thank you for taking the time to contribute. We appreciate it! + +There are two ways to contribute - via issues, which are used for discussion, and pull requests, which are concrete proposals of change. + +## Issues + +The project's [Issues Page](https://github.com/ga4gh/beacon-team/issues) is a forum to discuss both major and minor issues related to developing the Beacon API. It also serves as the means for collaborating with the group and discussing contributions that will ultimately lead to changes to the API. See the [Issue](#issue_resolution) section below for specifics on how issues are resolved by the community. + +## Pull Requests + +The way to contribute development effort and code to the project is via GitHub pull requests. GitHub provides a nice [overview on how to create a pull request](https://help.github.com/articles/creating-a-pull-request). + +Some general rules to follow: + +- [Fork](https://help.github.com/articles/fork-a-repo) the main project into your personal GitHub space to work on. +- Create a branch for each update that you're working on. These branches are often called "feature" or "topic" branches. Any changes that you push to your feature branch will automatically be shown in the pull request. +- Keep your pull requests as small as possible. Large pull requests are hard to review. Try to break up your changes into self-contained and incremental pull requests. +- The first line of commit messages should be a short (<80 character) summary, followed by an empty line and then any details that you want to share about the commit. +- Please try to follow the [existing syntax style](#syntax_style). + +## Issue Resolution + +Once a pull request or issue have been submitted, anyone can comment or vote on to express their opinion following the Apache voting system. Quick summary: + +- **+1** something you agree with +- **-1** if you have a strong objection to an issue, which will be taken very seriously. A -1 vote should provide an alternative solution. +- **+0** or **-0** for neutral comments or weak opinions. +- It's okay to have input without voting. +- Silence gives assent. + +A pull request with at least two **+1** votes, no **-1** votes, that has been open for at least 3 days is ready to be merged. We sometimes waive the 3 days for cosmetic-only changes -- use good judgment. If an issue gets any **-1** votes, the comments on the issue need to reach consensus before the issue can be resolved one way or the other. There isn't any strict time limit on a contentious issue. + +The project will strive for full consensus on everything until it runs into a problem with this model. + +## Syntax Style and Conventions + +The current code conventions for the source files are as follows: + +- Use two-space indentation, and no tabs. +- Hard-wrap code to 80 characters per line. +- Use `UpperCamelCase` for object or record names. +- Use `lowerCamelCase` for attribute or method names. +- Use `CONSTANT_CASE` for global and constant values. +- Comments: + - Comments should be indented at the same level as the surrounding code. + - Comments should precede the code that they make a comment on. Documentation comments will not work otherwise. + - Documentation comments, which are intended to be processed by avrodoc and displayed in the user-facing API documentation, must use the `/** ... */` style, and must not have a leading `*` on each internal line: + + /** + This documentation comment will be + processed correctly by avrodoc. + */ + + /** + * This documentation comment will have a + * bullet point at the start of every line + * when processed by avrodoc. + */ + + - Block and multi-line non-documentation comments, intended for schema developers only, must use the `/* ... */` style. + + /* + This multi-line comment will not appear in the + avrodoc documentation and is intended for + schema developers. + */ + + - All multi-line comments should have the comment text at the same indent level as the comment delimeters. + - One-line non-documentation comments, intended for schema developers only, must use the `// ...` style. + +## Gitflow Workflow + +Our workflow is based on [Gitflow](https://www.atlassian.com/git/tutorials/comparing-workflows/gitflow-workflow), which defines a strict branching model designed around the project release. This workflow uses two branches to record the history of the project. The master branch stores the official release history, and the develop branch serves as an integration branch for features. Aside from these two main branches, the workflow utilizes topic and release branches. + +### Topic Branches + +If you wish to collaborate on a new feature with other GA4GH members you can ask that a topic branch be created. Since Github does not allow pull requests against branches that do not yet exist, you will have to create an issue asking for the topic branch to be created. + +Once a topic branch exists, pull requests can be made against it in the usual way. It may also be brought up to date with new changes merged into develop by anyone with commit access, if the changes produce merely a fast-forward merge for each constituent branch. However, if changes from the develop branch create a new merge commit in or or more of the repositories, that commit needs to be reviewed in a pull request. + +Changes made in a topic branch can be merged into develop by creating and then [resolving in the normal way](#issue_resolution) a pull request against the develop branch. + +Topic branches that have been merged into develop and that are no longer being developed upon should be [deleted](https://github.com/blog/1335-tidying-up-after-pull-requests) (they will still appear in the git history). + +### Release Branches + +From time to time the group will make a release. This is achieved by creating a branch named "release-foo", where foo is the release name. Only bug fixes are allowed to release branches. To refer to a specific version of a release branch either the commit id can be used, or alternatively (better), a tag can be created (which should be replicated across repositories). diff --git a/README.md b/README.md index c4d02aef..9aa8e85e 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,9 @@ Please contact us for support in lighting a Beacon. ## Developers -### Development Model +### How to contribute -We adopt the Gitflow development model as outlined here: http://nvie.com/posts/a-successful-git-branching-model/ +See the [CONTRIBUTING.md](CONTRIBUTING.md) document. ### Developer FAQs From efaec47d2b8903589e7a974a0026fdd65393b0e8 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Wed, 20 Apr 2016 18:38:20 -0400 Subject: [PATCH 06/13] #30 added maven setup --- .gitignore | 75 ++++++ pom.xml | 252 ++++++++++++++++++ .../main/resources/avro}/beacon.avdl | 0 .../main/resources/avro}/beaconmethods.avdl | 0 .../main/resources/avro}/consentcode.avdl | 0 5 files changed, 327 insertions(+) create mode 100644 .gitignore create mode 100644 pom.xml rename {schema => src/main/resources/avro}/beacon.avdl (100%) rename {schema => src/main/resources/avro}/beaconmethods.avdl (100%) rename {schema => src/main/resources/avro}/consentcode.avdl (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..fded6d94 --- /dev/null +++ b/.gitignore @@ -0,0 +1,75 @@ +*.py[cod] +target +*~ +#* +doc/source/schemas/*.avpr +build + +#********** windows template********** + +# Windows image file caches +Thumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + + +#********** osx template********** + +.DS_Store + +# Thumbnails +._* + +# Files that might appear on external disk +.Spotlight-V100 +.Trashes + + +#********** linux template********** + +.* +!.gitignore +*~ + +# KDE +.directory + + +#********** emacs template********** + +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + + +#********** vim template********** + +.*.sw[a-z] +*.un~ +Session.vim + + +#********** maven template********** + +target/ + +#********** Travis CI ********** + +!.travis.yml + +#********** IntelliJ files ****** +*.iml + diff --git a/pom.xml b/pom.xml new file mode 100644 index 00000000..b19d14a0 --- /dev/null +++ b/pom.xml @@ -0,0 +1,252 @@ + + + 4.0.0 + org.ga4gh + beacon + jar + 0.3-SNAPSHOT + + Beacon + Beacon data models and APIs + https://github.com/ga4gh/beacon-team + 2014 + + Global Alliance for Genomics and Health + http://genomicsandhealth.org + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + + + + scm:git:https://github.com/ga4gh/beacon-team + scm:git:https://github.com/ga4gh/beacon-team + https://github.com/ga4gh/beacon-team + + + + GA4GH developers + https://github.com/orgs/ga4gh/people + Global Alliance Data Working Group + http://ga4gh.org/ + + + + + 1.8.0 + [1.6,) + [3.0.4,) + UTF-8 + UTF-8 + + + + + + + org.apache.avro + avro-maven-plugin + ${avro.version} + + + org.apache.maven.plugins + maven-clean-plugin + 2.6.1 + + + org.apache.maven.plugins + maven-compiler-plugin + 3.3 + + + org.apache.maven.plugins + maven-deploy-plugin + 2.8.2 + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.4 + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + + + org.apache.maven.plugins + maven-jar-plugin + 2.6 + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9.1 + + true + true + true + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.1 + + true + false + release + deploy + + + + org.apache.maven.plugins + maven-resources-plugin + 2.7 + + + org.apache.maven.plugins + maven-source-plugin + 2.4 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.18.1 + + + + + + org.apache.avro + avro-maven-plugin + + + schemas + generate-sources + + schema + protocol + idl-protocol + + + ${project.basedir}/src/main/resources/avro + String + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-java + + enforce + + + + + ${maven.enforcer.maven-version} + + + ${maven.enforcer.jdk-version} + + + + + + + + + + + + org.apache.avro + avro + ${avro.version} + + + org.apache.avro + avro-ipc + ${avro.version} + + + + + + org.apache.avro + avro + compile + + + org.apache.avro + avro-ipc + compile + + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + release + + + + org.apache.maven.plugins + maven-gpg-plugin + + + sign-artifacts + verify + + sign + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar-no-fork + + + + + + + + + diff --git a/schema/beacon.avdl b/src/main/resources/avro/beacon.avdl similarity index 100% rename from schema/beacon.avdl rename to src/main/resources/avro/beacon.avdl diff --git a/schema/beaconmethods.avdl b/src/main/resources/avro/beaconmethods.avdl similarity index 100% rename from schema/beaconmethods.avdl rename to src/main/resources/avro/beaconmethods.avdl diff --git a/schema/consentcode.avdl b/src/main/resources/avro/consentcode.avdl similarity index 100% rename from schema/consentcode.avdl rename to src/main/resources/avro/consentcode.avdl From fbefb935f2459730a2331988e34658a736948e9c Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Wed, 20 Apr 2016 18:11:50 -0400 Subject: [PATCH 07/13] fixed #7 updated description of the query endpoint to support both GET and POST --- src/main/resources/avro/beaconmethods.avdl | 28 ++++++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/main/resources/avro/beaconmethods.avdl b/src/main/resources/avro/beaconmethods.avdl index 554f0402..af434aae 100644 --- a/src/main/resources/avro/beaconmethods.avdl +++ b/src/main/resources/avro/beaconmethods.avdl @@ -16,12 +16,30 @@ Beacon getBeacon(); /** Gets response to a beacon query for allele information. -`GET /query` uses `BeaconAlleleRequest` for parameters and returns -a representation of `BeaconAlleleResponse`. Example: `GET /query? -referenceName=1&start=1000&referenceBases=A&alternateBases=C&assemblyId=GRCh37& -datasetIds=d1&datasetIds=d2` +GET and POST HTTP verbs must be supported. `GET /query` accepts fields of +`BeaconAlleleRequest` as URL parameters and returns a JSON representation of +`BeaconAlleleResponse`. `POST /query` accepts a JSON representation of +`BeaconAlleleRequest` as the request body and returns a JSON representation +of `BeaconAlleleResponse`. + +Examples: +GET /query?referenceName=1&start=1000&referenceBases=A&alternateBases=C +&assemblyId=GRCh37&datasetIds=d1&datasetIds=d2 + +POST /query +{ + "referenceName": "1", + "start": 1000, + "referenceBases": "A", + "alternateBases": "C", + "assemblyId": "GRCh37", + "datasetIds": [ + "d1", + "d2" + ] +} */ BeaconAlleleResponse getBeaconAlleleResponse( BeaconAlleleRequest beaconAlleleRequest); -} \ No newline at end of file +} From 4c2737d2436d0d874ee13ef07896a1c48c80bab1 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Thu, 14 Apr 2016 15:29:59 -0400 Subject: [PATCH 08/13] fixed #19 changed timestamp format to ISO 8601 --- src/main/resources/avro/beacon.avdl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/main/resources/avro/beacon.avdl b/src/main/resources/avro/beacon.avdl index 07793a0b..a641fea7 100644 --- a/src/main/resources/avro/beacon.avdl +++ b/src/main/resources/avro/beacon.avdl @@ -74,13 +74,11 @@ record BeaconDataset { */ union{ null, org.ga4gh.consentcode.ConsentCodeDataUse } consentCodeDataUse; - /** The time the dataset was created in the beacon in ms from the epoch. */ - long created = null; + /** The time the dataset was created (ISO 8601 format). */ + string createDateTime; - /** - The time the dataset was last updated in the beacon in ms from the epoch. - */ - long updated = null; + /** The time the dataset was updated in (ISO 8601 format). */ + string updateDateTime; /** Version of the dataset. */ union{ null, string } version = null; @@ -162,11 +160,11 @@ record Beacon { */ union{ null, string } alternativeUrl = null; - /** The time this beacon was created in ms from the epoch. */ - union { null, long } created = null; + /** The time the beacon was created (ISO 8601 format). */ + union{ null, string } createDateTime; - /** The time this beacon was last updated in ms from the epoch. */ - union { null, long } updated = null; + /** The time the beacon was updated in (ISO 8601 format). */ + union{ null, string } updateDateTime; /** Datasets served by the beacon. Any beacon should specify at least one From 823a3e0bf75454f22bfc4119dad2c452af8955b1 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Sun, 29 May 2016 18:19:08 -0400 Subject: [PATCH 09/13] fixed #15 removed data use conditions --- src/main/resources/avro/beacon.avdl | 7 -- src/main/resources/avro/consentcode.avdl | 88 ------------------------ 2 files changed, 95 deletions(-) delete mode 100644 src/main/resources/avro/consentcode.avdl diff --git a/src/main/resources/avro/beacon.avdl b/src/main/resources/avro/beacon.avdl index a641fea7..bff5be84 100644 --- a/src/main/resources/avro/beacon.avdl +++ b/src/main/resources/avro/beacon.avdl @@ -6,8 +6,6 @@ information about specific alleles. */ protocol Beacons { -import idl "consentcode.avdl"; - /** Query for information about a specific allele. @@ -69,11 +67,6 @@ record BeaconDataset { /** Assembly identifier (GRC notation, e.g. `GRCh37`). */ string assemblyId; - /** - Data use conditions for this dataset based on consent codes. - */ - union{ null, org.ga4gh.consentcode.ConsentCodeDataUse } consentCodeDataUse; - /** The time the dataset was created (ISO 8601 format). */ string createDateTime; diff --git a/src/main/resources/avro/consentcode.avdl b/src/main/resources/avro/consentcode.avdl deleted file mode 100644 index 6e314d29..00000000 --- a/src/main/resources/avro/consentcode.avdl +++ /dev/null @@ -1,88 +0,0 @@ -@namespace("org.ga4gh.consentcode") - -/** -Data use conditions based on consent codes as introduced in -http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1005772. -*/ -protocol ConsentCodeDataUseConditions { - -/** Data use condition. */ -record ConsentCodeDataUseCondition { - /** - Consent code abbreviation, e.g. `NRES` for no restrictions primary category. - */ - string code; - - /** Description of the condition. */ - union{ null, string } description = null; -} - -/** Data use of a resource based on consent codes. */ -record ConsentCodeDataUse { - /** - Primary data use category. - - Accepted values (- code: name - description): - - NRES: no restrictions - No restrictions on data use. - - GRU(CC): general research use and clinical care - For - health/medical/biomedical purposes and other biological research, - including the study of population origins or ancestry. - - HMB(CC): health/medical/biomedical research and clinical care - Use of the - data is limited to health/medical/biomedical purposes, does not include - the study of population origins or ancestry. - - DS-[XX](CC): disease-specific research and clinical care - Use of the data - must be related to [disease]. - - POA: population origins/ancestry research - Use of the data is limited to - the study of population origins or ancestry. - */ - ConsentCodeDataUseCondition primaryCategory; - - /** - Secondary data use categories. - - Accepted values (- code: name - description): - - RS-[XX]: other research-specific restrictions - Use of the data is limited - to studies of [research type] (e.g., pediatric research). - - RUO: research use only - Use of data is limited to research purposes - (e.g., does not include its use in clinical care). - - NMDS: no “general methods” research - Use of the data includes methods - development research (e.g., development of software or algorithms) ONLY - within the bounds of other data use limitations. - - GSO: genetic studies only - Use of the data is limited to genetic studies - only (i.e., no research using only the phenotype data). - */ - array secondaryCategories = []; - - /** - Data use requirements. - - Accepted values (- code: name - description): - - NPU: not-for-profit use only - Use of the data is limited to - not-for-profit organizations. - - PUB: publication required - Requestor agrees to make results of studies - using the data available to the larger scientific community. - - COL-[XX]: collaboration required - Requestor must agree to collaboration - with the primary study investigator(s). - - RTN: return data to database/resource - Requestor must return - derived/enriched data to the database/resource. - - IRB: ethics approval required - Requestor must provide documentation of - local IRB/REC approval. - - GS-[XX]: geographical restrictions - Use of the data is limited to within - [geographic region]. - - MOR-[XX]: publication moratorium/embargo - Requestor agrees not to publish - results of studies until [date]. - - TS-[XX]: time limits on use - Use of data is approved for [x months]. - - US: user-specific restrictions - Use of data is limited to use by approved - users. - - PS: project-specific restrictions - Use of data is limited to use within - an approved project. - - IS: institution-specific restrictions - Use of data is limited to use - within an approved institution. - */ - array requirements = []; - - /** Version of the data use specification. */ - string version; -} - -} From ac9e3c0a6dba6d7595823ee6bf1f3912f06126b1 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Sun, 29 May 2016 19:24:51 -0400 Subject: [PATCH 10/13] fixed #11 added global exists beacon response --- src/main/resources/avro/beacon.avdl | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/main/resources/avro/beacon.avdl b/src/main/resources/avro/beacon.avdl index a641fea7..6f30d25e 100644 --- a/src/main/resources/avro/beacon.avdl +++ b/src/main/resources/avro/beacon.avdl @@ -200,7 +200,7 @@ record BeaconDatasetAlleleResponse { Indicator of whether the given allele was observed in the dataset. This should be non-null, unless there was an error, in which case - `error` has to be null. + `error` has to be non-null. */ union{ null, boolean } exists; @@ -242,18 +242,30 @@ record BeaconAlleleResponse { /** Identifier of the beacon, as defined in `Beacon`. */ string beaconId; - /** Allele request as interpreted by the beacon. */ - union{ null, BeaconAlleleRequest } alleleRequest; + /** + Indicator of whether the given allele was observed in any of the datasets + queried. - /** Indicator of whether the beacon has observed the allele. */ - array datasetAlleleResponses = []; + This should be non-null, unless there was an error, in which case + `error` has to be non-null. + */ + union{ null, boolean } exists; /** Beacon-specific error. - This should be non-null in exceptional situations only. + This should be non-null in exceptional situations only, in which case + `exists` has to be null. */ union{ null, BeaconError } `error` = null; + + /** Allele request as interpreted by the beacon. */ + union{ null, BeaconAlleleRequest } alleleRequest; + + /** + Indicator of whether the given allele was observed in individual datasets. + */ + array datasetAlleleResponses = []; } } From 2b798462d31239a2d5fcd2b5ce2bddadcffa869a Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Sun, 29 May 2016 23:06:22 -0400 Subject: [PATCH 11/13] fixed #10 made dataset-level responses optional --- src/main/resources/avro/beacon.avdl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/main/resources/avro/beacon.avdl b/src/main/resources/avro/beacon.avdl index 6f30d25e..83f5008b 100644 --- a/src/main/resources/avro/beacon.avdl +++ b/src/main/resources/avro/beacon.avdl @@ -53,6 +53,15 @@ record BeaconAlleleRequest { If this field is null/not specified, all datasets should be queried. */ union{ null, array } datasetIds = null; + + /** + Indicator of whether responses for individual datasets + (`datasetAlleleResponses`) should be included (not null) in the response + (`BeaconAlleleResponse`) to this request. + + If null (not specified), the default value of false is assumed. + */ + union{ null, boolean } includeDatasetResponses = null; } /** Dataset of a beacon. */ @@ -264,8 +273,11 @@ record BeaconAlleleResponse { /** Indicator of whether the given allele was observed in individual datasets. + + This should be non-null if `includeDatasetResponses` in the corresponding + `BeaconAlleleRequest` is true, and null otherwise. */ - array datasetAlleleResponses = []; + union{ null, array } datasetAlleleResponses = null; } } From 300988b4915294f8b905cc6023916400434794d0 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Tue, 31 May 2016 09:49:26 -0400 Subject: [PATCH 12/13] moved general beacon information to wiki --- README.md | 50 +++++++++++++------------------------------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 9aa8e85e..6ffc8f23 100644 --- a/README.md +++ b/README.md @@ -1,49 +1,25 @@ -# Beacon Project Repository +# Beacon Schemas -## What's a Beacon? +## What is a Beacon? -A “Beacon” is a web-accessible service that can be queried for information about a specific allele. A user of a Beacon can pose queries of the form “Have you observed this nucleotide (e.g. C) at this genomic location (e.g. position 32,936,732 on chromosome 13)?” to which the Beacon must respond with either “yes” or “no.” In this way, a Beacon allows allelic information of interest to be discovered by a remote querier with no reference to a specific sample or patient the allele was observed in. +A _Beacon_ is a web-accessible service that can be queried for information about a specific allele. A user of a Beacon can pose queries of the form _Have you observed this nucleotide (e.g. C) at this genomic location (e.g. position 32,936,732 on chromosome 13)?_ to which the Beacon must respond with either _yes_ or _no_. -## Related Links +## Beacon Project -* GA4GH Beacon Site: http://ga4gh.org/#/beacon -* Beacon Network: http://beacon-network.org +The Beacon project is a project to test the willingness of international sites to share genetic data in the simplest of all technical contexts. For more information, visit [GA4GH Data Working Group](http://ga4gh.org/#/beacon). -## How to light a Beacon +## Tools -If you are a data steward, please consider lighting a Beacon. There are three options of varying complexity for lighting beacons: -* From scratch (hardest): develop a Beacon compliant with the specification(s) in the schemas directory of this repository -* Reference impementation (easier): use a free to download and install implementation of Beacon, which you host on your network or on a cloud instance you own -* Managed solutions (easiest): use an implementation of Beacon that is implemented and maintained by a third-party +A list of related tools and projects developed by the community is maintained on the [Resources](https://github.com/ga4gh/beacon-team/wiki/Resources) wiki page. -Please contact us for support in lighting a Beacon. +## How to contribute -## Developers +Guidelines for contributing to this repository are listed in the [CONTRIBUTING.md](CONTRIBUTING.md) document. -### How to contribute +## License -See the [CONTRIBUTING.md](CONTRIBUTING.md) document. +See the [LICENSE](LICENSE) file. -### Developer FAQs +## More information -#### What are the valid responses for a beacon? - -A: Yes or No - -Yes means, "I have information about the queried variant" and No means, either (i) "I don't have information about the queried variant" or (ii) "I don't know if I have information about the queried variant". - -The decision to support either Yes or No, and not an additional Null option, was due to complexities in distinguishing between "I don't have information about the queried variant" and "I don't know if I have information about the queried variant". For example, if a beacon is served from a VCF file that has reference alleles omitted, it is impossible to determine whether the reference allele was observed but not recorded, or not observed at all. - -It was decided that an additional set of Evidence Codes could be returned if this information is available. - -### Is the position in a Beacon query 0-based or 1-based? - -A: 0-based - -The decision to use 0-based coordinates for the position in the Beacon query was based on alignment with the core GA4GH APIs. Of course, when designing clients (e.g. websites) which query beacons, developers should use descretion about what the end-user expects. The Beacon Network, for example, takes 1-based coordinates as input and queries connected Beacons in 0-based coordinates, as appropriate. - -### How are queries against complex mutations treated? - -A: For insertions and deletions exact match is required for a Yes response. More complex mutations (e.g. inversions, duplications) are not yet supported. - -Queries for insertions or deletions must be sepcified using ref and alt strings of bases. For insertions and deletions exact match is required for a Yes response. More complex mutations (e.g. inversions, duplications) are not yet supported. +More information for developers is available on [our wiki](https://github.com/ga4gh/beacon-team/wiki). From 88c2c0ccc6b4b43b987be39b7bad5eb32d7c1211 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Tue, 31 May 2016 12:39:04 -0400 Subject: [PATCH 13/13] Bumped version number to 0.3.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b19d14a0..079d0485 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ org.ga4gh beacon jar - 0.3-SNAPSHOT + 0.3.0 Beacon Beacon data models and APIs