This repository has been archived by the owner on Jan 31, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 22
Auxiliary Data Import
mkiwala-g edited this page Aug 11, 2014
·
20 revisions
$ URI='ftp://ftp.ncbi.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh37/special_requests/GRCh37-lite.fa.gz'
$ genome taxon create \
--domain=Eukaryota \
--name=human \
--ncbi-taxon-id=9606 \
--species-latin-name='Homo sapiens'
$ genome processing-profile create imported-reference-sequence --name=chromosome-fastas
$ wget $URI
$ gunzip GRCh37-list.fa.gz
$ genome model define imported-reference-sequence \
--fasta-file=$PWD/GRCh37-lite.fa \
--processing-profile-id=2dc430f34746455b87b3dd179b3a193e \
--species-name=human \
--version=37-lite-test \
--prefix=GRC \
--assembly-name=GRCh37-lite \
--build-name=GRCh37-lite-build37 \
--sequence-uri=$URI
$ genome model define imported-reference-sequence
--append-to=106942997
--fasta-file=/ERCC/ERCC92.fa
--use-default-sequence-uri
--species-name=human
--version=37_ERCC
$ genome model imported-variation-list import-dbsnp-build
--vcf-file-url ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b141_GRCh37p13/VCF/00-All.vcf.gz
--version 141
--reference-sequence-build 106942997
--flat-file-pattern ds_flat_chX.flat.gz
--contig-names-translation-file /reference/scaffold_names
--from-names-column 2
--to-names-column 3