-
Notifications
You must be signed in to change notification settings - Fork 1
6. GBS Sexing
George Pacheco edited this page Jul 27, 2021
·
2 revisions
We took advantage of the improved Pigeon Genome Cliv_2.0 to try to sex our pigeons based on differences of coverage regarding the Z chromosome and an autosomal chromosome of similar size.
samtools faidx GCA_001887795.1_colLiv2_genomic.fasta CM007524.1 > GCA_001887795.1_colLiv2_genomic_Z.fasta
$SCRIPTS/appz/p5-bpwrapper/bin/bioseq --restrict-coord EcoT22I ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_Z.fasta > ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_Z.bed
xsbatch -c 15 --mem-per-cpu 2000 -J Sexing --time 1-00 -- "blastn -num_threads 15 -query ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_Z.fasta -db ~/data/Pigeons/Reference/DanishTumbler_Dovetail_ReRun.fasta -evalue 1e-5 -perc_identity 95 -outfmt '7 std slen sstrand' > ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_Z--DanishTumbler_Dovetail_ReRun.tsv"
awk '!/#/ && $13>10000 && $4>=1000 && $5/$4<0.01' ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_Z--DanishTumbler_Dovetail_ReRun.tsv | sort -k 2,2 -k 9,9g -k 10,10g | awk '{start=$9; end=$10} $14=="minus"{start=$10; end=$9} {print $2"\t"start"\t"end"\t"$13}' | awk '{x[$1]+=$3-$2+1; s[$1]=$4} END{for(i in x) print i"\t"x[i]"\t"x[i]/s[i]}' | sort -k 3,3gr | awk 'NR>1 && $3>0.9{print $1}' | fgrep -w -f - ~/data/Pigeons/Reference/PBGP_FinalRun.EcoT22I_Extended_Merged_RemovedBadLoci-PossibleParalogs-g650.bed > ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_Z--DanishTumbler_Dovetail_ReRun.bed
samtools faidx GCA_001887795.1_colLiv2_genomic.fasta CM007530.1 > GCA_001887795.1_colLiv2_genomic_6.fasta
$SCRIPTS/appz/p5-bpwrapper/bin/bioseq --restrict-coord EcoT22I ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_6.fasta > ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_6.bed
xsbatch -c 8 --mem-per-cpu 1024 -J Chr-6 --time 1-00 -- "blastn -num_threads 8 -query ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_6.fasta -db ~/data/Pigeons/Reference/DanishTumbler_Dovetail_ReRun.fasta -evalue 1e-5 -perc_identity 95 -outfmt '7 std slen sstrand' > ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_6--DanishTumbler_Dovetail_ReRun.tsv"
awk '!/#/ && $13>10000 && $4>=1000 && $5/$4<0.01' ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_6--DanishTumbler_Dovetail_ReRun.tsv | sort -k 2,2 -k 9,9g -k 10,10g | awk '{start=$9; end=$10} $14=="minus"{start=$10; end=$9} {print $2"\t"start"\t"end"\t"$13}' | awk '{x[$1]+=$3-$2+1; s[$1]=$4} END{for(i in x) print i"\t"x[i]"\t"x[i]/s[i]}' | sort -k 3,3gr | awk 'NR>1 && $3>0.9{print $1}' | fgrep -w -f - ~/data/Pigeons/Reference/PBGP_FinalRun.EcoT22I_Extended_Merged_RemovedBadLoci-PossibleParalogs-g650.bed > ~/data/Pigeons/Reference/GCA_001887795.1_colLiv2_genomic_6--DanishTumbler_Dovetail_ReRun.bed
- 1. Data Access
- 2. Sequencing Quality Check
- 3. Demultiplexing
- 4. Creation of Mapping Targets
- 5. Filtering For Chimeric Reads
- 6. GBS Sexing
- 7. Read Processing & Mapping
- 8. Running Stats & Filtering of Bad Samples
- 9. Filtering of Possible Paralogs
- 10. Merging of Duplicate Cases
- 11. Investigation of Filtering of Possible Paralogs
- 12. Creation of Specific Datasets
- 13. Loci Information
- 14. Heterozygosity Calculation
- 15. Population Genetics Statistics
- 16. Phylogenetic Reconstruction
- 17. Multidimensional Scaling
- 18. Estimation of Individual Ancestries
- 19. Inference of Population Splits
- 20. Measuring of Linkage Disequilibrium
- 21. GWAS