Skip to content

Commit

Permalink
Fix track JSON load, make sure chunk ref path is first, and add scrip…
Browse files Browse the repository at this point in the history
…t to help make a chunk from a graph
  • Loading branch information
adamnovak committed Sep 15, 2023
1 parent 6267403 commit 250aa66
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 34 deletions.
8 changes: 4 additions & 4 deletions docker/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@
"internalDataPath": "./exampleData/internal",

"defaultHaplotypeColorPalette" : {
"mainPalette": "ygreys",
"auxPalette": "ygreys",
"mainPalette": "greys",
"auxPalette": "greys",
"colorReadsByMappingQuality": false
},

"defaultReadColorPalette" : {
"mainPalette": "reds",
"auxPalette": "blues",
"mainPalette": "blues",
"auxPalette": "reds",
"colorReadsByMappingQuality": false
},

Expand Down
8 changes: 4 additions & 4 deletions exampleData/chunk-ref-1-20/tracks.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"trackFile": "cactus.vg",
"trackType": "graph",
"trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}
"trackColorSettings": {"mainPalette": "plainColors", "auxPalette": "greys"}
},
{
"trackFile": "cactus0_10.sorted.gam",
"trackType": "read",
"trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}
"trackColorSettings": {"mainPalette": "blues", "auxPalette": "reds"}
},
{
"trackFile": "cactus10_20.sorted.gam",
"trackType": "read",
"trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}
}
"trackColorSettings": {"mainPalette": "blues", "auxPalette": "reds"}
}
78 changes: 58 additions & 20 deletions scripts/prepare_chunks.sh
Original file line number Diff line number Diff line change
@@ -1,53 +1,91 @@
#!/usr/bin/env bash
set -e

while getopts x:h:g:r:o: flag
function usage() {
echo >&2 "${0}: Extract graph and read chunks for a region, producing a referencing line for a BED file on standard output"
echo >&2
echo >&2 "Usage: ${0} -x mygraph.xg [-h mygraph.gbwt] -r chr1:1-100 [-d 'Description of region'] -o chunk-chr1-1-100 [-g mygam1.gam [-g mygam2.gam ...]] >> regions.bed"
exit 1
}

while getopts x:h:g:r:o:d: flag
do
case "${flag}" in
x) XG_FILE=${OPTARG};;
h) GBWT=${OPTARG};;
x) GRAPH_FILE=${OPTARG};;
h) HAPLOTYPE_FILE=${OPTARG};;
g) GAM_FILES+=("$OPTARG");;
r) REGION=${OPTARG};;
o) OUTDIR=${OPTARG};;
d) DESC="${OPTARG}";;
*)
usage
;;

esac
done

if ! command -v jq &> /dev/null
then
echo "This script requires jq, exiting..."
exit
echo >&2 "This script requires jq, exiting..."
exit 1
fi

if [[ -z "${REGION}" ]] ; then
echo >&2 "You must specify a region with -r"
echo >&2
usage
fi

if [[ -z "${GRAPH_FILE}" ]] ; then
echo >&2 "You must specify a graph with -x"
echo >&2
usage
fi

echo "XG File: " $XG_FILE
echo "Haplotype File: " $GBWT
echo "Region: " $REGION
echo "Output Directory: " $OUTDIR
if [[ -z "${OUTDIR}" ]] ; then
echo >&2 "You must specify an output directory with -o"
echo >&2
usage
fi

if [[ -z "${DESC}" ]] ; then
DESC="Region ${REGION}"
fi

echo >&2 "Graph File: " $GRAPH_FILE
echo >&2 "Haplotype File: " $HAPLOTYPE_FILE
echo >&2 "Region: " $REGION
echo >&2 "Output Directory: " $OUTDIR

rm -fr $OUTDIR
mkdir -p $OUTDIR

vg_chunk_params="-x $XG_FILE -g -c 20 -p $REGION -T -b $OUTDIR/chunk -E $OUTDIR/regions.tsv"
vg_chunk_params=(-x $GRAPH_FILE -g -c 20 -p $REGION -T -b $OUTDIR/chunk -E $OUTDIR/regions.tsv)

# construct track JSON for xg file
jq -n --arg trackFile "${XG_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "greys", "auxPalette": "ygreys"}' '$ARGS.named' >> $OUTDIR/tracks.json
# construct track JSON for graph file
jq -n --arg trackFile "${GRAPH_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "plainColors", "auxPalette": "greys"}' '$ARGS.named' >> $OUTDIR/tracks.json

# construct track JSON for gbwt file; if not any specific gbwt file, then default would be haplotype
if [[ ! -z "${GBWT}" ]] ; then
jq -n --arg trackFile "${GBWT}" --arg trackType "haplotype" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
# construct track JSON for haplotype file, if provided
if [[ ! -z "${HAPLOTYPE_FILE}" ]] ; then
jq -n --arg trackFile "${HAPLOTYPE_FILE}" --arg trackType "haplotype" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
fi

# construct track JSON for each gam file
echo "Gam Files:"
echo >&2 "Gam Files:"
for GAM_FILE in "${GAM_FILES[@]}"; do
echo " - $GAM_FILE"
echo >&2 " - $GAM_FILE"
jq -n --arg trackFile "${GAM_FILE}" --arg trackType "read" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
vg_chunk_params=" $vg_chunk_params -a $GAM_FILE"
vg_chunk_params+=(-a $GAM_FILE)
done

# Call vg chunk
vg chunk $vg_chunk_params > $OUTDIR/chunk.vg
vg chunk "${vg_chunk_params[@]}" > $OUTDIR/chunk.vg

for file in `ls $OUTDIR/`
do
printf "$file\n" >> $OUTDIR/chunk_contents.txt
done
done

# Print BED line
cat $OUTDIR/regions.tsv | cut -f1-3 | tr -d "\n"
printf "\t${DESC}\t${OUTDIR}\n"
107 changes: 107 additions & 0 deletions scripts/prepare_local_chunk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/env bash
set -e

function usage() {
echo >&2 "${0}: Prepare a tube map chunk and BED line on standard output from a pre-made subgraph. Only supports paths, not haplotypes."
echo >&2
echo >&2 "Usage: ${0} -x subgraph.xg -r chr1:1-100 [-d 'Description of region'] -o chunk-chr1-1-100 [-g mygam1.gam [-g mygam2.gam ...]] >> regions.bed"
exit 1
}

while getopts x:g:r:o:d: flag
do
case "${flag}" in
x) GRAPH_FILE=${OPTARG};;
g) GAM_FILES+=("$OPTARG");;
r) REGION=${OPTARG};;
o) OUTDIR=${OPTARG};;
d) DESC="${OPTARG}";;
*)
usage
;;

esac
done

if ! command -v jq &> /dev/null
then
echo >&2 "This script requires jq, exiting..."
exit 1
fi

if [[ -z "${REGION}" ]] ; then
echo >&2 "You must specify a region with -r"
echo >&2
usage
fi

if [[ -z "${GRAPH_FILE}" ]] ; then
echo >&2 "You must specify a graph with -x"
echo >&2
usage
fi

if [[ -z "${OUTDIR}" ]] ; then
echo >&2 "You must specify an output directory with -o"
echo >&2
usage
fi

if [[ -z "${DESC}" ]] ; then
DESC="Region ${REGION}"
fi

echo >&2 "Graph File: " $GRAPH_FILE
echo >&2 "Region: " $REGION
echo >&2 "Output Directory: " $OUTDIR

rm -fr $OUTDIR
mkdir -p $OUTDIR

# Parse the region
REGION_END="$(echo ${REGION} | rev | cut -f1 -d'-' | rev)"
REGION_START="$(echo ${REGION} | rev | cut -f2 -d'-' | cut -f1 -d':' | rev)"
REGION_CONTIG="$(echo ${REGION} | rev| cut -f2- -d':' | rev)"

# construct track JSON for graph file
jq -n --arg trackFile "${GRAPH_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "plainColors", "auxPalette": "greys"}' '$ARGS.named' >> $OUTDIR/tracks.json

# Put the graphy file in place
vg convert -p "${GRAPH_FILE}" > $OUTDIR/chunk.vg
# Start the region BED inside the chunk
printf "${REGION_CONTIG}\t${REGION_START}\t${REGION_END}" > $OUTDIR/regions.tsv


echo >&2 "Gam Files:"
GAM_NUM=0
for GAM_FILE in "${GAM_FILES[@]}"; do
echo >&2 " - $GAM_FILE"
# construct track JSON for each gam file
jq -n --arg trackFile "${GAM_FILE}" --arg trackType "read" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
# Work out a chunk-internal GAM name with the same leading numbering vg chunk uses
if [[ "${GAM_NUM}" == "0" ]] ; then
GAM_LEADER="chunk"
else
GAM_LEADER="chunk-${GAM_NUM}"
fi
GAM_CHUNK_NAME="${OUTDIR}/${GAM_LEADER}_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.gam"
# Put the chunk in place
cp "${GAM_FILE}" "${GAM_CHUNK_NAME}"
# List it in the regions TSV like vg would
printf "\t$(basename "${GAM_CHUNK_NAME}")" >> $OUTDIR/regions.tsv
GAM_NUM=$((GAM_NUM + 1))
done

# Make the empty but required annotation file. We have no haplotypes to put in it.
touch "${OUTDIR}/chunk_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.annotate.txt"
printf "\tchunk_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.annotate.txt\n" >> $OUTDIR/regions.tsv

for file in `ls $OUTDIR/`
do
printf "$file\n" >> $OUTDIR/chunk_contents.txt
done

# Print BED line
cat $OUTDIR/regions.tsv | cut -f1-3 | tr -d "\n"
printf "\t${DESC}\t${OUTDIR}\n"

4 changes: 2 additions & 2 deletions src/components/TubeMapContainer.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class TubeMapContainer extends Component {

handleFetchError(error, message) {
if (!this.cancelSignal.aborted) {
console.log(message, error.name, error.message);
console.error(message, error);
this.setState({ error: error, isLoading: false });
} else {
console.log("fetch canceled by componentWillUnmount", error.message);
Expand Down Expand Up @@ -199,7 +199,7 @@ class TubeMapContainer extends Component {
} catch (error) {
this.handleFetchError(
error,
`POST to ${this.props.apiUrl}/getChunkedData failed:`
`Fetching and parsing POST to ${this.props.apiUrl}/getChunkedData failed:`
);
}
};
Expand Down
39 changes: 35 additions & 4 deletions src/server.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ async function getChunkedData(req, res, next) {
}
req.graph = JSON.parse(graphAsString);
req.region = [rangeRegion.start, rangeRegion.end];
// vg chunk always puts the path we reference on first automatically
if (!sentResponse) {
sentResponse = true;
processAnnotationFile(req, res, next);
Expand Down Expand Up @@ -593,6 +594,24 @@ async function getChunkedData(req, res, next) {
}
req.graph = JSON.parse(graphAsString);
req.region = [rangeRegion.start, rangeRegion.end];

// We might not have the path we are referencing on appearing first.
if (parsedRegion.contig !== "node") {
// Make sure that path 0 is the path we actually asked about
let refPaths = [];
let otherPaths = [];
for (let path of req.graph.path) {
if (path.name === parsedRegion.contig) {
// This is the path we asked about, so it goes first
refPaths.push(path);
} else {
// Then we put each other path
otherPaths.push(path);
}
}
req.graph.path = refPaths.concat(otherPaths);
}

if (!sentResponse) {
sentResponse = true;
processAnnotationFile(req, res, next);
Expand Down Expand Up @@ -669,6 +688,7 @@ function returnErrorMiddleware(err, req, res, next) {
result.error += req.error.toString("utf-8");
}
console.log("returning error: " + result.error);
console.error(err);
if (err.status) {
// Error comes with a status
res.status(err.status);
Expand Down Expand Up @@ -922,6 +942,10 @@ function processGamFiles(req, res, next) {
}
}

// Function to do the step of reading the "region" file, a BED inside the chunk
// that records the path and start offset that were used to define the chunk.
//
// Calls out to the next step, cleanUpAndSendResult
function processRegionFile(req, res, next) {
try {
console.time("processing region file");
Expand Down Expand Up @@ -1432,14 +1456,21 @@ async function getBedRegions(bed) {
if (fs.existsSync(track_json)) {
// Create string of tracks data
const string_data = fs.readFileSync(track_json);
const parser = new JSONParser();
parser.onValue = (value, key, parent, stack) => {
if (stack > 0) return; // ignore inner values
const parser = new JSONParser({separator: ''});
parser.onValue = ({value, key, parent, stack}) => {
if (stack.length > 0) {
// ignore inner values
return;
}
if (!Object.hasOwn(value, 'trackFile')) {
throw new BadRequestError('Non-track object in tracks.json: ' + JSON.stringify(value))
}
// put tracks in array
tracks_array.push(value);
};
parser.write(string_data);
tracks = tracks_array;
// Convert to object container like the client component prop types expect
tracks = {...tracks_array};
}
}

Expand Down
7 changes: 7 additions & 0 deletions src/util/tubemap.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const greys = [
"#000000",
];

// Greys but with a special color for the first thing.
const ygreys = [
"#9467bd",
"#d9d9d9",
Expand Down Expand Up @@ -4229,6 +4230,12 @@ export function vgExtractReads(

for (let i = 0; i < myReads.length; i += 1) {
const read = myReads[i];

if (!read.path) {
// Read does not have a path assigned, this is an unmapped read.
continue;
}

const sequence = [];
const sequenceNew = [];
let firstIndex = -1; // index within mapping of the first node id contained in nodeNames
Expand Down

0 comments on commit 250aa66

Please sign in to comment.