diff --git a/CHANGELOG.md b/CHANGELOG.md index 6062409..1086620 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# 0.2.0dev +- Added the `defaults` type to the `info` and `format` configuration. Defaults for resolvable fields can be set this way. + # 0.1.2 - Adjust ## Fixes - Fixed a bug where the output VCF has no header when the input VCF has no variants diff --git a/docs/configuration.md b/docs/configuration.md index 7d90005..69ee922 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -41,6 +41,9 @@ info: ### value The `value` field can be used to change the default value of the info field. The value can be resolved (see [Resolvable fields](#resolvable-fields)). +### defaults +The `defaults` field can be used to define defaults for resolvable `INFO` and `FORMAT` fields. These defaults will be used when the required field is missing from the variant. + ### type The `type` field can be used to set the type of the info field (This will be reflected in the header of the output VCF file). @@ -58,6 +61,8 @@ For example when all `SVLEN` info fields are positive, you maybe want to change info: SVLEN: value: $INFO/SVLEN + defaults: + $INFO/SVLEN: "-1" type: Integer description: "Structural variant length" number: 1 @@ -71,6 +76,9 @@ The `format` section can be used to change the format fields for each variant. T format: : value: + defaults: + : + : type: description: number: diff --git a/svync_api/config.go b/svync_api/config.go index fd9d0eb..734cc03 100644 --- a/svync_api/config.go +++ b/svync_api/config.go @@ -79,7 +79,10 @@ func (config *Config) defineMissing() { // Format fields if _, ok := config.Format["GT"]; !ok { config.Format["GT"] = ConfigInput{ - Value: "$FORMAT/GT", + Value: "$FORMAT/GT", + Defaults: map[string]string{ + "$FORMAT/GT": "./.", + }, Number: "1", Type: "String", Description: "Genotype", diff --git a/svync_api/execute.go b/svync_api/execute.go index fbbd396..5fb961a 100644 --- a/svync_api/execute.go +++ b/svync_api/execute.go @@ -20,10 +20,10 @@ func Execute(Cctx *cli.Context, config *Config) { file := Cctx.String("input") inputVcf, err := os.Open(file) - defer inputVcf.Close() if err != nil { logger.Fatal(err) } + defer inputVcf.Close() header := newHeader() breakEndVariants := &map[string]Variant{} headerIsMade := false diff --git a/svync_api/resolve.go b/svync_api/resolve.go index 357bc3f..ff9eaef 100644 --- a/svync_api/resolve.go +++ b/svync_api/resolve.go @@ -12,7 +12,7 @@ import ( ) // Resolve a value -func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *cli.Context) string { +func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *cli.Context, config *Config) string { logger := log.New(os.Stderr, "", 0) // Replace all the FORMAT fields @@ -21,16 +21,19 @@ func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *c if len(allFormats) > 0 && format == nil { logger.Fatalf("Cannot use a FORMAT field in a non-FORMAT context, please check your config file") } - for _, stringToReplace := range allFormats { - fieldSlice := strings.Split(stringToReplace, "/") + for _, rawField := range allFormats { + fieldSlice := strings.Split(rawField, "/") field := fieldSlice[1] formatValue, ok := format.Content[field] - // TODO implement some alternative way to handle missing fields if !ok { - if !Cctx.Bool("mute-warnings") { - logger.Printf("The field %s is not present in the FORMAT fields of the variant with ID %s, excluding it from this variant", field, variant.Id) + // Check if the field is a default value + defaults := config.Format[field].Defaults + if defaultValue, ok := defaults[rawField]; ok { + formatValue = []string{defaultValue} + } else if !Cctx.Bool("mute-warnings") { + logger.Printf("The field %s is not present in the FORMAT fields of the variant with ID %s, excluding it from this variant. Supply a default to mute this warning", field, variant.Id) } } else if len(fieldSlice) > 2 { index, err := strconv.ParseInt(fieldSlice[2], 0, 64) @@ -39,23 +42,26 @@ func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *c } formatValue = []string{formatValue[index]} } - input = strings.ReplaceAll(input, stringToReplace, strings.Join(formatValue, ",")) + input = strings.ReplaceAll(input, rawField, strings.Join(formatValue, ",")) } // Replace all the INFO fields infoRegex := regexp.MustCompile(`\$INFO/[\w\d]+(/\d+)?`) allInfos := infoRegex.FindAllString(input, -1) - for _, stringToReplace := range allInfos { - fieldSlice := strings.Split(stringToReplace, "/") + for _, rawField := range allInfos { + fieldSlice := strings.Split(rawField, "/") field := fieldSlice[1] info, ok := variant.Info[field] - // TODO implement some alternative way to handle missing fields if !ok { + // Check if the field is a default value + defaults := config.Info[field].Defaults infoType := variant.Header.Info[field].Type - if infoType != "Flag" && !Cctx.Bool("mute-warnings") { - logger.Printf("The field %s is not present in the INFO fields of the variant with ID %s, excluding it from this variant", field, variant.Id) + if defaultValue, ok := defaults[rawField]; ok { + info = []string{defaultValue} + } else if infoType != "Flag" && !Cctx.Bool("mute-warnings") { + logger.Printf("The field %s is not present in the FORMAT fields of the variant with ID %s, excluding it from this variant. Supply a default to mute this warning", field, variant.Id) } } else if len(fieldSlice) > 2 { index, err := strconv.ParseInt(fieldSlice[2], 0, 64) @@ -64,7 +70,7 @@ func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *c } info = []string{info[index]} } - input = strings.ReplaceAll(input, stringToReplace, strings.Join(info, ",")) + input = strings.ReplaceAll(input, rawField, strings.Join(info, ",")) } // Replace POS fields diff --git a/svync_api/standardize.go b/svync_api/standardize.go index bc5ae77..8c3daca 100644 --- a/svync_api/standardize.go +++ b/svync_api/standardize.go @@ -102,7 +102,7 @@ func (variant *Variant) standardize(config *Config, Cctx *cli.Context, count int } } - standardizedVariant.Id = fmt.Sprintf("%s_%v", ResolveValue(config.Id, variant, nil, Cctx), count) + standardizedVariant.Id = fmt.Sprintf("%s_%v", ResolveValue(config.Id, variant, nil, Cctx, config), count) // Add info fields for name, infoConfig := range config.Info { @@ -114,7 +114,7 @@ func (variant *Variant) standardize(config *Config, Cctx *cli.Context, count int if value == "" { continue } - standardizedVariant.Info[name] = []string{ResolveValue(value, variant, nil, Cctx)} + standardizedVariant.Info[name] = []string{ResolveValue(value, variant, nil, Cctx, config)} } // Add format fields @@ -127,7 +127,7 @@ func (variant *Variant) standardize(config *Config, Cctx *cli.Context, count int if val, ok := formatConfig.Alts[sVType]; ok { value = val } - newFormat.Content[name] = []string{ResolveValue(value, variant, &format, Cctx)} + newFormat.Content[name] = []string{ResolveValue(value, variant, &format, Cctx, config)} } standardizedVariant.Format[sample] = *newFormat } diff --git a/svync_api/structs.go b/svync_api/structs.go index 4e196f7..eac8648 100644 --- a/svync_api/structs.go +++ b/svync_api/structs.go @@ -62,6 +62,7 @@ type Config struct { type MapConfigInput map[string]ConfigInput type ConfigInput struct { Value string + Defaults map[string]string Description string Number string Type string diff --git a/svync_api/variant.go b/svync_api/variant.go index 7eaaba5..f8c8836 100644 --- a/svync_api/variant.go +++ b/svync_api/variant.go @@ -45,10 +45,10 @@ func toBreakPoint(mate1 *Variant, mate2 *Variant) *Variant { filter = mate1.Filter } - varQual, err := strconv.ParseFloat(mate1.Qual, 64) - mateQual, err := strconv.ParseFloat(mate2.Qual, 64) + varQual, err1 := strconv.ParseFloat(mate1.Qual, 64) + mateQual, err2 := strconv.ParseFloat(mate2.Qual, 64) qual := "." - if err == nil { + if err1 == nil && err2 == nil { qual = fmt.Sprintf("%f", (varQual+mateQual)/2) }