Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
# 0.2.0dev

## New features

- Added the `defaults` type to the `info` and `format` configuration. Defaults for resolvable fields can be set this way.

# 0.1.2 - Adjust

## Fixes

- Fixed a bug where the output VCF has no header when the input VCF has no variants


# 0.1.1 - Transform

## Fixes

- Fixed a bug where the samples were missing from the header

# 0.1.0 - Change
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ The configuration file is the core of the standardization in Svync. More informa

## Installation
### Mamba/Conda
This is the preffered way of installing BedGoVcf.
This is the preferred way of installing Svync.

```bash
mamba install -c bioconda bedgovcf
mamba install -c bioconda svync
```

or with conda:

```bash
conda install -c bioconda bedgovcf
conda install -c bioconda svync
```

### Precompiled binaries
Expand All @@ -45,17 +45,17 @@ Precompiled binaries are available for Linux and macOS on the [releases page](ht
### Installation from source
Make sure you have go installed on your machine (or [install](https://go.dev/doc/install) it if you don't currently have it)

Then run these commands to install bedgovcf:
Then run these commands to install svync:

```bash
go get .
go build .
sudo mv bedgovcf /usr/local/bin/
sudo mv svync /usr/local/bin/
```

Next run this command to check if it was correctly installed:

```bash
bedgovcf --help
svync --help
```

8 changes: 8 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ info:
### value
The `value` field can be used to change the default value of the info field. The value can be resolved (see [Resolvable fields](#resolvable-fields)).

### defaults
The `defaults` field can be used to define defaults for resolvable `INFO` and `FORMAT` fields. These defaults will be used when the required field is missing from the variant.

### type
The `type` field can be used to set the type of the info field (This will be reflected in the header of the output VCF file).

Expand All @@ -58,6 +61,8 @@ For example when all `SVLEN` info fields are positive, you maybe want to change
info:
SVLEN:
value: $INFO/SVLEN
defaults:
$INFO/SVLEN: "-1"
type: Integer
description: "Structural variant length"
number: 1
Expand All @@ -71,6 +76,9 @@ The `format` section can be used to change the format fields for each variant. T
format:
<format_field>:
value: <new_value>
defaults:
<resolvable-field>: <default>
<resolvable-field>: <default>
type: <new_type>
description: <new_description>
number: <new_number>
Expand Down
2 changes: 1 addition & 1 deletion svync.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ func main() {
Name: "svync",
Usage: "A tool to standardize VCF files from structural variant callers",
HideHelpCommand: true,
Version: "0.1.2",
Version: "0.2.0dev",
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "nodate",
Expand Down
5 changes: 4 additions & 1 deletion svync_api/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ func (config *Config) defineMissing() {
// Format fields
if _, ok := config.Format["GT"]; !ok {
config.Format["GT"] = ConfigInput{
Value: "$FORMAT/GT",
Value: "$FORMAT/GT",
Defaults: map[string]string{
"$FORMAT/GT": "./.",
},
Number: "1",
Type: "String",
Description: "Genotype",
Expand Down
2 changes: 1 addition & 1 deletion svync_api/execute.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ func Execute(Cctx *cli.Context, config *Config) {

file := Cctx.String("input")
inputVcf, err := os.Open(file)
defer inputVcf.Close()
if err != nil {
logger.Fatal(err)
}
defer inputVcf.Close()
header := newHeader()
breakEndVariants := &map[string]Variant{}
headerIsMade := false
Expand Down
32 changes: 19 additions & 13 deletions svync_api/resolve.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

// Resolve a value
func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *cli.Context) string {
func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *cli.Context, config *Config) string {
logger := log.New(os.Stderr, "", 0)

// Replace all the FORMAT fields
Expand All @@ -21,16 +21,19 @@ func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *c
if len(allFormats) > 0 && format == nil {
logger.Fatalf("Cannot use a FORMAT field in a non-FORMAT context, please check your config file")
}
for _, stringToReplace := range allFormats {
fieldSlice := strings.Split(stringToReplace, "/")
for _, rawField := range allFormats {
fieldSlice := strings.Split(rawField, "/")

field := fieldSlice[1]
formatValue, ok := format.Content[field]

// TODO implement some alternative way to handle missing fields
if !ok {
if !Cctx.Bool("mute-warnings") {
logger.Printf("The field %s is not present in the FORMAT fields of the variant with ID %s, excluding it from this variant", field, variant.Id)
// Check if the field is a default value
defaults := config.Format[field].Defaults
if defaultValue, ok := defaults[rawField]; ok {
formatValue = []string{defaultValue}
} else if !Cctx.Bool("mute-warnings") {
logger.Printf("The field %s is not present in the FORMAT fields of the variant with ID %s, excluding it from this variant. Supply a default to mute this warning", field, variant.Id)
}
} else if len(fieldSlice) > 2 {
index, err := strconv.ParseInt(fieldSlice[2], 0, 64)
Expand All @@ -39,23 +42,26 @@ func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *c
}
formatValue = []string{formatValue[index]}
}
input = strings.ReplaceAll(input, stringToReplace, strings.Join(formatValue, ","))
input = strings.ReplaceAll(input, rawField, strings.Join(formatValue, ","))
}

// Replace all the INFO fields
infoRegex := regexp.MustCompile(`\$INFO/[\w\d]+(/\d+)?`)
allInfos := infoRegex.FindAllString(input, -1)
for _, stringToReplace := range allInfos {
fieldSlice := strings.Split(stringToReplace, "/")
for _, rawField := range allInfos {
fieldSlice := strings.Split(rawField, "/")

field := fieldSlice[1]
info, ok := variant.Info[field]

// TODO implement some alternative way to handle missing fields
if !ok {
// Check if the field is a default value
defaults := config.Info[field].Defaults
infoType := variant.Header.Info[field].Type
if infoType != "Flag" && !Cctx.Bool("mute-warnings") {
logger.Printf("The field %s is not present in the INFO fields of the variant with ID %s, excluding it from this variant", field, variant.Id)
if defaultValue, ok := defaults[rawField]; ok {
info = []string{defaultValue}
} else if infoType != "Flag" && !Cctx.Bool("mute-warnings") {
logger.Printf("The field %s is not present in the FORMAT fields of the variant with ID %s, excluding it from this variant. Supply a default to mute this warning", field, variant.Id)
}
} else if len(fieldSlice) > 2 {
index, err := strconv.ParseInt(fieldSlice[2], 0, 64)
Expand All @@ -64,7 +70,7 @@ func ResolveValue(input string, variant *Variant, format *VariantFormat, Cctx *c
}
info = []string{info[index]}
}
input = strings.ReplaceAll(input, stringToReplace, strings.Join(info, ","))
input = strings.ReplaceAll(input, rawField, strings.Join(info, ","))
}

// Replace POS fields
Expand Down
6 changes: 3 additions & 3 deletions svync_api/standardize.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func (variant *Variant) standardize(config *Config, Cctx *cli.Context, count int
}
}

standardizedVariant.Id = fmt.Sprintf("%s_%v", ResolveValue(config.Id, variant, nil, Cctx), count)
standardizedVariant.Id = fmt.Sprintf("%s_%v", ResolveValue(config.Id, variant, nil, Cctx, config), count)

// Add info fields
for name, infoConfig := range config.Info {
Expand All @@ -114,7 +114,7 @@ func (variant *Variant) standardize(config *Config, Cctx *cli.Context, count int
if value == "" {
continue
}
standardizedVariant.Info[name] = []string{ResolveValue(value, variant, nil, Cctx)}
standardizedVariant.Info[name] = []string{ResolveValue(value, variant, nil, Cctx, config)}
}

// Add format fields
Expand All @@ -127,7 +127,7 @@ func (variant *Variant) standardize(config *Config, Cctx *cli.Context, count int
if val, ok := formatConfig.Alts[sVType]; ok {
value = val
}
newFormat.Content[name] = []string{ResolveValue(value, variant, &format, Cctx)}
newFormat.Content[name] = []string{ResolveValue(value, variant, &format, Cctx, config)}
}
standardizedVariant.Format[sample] = *newFormat
}
Expand Down
Loading