Skip to content

Commit

Permalink
CSV: check for mismatch field count vs explicitly specified column na…
Browse files Browse the repository at this point in the history
…mes (#147)

* csv: check for mismatch field count vs explicitly specified column names

* CHANGELOG update
  • Loading branch information
neilotoole authored Mar 15, 2023
1 parent fa1677c commit 7148505
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 12 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.24.3] - 2023-03-14

### Added

- When a CSV source has explicit column names (via `--opts cols=A,B,C`), `sq` now verifies
that the CSV data record field count matches the number of explicit columns.


## [v0.24.2] - 2023-03-13

### Fixed
Expand Down Expand Up @@ -145,6 +153,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#89]: Bug with SQL generated for joins.


[v0.24.3]: https://github.com/neilotoole/sq/compare/v0.24.2...v0.24.3
[v0.24.2]: https://github.com/neilotoole/sq/compare/v0.24.1...v0.24.2
[v0.24.1]: https://github.com/neilotoole/sq/compare/v0.24.0...v0.24.1
[v0.24.0]: https://github.com/neilotoole/sq/compare/v0.23.0...v0.24.0
Expand Down
36 changes: 24 additions & 12 deletions drivers/csv/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,24 +364,37 @@ func getColNames(cr *csv.Reader, src *source.Source, readAheadRecs *[][]string)
return headerRec, nil
}

// The CSV file does not have a header record. We will generate
// col names [A,B,C...]. To do so, we need to know how many fields
// there are in the first record.
firstDataRecord, err := cr.Read()
// Read ahead the first record. We need this to determine the number
// of columns.
firstRec, err := cr.Read()
if err == io.EOF { //nolint:errorlint
return nil, errz.Errorf("data source %s is empty", src.Handle)
}
if err != nil {
return nil, errz.Wrapf(err, "read from data source %s", src.Handle)
}

// firstRecord contains actual data, so append it to initialRecs.
*readAheadRecs = append(*readAheadRecs, firstDataRecord)
// firstRec contains actual data, so append it to readAheadRecs.
*readAheadRecs = append(*readAheadRecs, firstRec)

// If we have explicit column names, we still need to verify the
// column name count against the data.
if len(explicitColNames) > 0 {
if len(explicitColNames) != len(firstRec) {
return nil, errz.Errorf("mismatch: source has %d explicit column names specified, but first data record has %d fields", //nolint:lll
len(explicitColNames), len(firstRec))
}

return explicitColNames, nil
}

// The CSV file does not have a header record. We will generate
// col names [A,B,C...]. To do so, we need to know how many fields
// there are in the first record.
// If no column names yet, we generate them based on the number
// of fields in firstDataRecord.
generatedColNames := make([]string, len(firstDataRecord))
for i := range firstDataRecord {
// of fields in firstRec.
generatedColNames := make([]string, len(firstRec))
for i := range firstRec {
generatedColNames[i] = stringz.GenerateAlphaColName(i, false)
}

Expand Down Expand Up @@ -429,13 +442,12 @@ func getDelimFromOptions(opts options.Options) (r rune, ok bool, err error) {
return 0, false, nil
}

const key = "delim"
_, ok = opts[key]
_, ok = opts[options.OptDelim]
if !ok {
return 0, false, nil
}

val := opts.Get(key)
val := opts.Get(options.OptDelim)
if val == "" {
return 0, false, nil
}
Expand Down

0 comments on commit 7148505

Please sign in to comment.