From 7148505a84f109d58da62ca837e13e1c9c4ef405 Mon Sep 17 00:00:00 2001 From: Neil O'Toole Date: Tue, 14 Mar 2023 23:04:49 -0600 Subject: [PATCH] CSV: check for mismatch field count vs explicitly specified column names (#147) * csv: check for mismatch field count vs explicitly specified column names * CHANGELOG update --- CHANGELOG.md | 9 +++++++++ drivers/csv/import.go | 36 ++++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 826e7a1ea..698fc3fc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v0.24.3] - 2023-03-14 + +### Added + +- When a CSV source has explicit column names (via `--opts cols=A,B,C`), `sq` now verifies + that the CSV data record field count matches the number of explicit columns. + + ## [v0.24.2] - 2023-03-13 ### Fixed @@ -145,6 +153,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#89]: Bug with SQL generated for joins. +[v0.24.3]: https://github.com/neilotoole/sq/compare/v0.24.2...v0.24.3 [v0.24.2]: https://github.com/neilotoole/sq/compare/v0.24.1...v0.24.2 [v0.24.1]: https://github.com/neilotoole/sq/compare/v0.24.0...v0.24.1 [v0.24.0]: https://github.com/neilotoole/sq/compare/v0.23.0...v0.24.0 diff --git a/drivers/csv/import.go b/drivers/csv/import.go index c8336d141..1abb0b02b 100644 --- a/drivers/csv/import.go +++ b/drivers/csv/import.go @@ -364,10 +364,9 @@ func getColNames(cr *csv.Reader, src *source.Source, readAheadRecs *[][]string) return headerRec, nil } - // The CSV file does not have a header record. We will generate - // col names [A,B,C...]. To do so, we need to know how many fields - // there are in the first record. - firstDataRecord, err := cr.Read() + // Read ahead the first record. We need this to determine the number + // of columns. + firstRec, err := cr.Read() if err == io.EOF { //nolint:errorlint return nil, errz.Errorf("data source %s is empty", src.Handle) } @@ -375,13 +374,27 @@ func getColNames(cr *csv.Reader, src *source.Source, readAheadRecs *[][]string) return nil, errz.Wrapf(err, "read from data source %s", src.Handle) } - // firstRecord contains actual data, so append it to initialRecs. - *readAheadRecs = append(*readAheadRecs, firstDataRecord) + // firstRec contains actual data, so append it to readAheadRecs. + *readAheadRecs = append(*readAheadRecs, firstRec) + + // If we have explicit column names, we still need to verify the + // column name count against the data. + if len(explicitColNames) > 0 { + if len(explicitColNames) != len(firstRec) { + return nil, errz.Errorf("mismatch: source has %d explicit column names specified, but first data record has %d fields", //nolint:lll + len(explicitColNames), len(firstRec)) + } + + return explicitColNames, nil + } + // The CSV file does not have a header record. We will generate + // col names [A,B,C...]. To do so, we need to know how many fields + // there are in the first record. // If no column names yet, we generate them based on the number - // of fields in firstDataRecord. - generatedColNames := make([]string, len(firstDataRecord)) - for i := range firstDataRecord { + // of fields in firstRec. + generatedColNames := make([]string, len(firstRec)) + for i := range firstRec { generatedColNames[i] = stringz.GenerateAlphaColName(i, false) } @@ -429,13 +442,12 @@ func getDelimFromOptions(opts options.Options) (r rune, ok bool, err error) { return 0, false, nil } - const key = "delim" - _, ok = opts[key] + _, ok = opts[options.OptDelim] if !ok { return 0, false, nil } - val := opts.Get(key) + val := opts.Get(options.OptDelim) if val == "" { return 0, false, nil }