Skip to content

Commit 4830ed6

Browse files
authored
Improving column escape names (#1107)
1 parent 417f9b9 commit 4830ed6

File tree

4 files changed

+27
-18
lines changed

4 files changed

+27
-18
lines changed

lib/typing/columns/columns.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"fmt"
66
"slices"
7+
"strconv"
78
"strings"
89
"sync"
910

@@ -12,9 +13,16 @@ import (
1213
"github.com/artie-labs/transfer/lib/typing"
1314
)
1415

15-
// EscapeName - will lowercase columns and escape spaces.
1616
func EscapeName(name string) string {
17+
// Lowercasing and escaping spaces.
1718
_, name = stringutil.EscapeSpaces(strings.ToLower(name))
19+
20+
// Does the column name start with a number? If so, let's prefix `col_` to the column name.
21+
// We're doing this most databases do not allow column names to start with a number.
22+
if _, err := strconv.Atoi(string(name[0])); err == nil {
23+
name = "col_" + name
24+
}
25+
1826
return name
1927
}
2028

lib/typing/columns/columns_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ func TestEscapeName(t *testing.T) {
2929
name: "col with spaces",
3030
expectedName: "col__with__spaces",
3131
},
32+
{
33+
name: "1abc",
34+
expectedName: "col_1abc",
35+
},
3236
}
3337

3438
for _, testCase := range testCases {

models/event/event.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -199,17 +199,10 @@ func (e *Event) Save(cfg config.Config, inMemDB *models.DatabaseData, tc kafkali
199199
// Update col if necessary
200200
sanitizedData := make(map[string]any)
201201
for _col, val := range e.Data {
202-
// TODO: Refactor this to call columns.EscapeName(...)
203-
// columns need to all be normalized and lower cased.
204-
newColName := strings.ToLower(_col)
205-
// Columns here could contain spaces. Every destination treats spaces in a column differently.
206-
// So far, Snowflake accepts them when escaped properly, however BigQuery does not accept it.
207-
// Instead of making this more complicated for future destinations, we will escape the spaces by having double underscore `__`
208-
// So, if customers want to retrieve spaces again, they can replace `__`.
209-
var containsSpace bool
210-
containsSpace, newColName = stringutil.EscapeSpaces(newColName)
211-
if containsSpace {
212-
// Write the message back if the column has changed.
202+
newColName := columns.EscapeName(_col)
203+
if newColName != _col {
204+
// This means that the column name has changed.
205+
// We need to update the column name in the sanitizedData map.
213206
sanitizedData[newColName] = val
214207
}
215208

models/event/event_save_test.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package event
33
import (
44
"fmt"
55
"strconv"
6+
"strings"
67

78
"github.com/artie-labs/transfer/lib/typing/columns"
89

@@ -173,21 +174,22 @@ func (e *EventsTestSuite) TestEventSaveOptionalSchema() {
173174
func (e *EventsTestSuite) TestEvent_SaveColumnsNoData() {
174175
var cols columns.Columns
175176
for i := 0; i < 50; i++ {
176-
cols.AddColumn(columns.NewColumn(fmt.Sprint(i), typing.Invalid))
177+
cols.AddColumn(columns.NewColumn(fmt.Sprintf("col_%d", i), typing.Invalid))
177178
}
178179

179180
evt := Event{
180181
Table: "non_existent",
181182
Columns: &cols,
182183
Data: map[string]any{
183-
"1": "123",
184+
"col_1": "123",
184185
constants.DeleteColumnMarker: true,
185186
constants.OnlySetDeleteColumnMarker: true,
186187
},
187188
PrimaryKeyMap: map[string]any{
188-
"1": "123",
189+
"col_1": "123",
189190
},
190191
}
192+
191193
kafkaMsg := kafka.Message{}
192194
_, _, err := evt.Save(e.cfg, e.db, topicConfig, artie.NewMessage(&kafkaMsg, nil, kafkaMsg.Topic))
193195
assert.NoError(e.T(), err)
@@ -199,19 +201,21 @@ func (e *EventsTestSuite) TestEvent_SaveColumnsNoData() {
199201
continue
200202
}
201203

204+
columnNamePart := strings.Split(col.Name(), "_")[1]
205+
202206
if prevKey == "" {
203-
prevKey = col.Name()
207+
prevKey = columnNamePart
204208
continue
205209
}
206210

207-
currentKeyParsed, err := strconv.Atoi(col.Name())
211+
currentKeyParsed, err := strconv.Atoi(columnNamePart)
208212
assert.NoError(e.T(), err)
209213

210214
prevKeyParsed, err := strconv.Atoi(prevKey)
211215
assert.NoError(e.T(), err)
212216

213217
// Testing ordering.
214-
assert.True(e.T(), currentKeyParsed > prevKeyParsed, fmt.Sprintf("current key: %v, prevKey: %v", currentKeyParsed, prevKeyParsed))
218+
assert.True(e.T(), currentKeyParsed > prevKeyParsed, fmt.Sprintf("current key: %q, prevKey: %q", currentKeyParsed, prevKeyParsed))
215219
}
216220

217221
// Now let's add more keys.

0 commit comments

Comments
 (0)