@@ -3,18 +3,14 @@ package pglogicalstream
3
3
import (
4
4
"context"
5
5
"crypto/tls"
6
+ "database/sql"
6
7
"fmt"
7
8
"os"
8
9
"strings"
9
10
"time"
10
11
11
- "github.com/apache/arrow/go/v14/arrow"
12
- "github.com/apache/arrow/go/v14/arrow/array"
13
- "github.com/apache/arrow/go/v14/arrow/memory"
14
12
"github.com/charmbracelet/log"
15
- "github.com/cloudquery/plugin-sdk/v4/scalar"
16
13
"github.com/jackc/pglogrepl"
17
- "github.com/jackc/pgx/v5"
18
14
"github.com/jackc/pgx/v5/pgconn"
19
15
"github.com/jackc/pgx/v5/pgproto3"
20
16
"github.com/usedatabrew/pglogicalstream/internal/helpers"
@@ -40,7 +36,6 @@ type Stream struct {
40
36
lsnrestart pglogrepl.LSN
41
37
slotName string
42
38
schema string
43
- tableSchemas []schemas.DataTableSchema
44
39
tableNames []string
45
40
separateChanges bool
46
41
snapshotBatchSize int
@@ -86,19 +81,6 @@ func NewPgStream(config Config, logger *log.Logger) (*Stream, error) {
86
81
var dataSchemas []schemas.DataTableSchema
87
82
for _ , table := range config .DbTablesSchema {
88
83
tableNames = append (tableNames , strings .Split (table .Table , "." )[1 ])
89
- var dts schemas.DataTableSchema
90
- dts .TableName = table .Table
91
- var arrowSchemaFields []arrow.Field
92
- for _ , col := range table .Columns {
93
- arrowSchemaFields = append (arrowSchemaFields , arrow.Field {
94
- Name : col .Name ,
95
- Type : helpers .MapPlainTypeToArrow (col .DatabrewType ),
96
- Nullable : col .Nullable ,
97
- Metadata : arrow.Metadata {},
98
- })
99
- }
100
- dts .Schema = arrow .NewSchema (arrowSchemaFields , nil )
101
- dataSchemas = append (dataSchemas , dts )
102
84
}
103
85
104
86
stream := & Stream {
@@ -108,7 +90,6 @@ func NewPgStream(config Config, logger *log.Logger) (*Stream, error) {
108
90
snapshotMessages : make (chan Wal2JsonChanges , 100 ),
109
91
slotName : config .ReplicationSlotName ,
110
92
schema : config .DbSchema ,
111
- tableSchemas : dataSchemas ,
112
93
snapshotMemorySafetyFactor : config .SnapshotMemorySafetyFactor ,
113
94
separateChanges : config .SeparateChanges ,
114
95
snapshotBatchSize : config .BatchSize ,
@@ -154,7 +135,7 @@ func NewPgStream(config Config, logger *log.Logger) (*Stream, error) {
154
135
if len (slotCheckResults ) == 0 || len (slotCheckResults [0 ].Rows ) == 0 {
155
136
// here we create a new replication slot because there is no slot found
156
137
var createSlotResult CreateReplicationSlotResult
157
- createSlotResult , err = CreateReplicationSlot (context .Background (), stream .pgConn , stream .slotName , outputPlugin ,
138
+ createSlotResult , err = CreateReplicationSlot (context .Background (), stream .pgConn , stream .slotName , "wal2json" ,
158
139
CreateReplicationSlotOptions {Temporary : false ,
159
140
SnapshotAction : "export" ,
160
141
})
@@ -309,71 +290,111 @@ func (s *Stream) processSnapshot() {
309
290
snapshotter .CloseConn ()
310
291
}()
311
292
312
- for _ , table := range s .tableSchemas {
313
- s .logger .Info ("Processing database snapshot" , "schema" , s .schema , "table" , table )
314
-
315
- var offset = 0
293
+ for _ , table := range s .tableNames {
294
+ log .Printf ("Processing snapshot for a table %s.%s" , s .schema , table )
316
295
317
- pk , err := s .getPrimaryKeyColumn (table .TableName )
318
- if err != nil {
319
- s .logger .Fatalf ("Failed to resolve pk %s" , err .Error ())
320
- }
296
+ var (
297
+ avgRowSizeBytes sql.NullInt64
298
+ offset = int (0 )
299
+ )
300
+ // extract only the name of the table
301
+ rawTableName := strings .Split (table , "." )[1 ]
302
+ avgRowSizeBytes = snapshotter .FindAvgRowSize (rawTableName )
303
+ fmt .Println (avgRowSizeBytes , offset , "AVG SIZES" )
321
304
322
- s . logger . Info ( "Query snapshot" , "batch-size" , s . snapshotBatchSize )
323
- builder := array . NewRecordBuilder ( memory . DefaultAllocator , table . Schema )
305
+ batchSize := snapshotter . CalculateBatchSize ( helpers . GetAvailableMemory (), uint64 ( avgRowSizeBytes . Int64 ) )
306
+ fmt . Println ( "Query with batch size" , batchSize , "Available memory: " , helpers . GetAvailableMemory (), "Avg row size: " , avgRowSizeBytes . Int64 )
324
307
325
- colNames := make ([]string , 0 , len (table .Schema .Fields ()))
308
+ for {
309
+ var snapshotRows * sql.Rows
310
+ if snapshotRows , err = snapshotter .QuerySnapshotData (table , batchSize , offset ); err != nil {
311
+ log .Fatalf ("Can't query snapshot data %v" , err )
312
+ }
326
313
327
- for _ , col := range table .Schema .Fields () {
328
- colNames = append (colNames , pgx.Identifier {col .Name }.Sanitize ())
329
- }
314
+ columnTypes , err := snapshotRows .ColumnTypes ()
315
+ var columnTypesString = make ([]string , len (columnTypes ))
316
+ columnNames , err := snapshotRows .Columns ()
317
+ for i , _ := range columnNames {
318
+ columnTypesString [i ] = columnTypes [i ].DatabaseTypeName ()
319
+ }
330
320
331
- for {
332
- var snapshotRows pgx.Rows
333
- s .logger .Info ("Query snapshot: " , "table" , table .TableName , "columns" , colNames , "batch-size" , s .snapshotBatchSize , "offset" , offset )
334
- if snapshotRows , err = snapshotter .QuerySnapshotData (table .TableName , colNames , pk , s .snapshotBatchSize , offset ); err != nil {
335
- s .logger .Errorf ("Failed to query snapshot data %s" , err .Error ())
336
- s .cleanUpOnFailure ()
337
- os .Exit (1 )
321
+ if err != nil {
322
+ panic (err )
338
323
}
339
324
325
+ count := len (columnTypes )
340
326
var rowsCount = 0
341
327
for snapshotRows .Next () {
342
328
rowsCount += 1
329
+ scanArgs := make ([]interface {}, count )
330
+ for i , v := range columnTypes {
331
+ switch v .DatabaseTypeName () {
332
+ case "VARCHAR" , "TEXT" , "UUID" , "TIMESTAMP" :
333
+ scanArgs [i ] = new (sql.NullString )
334
+ break
335
+ case "BOOL" :
336
+ scanArgs [i ] = new (sql.NullBool )
337
+ break
338
+ case "INT4" :
339
+ scanArgs [i ] = new (sql.NullInt64 )
340
+ break
341
+ default :
342
+ scanArgs [i ] = new (sql.NullString )
343
+ }
344
+ }
345
+
346
+ err := snapshotRows .Scan (scanArgs ... )
343
347
344
- values , err := snapshotRows .Values ()
345
348
if err != nil {
346
349
panic (err )
347
350
}
348
351
349
- for i , v := range values {
350
- s := scalar .NewScalar (table .Schema .Field (i ).Type )
351
- if err := s .Set (v ); err != nil {
352
- panic (err )
352
+ var columnValues = make ([]interface {}, len (columnTypes ))
353
+ for i , _ := range columnTypes {
354
+ if z , ok := (scanArgs [i ]).(* sql.NullBool ); ok {
355
+ columnValues [i ] = z .Bool
356
+ continue
357
+ }
358
+ if z , ok := (scanArgs [i ]).(* sql.NullString ); ok {
359
+ columnValues [i ] = z .String
360
+ continue
361
+ }
362
+ if z , ok := (scanArgs [i ]).(* sql.NullInt64 ); ok {
363
+ columnValues [i ] = z .Int64
364
+ continue
365
+ }
366
+ if z , ok := (scanArgs [i ]).(* sql.NullFloat64 ); ok {
367
+ columnValues [i ] = z .Float64
368
+ continue
369
+ }
370
+ if z , ok := (scanArgs [i ]).(* sql.NullInt32 ); ok {
371
+ columnValues [i ] = z .Int32
372
+ continue
353
373
}
354
374
355
- scalar . AppendToBuilder ( builder . Field ( i ), s )
375
+ columnValues [ i ] = scanArgs [ i ]
356
376
}
357
- var snapshotChanges = Wal2JsonChanges {
358
- Lsn : "" ,
359
- Changes : []Wal2JsonChange {
360
- {
361
- Kind : "insert" ,
362
- Schema : s .schema ,
363
- Table : strings .Split (table .TableName , "." )[1 ],
364
- Row : builder .NewRecord (),
365
- },
366
- },
377
+
378
+ var snapshotChanges []Wal2JsonChange
379
+ snapshotChanges = append (snapshotChanges , Wal2JsonChange {
380
+ Kind : "insert" ,
381
+ Schema : s .schema ,
382
+ Table : table ,
383
+ ColumnNames : columnNames ,
384
+ ColumnValues : columnValues ,
385
+ })
386
+ var lsn * string
387
+ snapshotChangePacket := Wal2JsonChanges {
388
+ Lsn : lsn ,
389
+ Changes : snapshotChanges ,
367
390
}
368
391
369
- s .snapshotMessages <- snapshotChanges
392
+ s .snapshotMessages <- snapshotChangePacket
370
393
}
371
394
372
- snapshotRows .Close ()
373
-
374
- offset += s .snapshotBatchSize
395
+ offset += batchSize
375
396
376
- if s . snapshotBatchSize != rowsCount {
397
+ if batchSize != rowsCount {
377
398
break
378
399
}
379
400
}
0 commit comments