@@ -12,13 +12,15 @@ import com.github.ajalt.clikt.parameters.options.flag
1212import com.github.ajalt.clikt.parameters.options.option
1313import com.github.ajalt.clikt.parameters.options.required
1414import com.github.ajalt.clikt.parameters.options.versionOption
15+ import com.github.ajalt.clikt.parameters.types.int
1516import com.github.ajalt.mordant.rendering.TextColors.cyan
1617import com.github.ajalt.mordant.rendering.TextColors.green
1718import com.github.ajalt.mordant.rendering.TextColors.red
1819import com.github.ajalt.mordant.rendering.TextColors.yellow
1920import com.github.ajalt.mordant.rendering.TextStyles.bold
2021import com.github.ajalt.mordant.terminal.Terminal
2122import org.apache.avro.Schema
23+ import org.apache.avro.generic.GenericRecord
2224import kotlin.system.exitProcess
2325
2426class KafkaCsvLoaderCommand : CliktCommand (
@@ -48,6 +50,14 @@ class KafkaCsvLoaderCommand : CliktCommand(
4850 " -d" ,
4951 help = " Validate CSV and schema without sending to Kafka" ,
5052 ).flag(default = false )
53+ private val batchSize by option(
54+ " --batch-size" ,
55+ help = " Number of records to batch before sending (default: 1 = no batching)" ,
56+ ).int().default(1 )
57+ private val asyncSend by option(
58+ " --async" ,
59+ help = " Send batches asynchronously (faster but less safe)" ,
60+ ).flag(default = false )
5161
5262 init {
5363 versionOption(getVersion())
@@ -167,41 +177,147 @@ class KafkaCsvLoaderCommand : CliktCommand(
167177
168178 KafkaProducerClient (bootstrapServers, schemaRegistry).use { producer ->
169179 terminal.println (yellow(" 📤 Sending records to Kafka..." ))
180+ if (batchSize > 1 ) {
181+ terminal.println (cyan(" Batch size: $batchSize , Mode: ${if (asyncSend) " async" else " sync" } " ))
182+ }
170183 terminal.println ()
171184
172185 var successCount = 0
173186 val failures = mutableListOf<Pair <Int , String >>()
174187
175- csvData.rows.forEachIndexed { index, row ->
176- val rowNumber = index + 1
177- val result = AvroRecordMapper .mapRow(schema, row)
178-
179- when (result) {
180- is RowMappingResult .Success -> {
181- val key = keyField?.let { row[it] }
182- try {
183- producer.sendSync(topic, key, result.record)
184- successCount++
185- terminal.print (green(" ✓" ))
186- if (rowNumber % 50 == 0 ) {
187- terminal.println (" $rowNumber " )
188- }
189- } catch (e: Exception ) {
190- failures.add(rowNumber to " Kafka error: ${e.message} " )
191- terminal.print (red(" ✗" ))
188+ if (batchSize > 1 ) {
189+ // Batched sending
190+ successCount = sendBatched(schema, csvData, producer, failures)
191+ } else {
192+ // Row-by-row sending
193+ successCount = sendRowByRow(schema, csvData, producer, failures)
194+ }
195+
196+ terminal.println ()
197+ terminal.println ()
198+
199+ printKafkaSummary(successCount, failures)
200+ }
201+ }
202+
203+ private fun sendRowByRow (
204+ schema : Schema ,
205+ csvData : CsvData ,
206+ producer : KafkaProducerClient ,
207+ failures : MutableList <Pair <Int , String >>,
208+ ): Int {
209+ var successCount = 0
210+
211+ csvData.rows.forEachIndexed { index, row ->
212+ val rowNumber = index + 1
213+ val result = AvroRecordMapper .mapRow(schema, row)
214+
215+ when (result) {
216+ is RowMappingResult .Success -> {
217+ val key = keyField?.let { row[it] }
218+ try {
219+ producer.sendSync(topic, key, result.record)
220+ successCount++
221+ terminal.print (green(" ✓" ))
222+ if (rowNumber % 50 == 0 ) {
223+ terminal.println (" $rowNumber " )
192224 }
193- }
194- is RowMappingResult .Failure -> {
195- failures.add(rowNumber to result.errors.joinToString(" ; " ))
225+ } catch (e: Exception ) {
226+ failures.add(rowNumber to " Kafka error: ${e.message} " )
196227 terminal.print (red(" ✗" ))
197228 }
198229 }
230+ is RowMappingResult .Failure -> {
231+ failures.add(rowNumber to result.errors.joinToString(" ; " ))
232+ terminal.print (red(" ✗" ))
233+ }
199234 }
235+ }
200236
201- terminal. println ()
202- terminal. println ()
237+ return successCount
238+ }
203239
204- printKafkaSummary(successCount, failures)
240+ private fun sendBatched (
241+ schema : Schema ,
242+ csvData : CsvData ,
243+ producer : KafkaProducerClient ,
244+ failures : MutableList <Pair <Int , String >>,
245+ ): Int {
246+ var successCount = 0
247+ val batch = mutableListOf<Triple <Int , String ?, GenericRecord >>()
248+
249+ csvData.rows.forEachIndexed { index, row ->
250+ val rowNumber = index + 1
251+ val result = AvroRecordMapper .mapRow(schema, row)
252+
253+ when (result) {
254+ is RowMappingResult .Success -> {
255+ val key = keyField?.let { row[it] }
256+ batch.add(Triple (rowNumber, key, result.record))
257+
258+ // Send batch when it reaches batch size or it's the last row
259+ if (batch.size >= batchSize || rowNumber == csvData.rows.size) {
260+ val batchSuccess = sendBatchToKafka(producer, batch, failures)
261+ successCount + = batchSuccess
262+ batch.clear()
263+
264+ if (rowNumber % 50 == 0 ) {
265+ terminal.println (green(" ✓ Processed $rowNumber rows..." ))
266+ }
267+ }
268+ }
269+ is RowMappingResult .Failure -> {
270+ failures.add(rowNumber to result.errors.joinToString(" ; " ))
271+ terminal.print (red(" ✗" ))
272+ }
273+ }
274+ }
275+
276+ return successCount
277+ }
278+
279+ private fun sendBatchToKafka (
280+ producer : KafkaProducerClient ,
281+ batch : List <Triple <Int , String ?, GenericRecord >>,
282+ failures : MutableList <Pair <Int , String >>,
283+ ): Int {
284+ if (batch.isEmpty()) return 0
285+
286+ return try {
287+ val records = batch.map { (_, key, record) -> key to record }
288+
289+ if (asyncSend) {
290+ // Async: send and flush
291+ val futures = producer.sendBatch(topic, records)
292+ producer.flush()
293+ // Check for failures
294+ futures.forEachIndexed { idx, future ->
295+ try {
296+ future.get()
297+ terminal.print (green(" ✓" ))
298+ } catch (e: Exception ) {
299+ val rowNumber = batch[idx].first
300+ failures.add(rowNumber to " Kafka error: ${e.message} " )
301+ terminal.print (red(" ✗" ))
302+ }
303+ }
304+ batch.size -
305+ futures.count { future ->
306+ runCatching { future.get() }.isFailure
307+ }
308+ } else {
309+ // Sync: wait for all to complete
310+ producer.sendBatchSync(topic, records)
311+ batch.forEach { _ -> terminal.print (green(" ✓" )) }
312+ batch.size
313+ }
314+ } catch (e: Exception ) {
315+ // Entire batch failed
316+ batch.forEach { (rowNumber, _, _) ->
317+ failures.add(rowNumber to " Kafka batch error: ${e.message} " )
318+ terminal.print (red(" ✗" ))
319+ }
320+ 0
205321 }
206322 }
207323
0 commit comments