27
27
from kafka .admin import KafkaAdminClient , NewTopic
28
28
from kafka .consumer .fetcher import ConsumerRecord
29
29
from kafka .errors import KafkaError , TopicAlreadyExistsError
30
- from kafka .future import Future
31
30
from kafka .structs import PartitionMetadata , TopicPartition
32
31
from karapace import constants
33
32
from karapace .backup .backends .v1 import SchemaBackupV1Reader
40
39
from pathlib import Path
41
40
from rich .console import Console
42
41
from tenacity import retry , retry_if_exception_type , RetryCallState , stop_after_delay , wait_fixed
43
- from typing import AbstractSet , Callable , Collection , Iterator , Literal , Mapping , NewType , NoReturn , TypeVar
42
+ from typing import AbstractSet , Callable , Collection , Iterator , Literal , Mapping , NewType , TypeVar
44
43
45
44
import contextlib
46
45
import datetime
@@ -263,16 +262,6 @@ def _consumer(config: Config, topic: str) -> Iterator[KafkaConsumer]:
263
262
yield consumer
264
263
265
264
266
- @contextlib .contextmanager
267
- def _enable_producer_callback_errors () -> Iterator [None ]:
268
- global_value = Future .error_on_callbacks
269
- Future .error_on_callbacks = True
270
- try :
271
- yield None
272
- finally :
273
- Future .error_on_callbacks = global_value
274
-
275
-
276
265
@contextlib .contextmanager
277
266
def _producer (config : Config , topic : str ) -> Iterator [KafkaProducer ]:
278
267
"""Creates an automatically closing Kafka producer client.
@@ -282,10 +271,9 @@ def _producer(config: Config, topic: str) -> Iterator[KafkaProducer]:
282
271
:raises PartitionCountError: if the topic does not have exactly one partition.
283
272
:raises Exception: if client creation fails, concrete exception types are unknown, see Kafka implementation.
284
273
"""
285
- with _enable_producer_callback_errors ():
286
- with kafka_producer_from_config (config ) as producer :
287
- __check_partition_count (topic , producer .partitions_for )
288
- yield producer
274
+ with kafka_producer_from_config (config ) as producer :
275
+ __check_partition_count (topic , producer .partitions_for )
276
+ yield producer
289
277
290
278
291
279
def _normalize_location (input_location : str ) -> Path | StdOut :
@@ -390,13 +378,10 @@ def _handle_restore_topic(
390
378
raise BackupTopicAlreadyExists (f"Topic to restore '{ instruction .topic_name } ' already exists" )
391
379
392
380
393
- def _raise_backup_error (exception : Exception ) -> NoReturn :
394
- raise BackupDataRestorationError ("Error while producing restored messages" ) from exception
395
-
396
-
397
381
def _handle_producer_send (
398
382
instruction : ProducerSend ,
399
383
producer : KafkaProducer ,
384
+ producer_error_callback : Callable [[Exception ], None ],
400
385
) -> None :
401
386
LOG .debug (
402
387
"Sending kafka msg key: %r, value: %r" ,
@@ -411,7 +396,7 @@ def _handle_producer_send(
411
396
partition = instruction .partition_index ,
412
397
headers = [(key .decode () if key is not None else None , value ) for key , value in instruction .headers ],
413
398
timestamp_ms = instruction .timestamp ,
414
- ).add_errback (_raise_backup_error )
399
+ ).add_errback (producer_error_callback )
415
400
except (KafkaError , AssertionError ) as ex :
416
401
raise BackupDataRestorationError ("Error while calling send on restoring messages" ) from ex
417
402
@@ -446,11 +431,23 @@ def restore_backup(
446
431
LOG .info ("Identified backup backend: %s" , backend .__class__ .__name__ )
447
432
LOG .info ("Starting backup restore for topic: %r" , topic_name )
448
433
434
+ # Stores the latest exception raised by the error callback set on producer.send()
435
+ _producer_exception = None
436
+
449
437
# We set up an ExitStack context, so that we can enter the producer context only
450
438
# after processing a RestoreTopic instruction.
451
439
with contextlib .ExitStack () as stack :
452
440
producer = None
453
441
442
+ def _producer_error_callback (exception : Exception ) -> None :
443
+ LOG .error ("Producer error" , exc_info = exception )
444
+ nonlocal _producer_exception
445
+ _producer_exception = exception
446
+
447
+ def _check_producer_exception () -> None :
448
+ if _producer_exception is not None :
449
+ raise BackupDataRestorationError ("Error while producing restored messages" ) from _producer_exception
450
+
454
451
for instruction in backend .read (backup_location , topic_name ):
455
452
if isinstance (instruction , RestoreTopicLegacy ):
456
453
_handle_restore_topic_legacy (instruction , config , skip_topic_creation )
@@ -461,10 +458,20 @@ def restore_backup(
461
458
elif isinstance (instruction , ProducerSend ):
462
459
if producer is None :
463
460
raise RuntimeError ("Backend has not yet sent RestoreTopic." )
464
- _handle_producer_send (instruction , producer )
461
+ _handle_producer_send (instruction , producer , _producer_error_callback )
462
+ # Immediately check if producer.send() generated an exception. This call is
463
+ # only an optimization, as producing is asynchronous and no sends might
464
+ # have been executed once we reach this line.
465
+ _check_producer_exception ()
465
466
else :
466
467
assert_never (instruction )
467
468
469
+ # Check if an exception was raised after the producer was flushed and closed
470
+ # by `kafka_producer_from_config` context manager. As opposed to the previous
471
+ # call, this one is essential for correct behavior, as when we reach this point the
472
+ # producer can no longer be sending messages (it has been flushed and closed).
473
+ _check_producer_exception ()
474
+
468
475
469
476
def create_backup (
470
477
config : Config ,
0 commit comments