|
| 1 | +""" |
| 2 | +karapace - Karapace producer |
| 3 | +
|
| 4 | +Copyright (c) 2023 Aiven Ltd |
| 5 | +See LICENSE for details |
| 6 | +""" |
| 7 | +from kafka import KafkaProducer |
| 8 | +from kafka.errors import MessageSizeTooLargeError |
| 9 | +from karapace.config import Config |
| 10 | +from karapace.errors import SchemaTooLargeException |
| 11 | +from karapace.key_format import KeyFormatter |
| 12 | +from karapace.offset_watcher import OffsetWatcher |
| 13 | +from karapace.utils import json_encode, KarapaceKafkaClient |
| 14 | +from karapace.version import __version__ |
| 15 | +from typing import Any, cast, Dict, Optional, Tuple, Union |
| 16 | + |
| 17 | +import logging |
| 18 | +import time |
| 19 | + |
| 20 | +LOG = logging.getLogger(__name__) |
| 21 | +X_REGISTRY_VERSION_HEADER = ("X-Registry-Version", f"karapace-{__version__}".encode()) |
| 22 | + |
| 23 | + |
| 24 | +class KarapaceProducer: |
| 25 | + def __init__(self, *, config: Config, offset_watcher: OffsetWatcher, key_formatter: KeyFormatter): |
| 26 | + self._producer: Optional[KafkaProducer] = None |
| 27 | + self._config = config |
| 28 | + self._offset_watcher = offset_watcher |
| 29 | + self._key_formatter = key_formatter |
| 30 | + self._kafka_timeout = 10 |
| 31 | + self._schemas_topic = self._config["topic_name"] |
| 32 | + |
| 33 | + host: str = cast(str, self._config["host"]) |
| 34 | + self.x_origin_host_header: Tuple[str, bytes] = ("X-Origin-Host", host.encode("utf8")) |
| 35 | + |
| 36 | + def initialize_karapace_producer( |
| 37 | + self, |
| 38 | + ) -> None: |
| 39 | + while True: |
| 40 | + try: |
| 41 | + self._producer = KafkaProducer( |
| 42 | + bootstrap_servers=self._config["bootstrap_uri"], |
| 43 | + security_protocol=self._config["security_protocol"], |
| 44 | + ssl_cafile=self._config["ssl_cafile"], |
| 45 | + ssl_certfile=self._config["ssl_certfile"], |
| 46 | + ssl_keyfile=self._config["ssl_keyfile"], |
| 47 | + sasl_mechanism=self._config["sasl_mechanism"], |
| 48 | + sasl_plain_username=self._config["sasl_plain_username"], |
| 49 | + sasl_plain_password=self._config["sasl_plain_password"], |
| 50 | + api_version=(1, 0, 0), |
| 51 | + metadata_max_age_ms=self._config["metadata_max_age_ms"], |
| 52 | + max_block_ms=2000, # missing topics will block unless we cache cluster metadata and pre-check |
| 53 | + connections_max_idle_ms=self._config["connections_max_idle_ms"], # helps through cluster upgrades ?? |
| 54 | + kafka_client=KarapaceKafkaClient, |
| 55 | + ) |
| 56 | + return |
| 57 | + except: # pylint: disable=bare-except |
| 58 | + LOG.exception("Unable to create producer, retrying") |
| 59 | + time.sleep(1) |
| 60 | + |
| 61 | + def close(self) -> None: |
| 62 | + if self._producer is not None: |
| 63 | + self._producer.close() |
| 64 | + |
| 65 | + def _send_kafka_message(self, key: Union[bytes, str], value: Union[bytes, str]) -> None: |
| 66 | + assert self._producer is not None |
| 67 | + |
| 68 | + if isinstance(key, str): |
| 69 | + key = key.encode("utf8") |
| 70 | + if isinstance(value, str): |
| 71 | + value = value.encode("utf8") |
| 72 | + |
| 73 | + future = self._producer.send( |
| 74 | + self._schemas_topic, |
| 75 | + key=key, |
| 76 | + value=value, |
| 77 | + headers=[X_REGISTRY_VERSION_HEADER, self.x_origin_host_header], |
| 78 | + ) |
| 79 | + self._producer.flush(timeout=self._kafka_timeout) |
| 80 | + try: |
| 81 | + msg = future.get(self._kafka_timeout) |
| 82 | + except MessageSizeTooLargeError as ex: |
| 83 | + raise SchemaTooLargeException from ex |
| 84 | + |
| 85 | + sent_offset = msg.offset |
| 86 | + |
| 87 | + LOG.info( |
| 88 | + "Waiting for schema reader to caught up. key: %r, value: %r, offset: %r", |
| 89 | + key, |
| 90 | + value, |
| 91 | + sent_offset, |
| 92 | + ) |
| 93 | + |
| 94 | + if self._offset_watcher.wait_for_offset(sent_offset, timeout=60) is True: |
| 95 | + LOG.info( |
| 96 | + "Schema reader has found key. key: %r, value: %r, offset: %r", |
| 97 | + key, |
| 98 | + value, |
| 99 | + sent_offset, |
| 100 | + ) |
| 101 | + else: |
| 102 | + raise RuntimeError( |
| 103 | + "Schema reader timed out while looking for key. key: {!r}, value: {!r}, offset: {}".format( |
| 104 | + key, value, sent_offset |
| 105 | + ) |
| 106 | + ) |
| 107 | + |
| 108 | + def send_message(self, *, key: Dict[str, Any], value: Optional[Dict[str, Any]]) -> None: |
| 109 | + key_bytes = self._key_formatter.format_key(key) |
| 110 | + value_bytes: Union[bytes, str] = b"" |
| 111 | + if value is not None: |
| 112 | + value_bytes = json_encode(value, binary=True, compact=True) |
| 113 | + self._send_kafka_message(key=key_bytes, value=value_bytes) |
0 commit comments