-
Notifications
You must be signed in to change notification settings - Fork 488
fix(openfeature): block initialize() until RC config arrives #16650
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f29acd1
6000125
e7306ba
39f1601
1c3fc38
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| from collections import OrderedDict | ||
| from collections.abc import MutableMapping | ||
| from importlib.metadata import version | ||
| import threading | ||
| import typing | ||
|
|
||
| from openfeature.evaluation_context import EvaluationContext | ||
|
|
@@ -87,11 +88,20 @@ class DataDogProvider(AbstractProvider): | |
| Feature Flags and Experimentation (FFE) product. | ||
| """ | ||
|
|
||
| def __init__(self, *args: typing.Any, **kwargs: typing.Any): | ||
| def __init__(self, *args: typing.Any, initialization_timeout: typing.Optional[float] = None, **kwargs: typing.Any): | ||
| super().__init__(*args, **kwargs) | ||
| self._metadata = Metadata(name="Datadog") | ||
| self._status = ProviderStatus.NOT_READY | ||
| self._config_received = False | ||
|
|
||
| # Initialization timeout: constructor arg takes priority, then env var (default 30s) | ||
| if initialization_timeout is not None: | ||
| self._initialization_timeout = initialization_timeout | ||
| else: | ||
| self._initialization_timeout = ffe_config.initialization_timeout_ms / 1000.0 | ||
|
|
||
| # Event used to block initialize() until config arrives. | ||
| # Also serves as the "config received" flag via is_set(). | ||
| self._config_received = threading.Event() | ||
|
|
||
| # Cache for reported exposures to prevent duplicates | ||
| # Stores mapping of (flag_key, subject_id) -> (allocation_key, variant_key) | ||
|
|
@@ -108,9 +118,6 @@ def __init__(self, *args: typing.Any, **kwargs: typing.Any): | |
| "please set DD_EXPERIMENTAL_FLAGGING_PROVIDER_ENABLED=true to enable it", | ||
| ) | ||
|
|
||
| # Register this provider instance for status updates | ||
| _register_provider(self) | ||
|
|
||
| def get_metadata(self) -> Metadata: | ||
| """Returns provider metadata.""" | ||
| return self._metadata | ||
|
|
@@ -119,32 +126,52 @@ def initialize(self, evaluation_context: EvaluationContext) -> None: | |
| """ | ||
| Initialize the provider. | ||
|
|
||
| Called by the OpenFeature SDK when the provider is set. | ||
| Provider Creation → NOT_READY | ||
| ↓ | ||
| First Remote Config Payload | ||
| ↓ | ||
| READY (emits PROVIDER_READY event) | ||
| ↓ | ||
| Shutdown | ||
| ↓ | ||
| NOT_READY | ||
| Blocks until Remote Config delivers the first FFE configuration or | ||
| the initialization timeout expires. | ||
|
|
||
| The timeout is configurable via: | ||
| - Constructor: DataDogProvider(initialization_timeout=10.0) # seconds | ||
| - Env var: DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS=10000 | ||
|
|
||
| Provider lifecycle: | ||
| NOT_READY -> initialize() blocks -> config arrives -> READY | ||
| NOT_READY -> initialize() blocks -> timeout -> raises ProviderNotReadyError | ||
| """ | ||
| if not self._enabled: | ||
| return | ||
|
|
||
| # Register for RC config callbacks (in initialize, not __init__, so | ||
| # re-initialization after shutdown re-registers the provider) | ||
| _register_provider(self) | ||
|
|
||
| try: | ||
| # Start the exposure writer for reporting | ||
| start_exposure_writer() | ||
| except ServiceStatusError: | ||
| logger.debug("Exposure writer is already running", exc_info=True) | ||
|
|
||
| # If configuration was already received before initialization, emit ready now | ||
| # Fast path: config already available (RC delivered before set_provider) | ||
| config = _get_ffe_config() | ||
| if config is not None and not self._config_received: | ||
| self._config_received = True | ||
| if config is not None: | ||
| logger.debug("FFE configuration already available, provider is READY") | ||
| self._config_received.set() | ||
| self._status = ProviderStatus.READY | ||
| self._emit_ready_event() | ||
| return # SDK will dispatch PROVIDER_READY | ||
|
|
||
| # Block until config arrives or timeout expires | ||
| logger.debug( | ||
| "Waiting up to %.1fs for initial FFE configuration from Remote Config", self._initialization_timeout | ||
| ) | ||
| if not self._config_received.wait(timeout=self._initialization_timeout): | ||
| # Timeout expired without receiving config | ||
| from openfeature.exception import ProviderNotReadyError | ||
|
|
||
| raise ProviderNotReadyError( | ||
| f"Provider timed out after {self._initialization_timeout:.1f}s waiting for " | ||
| "initial configuration from Remote Config" | ||
| ) | ||
|
|
||
| # Config received during wait -- on_configuration_received() already set status | ||
|
|
||
| def shutdown(self) -> None: | ||
| """ | ||
|
|
@@ -167,7 +194,7 @@ def shutdown(self) -> None: | |
| # Unregister provider | ||
| _unregister_provider(self) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aside(not caused by this PR): registering provider in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 to this. The provider shouldn't be managing its own registration, right? |
||
| self._status = ProviderStatus.NOT_READY | ||
| self._config_received = False | ||
| self._config_received.clear() | ||
|
|
||
| def resolve_boolean_details( | ||
| self, | ||
|
|
@@ -423,14 +450,18 @@ def on_configuration_received(self) -> None: | |
| """ | ||
| Called when a Remote Configuration payload is received and processed. | ||
|
|
||
| Emits PROVIDER_READY event on first configuration. | ||
| Updates status first, then signals the event to unblock initialize(). | ||
| Emits PROVIDER_READY for late arrivals (config received after initialize() timed out). | ||
| """ | ||
| if not self._config_received: | ||
| self._config_received = True | ||
| if not self._config_received.is_set(): | ||
| self._status = ProviderStatus.READY | ||
| logger.debug("First FFE configuration received, provider is now READY") | ||
| # Emit READY for late recovery: config arrived after init timed out | ||
| self._emit_ready_event() | ||
|
|
||
| # Signal the event last to unblock initialize() after status is updated | ||
| self._config_received.set() | ||
|
|
||
| def _emit_ready_event(self) -> None: | ||
| """ | ||
| Safely emit PROVIDER_READY event. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,10 +30,20 @@ class OpenFeatureConfig(DDConfig): | |
| default=1.0, | ||
| ) | ||
|
|
||
| # Provider initialization timeout in milliseconds. | ||
| # Controls how long initialize() blocks waiting for the first Remote Config payload. | ||
| # Default is 30000ms (30 seconds), matching Java, Go, and Node.js SDKs. | ||
| initialization_timeout_ms = DDConfig.var( | ||
| int, | ||
| "DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not needed per-se but all other env vars across the SDK fleet are still prefix with |
||
| default=30000, | ||
| ) | ||
|
|
||
| _openfeature_config_keys = [ | ||
| "experimental_flagging_provider_enabled", | ||
| "ffe_intake_enabled", | ||
| "ffe_intake_heartbeat_interval", | ||
| "initialization_timeout_ms", | ||
| ] | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| --- | ||
| fixes: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor: exiting before initialization is complete was a bug. But timeout ( |
||
| - | | ||
| openfeature: This fix resolves an issue where ``DataDogProvider.initialize()`` returned immediately | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should simplify this release note a ton and focus on customer facing wording, feels like too much internal implementation details.
obviously no clue if my summary is correct/accurate at all, but things like... does mentioning "remote config" matter much to understand the bug/impact? same for |
||
| without waiting for Remote Configuration data, causing the OpenFeature SDK to emit ``PROVIDER_READY`` | ||
| before flag configuration was available. Flag evaluations in this window silently returned default | ||
| values. The provider now blocks in ``initialize()`` until the first configuration arrives or a | ||
| configurable timeout expires (default 30s), matching the behavior of the Java, Go, and Node.js | ||
| providers. The timeout is configurable via the ``DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS`` | ||
| environment variable or the ``init_timeout`` constructor parameter. | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍