Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: differentiate producers to the same topic with aliases #68

Merged
merged 15 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
* 1.10.6
- Added the `alias` producer config option to make producers to the same topic be independent.

* 1.10.4 (merge 1.5.14)
- Split batch if `message_too_large` error code is received.
Prior to this fix, `wolff_producer` would retry the same batch indefinitely for any error code received from Kafka (`message_too_large` included).
Expand Down
4 changes: 4 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ services:
zookeeper:
image: wurstmeister/zookeeper
container_name: wolff-zk
ulimits:
nofile:
soft: 65536
hard: 65536
kafka_1:
depends_on:
- zookeeper
Expand Down
2 changes: 1 addition & 1 deletion src/wolff.app.src
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{application, wolff,
[{description, "Kafka's publisher"},
{vsn, "1.10.4"},
{vsn, "1.10.6"},
{registered, []},
{applications,
[kernel,
Expand Down
2 changes: 1 addition & 1 deletion src/wolff.appup.src
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
%% -*- mode: erlang; -*-
{"1.10.4",
{"1.10.6",
[
],
[
Expand Down
2 changes: 1 addition & 1 deletion src/wolff.erl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ ensure_supervised_producers(ClientId, Topic, ProducerCfg) ->
%% @hidden Deprecated.
-spec stop_and_delete_supervised_producers(client_id(), topic(), name()) -> ok.
zmstone marked this conversation as resolved.
Show resolved Hide resolved
stop_and_delete_supervised_producers(ClientId, Topic, _Name) ->
stop_and_delete_supervised_producers(ClientId, Topic).
stop_and_delete_supervised_producers(ClientId, {_Alias = undefined, Topic}).
zmstone marked this conversation as resolved.
Show resolved Hide resolved

%% @doc Ensure supervised producers are stopped then deleted.
stop_and_delete_supervised_producers(ClientId, Topic) ->
zmstone marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
134 changes: 94 additions & 40 deletions src/wolff_client.erl
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@

-type config() :: map().
-type topic() :: kpro:topic().
-type alias_and_topic() :: wolff_producers:alias_and_topic().
zmstone marked this conversation as resolved.
Show resolved Hide resolved
zmstone marked this conversation as resolved.
Show resolved Hide resolved
-type topic_or_alias() :: topic() | alias_and_topic().
-type partition() :: kpro:partition().
-type connection() :: kpro:connection().
-type host() :: wolff:host().
-type conn_id() :: {topic(), partition()} | host().
-type conn_id() :: {topic_or_alias(), partition()} | host().

-type state() ::
#{client_id := wolff:client_id(),
Expand All @@ -53,7 +55,9 @@
%% only applicable when connection strategy is per_broker
%% because in this case the connections are keyed by host()
%% but we need to find connection by {topic(), partition()}
leaders => #{{topic(), partition()} => connection()}
leaders => #{{topic(), partition()} => connection()},
%% Reference counting so we may drop connection metadata when no longer required.
known_topics := #{topic() => #{topic_or_alias() => true}}
zmstone marked this conversation as resolved.
Show resolved Hide resolved
}.

-define(DEFAULT_METADATA_TIMEOUT, 10000).
Expand Down Expand Up @@ -81,7 +85,8 @@ start_link(ClientId, Hosts, Config) ->
conns => #{},
metadata_conn => not_initialized,
metadata_ts => #{},
leaders => #{}
leaders => #{},
known_topics => #{}
},
case maps:get(reg_name, Config, false) of
false -> gen_server:start_link(?MODULE, St, []);
Expand All @@ -94,15 +99,17 @@ stop(Pid) ->
get_id(Pid) ->
gen_server:call(Pid, get_id, infinity).

-spec get_leader_connections(pid(), topic()) ->
-spec get_leader_connections(pid(), topic_or_alias()) ->
{ok, [{partition(), pid() | ?conn_down(_)}]} | {error, any()}.
get_leader_connections(Client, Topic) ->
safe_call(Client, {get_leader_connections, Topic, all_partitions}).
get_leader_connections(Client, TopicOrAlias0) ->
TopicOrAlias = ensure_has_alias(TopicOrAlias0),
safe_call(Client, {get_leader_connections, TopicOrAlias, all_partitions}).

-spec get_leader_connections(pid(), topic(), all_partitions | pos_integer()) ->
-spec get_leader_connections(pid(), topic_or_alias(), all_partitions | pos_integer()) ->
{ok, [{partition(), pid() | ?conn_down(_)}]} | {error, any()}.
get_leader_connections(Client, Topic, MaxPartitions) ->
safe_call(Client, {get_leader_connections, Topic, MaxPartitions}).
get_leader_connections(Client, TopicOrAlias0, MaxPartitions) ->
TopicOrAlias = ensure_has_alias(TopicOrAlias0),
safe_call(Client, {get_leader_connections, TopicOrAlias, MaxPartitions}).

%% @doc Check if client has a metadata connection alive.
%% Trigger a reconnect if the connection is down for whatever reason.
Expand Down Expand Up @@ -147,8 +154,8 @@ safe_call(Pid, Call) ->
recv_leader_connection(Client, Topic, Partition, Pid, MaxPartitions) ->
zmstone marked this conversation as resolved.
Show resolved Hide resolved
gen_server:cast(Client, {recv_leader_connection, Topic, Partition, Pid, MaxPartitions}).

delete_producers_metadata(Client, Topic) ->
gen_server:cast(Client, {delete_producers_metadata, Topic}).
delete_producers_metadata(Client, TopicOrAlias) ->
gen_server:cast(Client, {delete_producers_metadata, TopicOrAlias}).

init(#{client_id := ClientID} = St) ->
erlang:process_flag(trap_exit, true),
Expand All @@ -167,10 +174,10 @@ handle_call({check_if_topic_exists, Topic}, _From, #{conn_config := ConnConfig}
{error, Reason} ->
{reply, {error, Reason}, St0}
end;
handle_call({get_leader_connections, Topic, MaxPartitions}, _From, St0) ->
case ensure_leader_connections(St0, Topic, MaxPartitions) of
handle_call({get_leader_connections, TopicOrAlias, MaxPartitions}, _From, St0) ->
case ensure_leader_connections(St0, TopicOrAlias, MaxPartitions) of
{ok, St} ->
Result = do_get_leader_connections(St, Topic),
Result = do_get_leader_connections(St, TopicOrAlias),
{reply, {ok, Result}, St};
{error, Reason} ->
{reply, {error, Reason}, St0}
Expand Down Expand Up @@ -207,11 +214,29 @@ handle_cast({recv_leader_connection, Topic, Partition, Caller, MaxConnections},
_ = erlang:send(Caller, ?leader_connection({error, Reason})),
{noreply, St0}
end;

handle_cast({delete_producers_metadata, Topic}, #{metadata_ts := Topics, conns := Conns} = St) ->
Conns1 = maps:without( [K || K = {K1, _} <- maps:keys(Conns), K1 =:= Topic ], Conns),
{noreply, St#{metadata_ts => maps:remove(Topic, Topics), conns => Conns1}};

handle_cast({delete_producers_metadata, TopicOrAlias}, St0) ->
#{metadata_ts := Topics0,
conns := Conns0,
known_topics := KnownTopics0} = St0,
Topic = get_topic(TopicOrAlias),
case KnownTopics0 of
#{Topic := #{TopicOrAlias := true} = KnownProducers} when map_size(KnownProducers) =:= 1 ->
%% Last entry: we may drop the connection metadata
KnownTopics = maps:remove(Topic, KnownTopics0),
Conns = maps:without( [K || K = {K1, _} <- maps:keys(Conns0), K1 =:= Topic], Conns0),
Topics = maps:remove(TopicOrAlias, Topics0),
St = St0#{metadata_ts := Topics, conns := Conns, known_topics := KnownTopics},
{noreply, St};
#{Topic := #{TopicOrAlias := true} = KnownProducers0} ->
%% Connection is still being used by other producers.
KnownProducers = maps:remove(TopicOrAlias, KnownProducers0),
KnownTopics = KnownTopics0#{Topic := KnownProducers},
St = St0#{known_topics := KnownTopics},
{noreply, St};
_ ->
%% Already gone; nothing to do.
{noreply, St0}
end;
handle_cast(_Cast, St) ->
{noreply, St}.

Expand Down Expand Up @@ -269,7 +294,8 @@ do_close_connection(Pid) ->
exit(Pid, kill)
end.

do_get_leader_connections(#{conns := Conns} = St, Topic) ->
do_get_leader_connections(#{conns := Conns} = St, TopicOrAlias) ->
Topic = get_topic(TopicOrAlias),
FindInMap = case get_connection_strategy(St) of
per_partition -> Conns;
per_broker -> maps:get(leaders, St)
Expand All @@ -288,49 +314,57 @@ do_get_leader_connections(#{conns := Conns} = St, Topic) ->
maps:fold(F, [], FindInMap).

%% return true if there is no need to refresh metadata because the last one is fresh enough
is_metadata_fresh(#{metadata_ts := Topics, config := Config}, Topic) ->
is_metadata_fresh(#{metadata_ts := Topics, config := Config}, TopicOrAlias) ->
MinInterval = maps:get(min_metadata_refresh_interval, Config, ?MIN_METADATA_REFRESH_INTERVAL),
case maps:get(Topic, Topics, false) of
case maps:get(TopicOrAlias, Topics, false) of
false -> false;
Ts -> timer:now_diff(erlang:timestamp(), Ts) < MinInterval * 1000
end.

-spec ensure_leader_connections(state(), topic(), all_partitions | pos_integer()) ->
-spec ensure_leader_connections(state(), topic_or_alias(), all_partitions | pos_integer()) ->
{ok, state()} | {error, any()}.
ensure_leader_connections(St, Topic, MaxPartitions) ->
ensure_leader_connections(St, TopicOrAlias, MaxPartitions) ->
Topic = get_topic(TopicOrAlias),
case is_metadata_fresh(St, Topic) of
true -> {ok, St};
false -> ensure_leader_connections2(St, Topic, MaxPartitions)
false -> ensure_leader_connections2(St, TopicOrAlias, MaxPartitions)
end.

ensure_leader_connections2(#{metadata_conn := Pid, conn_config := ConnConfig} = St, Topic, MaxPartitions) when is_pid(Pid) ->
-spec ensure_leader_connections2(state(), topic_or_alias(), wolff_producers:max_partitions()) ->
{ok, state()} | {error, term()}.
ensure_leader_connections2(#{metadata_conn := Pid, conn_config := ConnConfig} = St, TopicOrAlias, MaxPartitions) when is_pid(Pid) ->
Topic = get_topic(TopicOrAlias),
Timeout = maps:get(request_timeout, ConnConfig, ?DEFAULT_METADATA_TIMEOUT),
case do_get_metadata(Pid, Topic, Timeout) of
{ok, {Brokers, PartitionMetaList}} ->
ensure_leader_connections3(St, Topic, Pid, Brokers, PartitionMetaList, MaxPartitions);
ensure_leader_connections3(St, TopicOrAlias, Pid, Brokers, PartitionMetaList, MaxPartitions);
{error, _Reason} ->
%% ensure metadata connection is down, try to establish a new one in the next clause,
%% reason is discarded here, because the next clause will log error if the immediate retry fails
exit(Pid, kill),
ensure_leader_connections2(St#{metadata_conn => down}, Topic, MaxPartitions)
ensure_leader_connections2(St#{metadata_conn => down}, TopicOrAlias, MaxPartitions)
end;
ensure_leader_connections2(#{conn_config := ConnConfig,
seed_hosts := SeedHosts} = St, Topic, MaxPartitions) ->
seed_hosts := SeedHosts} = St, TopicOrAlias, MaxPartitions) ->
Topic = get_topic(TopicOrAlias),
case get_metadata(SeedHosts, ConnConfig, Topic, []) of
{ok, {ConnPid, {Brokers, PartitionMetaList}}} ->
ensure_leader_connections3(St, Topic, ConnPid, Brokers, PartitionMetaList, MaxPartitions);
ensure_leader_connections3(St, TopicOrAlias, ConnPid, Brokers, PartitionMetaList, MaxPartitions);
{error, Errors} ->
log_warn(failed_to_fetch_metadata, #{topic => Topic, errors => Errors}),
log_warn(failed_to_fetch_metadata, #{topic => get_topic(TopicOrAlias), errors => Errors}),
{error, failed_to_fetch_metadata}
end.

ensure_leader_connections3(#{metadata_ts := MetadataTs} = St0, Topic,
-spec ensure_leader_connections3(state(), topic_or_alias(), pid(), _Brokers,
_PartitionMetaList, wolff_producers:max_partitions()) ->
{ok, state()}.
ensure_leader_connections3(#{metadata_ts := MetadataTs} = St0, TopicOrAlias,
ConnPid, Brokers, PartitionMetaList0, MaxPartitions) ->
PartitionMetaList = limit_partitions_count(PartitionMetaList0, MaxPartitions),
St = lists:foldl(fun(PartitionMeta, StIn) ->
ensure_leader_connection(StIn, Brokers, Topic, PartitionMeta)
ensure_leader_connection(StIn, Brokers, TopicOrAlias, PartitionMeta)
end, St0, PartitionMetaList),
{ok, St#{metadata_ts := MetadataTs#{Topic => erlang:timestamp()},
{ok, St#{metadata_ts := MetadataTs#{TopicOrAlias => erlang:timestamp()},
metadata_conn => ConnPid
}}.

Expand All @@ -342,19 +376,23 @@ limit_partitions_count(PartitionMetaList, _) ->
%% This function ensures each Topic-Partition pair has a connection record
%% either a pid when the leader is healthy, or the error reason
%% if failed to discover the leader or failed to connect to the leader
ensure_leader_connection(St, Brokers, Topic, P_Meta) ->
-spec ensure_leader_connection(state(), _Brokers, topic_or_alias(), _PartitionMetaList) -> state().
ensure_leader_connection(St, Brokers, TopicOrAlias, P_Meta) ->
PartitionNum = kpro:find(partition_index, P_Meta),
ErrorCode = kpro:find(error_code, P_Meta),
case ErrorCode =:= ?no_error of
true ->
do_ensure_leader_connection(St, Brokers, Topic, PartitionNum, P_Meta);
do_ensure_leader_connection(St, Brokers, TopicOrAlias, PartitionNum, P_Meta);
false ->
maybe_disconnect_old_leader(St, Topic, PartitionNum, ErrorCode)
maybe_disconnect_old_leader(St, TopicOrAlias, PartitionNum, ErrorCode)
end.

-spec do_ensure_leader_connection(state(), _Brokers, topic_or_alias(), _Partition, _PartitionMetaList) ->
state().
do_ensure_leader_connection(#{conn_config := ConnConfig,
conns := Connections0
} = St0, Brokers, Topic, PartitionNum, P_Meta) ->
} = St0, Brokers, TopicOrAlias, PartitionNum, P_Meta) ->
Topic = get_topic(TopicOrAlias),
LeaderBrokerId = kpro:find(leader_id, P_Meta),
{_, Host} = lists:keyfind(LeaderBrokerId, 1, Brokers),
Strategy = get_connection_strategy(St0),
Expand Down Expand Up @@ -384,7 +422,8 @@ do_ensure_leader_connection(#{conn_config := ConnConfig,
end.

%% Handle error code in partition metadata.
maybe_disconnect_old_leader(#{conns := Connections} = St, Topic, PartitionNum, ErrorCode) ->
maybe_disconnect_old_leader(#{conns := Connections} = St, TopicOrAlias, PartitionNum, ErrorCode) ->
Topic = get_topic(TopicOrAlias),
Strategy = get_connection_strategy(St),
case Strategy of
per_partition ->
Expand Down Expand Up @@ -446,12 +485,16 @@ split_config(Config) ->
{ConnCfg, MyCfg} = lists:partition(Pred, maps:to_list(Config)),
{maps:from_list(ConnCfg), maps:from_list(MyCfg)}.

-spec get_metadata([_Host], _ConnConfig, topic()) ->
{ok, {pid(), term()}} | {error, term()}.
get_metadata(Hosts, _ConnectFun, _Topic) when Hosts =:= [] ->
{error, no_hosts};
get_metadata(Hosts, ConnectFun, Topic) ->
get_metadata(Hosts, ConnectFun, Topic, []).

get_metadata([], _ConnectFun, _Topic, Errors) ->
-spec get_metadata([_Host], _ConnConfig, topic(), [Error]) ->
{ok, {pid(), term()}} | {error, [Error] | term()}.
get_metadata([], _ConnConfig, _Topic, Errors) ->
{error, Errors};
get_metadata([Host | Rest], ConnConfig, Topic, Errors) ->
case do_connect(Host, ConnConfig) of
Expand All @@ -469,6 +512,8 @@ get_metadata([Host | Rest], ConnConfig, Topic, Errors) ->
get_metadata(Rest, ConnConfig, Topic, [Reason | Errors])
end.

-spec do_get_metadata(connection(), topic(), timeout()) ->
{ok, {_Brokers, _Partitions}} | {error, term()}.
do_get_metadata(Connection, Topic, Timeout) ->
case kpro:get_api_versions(Connection) of
{ok, Vsns} ->
Expand All @@ -478,6 +523,7 @@ do_get_metadata(Connection, Topic, Timeout) ->
{error, Reason}
end.

-spec do_get_metadata2(_Vsn, connection(), topic(), timeout()) -> {ok, {_, _}} | {error, term()}.
do_get_metadata2(Vsn, Connection, Topic, Timeout) ->
Req = kpro_req_lib:metadata(Vsn, [Topic], _IsAutoCreateAllowed = false),
case kpro:request_sync(Connection, Req, Timeout) of
Expand Down Expand Up @@ -561,3 +607,11 @@ bin(X) ->
{error, _} -> bin(io_lib:format("~0p", [X]));
Addr -> bin(Addr)
end.

-spec get_topic(topic_or_alias()) -> topic().
get_topic({_Alias, Topic}) -> Topic;
get_topic(Topic) -> Topic.

-spec ensure_has_alias(topic_or_alias()) -> alias_and_topic().
ensure_has_alias({Alias, Topic}) -> {Alias, Topic};
ensure_has_alias(Topic) -> {undefined, Topic}.
zmstone marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 5 additions & 1 deletion src/wolff_producer.erl
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,15 @@ do_init(#{client_id := ClientId,
partition := Partition,
config := Config0
} = St) ->
AliasPathSegment = case maps:find(alias, Config0) of
zmstone marked this conversation as resolved.
Show resolved Hide resolved
{ok, Alias} when is_binary(Alias) -> Alias;
_ -> Topic
end,
QCfg = case maps:get(replayq_dir, Config0, false) of
false ->
#{mem_only => true};
BaseDir ->
Dir = filename:join([BaseDir, Topic, integer_to_list(Partition)]),
Dir = filename:join([BaseDir, AliasPathSegment, integer_to_list(Partition)]),
zmstone marked this conversation as resolved.
Show resolved Hide resolved
SegBytes = maps:get(replayq_seg_bytes, Config0, ?DEFAULT_REPLAYQ_SEG_BYTES),
Offload = maps:get(replayq_offload_mode, Config0, false),
#{dir => Dir, seg_bytes => SegBytes, offload => Offload}
Expand Down
Loading
Loading