Skip to content

Commit

Permalink
dns: adding resolution details
Browse files Browse the repository at this point in the history
Signed-off-by: Alyssa Wilk <alyssar@chromium.org>
  • Loading branch information
alyssawilk committed Jun 3, 2024
1 parent 005f119 commit 17dd04c
Show file tree
Hide file tree
Showing 17 changed files with 96 additions and 47 deletions.
4 changes: 4 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ behavior_changes:
minor_behavior_changes:
# *Changes that may cause incompatibilities for some users, but should not for most*
- area: dfp
change: |
Changed dynamic forward proxy so local reply errors include DNS resolution details. This behavior can be temporarily
disabled by setting the runtime feature ``envoy.reloadable_features.dns_details`` to false.
- area: grpc
change: |
Changes in ``AsyncStreamImpl`` now propagate tracing context headers in bidirectional streams when using
Expand Down
3 changes: 2 additions & 1 deletion envoy/network/dns.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ class DnsResolver {
* @param status supplies the final status of the resolution.
* @param response supplies the list of resolved IP addresses and TTLs.
*/
using ResolveCb = std::function<void(ResolutionStatus status, std::list<DnsResponse>&& response)>;
using ResolveCb = std::function<void(ResolutionStatus status, std::string details,
std::list<DnsResponse>&& response)>;

/**
* Initiate an async DNS resolution.
Expand Down
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ RUNTIME_GUARD(envoy_reloadable_features_defer_processing_backedup_streams);
RUNTIME_GUARD(envoy_reloadable_features_dfp_mixed_scheme);
RUNTIME_GUARD(envoy_reloadable_features_disallow_quic_client_udp_mmsg);
RUNTIME_GUARD(envoy_reloadable_features_dns_cache_set_first_resolve_complete);
RUNTIME_GUARD(envoy_reloadable_features_dns_details);
RUNTIME_GUARD(envoy_reloadable_features_dns_nodata_noname_is_success);
RUNTIME_GUARD(envoy_reloadable_features_dns_reresolve_on_eai_again);
RUNTIME_GUARD(envoy_reloadable_features_edf_lb_host_scheduler_init_fix);
Expand Down
1 change: 1 addition & 0 deletions source/common/stream_info/stream_info_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ struct StreamInfoImpl : public StreamInfo {
void setResponseCode(uint32_t code) override { response_code_ = code; }

void setResponseCodeDetails(absl::string_view rc_details) override {
// Callers should sanitize with StringUtil::replaceAllEmptySpace if necessary.
ASSERT(!StringUtil::hasEmptySpace(rc_details));
response_code_details_.emplace(rc_details);
}
Expand Down
4 changes: 2 additions & 2 deletions source/extensions/clusters/strict_dns/strict_dns_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() {

active_query_ = parent_.dns_resolver_->resolve(
dns_address_, parent_.dns_lookup_family_,
[this](Network::DnsResolver::ResolutionStatus status,
[this](Network::DnsResolver::ResolutionStatus status, std::string details,
std::list<Network::DnsResponse>&& response) -> void {
active_query_ = nullptr;
ENVOY_LOG(trace, "async DNS resolution complete for {}", dns_address_);
ENVOY_LOG(trace, "async DNS resolution complete for {} details {}", dns_address_, details);

std::chrono::milliseconds final_refresh_rate = parent_.dns_refresh_rate_ms_;

Expand Down
5 changes: 5 additions & 0 deletions source/extensions/common/dynamic_forward_proxy/dns_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ class DnsHostInfo {
* TTL policy
*/
virtual void touch() PURE;

/**
* Returns details about the last resolution.
*/
virtual std::string details() PURE;
};

using DnsHostInfoSharedPtr = std::shared_ptr<DnsHostInfo>;
Expand Down
26 changes: 15 additions & 11 deletions source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ void DnsCacheImpl::startCacheLoad(const std::string& host, uint16_t default_port
// If the DNS request was simply to create a host endpoint in a Dynamic Forward Proxy cluster,
// fast fail the look-up as the address is not needed.
if (is_proxy_lookup) {
finishResolve(host, Network::DnsResolver::ResolutionStatus::Success, {}, {}, true);
finishResolve(host, Network::DnsResolver::ResolutionStatus::Success, "proxy_resolve", {}, {},
true);
} else {
startResolve(host, *primary_host);
}
Expand Down Expand Up @@ -262,7 +263,7 @@ void DnsCacheImpl::onResolveTimeout(const std::string& host) {
ENVOY_LOG_EVENT(debug, "dns_cache_resolve_timeout", "host='{}' resolution timeout", host);
stats_.dns_query_timeout_.inc();
primary_host.active_query_->cancel(Network::ActiveDnsQuery::CancelReason::Timeout);
finishResolve(host, Network::DnsResolver::ResolutionStatus::Failure, {});
finishResolve(host, Network::DnsResolver::ResolutionStatus::Failure, "resolve_timeout", {});
}

void DnsCacheImpl::onReResolveAlarm(const std::string& host) {
Expand Down Expand Up @@ -342,16 +343,16 @@ void DnsCacheImpl::startResolve(const std::string& host, PrimaryHostInfo& host_i
stats_.dns_query_attempt_.inc();

host_info.timeout_timer_->enableTimer(timeout_interval_, nullptr);
host_info.active_query_ =
resolver_->resolve(host_info.host_info_->resolvedHost(), dns_lookup_family_,
[this, host](Network::DnsResolver::ResolutionStatus status,
std::list<Network::DnsResponse>&& response) {
finishResolve(host, status, std::move(response));
});
host_info.active_query_ = resolver_->resolve(
host_info.host_info_->resolvedHost(), dns_lookup_family_,
[this, host](Network::DnsResolver::ResolutionStatus status, std::string details,
std::list<Network::DnsResponse>&& response) {
finishResolve(host, status, details, std::move(response));
});
}

void DnsCacheImpl::finishResolve(const std::string& host,
Network::DnsResolver::ResolutionStatus status,
Network::DnsResolver::ResolutionStatus status, std::string details,
std::list<Network::DnsResponse>&& response,
absl::optional<MonotonicTime> resolution_time,
bool is_proxy_lookup) {
Expand Down Expand Up @@ -436,6 +437,7 @@ void DnsCacheImpl::finishResolve(const std::string& host,
current_address ? current_address->asStringView() : "<empty>",
new_address ? new_address->asStringView() : "<empty>");
primary_host_info->host_info_->setAddresses(new_address, std::move(address_list));
primary_host_info->host_info_->setDetails(details);

runAddUpdateCallbacks(host, primary_host_info->host_info_);
if (Runtime::runtimeFeatureEnabled(
Expand All @@ -444,6 +446,8 @@ void DnsCacheImpl::finishResolve(const std::string& host,
}
address_changed = true;
stats_.host_address_changed_.inc();
} else if (current_address == nullptr) {
primary_host_info->host_info_->setDetails(details);
}

if (first_resolve) {
Expand Down Expand Up @@ -635,8 +639,8 @@ void DnsCacheImpl::loadCacheEntries(
key, accumulateToString<Network::DnsResponse>(responses, [](const auto& dns_response) {
return dns_response.addrInfo().address_->asString();
}));
finishResolve(key, Network::DnsResolver::ResolutionStatus::Success, std::move(responses),
resolution_time);
finishResolve(key, Network::DnsResolver::ResolutionStatus::Success, "from_cache",
std::move(responses), resolution_time);
stats_.cache_load_.inc();
return KeyValueStore::Iterate::Continue;
};
Expand Down
13 changes: 12 additions & 1 deletion source/extensions/common/dynamic_forward_proxy/dns_cache_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,16 @@ class DnsCacheImpl : public DnsCache, Logger::Loggable<Logger::Id::forward_proxy
address_list_ = std::move(list);
}

void setDetails(std::string details) {
absl::WriterMutexLock lock{&resolve_lock_};
details_ = details;
}

std::string details() override {
absl::ReaderMutexLock lock{&resolve_lock_};
return details_;
}

std::chrono::steady_clock::duration lastUsedTime() const { return last_used_time_.load(); }

bool firstResolveComplete() const override {
Expand All @@ -168,6 +178,7 @@ class DnsCacheImpl : public DnsCache, Logger::Loggable<Logger::Id::forward_proxy
Network::Address::InstanceConstSharedPtr address_ ABSL_GUARDED_BY(resolve_lock_);
std::vector<Network::Address::InstanceConstSharedPtr>
address_list_ ABSL_GUARDED_BY(resolve_lock_);
std::string details_ ABSL_GUARDED_BY(resolve_lock_){"not_resolved"};

// Using std::chrono::steady_clock::duration is required for compilation within an atomic vs.
// using MonotonicTime.
Expand Down Expand Up @@ -212,7 +223,7 @@ class DnsCacheImpl : public DnsCache, Logger::Loggable<Logger::Id::forward_proxy
ABSL_LOCKS_EXCLUDED(primary_hosts_lock_);

void finishResolve(const std::string& host, Network::DnsResolver::ResolutionStatus status,
std::list<Network::DnsResponse>&& response,
std::string details, std::list<Network::DnsResponse>&& response,
absl::optional<MonotonicTime> resolution_time = {},
bool is_proxy_lookup = false);
void runAddUpdateCallbacks(const std::string& host, const DnsHostInfoSharedPtr& host_info);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ Http::FilterHeadersStatus ProxyFilter::decodeHeaders(Http::RequestHeaderMap& hea
auto const& host = result.host_info_;
latchTime(decoder_callbacks_, DNS_END);
if (!host.has_value() || !host.value()->address()) {
onDnsResolutionFail();
onDnsResolutionFail(host.has_value() ? *host : nullptr);
return Http::FilterHeadersStatus::StopIteration;
}
addHostAddressToFilterState(host.value()->address());
Expand Down Expand Up @@ -387,17 +387,27 @@ void ProxyFilter::onClusterInitTimeout() {
absl::nullopt, RcDetails::get().SubClusterWarmingTimeout);
}

void ProxyFilter::onDnsResolutionFail() {
void ProxyFilter::onDnsResolutionFail(
const Common::DynamicForwardProxy::DnsHostInfoSharedPtr host) {
if (isProxying()) {
decoder_callbacks_->continueDecoding();
return;
}

decoder_callbacks_->streamInfo().setResponseFlag(
StreamInfo::CoreResponseFlag::DnsResolutionFailed);
decoder_callbacks_->sendLocalReply(Http::Code::ServiceUnavailable,
ResponseStrings::get().DnsResolutionFailure, nullptr,
absl::nullopt, RcDetails::get().DnsResolutionFailure);
std::string details = "";
if ((Runtime::runtimeFeatureEnabled("envoy.reloadable_features.dns_details"))) {
if (!host) {
details = "no_host";
} else {
details = StringUtil::replaceAllEmptySpace(host->details());
ASSERT(details != "not_resolved");
}
}
decoder_callbacks_->sendLocalReply(
Http::Code::ServiceUnavailable, ResponseStrings::get().DnsResolutionFailure, nullptr,
absl::nullopt, absl::StrCat(RcDetails::get().DnsResolutionFailure, "{", details, "}"));
}

void ProxyFilter::onLoadDnsCacheComplete(
Expand All @@ -409,7 +419,7 @@ void ProxyFilter::onLoadDnsCacheComplete(
circuit_breaker_.reset();

if (!host_info->address()) {
onDnsResolutionFail();
onDnsResolutionFail(host_info);
return;
}
addHostAddressToFilterState(host_info->address());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ class ProxyFilter

private:
void addHostAddressToFilterState(const Network::Address::InstanceConstSharedPtr& address);
void onDnsResolutionFail();
void onDnsResolutionFail(
const Extensions::Common::DynamicForwardProxy::DnsHostInfoSharedPtr host_info);
bool isProxying();

const ProxyFilterConfigSharedPtr config_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ AppleDnsResolverImpl::startResolution(const std::string& dns_name,
ENVOY_LOG_EVENT(debug, "apple_dns_immediate_resolution",
"DNS resolver resolved ({}) to ({}) without issuing call to Apple API",
dns_name, address->asString());
callback(DnsResolver::ResolutionStatus::Success,
callback(DnsResolver::ResolutionStatus::Success, "apple_dns_success",
{DnsResponse(address, std::chrono::seconds(60))});
return {nullptr, true};
}
Expand Down Expand Up @@ -188,6 +188,7 @@ void AppleDnsResolverImpl::PendingResolution::onEventCallback(uint32_t events) {
// events indicates that the sd_ref state is broken.
// Therefore, finish resolving with an error.
pending_response_.status_ = ResolutionStatus::Failure;
pending_response_.details = absl::StrCat(error);
finishResolve();
}
}
Expand Down Expand Up @@ -228,7 +229,7 @@ void AppleDnsResolverImpl::PendingResolution::finishResolve() {
ENVOY_LOG_EVENT(debug, "apple_dns_resolution_complete",
"dns resolution for {} completed with status {}", dns_name_,
static_cast<int>(pending_response_.status_));
callback_(pending_response_.status_, std::move(finalAddressList()));
callback_(pending_response_.status_, pending_response_.details_, std::move(finalAddressList()));

if (owned_) {
ENVOY_LOG(debug, "Resolution for {} completed (async)", dns_name_);
Expand Down Expand Up @@ -335,6 +336,7 @@ void AppleDnsResolverImpl::PendingResolution::onDNSServiceGetAddrInfoReply(
parent_.chargeGetAddrInfoErrorStats(error_code);

pending_response_.status_ = ResolutionStatus::Failure;
pending_response_.details = absl::StrCat(error_code);
pending_response_.v4_responses_.clear();
pending_response_.v6_responses_.clear();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ class AppleDnsResolverImpl : public DnsResolver, protected Logger::Loggable<Logg
// onDNSServiceGetAddrInfoReply callback.
struct PendingResponse {
ResolutionStatus status_ = ResolutionStatus::Success;
std::string details_ = "not_set";
// `v4_response_received_` and `v6_response_received_` denote whether a callback from the
// `DNSServiceGetAddrInfo` call has been received for the IPv4 address family and IPv6
// address family, respectively. If the query protocol is set to kDNSServiceProtocol_IPv4 or
Expand Down
7 changes: 6 additions & 1 deletion source/extensions/network/dns_resolver/cares/dns_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ void DnsResolverImpl::AddrInfoPendingResolution::onAresGetAddrInfoCallback(

if (status == ARES_SUCCESS) {
pending_response_.status_ = ResolutionStatus::Success;
pending_response_.details_ = "cares_success";

if (addrinfo != nullptr && addrinfo->nodes != nullptr) {
bool can_process_v4 =
Expand Down Expand Up @@ -258,7 +259,10 @@ void DnsResolverImpl::AddrInfoPendingResolution::onAresGetAddrInfoCallback(
// Treat `ARES_ENODATA` or `ARES_ENOTFOUND` here as success to populate back the
// "empty records" response.
pending_response_.status_ = ResolutionStatus::Success;
pending_response_.details_ = "cares_norecords";
ASSERT(addrinfo == nullptr);
} else {
pending_response_.details_ = "cares_failure";
}

if (timeouts > 0) {
Expand Down Expand Up @@ -305,7 +309,8 @@ void DnsResolverImpl::PendingResolution::finishResolve() {
// TODO(chaoqin-li1123): remove try catch pattern here once we figure how to handle unexpected
// exception in fuzz tests.
TRY_NEEDS_AUDIT {
callback_(pending_response_.status_, std::move(pending_response_.address_list_));
callback_(pending_response_.status_, pending_response_.details_,
std::move(pending_response_.address_list_));
}
END_TRY
MULTI_CATCH(
Expand Down
1 change: 1 addition & 0 deletions source/extensions/network/dns_resolver/cares/dns_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class DnsResolverImpl : public DnsResolver, protected Logger::Loggable<Logger::I
struct PendingResponse {
ResolutionStatus status_;
std::list<DnsResponse> address_list_;
std::string details_{"not_set"};
};

// Note: pending_response_ is constructed with ResolutionStatus::Failure by default and
Expand Down
18 changes: 10 additions & 8 deletions source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ void GetAddrInfoDnsResolver::resolveThreadRoutine() {

// For mock testing make sure the getaddrinfo() response is freed prior to the post.
std::pair<ResolutionStatus, std::list<DnsResponse>> response;
std::string details;
{
addrinfo hints;
memset(&hints, 0, sizeof(hints));
Expand Down Expand Up @@ -172,16 +173,17 @@ void GetAddrInfoDnsResolver::resolveThreadRoutine() {
next_query->dns_name_, gai_strerror(rc.return_value_), errorDetails(rc.errno_));
response = std::make_pair(ResolutionStatus::Failure, std::list<DnsResponse>());
}
details = gai_strerror(rc.return_value_);
}

dispatcher_.post(
[finished_query = std::move(next_query), response = std::move(response)]() mutable {
if (finished_query->cancelled_) {
ENVOY_LOG(debug, "dropping cancelled query [{}]", finished_query->dns_name_);
} else {
finished_query->callback_(response.first, std::move(response.second));
}
});
dispatcher_.post([finished_query = std::move(next_query), response = std::move(response),
details = std::move(details)]() mutable {
if (finished_query->cancelled_) {
ENVOY_LOG(debug, "dropping cancelled query [{}]", finished_query->dns_name_);
} else {
finished_query->callback_(response.first, details, std::move(response.second));
}
});
}

ENVOY_LOG(debug, "getaddrinfo resolver thread exiting");
Expand Down
1 change: 1 addition & 0 deletions test/extensions/common/dynamic_forward_proxy/mocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class MockDnsHostInfo : public DnsHostInfo {
MOCK_METHOD(bool, isIpAddress, (), (const));
MOCK_METHOD(void, touch, ());
MOCK_METHOD(bool, firstResolveComplete, (), (const));
MOCK_METHOD(std::string, details, ());

Network::Address::InstanceConstSharedPtr address_;
std::vector<Network::Address::InstanceConstSharedPtr> address_list_;
Expand Down
Loading

0 comments on commit 17dd04c

Please sign in to comment.