-
Notifications
You must be signed in to change notification settings - Fork 1.1k
chore: asynchronous IO for connection fiber #6069
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8d28c55
ec18a18
d587691
334ade2
f5b2787
74428b5
ae7188d
1a5c383
380cfde
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| name: RegTests IoLoopV2 | ||
|
|
||
| # Manually triggered only | ||
| on: | ||
| workflow_dispatch: | ||
| push: | ||
|
|
||
| jobs: | ||
| build: | ||
| strategy: | ||
| matrix: | ||
| # Test of these containers | ||
| container: ["ubuntu-dev:20"] | ||
| proactor: [Uring] | ||
| build-type: [Debug, Release] | ||
| runner: [ubuntu-latest, [self-hosted, linux, ARM64]] | ||
|
|
||
| runs-on: ${{ matrix.runner }} | ||
|
|
||
| container: | ||
| image: ghcr.io/romange/${{ matrix.container }} | ||
| options: --security-opt seccomp=unconfined --sysctl "net.ipv6.conf.all.disable_ipv6=0" | ||
| volumes: | ||
| - /var/crash:/var/crash | ||
|
|
||
| steps: | ||
| - uses: actions/checkout@v5 | ||
| with: | ||
| submodules: true | ||
|
|
||
| - name: Print environment info | ||
| run: | | ||
| cat /proc/cpuinfo | ||
| ulimit -a | ||
| env | ||
| - name: Configure & Build | ||
| run: | | ||
| # -no-pie to disable address randomization so we could symbolize stacktraces | ||
| cmake -B ${GITHUB_WORKSPACE}/build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} -GNinja \ | ||
| -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DPRINT_STACKTRACES_ON_SIGNAL=ON \ | ||
| -DCMAKE_CXX_FLAGS=-no-pie -DHELIO_STACK_CHECK:STRING=4096 | ||
| cd ${GITHUB_WORKSPACE}/build && ninja dragonfly | ||
| pwd | ||
| ls -l .. | ||
| - name: Run regression tests action | ||
| uses: ./.github/actions/regression-tests | ||
| with: | ||
| dfly-executable: dragonfly | ||
| gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }} | ||
| build-folder-name: build | ||
| filter: ${{ matrix.build-type == 'Release' && 'not debug_only and not tls' || 'not opt_only and not tls' }} | ||
| aws-access-key-id: ${{ secrets.AWS_S3_ACCESS_KEY }} | ||
| aws-secret-access-key: ${{ secrets.AWS_S3_ACCESS_SECRET }} | ||
| s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }} | ||
| df-arg: "expiremental_io_loop_v2" | ||
|
|
||
| - name: Upload logs on failure | ||
| if: failure() | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: logs | ||
| path: /tmp/failed/* | ||
|
|
||
| - name: Copy binary on a self hosted runner | ||
| if: failure() | ||
| run: | | ||
| # We must use sh syntax. | ||
| if [ "$RUNNER_ENVIRONMENT" = "self-hosted" ]; then | ||
| cd ${GITHUB_WORKSPACE}/build | ||
| timestamp=$(date +%Y-%m-%d_%H:%M:%S) | ||
| mv ./dragonfly /var/crash/dragonfy_${timestamp} | ||
| fi | ||
| lint-test-chart: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v5 | ||
| - uses: ./.github/actions/lint-test-chart | ||
| +33 −0 | .agent/README.md | |
| +460 −0 | .agent/cpp_guidelines.md | |
| +140 −0 | .github/copilot-instructions.md | |
| +19 −0 | .github/workflows/ci.yml | |
| +2 −2 | util/fibers/uring_proactor.cc |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| #include "facade/service_interface.h" | ||
| #include "facade/socket_utils.h" | ||
| #include "io/file.h" | ||
| #include "util/fiber_socket_base.h" | ||
| #include "util/fibers/fibers.h" | ||
| #include "util/fibers/proactor_base.h" | ||
|
|
||
|
|
@@ -112,6 +113,8 @@ ABSL_FLAG(uint32_t, pipeline_wait_batch_usec, 0, | |
| "If non-zero, waits for this time for more I/O " | ||
| " events to come for the connection in case there is only one command in the pipeline. "); | ||
|
|
||
| ABSL_FLAG(bool, expiremental_io_loop_v2, false, "new io loop"); | ||
|
|
||
| using namespace util; | ||
| using namespace std; | ||
| using absl::GetFlag; | ||
|
|
@@ -695,6 +698,8 @@ void Connection::OnShutdown() { | |
| VLOG(1) << "Connection::OnShutdown"; | ||
|
|
||
| BreakOnce(POLLHUP); | ||
| io_ec_ = make_error_code(errc::connection_aborted); | ||
| io_event_.notify_one(); | ||
| } | ||
|
|
||
| void Connection::OnPreMigrateThread() { | ||
|
|
@@ -1096,7 +1101,12 @@ void Connection::ConnectionFlow() { | |
| // Main loop. | ||
| if (parse_status != ERROR && !ec) { | ||
| UpdateIoBufCapacity(io_buf_, stats_, [&]() { io_buf_.EnsureCapacity(64); }); | ||
| auto res = IoLoop(); | ||
| variant<error_code, Connection::ParserStatus> res; | ||
| if (GetFlag(FLAGS_expiremental_io_loop_v2) && !is_tls_) { | ||
| res = IoLoopV2(); | ||
| } else { | ||
| res = IoLoop(); | ||
| } | ||
|
|
||
| if (holds_alternative<error_code>(res)) { | ||
| ec = get<error_code>(res); | ||
|
|
@@ -1154,6 +1164,10 @@ void Connection::ConnectionFlow() { | |
| } | ||
| } | ||
|
|
||
| if (GetFlag(FLAGS_expiremental_io_loop_v2) && !is_tls_) { | ||
| socket_->ResetOnRecvHook(); | ||
| } | ||
|
|
||
| if (ec && !FiberSocketBase::IsConnClosed(ec)) { | ||
| string conn_info = service_->GetContextInfo(cc_.get()).Format(); | ||
| LOG_EVERY_T(WARNING, 1) << "Socket error for connection " << conn_info << " " << GetName() | ||
|
|
@@ -1225,6 +1239,7 @@ Connection::ParserStatus Connection::ParseRedis(unsigned max_busy_cycles) { | |
| auto dispatch_async = [this]() -> MessageHandle { return {FromArgs(tmp_parse_args_)}; }; | ||
|
|
||
| io::Bytes read_buffer = io_buf_.InputBuffer(); | ||
| size_t total = 0; | ||
| do { | ||
| result = redis_parser_->Parse(read_buffer, &consumed, &tmp_parse_args_); | ||
| request_consumed_bytes_ += consumed; | ||
|
|
@@ -1258,6 +1273,7 @@ Connection::ParserStatus Connection::ParseRedis(unsigned max_busy_cycles) { | |
| << "Redis parser error: " << result << " during parse: " << ToSV(read_buffer); | ||
| } | ||
| read_buffer.remove_prefix(consumed); | ||
| total += consumed; | ||
|
|
||
| // We must yield from time to time to allow other fibers to run. | ||
| // Specifically, if a client sends a huge chunk of data resulting in a very long pipeline, | ||
|
|
@@ -1268,7 +1284,7 @@ Connection::ParserStatus Connection::ParseRedis(unsigned max_busy_cycles) { | |
| } | ||
| } while (RedisParser::OK == result && read_buffer.size() > 0 && !reply_builder_->GetError()); | ||
|
|
||
| io_buf_.ConsumeInput(io_buf_.InputLen()); | ||
| io_buf_.ConsumeInput(total); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what was the reason for this change?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because above we have a preemption point and the size of the io_buf_.InputLen() might change because we read data |
||
|
|
||
| parser_error_ = result; | ||
| if (result == RedisParser::OK) | ||
|
|
@@ -1430,7 +1446,7 @@ io::Result<size_t> Connection::HandleRecvSocket() { | |
| return recv_sz; | ||
| } | ||
|
|
||
| auto Connection::IoLoop() -> variant<error_code, ParserStatus> { | ||
| variant<error_code, Connection::ParserStatus> Connection::IoLoop() { | ||
| error_code ec; | ||
| ParserStatus parse_status = OK; | ||
|
|
||
|
|
@@ -2161,6 +2177,162 @@ bool Connection::WeakRef::operator==(const WeakRef& other) const { | |
| return client_id_ == other.client_id_; | ||
| } | ||
|
|
||
| void Connection::DoReadOnRecv(const util::FiberSocketBase::RecvNotification& n) { | ||
| if (std::holds_alternative<std::error_code>(n.read_result)) { | ||
| io_ec_ = std::get<std::error_code>(n.read_result); | ||
| return; | ||
| } | ||
|
|
||
| // TODO non epoll API via EnableRecvMultishot | ||
| // if (std::holds_alternative<io::MutableBytes>(n.read_result)) | ||
| using RecvNot = util::FiberSocketBase::RecvNotification::RecvCompletion; | ||
| if (std::holds_alternative<RecvNot>(n.read_result)) { | ||
| if (!std::get<bool>(n.read_result)) { | ||
| io_ec_ = make_error_code(errc::connection_aborted); | ||
| return; | ||
| } | ||
|
|
||
| if (io_buf_.AppendLen() == 0) { | ||
| // We will regrow in IoLoop | ||
| return; | ||
| } | ||
|
|
||
| io::MutableBytes buf = io_buf_.AppendBuffer(); | ||
| io::Result<size_t> res = socket_->TryRecv(buf); | ||
|
|
||
| // error path | ||
| if (!res) { | ||
| auto ec = res.error(); | ||
| // EAGAIN and EWOULDBLOCK | ||
| if (ec == errc::resource_unavailable_try_again || ec == errc::operation_would_block) { | ||
| return; | ||
| } | ||
|
|
||
| if (ec == errc::connection_aborted || ec == errc::connection_reset) { | ||
| // The peer can shutdown the connection abruptly. | ||
| io_ec_ = ec; | ||
| return; | ||
| } | ||
|
|
||
| LOG_EVERY_T(ERROR, 10) << "Recv error: " << ec; | ||
| io_ec_ = ec; | ||
| return; | ||
| } | ||
|
|
||
| if (*res == 0) { | ||
| io_ec_ = make_error_code(errc::connection_aborted); | ||
| return; | ||
| } | ||
| // A recv call can return fewer bytes than requested even if the | ||
| // socket buffer actually contains enough data to satisfy the full request. | ||
| // TODO maybe worth looping here and try another recv call until it fails | ||
| // with EAGAIN or EWOULDBLOCK. The problem there is that we need to handle | ||
| // resizing if AppendBuffer is zero. | ||
| io_buf_.CommitWrite(*res); | ||
| return; | ||
| } | ||
|
|
||
| DCHECK(false) << "Sould not reach here"; | ||
| } | ||
|
|
||
| variant<error_code, Connection::ParserStatus> Connection::IoLoopV2() { | ||
| error_code ec; | ||
| ParserStatus parse_status = OK; | ||
|
|
||
| size_t max_iobfuf_len = GetFlag(FLAGS_max_client_iobuf_len); | ||
|
|
||
| auto* peer = socket_.get(); | ||
| recv_buf_.res_len = 0; | ||
|
|
||
| // TODO EnableRecvMultishot | ||
|
|
||
| // Breaks with TLS. RegisterOnRecv is unimplemented. | ||
| peer->RegisterOnRecv([this](const FiberSocketBase::RecvNotification& n) { | ||
| DoReadOnRecv(n); | ||
| io_event_.notify_one(); | ||
| }); | ||
|
|
||
| do { | ||
| HandleMigrateRequest(); | ||
|
|
||
| // We *must* poll again for readiness. The event handler we registered above | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is possible to improve for sure. 'We must' is a too strong statement as you can introduce a state that tracks whether the read is needed and when it's not needed. |
||
| // with RegisterOnRecv() will get called *once* for each socket readiness event. | ||
| // So, when we get notified below in io_event_.wait() we might read less data | ||
| // than it is available because io_buf_ does not have enough capacity. If we loop, | ||
| // and do not attempt to read from the socket again we can deadlock. To avoid this, | ||
| // we poll once for readiness before preempting. | ||
| DoReadOnRecv(FiberSocketBase::RecvNotification{true}); | ||
| fb2::NoOpLock noop; | ||
| io_event_.wait( | ||
| noop, [this]() { return io_buf_.InputLen() > 0 || io_ec_ || io_buf_.AppendLen() == 0; }); | ||
|
|
||
| if (io_ec_) { | ||
| LOG_IF(WARNING, cntx()->replica_conn) << "async io error: " << io_ec_; | ||
| return std::exchange(io_ec_, {}); | ||
| } | ||
|
|
||
| phase_ = PROCESS; | ||
| bool is_iobuf_full = io_buf_.AppendLen() == 0; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No functional change from here and onwards in comparison IoLoop |
||
|
|
||
| if (io_buf_.InputLen() > 0) { | ||
| if (redis_parser_) { | ||
| parse_status = ParseRedis(max_busy_read_cycles_cached); | ||
| } else { | ||
| DCHECK(memcache_parser_); | ||
| parse_status = ParseMemcache(); | ||
| } | ||
| } else { | ||
| parse_status = NEED_MORE; | ||
| DCHECK(io_buf_.AppendLen() == 0); | ||
| } | ||
|
|
||
| if (reply_builder_->GetError()) { | ||
| return reply_builder_->GetError(); | ||
| } | ||
|
|
||
| if (parse_status == NEED_MORE) { | ||
| parse_status = OK; | ||
|
|
||
| size_t capacity = io_buf_.Capacity(); | ||
| if (capacity < max_iobfuf_len) { | ||
| size_t parser_hint = 0; | ||
| if (redis_parser_) | ||
| parser_hint = redis_parser_->parselen_hint(); // Could be done for MC as well. | ||
|
|
||
| // If we got a partial request and we managed to parse its | ||
| // length, make sure we have space to store it instead of | ||
| // increasing space incrementally. | ||
| // (Note: The buffer object is only working in power-of-2 sizes, | ||
| // so there's no danger of accidental O(n^2) behavior.) | ||
| if (parser_hint > capacity) { | ||
| UpdateIoBufCapacity(io_buf_, stats_, | ||
| [&]() { io_buf_.Reserve(std::min(max_iobfuf_len, parser_hint)); }); | ||
| } | ||
|
|
||
| // If we got a partial request because iobuf was full, grow it up to | ||
| // a reasonable limit to save on Recv() calls. | ||
| if (is_iobuf_full && capacity < max_iobfuf_len / 2) { | ||
| // Last io used most of the io_buf to the end. | ||
| UpdateIoBufCapacity(io_buf_, stats_, [&]() { | ||
| io_buf_.Reserve(capacity * 2); // Valid growth range. | ||
| }); | ||
| } | ||
|
|
||
| if (io_buf_.AppendLen() == 0U) { | ||
| // it can happen with memcached but not for RedisParser, because RedisParser fully | ||
| // consumes the passed buffer | ||
| LOG_EVERY_T(WARNING, 10) | ||
| << "Maximum io_buf length reached, consider to increase max_client_iobuf_len flag"; | ||
| } | ||
| } | ||
| } else if (parse_status != OK) { | ||
| break; | ||
| } | ||
| } while (peer->IsOpen()); | ||
|
|
||
| return parse_status; | ||
| } | ||
|
|
||
| void ResetStats() { | ||
| auto& cstats = tl_facade_stats->conn_stats; | ||
| cstats.pipelined_cmd_cnt = 0; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you export
df-argonly if it's 'epoll'.I do not understand what happens here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I messed a rebase there I think. regardless it's fixed