From 25db9fd1eabfd66639f6c6fa6dcdf26a6bbabded Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Fri, 17 Nov 2023 10:45:16 -0800 Subject: [PATCH] Add more documentation for the FFI API --- capi/include/hyper.h | 10 ++-- src/ffi/body.rs | 62 +++++++++++++++++++---- src/ffi/client.rs | 36 ++++++++++++-- src/ffi/http_types.rs | 19 +++++++ src/ffi/io.rs | 39 +++++++++++---- src/ffi/task.rs | 112 ++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 246 insertions(+), 32 deletions(-) diff --git a/capi/include/hyper.h b/capi/include/hyper.h index 33c1d09782..6a0363f9cb 100644 --- a/capi/include/hyper.h +++ b/capi/include/hyper.h @@ -168,7 +168,7 @@ typedef struct hyper_executor hyper_executor; typedef struct hyper_headers hyper_headers; /* - An IO object used to represent a socket or similar concept. + A read/write handle for a specific connection. */ typedef struct hyper_io hyper_io; @@ -224,12 +224,12 @@ struct hyper_body *hyper_body_new(void); void hyper_body_free(struct hyper_body *body); /* - Return a task that will poll the body for the next buffer of data. + Create a task that will poll a response body for the next buffer of data. */ struct hyper_task *hyper_body_data(struct hyper_body *body); /* - Return a task that will poll the body and execute the callback with each + Create a task that will poll the body and execute the callback with each */ struct hyper_task *hyper_body_foreach(struct hyper_body *body, hyper_body_foreach_callback func, @@ -241,7 +241,7 @@ struct hyper_task *hyper_body_foreach(struct hyper_body *body, void hyper_body_set_userdata(struct hyper_body *body, void *userdata); /* - Set the data callback for this body. + Set the outgoing data callback for this body. */ void hyper_body_set_data_func(struct hyper_body *body, hyper_body_data_callback func); @@ -523,7 +523,7 @@ void hyper_task_set_userdata(struct hyper_task *task, void *userdata); void *hyper_task_userdata(struct hyper_task *task); /* - Copies a waker out of the task context. + Creates a waker associated with the task context. */ struct hyper_waker *hyper_context_waker(struct hyper_context *cx); diff --git a/src/ffi/body.rs b/src/ffi/body.rs index 91c5c7342f..8bf899b06d 100644 --- a/src/ffi/body.rs +++ b/src/ffi/body.rs @@ -11,9 +11,39 @@ use super::{UserDataPointer, HYPER_ITER_CONTINUE}; use crate::body::{Bytes, Frame, Incoming as IncomingBody}; /// A streaming HTTP body. +/// +/// This is used both for sending requests (with `hyper_request_set_body`) and +/// for receiving responses (with `hyper_response_body`). +/// +/// For outgoing request bodies, call `hyper_body_set_data_func` to provide the +/// data. +/// +/// For incoming response bodies, call `hyper_body_data` to get a task that will +/// yield a chunk of data each time it is polled. That task must be then be +/// added to the executor with `hyper_executor_push`. +/// +/// Methods: +/// +/// - hyper_body_new: Create a new “empty” body. +/// - hyper_body_set_userdata: Set userdata on this body, which will be passed to callback functions. +/// - hyper_body_set_data_func: Set the data callback for this body. +/// - hyper_body_data: Return a task that will poll the body for the next buffer of data. +/// - hyper_body_foreach: Return a task that will poll the body and execute the callback with each body chunk that is received. +/// - hyper_body_free: Free a body. pub struct hyper_body(pub(super) IncomingBody); /// A buffer of bytes that is sent or received on a `hyper_body`. +/// +/// Obtain one of these in the callback of `hyper_body_foreach` or by receiving +/// a task of type `HYPER_TASK_BUF` from `hyper_executor_poll` (after calling +/// `hyper_body_data` and pushing the resulting task). +/// +/// Methods: +/// +/// - hyper_buf_bytes: Get a pointer to the bytes in this buffer. +/// - hyper_buf_copy: Create a new hyper_buf * by copying the provided bytes. +/// - hyper_buf_free: Free this buffer. +/// - hyper_buf_len: Get the length of the bytes this buffer contains. pub struct hyper_buf(pub(crate) Bytes); pub(crate) struct UserBody { @@ -51,20 +81,31 @@ ffi_fn! { } ffi_fn! { - /// Return a task that will poll the body for the next buffer of data. + /// Create a task that will poll a response body for the next buffer of data. /// - /// The task value may have different types depending on the outcome: + /// The task may have different types depending on the outcome: /// /// - `HYPER_TASK_BUF`: Success, and more data was received. /// - `HYPER_TASK_ERROR`: An error retrieving the data. /// - `HYPER_TASK_EMPTY`: The body has finished streaming data. /// + /// When the application receives the task from `hyper_executor_poll`, + /// if the task type is `HYPER_TASK_BUF`, it should cast the task to + /// `hyper_buf *` and consume all the bytes in the buffer. Then + /// the application should call `hyper_body_data` again for the same + /// `hyper_body *`, to create a task for the next buffer of data. + /// Repeat until the polled task type is `HYPER_TASK_ERROR` or + /// `HYPER_TASK_EMPTY`. + /// /// To avoid a memory leak, the task must eventually be consumed by /// `hyper_task_free`, or taken ownership of by `hyper_executor_push` /// without subsequently being given back by `hyper_executor_poll`. /// - /// This does not consume the `hyper_body *`, so it may be used to again. - /// However, it MUST NOT be used or freed until the related task completes. + /// This does not consume the `hyper_body *`, so it may be used again. + /// However, the `hyper_body *` MUST NOT be used or freed until the + /// related task is returned from `hyper_executor_poll`. + /// + /// For a more convenient method, see also `hyper_body_foreach`. fn hyper_body_data(body: *mut hyper_body) -> *mut hyper_task { // This doesn't take ownership of the Body, so don't allow destructor let mut body = ManuallyDrop::new(non_null!(Box::from_raw(body) ?= ptr::null_mut())); @@ -88,18 +129,21 @@ ffi_fn! { } ffi_fn! { - /// Return a task that will poll the body and execute the callback with each + /// Create a task that will poll the body and execute the callback with each /// body chunk that is received. /// /// To avoid a memory leak, the task must eventually be consumed by /// `hyper_task_free`, or taken ownership of by `hyper_executor_push` /// without subsequently being given back by `hyper_executor_poll`. /// - /// The `hyper_buf` pointer is only a borrowed reference, it cannot live outside - /// the execution of the callback. You must make a copy to retain it. + /// The `hyper_buf` pointer is only a borrowed reference. It cannot live outside + /// the execution of the callback. You must make a copy of the bytes to retain them. /// /// The callback should return `HYPER_ITER_CONTINUE` to continue iterating - /// chunks as they are received, or `HYPER_ITER_BREAK` to cancel. + /// chunks as they are received, or `HYPER_ITER_BREAK` to cancel. Each + /// invocation of the callback must consume all the bytes it is provided. + /// There is no mechanism to signal to Hyper that only a subset of bytes were + /// consumed. /// /// This will consume the `hyper_body *`, you shouldn't use it anymore or free it. fn hyper_body_foreach(body: *mut hyper_body, func: hyper_body_foreach_callback, userdata: *mut c_void) -> *mut hyper_task { @@ -129,7 +173,7 @@ ffi_fn! { } ffi_fn! { - /// Set the data callback for this body. + /// Set the outgoing data callback for this body. /// /// The callback is called each time hyper needs to send more data for the /// body. It is passed the value from `hyper_body_set_userdata`. diff --git a/src/ffi/client.rs b/src/ffi/client.rs index 4de81d9b06..4acda891cf 100644 --- a/src/ffi/client.rs +++ b/src/ffi/client.rs @@ -23,9 +23,39 @@ pub struct hyper_clientconn_options { /// An HTTP client connection handle. /// -/// These are used to send a request on a single connection. It's possible to -/// send multiple requests on a single connection, such as when HTTP/1 -/// keep-alive or HTTP/2 is used. +/// These are used to send one or more requests on a single connection. +/// +/// It's possible to send multiple requests on a single connection, such +/// as when HTTP/1 keep-alive or HTTP/2 is used. +/// +/// To create a hyper_clientconn: +/// +/// 1. Create a hyper_clientconn_options with hyper_clientconn_options_new. +/// 2. Create a hyper_io with hyper_io_new. +/// 3. Call hyper_clientconn_handshake with the hyper_io and hyper_clientconn_options. +/// This creates a hyper_task. +/// 4. Add the hyper_task to an executor with hyper_executor_push. +/// 5. Poll that executor until it yields a task of type HYPER_TASK_CLIENTCONN. +/// 6. Extract the hyper_clientconn from the task with hyper_task_value. +/// This will require a cast from void * to hyper_clientconn *. +/// +/// This process results in a hyper_clientconn that permanently owns the +/// hyper_io. Because the hyper_io in turn owns a TCP or TLS connection, that means +/// the hyper_clientconn owns the connection for both the clientconn's lifetime +/// and the connection's lifetime. +/// +/// In other words, each connection (hyper_io) must have exactly one hyper_clientconn +/// associated with it. That's because hyper_clientconn_handshake sends the +/// [HTTP/2 Connection Preface] (for HTTP/2 connections). Since that preface can't +/// be sent twice, handshake can't be called twice. +/// +/// [HTTP/2 Connection Preface]: https://datatracker.ietf.org/doc/html/rfc9113#name-http-2-connection-preface +/// +/// Methods: +/// +/// - hyper_clientconn_handshake: Starts an HTTP client connection handshake using the provided IO transport and options. +/// - hyper_clientconn_send: Send a request on the client connection. +/// - hyper_clientconn_free: Free a hyper_clientconn *. pub struct hyper_clientconn { tx: Tx, } diff --git a/src/ffi/http_types.rs b/src/ffi/http_types.rs index a4d6b32a2c..33ce78448f 100644 --- a/src/ffi/http_types.rs +++ b/src/ffi/http_types.rs @@ -15,11 +15,30 @@ use crate::{HeaderMap, Method, Request, Response, Uri}; pub struct hyper_request(pub(super) Request); /// An HTTP response. +/// +/// This is obtained when `hyper_executor_poll` returns a `hyper_task` +/// of type `HYPER_TASK_RESPONSE`. To figure out which request this response +/// corresponds to, check the userdata of the task, which you should +/// previously have set to an application-specific identifier for the +/// request. +/// +/// Methods: +/// +/// - hyper_response_status: Get the HTTP-Status code of this response. +/// - hyper_response_version: Get the HTTP version used by this response. +/// - hyper_response_reason_phrase: Get a pointer to the reason-phrase of this response. +/// - hyper_response_reason_phrase_len: Get the length of the reason-phrase of this response. +/// - hyper_response_headers: Gets a reference to the HTTP headers of this response. +/// - hyper_response_body: Take ownership of the body of this response. +/// - hyper_response_free: Free an HTTP response. pub struct hyper_response(pub(super) Response); /// An HTTP header map. /// /// These can be part of a request or response. +/// +/// Obtain a pointer to read or modify these from `hyper_request_headers` +/// or `hyper_response_headers`. #[derive(Clone)] pub struct hyper_headers { pub(super) headers: HeaderMap, diff --git a/src/ffi/io.rs b/src/ffi/io.rs index c1ba87a02b..bfd51692eb 100644 --- a/src/ffi/io.rs +++ b/src/ffi/io.rs @@ -19,7 +19,20 @@ type hyper_io_read_callback = type hyper_io_write_callback = extern "C" fn(*mut c_void, *mut hyper_context<'_>, *const u8, size_t) -> size_t; -/// An IO object used to represent a socket or similar concept. +/// A read/write handle for a specific connection. +/// +/// This owns a specific TCP or TLS connection for the lifetime of +/// that connection. It contains a read and write callback, as well as a +/// void *userdata. Typically the userdata will point to a struct +/// containing a file descriptor and a TLS context. +/// +/// Methods: +/// +/// - hyper_io_new: Create a new IO type used to represent a transport. +/// - hyper_io_set_read: Set the read function for this IO transport. +/// - hyper_io_set_userdata: Set the user data pointer for this IO to some value. +/// - hyper_io_set_write: Set the write function for this IO transport. +/// - hyper_io_free: Free an IO handle. pub struct hyper_io { read: hyper_io_read_callback, write: hyper_io_write_callback, @@ -32,6 +45,11 @@ ffi_fn! { /// The read and write functions of this transport should be set with /// `hyper_io_set_read` and `hyper_io_set_write`. /// + /// It is expected that the underlying transport is non-blocking. When + /// a read or write callback can't make progress because there is no + /// data available yet, it should use the `hyper_waker` mechanism to + /// arrange to be called again when data is available. + /// /// To avoid a memory leak, the IO handle must eventually be consumed by /// `hyper_io_free` or `hyper_clientconn_handshake`. fn hyper_io_new() -> *mut hyper_io { @@ -72,10 +90,11 @@ ffi_fn! { /// unless you have already written them yourself. It is also undefined behavior /// to return that more bytes have been written than actually set on the `buf`. /// - /// If there is no data currently available, a waker should be claimed from - /// the `ctx` and registered with whatever polling mechanism is used to signal - /// when data is available later on. The return value should be - /// `HYPER_IO_PENDING`. + /// If there is no data currently available, the callback should create a + /// `hyper_waker` from its `hyper_context` argument and register the waker + /// with whatever polling mechanism is used to signal when data is available + /// later on. The return value should be `HYPER_IO_PENDING`. See the + /// documentation for `hyper_waker`. /// /// If there is an irrecoverable error reading data, then `HYPER_IO_ERROR` /// should be the return value. @@ -90,11 +109,11 @@ ffi_fn! { /// Data from the `buf` pointer should be written to the transport, up to /// `buf_len` bytes. The number of bytes written should be the return value. /// - /// If no data can currently be written, the `waker` should be cloned and - /// registered with whatever polling mechanism is used to signal when data - /// is available later on. The return value should be `HYPER_IO_PENDING`. - /// - /// Yeet. + /// If there is no data currently available, the callback should create a + /// `hyper_waker` from its `hyper_context` argument and register the waker + /// with whatever polling mechanism is used to signal when data is available + /// later on. The return value should be `HYPER_IO_PENDING`. See the documentation + /// for `hyper_waker`. /// /// If there is an irrecoverable error reading data, then `HYPER_IO_ERROR` /// should be the return value. diff --git a/src/ffi/task.rs b/src/ffi/task.rs index 78e92ba90c..375949a06a 100644 --- a/src/ffi/task.rs +++ b/src/ffi/task.rs @@ -28,6 +28,28 @@ pub const HYPER_POLL_PENDING: c_int = 1; pub const HYPER_POLL_ERROR: c_int = 3; /// A task executor for `hyper_task`s. +/// +/// A task is a unit of work that may be blocked on IO, and can be polled to +/// make progress on that work. +/// +/// An executor can hold many tasks, included from unrelated HTTP connections. +/// An executor is single threaded. Typically you might have one executor per +/// thread. Or, for simplicity, you may choose one executor per connection. +/// +/// Progress on tasks happens only when hyper_executor_poll is called, and only +/// on tasks whose corresponding `hyper_waker` has been called to indicate they +/// are ready to make progress (for instance, because the OS has indicated there +/// is more data to read or more buffer space available to write). +/// +/// Deadlock potential: hyper_executor_poll must not be called from within a task's +/// callback. Doing so will result in a deadlock. +/// +/// Methods: +/// +/// - hyper_executor_new: Creates a new task executor. +/// - hyper_executor_push: Push a task onto the executor. +/// - hyper_executor_poll: Polls the executor, trying to make progress on any tasks that have notified that they are ready again. +/// - hyper_executor_free: Frees an executor and any incomplete tasks still part of it. pub struct hyper_executor { /// The executor of all task futures. /// @@ -55,6 +77,40 @@ pub(crate) struct WeakExec(Weak); struct ExecWaker(AtomicBool); /// An async task. +/// +/// A task represents a chunk of work that will eventually yield exactly one +/// `hyper_task_value`. Tasks are pushed onto an executor, and that executor is +/// responsible for calling the necessary private functions on the task to make +/// progress. In most cases those private functions will eventually cause read +/// or write callbacks on a `hyper_io` object to be called. +/// +/// Tasks are created by various functions: +/// +/// - hyper_clientconn_handshake: Starts an HTTP client connection handshake. +/// - hyper_clientconn_send: Send a request on the client connection. +/// - hyper_body_data: Return a task that will poll the body for data. +/// - hyper_body_foreach: Return a task that will poll the body and execute a callback. +/// +/// Tasks then have a userdata associated with them using `hyper_task_set_userdata``. This +/// is important, for instance, to associate a request id with a given request. When multiple +/// tasks are running on the same executor, this allows distinguishing tasks for different +/// requests. +/// +/// Tasks are then pushed onto an executor, and eventually yielded from hyper_executor_poll: +/// +/// - hyper_executor_push: Push a task onto the executor. +/// - hyper_executor_poll: Polls the executor, trying to make progress on any tasks that have notified that they are ready again. +/// +/// Once a task is yielded from poll, retrieve its userdata, check its type, +/// and extract its value. This will require a case from void* to the appropriate type. +/// +/// Methods on hyper_task: +/// +/// - hyper_task_type: Query the return type of this task. +/// - hyper_task_value: Takes the output value of this task. +/// - hyper_task_set_userdata: Set a user data pointer to be associated with this task. +/// - hyper_task_userdata: Retrieve the userdata that has been set via hyper_task_set_userdata. +/// - hyper_task_free: Free a task. pub struct hyper_task { future: BoxFuture, output: Option, @@ -66,9 +122,36 @@ struct TaskFuture { } /// An async context for a task that contains the related waker. +/// +/// This is provided to hyper_io's read and write callbacks. Currently +/// its only purpose is to provide access to the waker. See hyper_waker. +/// +/// Corresponding Rust type: pub struct hyper_context<'a>(Context<'a>); /// A waker that is saved and used to waken a pending task. +/// +/// This is provided to `hyper_io`'s read and write callbacks via `hyper_context` +/// and `hyper_context_waker`. +/// +/// When nonblocking I/O in one of those callbacks can't make progress (returns +/// `EAGAIN` or `EWOULDBLOCK`), the callback has to return to avoid blocking the +/// executor. But it also has to arrange to get called in the future when more +/// data is available. That's the role of the async context and the waker. The +/// waker can be used to tell the executor "this task is ready to make progress." +/// +/// The read or write callback, upon finding it can't make progress, must get a +/// waker from the context (`hyper_context_waker`), arrange for that waker to be +/// called in the future, and then return `HYPER_POLL_PENDING`. +/// +/// The arrangements for the waker to be called in the future are up to the +/// application, but usually it will involve one big `select(2)` loop that checks which +/// FDs are ready, and a correspondence between FDs and waker objects. For each +/// FD that is ready, the corresponding waker must be called. Then `hyper_executor_poll` +/// must be called. That will cause the executor to attempt to make progress on each +/// woken task. +/// +/// Corresponding Rust type: pub struct hyper_waker { waker: std::task::Waker, } @@ -219,8 +302,14 @@ ffi_fn! { ffi_fn! { /// Push a task onto the executor. /// - /// The executor takes ownership of the task, which should not be accessed - /// again unless returned back to the user with `hyper_executor_poll`. + /// The executor takes ownership of the task, which must not be accessed + /// again. + /// + /// Ownership of the task will eventually be returned to the user from + /// `hyper_executor_poll`. + /// + /// To distinguish multiple tasks running on the same executor, use + /// hyper_task_set_userdata. fn hyper_executor_push(exec: *const hyper_executor, task: *mut hyper_task) -> hyper_code { let exec = non_null!(&*exec ?= hyper_code::HYPERE_INVALID_ARG); let task = non_null!(Box::from_raw(task) ?= hyper_code::HYPERE_INVALID_ARG); @@ -236,8 +325,7 @@ ffi_fn! { /// If ready, returns a task from the executor that has completed. /// /// To avoid a memory leak, the task must eventually be consumed by - /// `hyper_task_free`, or taken ownership of by `hyper_executor_push` - /// without subsequently being given back by `hyper_executor_poll`. + /// `hyper_task_free`. /// /// If there are no ready tasks, this returns `NULL`. fn hyper_executor_poll(exec: *const hyper_executor) -> *mut hyper_task { @@ -341,6 +429,9 @@ ffi_fn! { /// /// This value will be passed to task callbacks, and can be checked later /// with `hyper_task_userdata`. + /// + /// This is useful for telling apart tasks for different requests that are + /// running on the same executor. fn hyper_task_set_userdata(task: *mut hyper_task, userdata: *mut c_void) { if task.is_null() { return; @@ -414,7 +505,13 @@ impl hyper_context<'_> { } ffi_fn! { - /// Copies a waker out of the task context. + /// Creates a waker associated with the task context. + /// + /// The waker can be used to inform the task's executor that the task is + /// ready to make progress (using `hyper_waker_wake``). + /// + /// Typically this only needs to be called once, but it can be called + /// multiple times, returning a new waker each time. /// /// To avoid a memory leak, the waker must eventually be consumed by /// `hyper_waker_free` or `hyper_waker_wake`. @@ -439,6 +536,11 @@ ffi_fn! { ffi_fn! { /// Wake up the task associated with a waker. /// + /// This does not do work towards associated task. Instead, it signals + /// to the task's executor that the task is ready to make progress. The + /// application is responsible for calling hyper_executor_poll, which + /// will in turn do work on all tasks that are ready to make progress. + /// /// NOTE: This consumes the waker. You should not use or free the waker afterwards. fn hyper_waker_wake(waker: *mut hyper_waker) { let waker = non_null!(Box::from_raw(waker) ?= ());