From 19e6e9e5a3faf996ccb4a3c247e9854997b86030 Mon Sep 17 00:00:00 2001 From: nodejs-github-bot <18269663+nodejs-github-bot@users.noreply.github.com> Date: Sun, 5 Oct 2025 00:37:15 +0000 Subject: [PATCH] deps: update nghttp3 to 1.12.0 --- .../nghttp3/lib/includes/nghttp3/nghttp3.h | 127 +- .../nghttp3/lib/includes/nghttp3/version.h | 4 +- deps/ngtcp2/nghttp3/lib/nghttp3_conn.c | 165 +- deps/ngtcp2/nghttp3/lib/nghttp3_conn.h | 17 +- deps/ngtcp2/nghttp3/lib/nghttp3_err.c | 4 + deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.c | 86 + deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.h | 60 + deps/ngtcp2/nghttp3/lib/nghttp3_settings.c | 7 + deps/ngtcp2/nghttp3/lib/nghttp3_settings.h | 7 + deps/ngtcp2/nghttp3/lib/nghttp3_stream.h | 4 - deps/ngtcp2/nghttp3/lib/sfparse/COPYING | 22 + deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c | 1787 +++++++++++++++++ 12 files changed, 2232 insertions(+), 58 deletions(-) create mode 100644 deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.c create mode 100644 deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.h create mode 100644 deps/ngtcp2/nghttp3/lib/sfparse/COPYING create mode 100644 deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c diff --git a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h index 83820fc983bd84..9b18a22044e0e5 100644 --- a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h +++ b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/nghttp3.h @@ -81,6 +81,59 @@ extern "C" { */ typedef ptrdiff_t nghttp3_ssize; +/** + * @typedef + * + * :type:`nghttp3_tstamp` is a timestamp with nanosecond resolution. + * ``UINT64_MAX`` is an invalid value, and it is often used to + * indicate that no value is set. This type is available since + * v1.12.0. + */ +typedef uint64_t nghttp3_tstamp; + +/** + * @typedef + * + * :type:`nghttp3_duration` is a period of time in nanosecond + * resolution. ``UINT64_MAX`` is an invalid value, and it is often + * used to indicate that no value is set. This type is available + * since v1.12.0. + */ +typedef uint64_t nghttp3_duration; + +/** + * @macro + * + * :macro:`NGHTTP3_NANOSECONDS` is a count of tick which corresponds + * to 1 nanosecond. This macro is available since v1.12.0. + */ +#define NGHTTP3_NANOSECONDS ((nghttp3_duration)1ULL) + +/** + * @macro + * + * :macro:`NGHTTP3_MICROSECONDS` is a count of tick which corresponds + * to 1 microsecond. This macro is available since v1.12.0. + */ +#define NGHTTP3_MICROSECONDS ((nghttp3_duration)(1000ULL * NGHTTP3_NANOSECONDS)) + +/** + * @macro + * + * :macro:`NGHTTP3_MILLISECONDS` is a count of tick which corresponds + * to 1 millisecond. This macro is available since v1.12.0. + */ +#define NGHTTP3_MILLISECONDS \ + ((nghttp3_duration)(1000ULL * NGHTTP3_MICROSECONDS)) + +/** + * @macro + * + * :macro:`NGHTTP3_SECONDS` is a count of tick which corresponds to 1 + * second. This macro is available since v1.12.0. + */ +#define NGHTTP3_SECONDS ((nghttp3_duration)(1000ULL * NGHTTP3_MILLISECONDS)) + /** * @macro * @@ -264,6 +317,14 @@ typedef ptrdiff_t nghttp3_ssize; * allowed. */ #define NGHTTP3_ERR_H3_STREAM_CREATION_ERROR -609 +/** + * @macro + * + * :macro:`NGHTTP3_ERR_H3_EXCESSIVE_LOAD` indicates that a local + * endpoint detected that its remote endpoint is exhibiting a behavior + * that might generating excessive load. + */ +#define NGHTTP3_ERR_H3_EXCESSIVE_LOAD -610 /** * @macro * @@ -1638,7 +1699,8 @@ typedef struct nghttp3_conn nghttp3_conn; #define NGHTTP3_SETTINGS_V1 1 #define NGHTTP3_SETTINGS_V2 2 -#define NGHTTP3_SETTINGS_VERSION NGHTTP3_SETTINGS_V2 +#define NGHTTP3_SETTINGS_V3 3 +#define NGHTTP3_SETTINGS_VERSION NGHTTP3_SETTINGS_V3 /** * @struct @@ -1700,6 +1762,24 @@ typedef struct nghttp3_settings { * server uses this field. This field is available since v1.11.0. */ const nghttp3_vec *origin_list; + /* The following fields have been added since NGHTTP3_SETTINGS_V3. */ + /** + * :member:`glitch_ratelim_burst` is the maximum number of tokens + * available to "glitch" rate limiter. "glitch" is a suspicious + * activity from a remote endpoint. If detected, certain amount of + * tokens are consumed. If no tokens are available to consume, the + * connection is closed. The rate of token generation is specified + * by :member:`glitch_ratelim_rate`. This feature is enabled only + * when `nghttp3_conn_read_stream2` is used. This field has been + * available since v1.12.0. + */ + uint64_t glitch_ratelim_burst; + /** + * :member:`glitch_ratelim_rate` is the number of tokens generated + * per second. See :member:`glitch_ratelim_burst` for "glitch" rate + * limiter. This field has been available since v1.12.0. + */ + uint64_t glitch_ratelim_rate; } nghttp3_settings; /** @@ -2113,6 +2193,10 @@ typedef struct nghttp3_callbacks { * ` = 0 * - :member:`enable_connect_protocol * ` = 0 + * - :member:`glitch_ratelim_burst + * ` = 1000 + * - :member:`glitch_ratelim_rate + * ` = 33 */ NGHTTP3_EXTERN void nghttp3_settings_default_versioned(int settings_version, @@ -2209,6 +2293,11 @@ NGHTTP3_EXTERN int nghttp3_conn_bind_qpack_streams(nghttp3_conn *conn, /** * @function * + * .. warning:: + * + * Deprecated since v1.12.0. Use `nghttp3_conn_read_stream2` + * instead. + * * `nghttp3_conn_read_stream` reads data |src| of length |srclen| on * stream identified by |stream_id|. It returns the number of bytes * consumed. The "consumed" means that application can increase flow @@ -2237,6 +2326,42 @@ NGHTTP3_EXTERN nghttp3_ssize nghttp3_conn_read_stream(nghttp3_conn *conn, const uint8_t *src, size_t srclen, int fin); +/** + * @function + * + * `nghttp3_conn_read_stream2` reads data |src| of length |srclen| on + * stream identified by |stream_id|. It returns the number of bytes + * consumed. The "consumed" means that application can increase flow + * control credit (both stream and connection) of underlying QUIC + * connection by that amount. It does not include the amount of data + * carried by DATA frame which contains application data (excluding + * any control or QPACK unidirectional streams). See + * :type:`nghttp3_recv_data` to handle those bytes. If |fin| is + * nonzero, this is the last data from remote endpoint in this stream. + * |ts| is the current timestamp, and must be non-decreasing. It + * should be obtained from the clock that is steadily increasing. + * + * This function returns the number of bytes consumed, or one of the + * following negative error codes: + * + * :macro:`NGHTTP3_ERR_NOMEM` + * Out of memory. + * :macro:`NGHTTP3_ERR_CALLBACK_FAILURE` + * User callback failed. + * + * It may return the other error codes. The negative error code means + * that |conn| encountered a connection error, and the connection must + * be closed. Calling nghttp3 API other than `nghttp3_conn_del` + * causes undefined behavior. + * + * This function is available since v1.12.0. + */ +NGHTTP3_EXTERN nghttp3_ssize nghttp3_conn_read_stream2(nghttp3_conn *conn, + int64_t stream_id, + const uint8_t *src, + size_t srclen, int fin, + nghttp3_tstamp ts); + /** * @function * diff --git a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h index bd442b8be636ee..7661a6b2546dad 100644 --- a/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h +++ b/deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h @@ -31,7 +31,7 @@ * * Version number of the nghttp3 library release. */ -#define NGHTTP3_VERSION "1.11.0" +#define NGHTTP3_VERSION "1.12.0" /** * @macro @@ -41,6 +41,6 @@ * number, 8 bits for minor and 8 bits for patch. Version 1.2.3 * becomes 0x010203. */ -#define NGHTTP3_VERSION_NUM 0x010b00 +#define NGHTTP3_VERSION_NUM 0x010c00 #endif /* !defined(NGHTTP3_VERSION_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c index 8c0e94dc768a29..debb7c981794dd 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.c @@ -231,6 +231,15 @@ static int conn_call_end_origin(nghttp3_conn *conn) { return 0; } +static int conn_glitch_ratelim_drain(nghttp3_conn *conn, uint64_t n, + nghttp3_tstamp ts) { + if (ts == UINT64_MAX) { + return 0; + } + + return nghttp3_ratelim_drain(&conn->glitch_rlim, n, ts); +} + static int ricnt_less(const nghttp3_pq_entry *lhsx, const nghttp3_pq_entry *rhsx) { nghttp3_stream *lhs = @@ -309,6 +318,9 @@ static int conn_new(nghttp3_conn **pconn, int server, int callbacks_version, nghttp3_idtr_init(&conn->remote.bidi.idtr, mem); + nghttp3_ratelim_init(&conn->glitch_rlim, settings->glitch_ratelim_burst, + settings->glitch_ratelim_rate, 0); + conn->callbacks = *callbacks; conn->local.settings = *settings; if (server) { @@ -427,6 +439,13 @@ static int conn_bidi_idtr_open(nghttp3_conn *conn, int64_t stream_id) { nghttp3_ssize nghttp3_conn_read_stream(nghttp3_conn *conn, int64_t stream_id, const uint8_t *src, size_t srclen, int fin) { + return nghttp3_conn_read_stream2(conn, stream_id, src, srclen, fin, + UINT64_MAX); +} + +nghttp3_ssize nghttp3_conn_read_stream2(nghttp3_conn *conn, int64_t stream_id, + const uint8_t *src, size_t srclen, + int fin, nghttp3_tstamp ts) { nghttp3_stream *stream; size_t bidi_nproc; int rv; @@ -517,13 +536,14 @@ nghttp3_ssize nghttp3_conn_read_stream(nghttp3_conn *conn, int64_t stream_id, } if (nghttp3_stream_uni(stream_id)) { - return nghttp3_conn_read_uni(conn, stream, src, srclen, fin); + return nghttp3_conn_read_uni(conn, stream, src, srclen, fin, ts); } if (fin) { stream->flags |= NGHTTP3_STREAM_FLAG_READ_EOF; } - return nghttp3_conn_read_bidi(conn, &bidi_nproc, stream, src, srclen, fin); + return nghttp3_conn_read_bidi(conn, &bidi_nproc, stream, src, srclen, fin, + ts); } static nghttp3_ssize conn_read_type(nghttp3_conn *conn, nghttp3_stream *stream, @@ -586,8 +606,8 @@ static nghttp3_ssize conn_read_type(nghttp3_conn *conn, nghttp3_stream *stream, static int conn_delete_stream(nghttp3_conn *conn, nghttp3_stream *stream); nghttp3_ssize nghttp3_conn_read_uni(nghttp3_conn *conn, nghttp3_stream *stream, - const uint8_t *src, size_t srclen, - int fin) { + const uint8_t *src, size_t srclen, int fin, + nghttp3_tstamp ts) { nghttp3_ssize nread = 0; nghttp3_ssize nconsumed = 0; int rv; @@ -603,6 +623,12 @@ nghttp3_ssize nghttp3_conn_read_uni(nghttp3_conn *conn, nghttp3_stream *stream, return NGHTTP3_ERR_H3_GENERAL_PROTOCOL_ERROR; } + /* Receiving too frequent 0 length unidirectional stream is + suspicious. */ + if (conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + rv = conn_delete_stream(conn, stream); assert(0 == rv); @@ -620,6 +646,21 @@ nghttp3_ssize nghttp3_conn_read_uni(nghttp3_conn *conn, nghttp3_stream *stream, src += nread; srclen -= (size_t)nread; + if (stream->type == NGHTTP3_STREAM_TYPE_UNKNOWN) { + /* Receiving too frequent unknown stream type is suspicious.*/ + if (conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + + if (!fin) { + rv = conn_call_stop_sending(conn, stream, + NGHTTP3_H3_STREAM_CREATION_ERROR); + if (rv != 0) { + return rv; + } + } + } + if (srclen == 0) { return nread; } @@ -630,13 +671,13 @@ nghttp3_ssize nghttp3_conn_read_uni(nghttp3_conn *conn, nghttp3_stream *stream, if (fin) { return NGHTTP3_ERR_H3_CLOSED_CRITICAL_STREAM; } - nconsumed = nghttp3_conn_read_control(conn, stream, src, srclen); + nconsumed = nghttp3_conn_read_control(conn, stream, src, srclen, ts); break; case NGHTTP3_STREAM_TYPE_QPACK_ENCODER: if (fin) { return NGHTTP3_ERR_H3_CLOSED_CRITICAL_STREAM; } - nconsumed = nghttp3_conn_read_qpack_encoder(conn, src, srclen); + nconsumed = nghttp3_conn_read_qpack_encoder(conn, src, srclen, ts); break; case NGHTTP3_STREAM_TYPE_QPACK_DECODER: if (fin) { @@ -646,14 +687,6 @@ nghttp3_ssize nghttp3_conn_read_uni(nghttp3_conn *conn, nghttp3_stream *stream, break; case NGHTTP3_STREAM_TYPE_UNKNOWN: nconsumed = (nghttp3_ssize)srclen; - if (fin) { - break; - } - - rv = conn_call_stop_sending(conn, stream, NGHTTP3_H3_STREAM_CREATION_ERROR); - if (rv != 0) { - return rv; - } break; default: nghttp3_unreachable(); @@ -677,7 +710,8 @@ static int frame_fin(nghttp3_stream_read_state *rstate, size_t len) { nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, nghttp3_stream *stream, - const uint8_t *src, size_t srclen) { + const uint8_t *src, size_t srclen, + nghttp3_tstamp ts) { const uint8_t *p = src, *end = src + srclen; int rv; nghttp3_stream_read_state *rstate = &stream->rstate; @@ -775,12 +809,23 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, if (rstate->left == 0) { return NGHTTP3_ERR_H3_FRAME_ERROR; } + + /* We do not expect too frequent priority updates. */ + if (conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + rstate->state = NGHTTP3_CTRL_STREAM_STATE_PRIORITY_UPDATE_PRI_ELEM_ID; break; case NGHTTP3_FRAME_PRIORITY_UPDATE_PUSH_ID: /* We do not support push */ return NGHTTP3_ERR_H3_ID_ERROR; case NGHTTP3_FRAME_ORIGIN: + /* We do not expect too frequent ORIGIN frames. */ + if (conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + if (conn->server || (!conn->callbacks.recv_origin && !conn->callbacks.end_origin)) { busy = 1; @@ -815,6 +860,11 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, case NGHTTP3_H2_FRAME_CONTINUATION: return NGHTTP3_ERR_H3_FRAME_UNEXPECTED; default: + /* We do not expect too frequent unknown frames. */ + if (conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + /* TODO Handle reserved frame type */ busy = 1; rstate->state = NGHTTP3_CTRL_STREAM_STATE_IGN_FRAME; @@ -971,6 +1021,12 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, return NGHTTP3_ERR_H3_ID_ERROR; } + /* Receiving same GOAWAY ID is suspicious. */ + if (conn->rx.goaway_id == rvint->acc && + conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + conn->flags |= NGHTTP3_CONN_FLAG_GOAWAY_RECVED; conn->rx.goaway_id = rvint->acc; nghttp3_varint_read_state_reset(rvint); @@ -1005,6 +1061,12 @@ nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, return NGHTTP3_ERR_H3_FRAME_ERROR; } + /* Receiving same MAX_PUSH_ID is suspicious. */ + if (conn->local.uni.max_pushes == (uint64_t)rvint->acc + 1 && + conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + conn->local.uni.max_pushes = (uint64_t)rvint->acc + 1; nghttp3_varint_read_state_reset(rvint); @@ -1263,11 +1325,23 @@ static int conn_delete_stream(nghttp3_conn *conn, nghttp3_stream *stream) { return rv; } - if (bidi && conn->callbacks.stream_close) { - rv = conn->callbacks.stream_close(conn, stream->node.id, stream->error_code, - conn->user_data, stream->user_data); - if (rv != 0) { - return NGHTTP3_ERR_CALLBACK_FAILURE; + if (bidi) { + if (stream->qpack_blocked_pe.index != NGHTTP3_PQ_BAD_INDEX) { + nghttp3_conn_qpack_blocked_streams_remove(conn, stream); + + rv = nghttp3_qpack_decoder_cancel_stream(&conn->qdec, stream->node.id); + if (rv != 0) { + return rv; + } + } + + if (conn->callbacks.stream_close) { + rv = + conn->callbacks.stream_close(conn, stream->node.id, stream->error_code, + conn->user_data, stream->user_data); + if (rv != 0) { + return NGHTTP3_ERR_CALLBACK_FAILURE; + } } } @@ -1288,7 +1362,8 @@ static int conn_delete_stream(nghttp3_conn *conn, nghttp3_stream *stream) { } static int conn_process_blocked_stream_data(nghttp3_conn *conn, - nghttp3_stream *stream) { + nghttp3_stream *stream, + nghttp3_tstamp ts) { nghttp3_buf *buf; size_t nproc; nghttp3_ssize nconsumed; @@ -1307,7 +1382,7 @@ static int conn_process_blocked_stream_data(nghttp3_conn *conn, nconsumed = nghttp3_conn_read_bidi( conn, &nproc, stream, buf->pos, nghttp3_buf_len(buf), - len == 1 && (stream->flags & NGHTTP3_STREAM_FLAG_READ_EOF)); + len == 1 && (stream->flags & NGHTTP3_STREAM_FLAG_READ_EOF), ts); if (nconsumed < 0) { return (int)nconsumed; } @@ -1329,22 +1404,12 @@ static int conn_process_blocked_stream_data(nghttp3_conn *conn, } } - if (!(stream->flags & NGHTTP3_STREAM_FLAG_QPACK_DECODE_BLOCKED) && - (stream->flags & NGHTTP3_STREAM_FLAG_CLOSED)) { - assert(stream->qpack_blocked_pe.index == NGHTTP3_PQ_BAD_INDEX); - - rv = conn_delete_stream(conn, stream); - if (rv != 0) { - return rv; - } - } - return 0; } nghttp3_ssize nghttp3_conn_read_qpack_encoder(nghttp3_conn *conn, - const uint8_t *src, - size_t srclen) { + const uint8_t *src, size_t srclen, + nghttp3_tstamp ts) { nghttp3_ssize nconsumed = nghttp3_qpack_decoder_read_encoder(&conn->qdec, src, srclen); nghttp3_stream *stream; @@ -1366,7 +1431,7 @@ nghttp3_ssize nghttp3_conn_read_qpack_encoder(nghttp3_conn *conn, stream->qpack_blocked_pe.index = NGHTTP3_PQ_BAD_INDEX; stream->flags &= (uint16_t)~NGHTTP3_STREAM_FLAG_QPACK_DECODE_BLOCKED; - rv = conn_process_blocked_stream_data(conn, stream); + rv = conn_process_blocked_stream_data(conn, stream, ts); if (rv != 0) { return rv; } @@ -1407,7 +1472,8 @@ static int conn_update_stream_priority(nghttp3_conn *conn, nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, nghttp3_stream *stream, const uint8_t *src, - size_t srclen, int fin) { + size_t srclen, int fin, + nghttp3_tstamp ts) { const uint8_t *p = src, *end = src ? src + srclen : src; int rv; nghttp3_stream_read_state *rstate = &stream->rstate; @@ -1546,6 +1612,11 @@ nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, case NGHTTP3_H2_FRAME_CONTINUATION: return NGHTTP3_ERR_H3_FRAME_UNEXPECTED; default: + /* We do not expect too frequent unknown frames. */ + if (conn_glitch_ratelim_drain(conn, 1, ts) != 0) { + return NGHTTP3_ERR_H3_EXCESSIVE_LOAD; + } + /* TODO Handle reserved frame type */ busy = 1; rstate->state = NGHTTP3_REQ_STREAM_STATE_IGN_FRAME; @@ -2602,10 +2673,9 @@ int nghttp3_conn_is_stream_writable(nghttp3_conn *conn, int64_t stream_id) { return 0; } - return (stream->flags & - (NGHTTP3_STREAM_FLAG_FC_BLOCKED | - NGHTTP3_STREAM_FLAG_READ_DATA_BLOCKED | NGHTTP3_STREAM_FLAG_SHUT_WR | - NGHTTP3_STREAM_FLAG_CLOSED)) == 0; + return (stream->flags & (NGHTTP3_STREAM_FLAG_FC_BLOCKED | + NGHTTP3_STREAM_FLAG_READ_DATA_BLOCKED | + NGHTTP3_STREAM_FLAG_SHUT_WR)) == 0; } int nghttp3_conn_resume_stream(nghttp3_conn *conn, int64_t stream_id) { @@ -2642,12 +2712,7 @@ int nghttp3_conn_close_stream(nghttp3_conn *conn, int64_t stream_id, nghttp3_conn_unschedule_stream(conn, stream); - if (stream->qpack_blocked_pe.index == NGHTTP3_PQ_BAD_INDEX) { - return conn_delete_stream(conn, stream); - } - - stream->flags |= NGHTTP3_STREAM_FLAG_CLOSED; - return 0; + return conn_delete_stream(conn, stream); } int nghttp3_conn_shutdown_stream_read(nghttp3_conn *conn, int64_t stream_id) { @@ -2685,6 +2750,14 @@ void nghttp3_conn_qpack_blocked_streams_pop(nghttp3_conn *conn) { nghttp3_pq_pop(&conn->qpack_blocked_streams); } +void nghttp3_conn_qpack_blocked_streams_remove(nghttp3_conn *conn, + nghttp3_stream *stream) { + assert(!nghttp3_pq_empty(&conn->qpack_blocked_streams)); + assert(stream->qpack_blocked_pe.index != NGHTTP3_PQ_BAD_INDEX); + + nghttp3_pq_remove(&conn->qpack_blocked_streams, &stream->qpack_blocked_pe); +} + void nghttp3_conn_set_max_client_streams_bidi(nghttp3_conn *conn, uint64_t max_streams) { assert(conn->server); diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h index 80c26f80322c60..9856334fc07675 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_conn.h @@ -37,6 +37,7 @@ #include "nghttp3_tnode.h" #include "nghttp3_idtr.h" #include "nghttp3_gaptr.h" +#include "nghttp3_ratelim.h" /* NGHTTP3_QPACK_ENCODER_MAX_TABLE_CAPACITY is the maximum dynamic table size for QPACK encoder. */ @@ -84,6 +85,7 @@ struct nghttp3_conn { nghttp3_qpack_decoder qdec; nghttp3_qpack_encoder qenc; nghttp3_pq qpack_blocked_streams; + nghttp3_ratelim glitch_rlim; struct { nghttp3_pq spq; } sched[NGHTTP3_URGENCY_LEVELS]; @@ -180,18 +182,20 @@ int nghttp3_conn_create_stream(nghttp3_conn *conn, nghttp3_stream **pstream, nghttp3_ssize nghttp3_conn_read_bidi(nghttp3_conn *conn, size_t *pnproc, nghttp3_stream *stream, const uint8_t *src, - size_t srclen, int fin); + size_t srclen, int fin, nghttp3_tstamp ts); nghttp3_ssize nghttp3_conn_read_uni(nghttp3_conn *conn, nghttp3_stream *stream, - const uint8_t *src, size_t srclen, int fin); + const uint8_t *src, size_t srclen, int fin, + nghttp3_tstamp ts); nghttp3_ssize nghttp3_conn_read_control(nghttp3_conn *conn, nghttp3_stream *stream, - const uint8_t *src, size_t srclen); + const uint8_t *src, size_t srclen, + nghttp3_tstamp ts); nghttp3_ssize nghttp3_conn_read_qpack_encoder(nghttp3_conn *conn, - const uint8_t *src, - size_t srclen); + const uint8_t *src, size_t srclen, + nghttp3_tstamp ts); nghttp3_ssize nghttp3_conn_read_qpack_decoder(nghttp3_conn *conn, const uint8_t *src, @@ -216,6 +220,9 @@ int nghttp3_conn_qpack_blocked_streams_push(nghttp3_conn *conn, void nghttp3_conn_qpack_blocked_streams_pop(nghttp3_conn *conn); +void nghttp3_conn_qpack_blocked_streams_remove(nghttp3_conn *conn, + nghttp3_stream *stream); + int nghttp3_conn_schedule_stream(nghttp3_conn *conn, nghttp3_stream *stream); int nghttp3_conn_ensure_stream_scheduled(nghttp3_conn *conn, diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_err.c b/deps/ngtcp2/nghttp3/lib/nghttp3_err.c index 0d596bfab6d29d..eff6ea6a63a2f7 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_err.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_err.c @@ -74,6 +74,8 @@ const char *nghttp3_strerror(int liberr) { return "ERR_H3_SETTINGS_ERROR"; case NGHTTP3_ERR_H3_STREAM_CREATION_ERROR: return "ERR_H3_STREAM_CREATION_ERROR"; + case NGHTTP3_ERR_H3_EXCESSIVE_LOAD: + return "ERR_H3_EXCESSIVE_LOAD"; case NGHTTP3_ERR_NOMEM: return "ERR_NOMEM"; case NGHTTP3_ERR_CALLBACK_FAILURE: @@ -116,6 +118,8 @@ uint64_t nghttp3_err_infer_quic_app_error_code(int liberr) { return NGHTTP3_H3_SETTINGS_ERROR; case NGHTTP3_ERR_H3_STREAM_CREATION_ERROR: return NGHTTP3_H3_STREAM_CREATION_ERROR; + case NGHTTP3_ERR_H3_EXCESSIVE_LOAD: + return NGHTTP3_H3_EXCESSIVE_LOAD; case NGHTTP3_ERR_MALFORMED_HTTP_HEADER: case NGHTTP3_ERR_MALFORMED_HTTP_MESSAGING: return NGHTTP3_H3_MESSAGE_ERROR; diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.c b/deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.c new file mode 100644 index 00000000000000..d7a0267530cad0 --- /dev/null +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.c @@ -0,0 +1,86 @@ +/* + * nghttp3 + * + * Copyright (c) 2025 nghttp3 contributors + * Copyright (c) 2025 ngtcp2 contributors + * Copyright (c) 2023 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nghttp3_ratelim.h" + +#include + +#include "nghttp3_macro.h" + +void nghttp3_ratelim_init(nghttp3_ratelim *rlim, uint64_t burst, uint64_t rate, + nghttp3_tstamp ts) { + *rlim = (nghttp3_ratelim){ + .burst = burst, + .rate = rate, + .tokens = burst, + .ts = ts, + }; +} + +/* ratelim_update updates rlim->tokens with the current |ts|. */ +static void ratelim_update(nghttp3_ratelim *rlim, nghttp3_tstamp ts) { + uint64_t d, gain, gps; + + assert(ts >= rlim->ts); + + if (ts == rlim->ts) { + return; + } + + d = ts - rlim->ts; + rlim->ts = ts; + + if (rlim->rate > (UINT64_MAX - rlim->carry) / d) { + gain = UINT64_MAX; + } else { + gain = rlim->rate * d + rlim->carry; + } + + gps = gain / NGHTTP3_SECONDS; + + if (gps < rlim->burst && rlim->tokens < rlim->burst - gps) { + rlim->tokens += gps; + rlim->carry = gain % NGHTTP3_SECONDS; + + return; + } + + rlim->tokens = rlim->burst; + rlim->carry = 0; +} + +int nghttp3_ratelim_drain(nghttp3_ratelim *rlim, uint64_t n, + nghttp3_tstamp ts) { + ratelim_update(rlim, ts); + + if (rlim->tokens < n) { + return -1; + } + + rlim->tokens -= n; + + return 0; +} diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.h b/deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.h new file mode 100644 index 00000000000000..68aab93a61bfc0 --- /dev/null +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_ratelim.h @@ -0,0 +1,60 @@ +/* + * nghttp3 + * + * Copyright (c) 2025 nghttp3 contributors + * Copyright (c) 2025 ngtcp2 contributors + * Copyright (c) 2023 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGHTTP3_RATELIM_H +#define NGHTTP3_RATELIM_H + +#ifdef HAVE_CONFIG_H +# include +#endif /* defined(HAVE_CONFIG_H) */ + +#include + +typedef struct nghttp3_ratelim { + /* burst is the maximum number of tokens. */ + uint64_t burst; + /* rate is the rate of token generation measured by token / + second. */ + uint64_t rate; + /* tokens is the amount of tokens available to drain. */ + uint64_t tokens; + /* carry is the partial token gained in sub-second period. It is + added to the computation in the next update round. */ + uint64_t carry; + /* ts is the last timestamp that is known to this object. */ + nghttp3_tstamp ts; +} nghttp3_ratelim; + +/* nghttp3_ratelim_init initializes |rlim| with the given + parameters. */ +void nghttp3_ratelim_init(nghttp3_ratelim *rlim, uint64_t burst, uint64_t rate, + nghttp3_tstamp ts); + +/* nghttp3_ratelim_drain drains |n| from rlim->tokens. It returns 0 if + it succeeds, or -1. */ +int nghttp3_ratelim_drain(nghttp3_ratelim *rlim, uint64_t n, nghttp3_tstamp ts); + +#endif /* !defined(NGHTTP3_RATELIM_H) */ diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_settings.c b/deps/ngtcp2/nghttp3/lib/nghttp3_settings.c index 2b7cd2892b3cc9..ab2581a6e75115 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_settings.c +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_settings.c @@ -42,6 +42,10 @@ void nghttp3_settings_default_versioned(int settings_version, switch (settings_version) { case NGHTTP3_SETTINGS_VERSION: + settings->glitch_ratelim_burst = NGHTTP3_DEFAULT_GLITCH_RATELIM_BURST; + settings->glitch_ratelim_rate = NGHTTP3_DEFAULT_GLITCH_RATELIM_RATE; + /* fall through */ + case NGHTTP3_SETTINGS_V2: case NGHTTP3_SETTINGS_V1: settings->max_field_section_size = NGHTTP3_VARINT_MAX; settings->qpack_encoder_max_dtable_capacity = @@ -86,6 +90,9 @@ size_t nghttp3_settingslen_version(int settings_version) { switch (settings_version) { case NGHTTP3_SETTINGS_VERSION: return sizeof(settings); + case NGHTTP3_SETTINGS_V2: + return offsetof(nghttp3_settings, origin_list) + + sizeof(settings.origin_list); case NGHTTP3_SETTINGS_V1: return offsetof(nghttp3_settings, h3_datagram) + sizeof(settings.h3_datagram); diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_settings.h b/deps/ngtcp2/nghttp3/lib/nghttp3_settings.h index 0632aafc489ee5..7bc9039379f9d6 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_settings.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_settings.h @@ -31,6 +31,13 @@ #include +/* NGHTTP3_DEFAULT_GLITCH_RATELIM_BURST is the maximum number of + tokens in glitch rate limiter. It is also the initial value. */ +#define NGHTTP3_DEFAULT_GLITCH_RATELIM_BURST 1000 +/* NGHTTP3_DEFAULT_GLITCH_RATELIM_RATE is the rate of tokens generated + per second for glitch rate limiter. */ +#define NGHTTP3_DEFAULT_GLITCH_RATELIM_RATE 33 + /* * nghttp3_settings_convert_to_latest converts |src| of version * |settings_version| to the latest version NGHTTP3_SETTINGS_VERSION. diff --git a/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h b/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h index 759cf687a0c108..7cfb844dcf0a50 100644 --- a/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h +++ b/deps/ngtcp2/nghttp3/lib/nghttp3_stream.h @@ -114,10 +114,6 @@ typedef struct nghttp3_stream_read_state { /* NGHTTP3_STREAM_FLAG_READ_EOF indicates that remote endpoint sent fin. */ #define NGHTTP3_STREAM_FLAG_READ_EOF 0x0020u -/* NGHTTP3_STREAM_FLAG_CLOSED indicates that QUIC stream was closed. - nghttp3_stream object can still alive because it might be blocked - by QPACK decoder. */ -#define NGHTTP3_STREAM_FLAG_CLOSED 0x0040u /* NGHTTP3_STREAM_FLAG_SHUT_WR indicates that any further write operation to a stream is prohibited. */ #define NGHTTP3_STREAM_FLAG_SHUT_WR 0x0100u diff --git a/deps/ngtcp2/nghttp3/lib/sfparse/COPYING b/deps/ngtcp2/nghttp3/lib/sfparse/COPYING new file mode 100644 index 00000000000000..8212d82d83ab74 --- /dev/null +++ b/deps/ngtcp2/nghttp3/lib/sfparse/COPYING @@ -0,0 +1,22 @@ +The MIT License + +Copyright (c) 2023 sfparse contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c b/deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c new file mode 100644 index 00000000000000..cee089d3944d18 --- /dev/null +++ b/deps/ngtcp2/nghttp3/lib/sfparse/sfparse.c @@ -0,0 +1,1787 @@ +/* + * sfparse + * + * Copyright (c) 2023 sfparse contributors + * Copyright (c) 2019 nghttp3 contributors + * Copyright (c) 2015 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "sfparse.h" + +#include +#include +#include + +#ifdef __AVX2__ +# include +#endif /* __AVX2__ */ + +#define SFPARSE_STATE_DICT 0x08u +#define SFPARSE_STATE_LIST 0x10u +#define SFPARSE_STATE_ITEM 0x18u + +#define SFPARSE_STATE_INNER_LIST 0x04u + +#define SFPARSE_STATE_BEFORE 0x00u +#define SFPARSE_STATE_BEFORE_PARAMS 0x01u +#define SFPARSE_STATE_PARAMS 0x02u +#define SFPARSE_STATE_AFTER 0x03u + +#define SFPARSE_STATE_OP_MASK 0x03u + +#define SFPARSE_SET_STATE_AFTER(NAME) \ + (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER) +#define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME) \ + (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS) +#define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME) \ + (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE) + +#define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT) +#define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT) +#define SFPARSE_STATE_DICT_INNER_LIST_BEFORE \ + SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT) + +#define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST) +#define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST) +#define SFPARSE_STATE_LIST_INNER_LIST_BEFORE \ + SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST) + +#define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM) +#define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM) +#define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE \ + SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM) + +#define SFPARSE_STATE_INITIAL 0x00u + +#define DIGIT_CASES \ + case '0': \ + case '1': \ + case '2': \ + case '3': \ + case '4': \ + case '5': \ + case '6': \ + case '7': \ + case '8': \ + case '9' + +#define LCALPHA_CASES \ + case 'a': \ + case 'b': \ + case 'c': \ + case 'd': \ + case 'e': \ + case 'f': \ + case 'g': \ + case 'h': \ + case 'i': \ + case 'j': \ + case 'k': \ + case 'l': \ + case 'm': \ + case 'n': \ + case 'o': \ + case 'p': \ + case 'q': \ + case 'r': \ + case 's': \ + case 't': \ + case 'u': \ + case 'v': \ + case 'w': \ + case 'x': \ + case 'y': \ + case 'z' + +#define UCALPHA_CASES \ + case 'A': \ + case 'B': \ + case 'C': \ + case 'D': \ + case 'E': \ + case 'F': \ + case 'G': \ + case 'H': \ + case 'I': \ + case 'J': \ + case 'K': \ + case 'L': \ + case 'M': \ + case 'N': \ + case 'O': \ + case 'P': \ + case 'Q': \ + case 'R': \ + case 'S': \ + case 'T': \ + case 'U': \ + case 'V': \ + case 'W': \ + case 'X': \ + case 'Y': \ + case 'Z' + +#define ALPHA_CASES \ + UCALPHA_CASES: \ + LCALPHA_CASES + +#define TOKEN_CASES \ + case '!': \ + case '#': \ + case '$': \ + case '%': \ + case '&': \ + case '\'': \ + case '*': \ + case '+': \ + case '-': \ + case '.': \ + case '/': \ + DIGIT_CASES: \ + case ':': \ + UCALPHA_CASES: \ + case '^': \ + case '_': \ + case '`': \ + LCALPHA_CASES: \ + case '|': \ + case '~' + +#define LCHEXALPHA_CASES \ + case 'a': \ + case 'b': \ + case 'c': \ + case 'd': \ + case 'e': \ + case 'f' + +#define X00_1F_CASES \ + case 0x00: \ + case 0x01: \ + case 0x02: \ + case 0x03: \ + case 0x04: \ + case 0x05: \ + case 0x06: \ + case 0x07: \ + case 0x08: \ + case 0x09: \ + case 0x0a: \ + case 0x0b: \ + case 0x0c: \ + case 0x0d: \ + case 0x0e: \ + case 0x0f: \ + case 0x10: \ + case 0x11: \ + case 0x12: \ + case 0x13: \ + case 0x14: \ + case 0x15: \ + case 0x16: \ + case 0x17: \ + case 0x18: \ + case 0x19: \ + case 0x1a: \ + case 0x1b: \ + case 0x1c: \ + case 0x1d: \ + case 0x1e: \ + case 0x1f + +#define X20_21_CASES \ + case ' ': \ + case '!' + +#define X23_5B_CASES \ + case '#': \ + case '$': \ + case '%': \ + case '&': \ + case '\'': \ + case '(': \ + case ')': \ + case '*': \ + case '+': \ + case ',': \ + case '-': \ + case '.': \ + case '/': \ + DIGIT_CASES: \ + case ':': \ + case ';': \ + case '<': \ + case '=': \ + case '>': \ + case '?': \ + case '@': \ + UCALPHA_CASES: \ + case '[' + +#define X5D_7E_CASES \ + case ']': \ + case '^': \ + case '_': \ + case '`': \ + LCALPHA_CASES: \ + case '{': \ + case '|': \ + case '}': \ + case '~' + +#define X7F_FF_CASES \ + case 0x7f: \ + case 0x80: \ + case 0x81: \ + case 0x82: \ + case 0x83: \ + case 0x84: \ + case 0x85: \ + case 0x86: \ + case 0x87: \ + case 0x88: \ + case 0x89: \ + case 0x8a: \ + case 0x8b: \ + case 0x8c: \ + case 0x8d: \ + case 0x8e: \ + case 0x8f: \ + case 0x90: \ + case 0x91: \ + case 0x92: \ + case 0x93: \ + case 0x94: \ + case 0x95: \ + case 0x96: \ + case 0x97: \ + case 0x98: \ + case 0x99: \ + case 0x9a: \ + case 0x9b: \ + case 0x9c: \ + case 0x9d: \ + case 0x9e: \ + case 0x9f: \ + case 0xa0: \ + case 0xa1: \ + case 0xa2: \ + case 0xa3: \ + case 0xa4: \ + case 0xa5: \ + case 0xa6: \ + case 0xa7: \ + case 0xa8: \ + case 0xa9: \ + case 0xaa: \ + case 0xab: \ + case 0xac: \ + case 0xad: \ + case 0xae: \ + case 0xaf: \ + case 0xb0: \ + case 0xb1: \ + case 0xb2: \ + case 0xb3: \ + case 0xb4: \ + case 0xb5: \ + case 0xb6: \ + case 0xb7: \ + case 0xb8: \ + case 0xb9: \ + case 0xba: \ + case 0xbb: \ + case 0xbc: \ + case 0xbd: \ + case 0xbe: \ + case 0xbf: \ + case 0xc0: \ + case 0xc1: \ + case 0xc2: \ + case 0xc3: \ + case 0xc4: \ + case 0xc5: \ + case 0xc6: \ + case 0xc7: \ + case 0xc8: \ + case 0xc9: \ + case 0xca: \ + case 0xcb: \ + case 0xcc: \ + case 0xcd: \ + case 0xce: \ + case 0xcf: \ + case 0xd0: \ + case 0xd1: \ + case 0xd2: \ + case 0xd3: \ + case 0xd4: \ + case 0xd5: \ + case 0xd6: \ + case 0xd7: \ + case 0xd8: \ + case 0xd9: \ + case 0xda: \ + case 0xdb: \ + case 0xdc: \ + case 0xdd: \ + case 0xde: \ + case 0xdf: \ + case 0xe0: \ + case 0xe1: \ + case 0xe2: \ + case 0xe3: \ + case 0xe4: \ + case 0xe5: \ + case 0xe6: \ + case 0xe7: \ + case 0xe8: \ + case 0xe9: \ + case 0xea: \ + case 0xeb: \ + case 0xec: \ + case 0xed: \ + case 0xee: \ + case 0xef: \ + case 0xf0: \ + case 0xf1: \ + case 0xf2: \ + case 0xf3: \ + case 0xf4: \ + case 0xf5: \ + case 0xf6: \ + case 0xf7: \ + case 0xf8: \ + case 0xf9: \ + case 0xfa: \ + case 0xfb: \ + case 0xfc: \ + case 0xfd: \ + case 0xfe: \ + case 0xff + +static int is_ws(uint8_t c) { + switch (c) { + case ' ': + case '\t': + return 1; + default: + return 0; + } +} + +#ifdef __AVX2__ +# ifdef _MSC_VER +# include + +static int ctz(unsigned int v) { + unsigned long n; + + /* Assume that v is not 0. */ + _BitScanForward(&n, v); + + return (int)n; +} +# else /* !_MSC_VER */ +# define ctz __builtin_ctz +# endif /* !_MSC_VER */ +#endif /* __AVX2__ */ + +static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; } + +static void parser_discard_ows(sfparse_parser *sfp) { + for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos) + ; +} + +static void parser_discard_sp(sfparse_parser *sfp) { + for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos) + ; +} + +static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) { + sfp->state &= ~SFPARSE_STATE_OP_MASK; + sfp->state |= op; +} + +static void parser_unset_inner_list_state(sfparse_parser *sfp) { + sfp->state &= ~SFPARSE_STATE_INNER_LIST; +} + +#ifdef __AVX2__ +static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) { + const __m256i us = _mm256_set1_epi8('_'); + const __m256i ds = _mm256_set1_epi8('-'); + const __m256i dot = _mm256_set1_epi8('.'); + const __m256i ast = _mm256_set1_epi8('*'); + const __m256i r0l = _mm256_set1_epi8('0' - 1); + const __m256i r0r = _mm256_set1_epi8('9' + 1); + const __m256i r1l = _mm256_set1_epi8('a' - 1); + const __m256i r1r = _mm256_set1_epi8('z' + 1); + __m256i s, x; + uint32_t m; + + for (; first != last; first += 32) { + s = _mm256_loadu_si256((void *)first); + + x = _mm256_cmpeq_epi8(s, us); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x); + x = _mm256_or_si256( + _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), + x); + x = _mm256_or_si256( + _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), + x); + + m = ~(uint32_t)_mm256_movemask_epi8(x); + if (m) { + return first + ctz(m); + } + } + + return last; +} +#endif /* __AVX2__ */ + +static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) { + const uint8_t *base; +#ifdef __AVX2__ + const uint8_t *last; +#endif /* __AVX2__ */ + + switch (*sfp->pos) { + case '*': + LCALPHA_CASES: + break; + default: + return SFPARSE_ERR_PARSE; + } + + base = sfp->pos++; + +#ifdef __AVX2__ + if (sfp->end - sfp->pos >= 32) { + last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); + + sfp->pos = find_char_key(sfp->pos, last); + if (sfp->pos != last) { + goto fin; + } + } +#endif /* __AVX2__ */ + + for (; !parser_eof(sfp); ++sfp->pos) { + switch (*sfp->pos) { + case '_': + case '-': + case '.': + case '*': + DIGIT_CASES: + LCALPHA_CASES: + continue; + } + + break; + } + +#ifdef __AVX2__ +fin: +#endif /* __AVX2__ */ + if (dest) { + dest->base = (uint8_t *)base; + dest->len = (size_t)(sfp->pos - dest->base); + } + + return 0; +} + +static int parser_number(sfparse_parser *sfp, sfparse_value *dest) { + int sign = 1; + int64_t value = 0; + size_t len = 0; + size_t fpos = 0; + + if (*sfp->pos == '-') { + ++sfp->pos; + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + sign = -1; + } + + assert(!parser_eof(sfp)); + + for (; !parser_eof(sfp); ++sfp->pos) { + switch (*sfp->pos) { + DIGIT_CASES: + if (++len > 15) { + return SFPARSE_ERR_PARSE; + } + + value *= 10; + value += *sfp->pos - '0'; + + continue; + } + + break; + } + + if (len == 0) { + return SFPARSE_ERR_PARSE; + } + + if (parser_eof(sfp) || *sfp->pos != '.') { + if (dest) { + dest->type = SFPARSE_TYPE_INTEGER; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + dest->integer = value * sign; + } + + return 0; + } + + /* decimal */ + + if (len > 12) { + return SFPARSE_ERR_PARSE; + } + + fpos = len; + + ++sfp->pos; + + for (; !parser_eof(sfp); ++sfp->pos) { + switch (*sfp->pos) { + DIGIT_CASES: + if (++len > 15) { + return SFPARSE_ERR_PARSE; + } + + value *= 10; + value += *sfp->pos - '0'; + + continue; + } + + break; + } + + if (fpos == len || len - fpos > 3) { + return SFPARSE_ERR_PARSE; + } + + if (dest) { + dest->type = SFPARSE_TYPE_DECIMAL; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + dest->decimal.numer = value * sign; + + switch (len - fpos) { + case 1: + dest->decimal.denom = 10; + + break; + case 2: + dest->decimal.denom = 100; + + break; + case 3: + dest->decimal.denom = 1000; + + break; + } + } + + return 0; +} + +static int parser_date(sfparse_parser *sfp, sfparse_value *dest) { + int rv; + sfparse_value val; + + /* The first byte has already been validated by the caller. */ + assert('@' == *sfp->pos); + + ++sfp->pos; + + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + rv = parser_number(sfp, &val); + if (rv != 0) { + return rv; + } + + if (val.type != SFPARSE_TYPE_INTEGER) { + return SFPARSE_ERR_PARSE; + } + + if (dest) { + *dest = val; + dest->type = SFPARSE_TYPE_DATE; + } + + return 0; +} + +#ifdef __AVX2__ +static const uint8_t *find_char_string(const uint8_t *first, + const uint8_t *last) { + const __m256i bs = _mm256_set1_epi8('\\'); + const __m256i dq = _mm256_set1_epi8('"'); + const __m256i del = _mm256_set1_epi8(0x7f); + const __m256i sp = _mm256_set1_epi8(' '); + __m256i s, x; + uint32_t m; + + for (; first != last; first += 32) { + s = _mm256_loadu_si256((void *)first); + + x = _mm256_cmpgt_epi8(sp, s); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x); + + m = (uint32_t)_mm256_movemask_epi8(x); + if (m) { + return first + ctz(m); + } + } + + return last; +} +#endif /* __AVX2__ */ + +static int parser_string(sfparse_parser *sfp, sfparse_value *dest) { + const uint8_t *base; +#ifdef __AVX2__ + const uint8_t *last; +#endif /* __AVX2__ */ + uint32_t flags = SFPARSE_VALUE_FLAG_NONE; + + /* The first byte has already been validated by the caller. */ + assert('"' == *sfp->pos); + + base = ++sfp->pos; + +#ifdef __AVX2__ + for (; sfp->end - sfp->pos >= 32; ++sfp->pos) { + last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); + + sfp->pos = find_char_string(sfp->pos, last); + if (sfp->pos == last) { + break; + } + + switch (*sfp->pos) { + case '\\': + ++sfp->pos; + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + switch (*sfp->pos) { + case '"': + case '\\': + flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; + + break; + default: + return SFPARSE_ERR_PARSE; + } + + break; + case '"': + goto fin; + default: + return SFPARSE_ERR_PARSE; + } + } +#endif /* __AVX2__ */ + + for (; !parser_eof(sfp); ++sfp->pos) { + switch (*sfp->pos) { + X20_21_CASES: + X23_5B_CASES: + X5D_7E_CASES: + break; + case '\\': + ++sfp->pos; + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + switch (*sfp->pos) { + case '"': + case '\\': + flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; + + break; + default: + return SFPARSE_ERR_PARSE; + } + + break; + case '"': + goto fin; + default: + return SFPARSE_ERR_PARSE; + } + } + + return SFPARSE_ERR_PARSE; + +fin: + if (dest) { + dest->type = SFPARSE_TYPE_STRING; + dest->flags = flags; + dest->vec.len = (size_t)(sfp->pos - base); + dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; + } + + ++sfp->pos; + + return 0; +} + +#ifdef __AVX2__ +static const uint8_t *find_char_token(const uint8_t *first, + const uint8_t *last) { + /* r0: !..:, excluding "(), + r1: A..Z + r2: ^..~, excluding {} */ + const __m256i r0l = _mm256_set1_epi8('!' - 1); + const __m256i r0r = _mm256_set1_epi8(':' + 1); + const __m256i dq = _mm256_set1_epi8('"'); + const __m256i prl = _mm256_set1_epi8('('); + const __m256i prr = _mm256_set1_epi8(')'); + const __m256i comma = _mm256_set1_epi8(','); + const __m256i r1l = _mm256_set1_epi8('A' - 1); + const __m256i r1r = _mm256_set1_epi8('Z' + 1); + const __m256i r2l = _mm256_set1_epi8('^' - 1); + const __m256i r2r = _mm256_set1_epi8('~' + 1); + const __m256i cbl = _mm256_set1_epi8('{'); + const __m256i cbr = _mm256_set1_epi8('}'); + __m256i s, x; + uint32_t m; + + for (; first != last; first += 32) { + s = _mm256_loadu_si256((void *)first); + + x = _mm256_andnot_si256( + _mm256_cmpeq_epi8(s, comma), + _mm256_andnot_si256( + _mm256_cmpeq_epi8(s, prr), + _mm256_andnot_si256( + _mm256_cmpeq_epi8(s, prl), + _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq), + _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), + _mm256_cmpgt_epi8(r0r, s)))))); + x = _mm256_or_si256( + _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), + x); + x = _mm256_or_si256( + _mm256_andnot_si256( + _mm256_cmpeq_epi8(s, cbr), + _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl), + _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), + _mm256_cmpgt_epi8(r2r, s)))), + x); + + m = ~(uint32_t)_mm256_movemask_epi8(x); + if (m) { + return first + ctz(m); + } + } + + return last; +} +#endif /* __AVX2__ */ + +static int parser_token(sfparse_parser *sfp, sfparse_value *dest) { + const uint8_t *base; +#ifdef __AVX2__ + const uint8_t *last; +#endif /* __AVX2__ */ + + /* The first byte has already been validated by the caller. */ + base = sfp->pos++; + +#ifdef __AVX2__ + if (sfp->end - sfp->pos >= 32) { + last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); + + sfp->pos = find_char_token(sfp->pos, last); + if (sfp->pos != last) { + goto fin; + } + } +#endif /* __AVX2__ */ + + for (; !parser_eof(sfp); ++sfp->pos) { + switch (*sfp->pos) { + TOKEN_CASES: + continue; + } + + break; + } + +#ifdef __AVX2__ +fin: +#endif /* __AVX2__ */ + if (dest) { + dest->type = SFPARSE_TYPE_TOKEN; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + dest->vec.base = (uint8_t *)base; + dest->vec.len = (size_t)(sfp->pos - base); + } + + return 0; +} + +#ifdef __AVX2__ +static const uint8_t *find_char_byteseq(const uint8_t *first, + const uint8_t *last) { + const __m256i pls = _mm256_set1_epi8('+'); + const __m256i fs = _mm256_set1_epi8('/'); + const __m256i r0l = _mm256_set1_epi8('0' - 1); + const __m256i r0r = _mm256_set1_epi8('9' + 1); + const __m256i r1l = _mm256_set1_epi8('A' - 1); + const __m256i r1r = _mm256_set1_epi8('Z' + 1); + const __m256i r2l = _mm256_set1_epi8('a' - 1); + const __m256i r2r = _mm256_set1_epi8('z' + 1); + __m256i s, x; + uint32_t m; + + for (; first != last; first += 32) { + s = _mm256_loadu_si256((void *)first); + + x = _mm256_cmpeq_epi8(s, pls); + x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x); + x = _mm256_or_si256( + _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), + x); + x = _mm256_or_si256( + _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), + x); + x = _mm256_or_si256( + _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)), + x); + + m = ~(uint32_t)_mm256_movemask_epi8(x); + if (m) { + return first + ctz(m); + } + } + + return last; +} +#endif /* __AVX2__ */ + +static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) { + const uint8_t *base; +#ifdef __AVX2__ + const uint8_t *last; +#endif /* __AVX2__ */ + + /* The first byte has already been validated by the caller. */ + assert(':' == *sfp->pos); + + base = ++sfp->pos; + +#ifdef __AVX2__ + if (sfp->end - sfp->pos >= 32) { + last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); + sfp->pos = find_char_byteseq(sfp->pos, last); + } +#endif /* __AVX2__ */ + + for (; !parser_eof(sfp); ++sfp->pos) { + switch (*sfp->pos) { + case '+': + case '/': + DIGIT_CASES: + ALPHA_CASES: + continue; + case '=': + switch ((sfp->pos - base) & 0x3) { + case 0: + case 1: + return SFPARSE_ERR_PARSE; + case 2: + ++sfp->pos; + + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + if (*sfp->pos == '=') { + ++sfp->pos; + } + + break; + case 3: + ++sfp->pos; + + break; + } + + if (parser_eof(sfp) || *sfp->pos != ':') { + return SFPARSE_ERR_PARSE; + } + + goto fin; + case ':': + if (((sfp->pos - base) & 0x3) == 1) { + return SFPARSE_ERR_PARSE; + } + + goto fin; + default: + return SFPARSE_ERR_PARSE; + } + } + + return SFPARSE_ERR_PARSE; + +fin: + if (dest) { + dest->type = SFPARSE_TYPE_BYTESEQ; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + dest->vec.len = (size_t)(sfp->pos - base); + dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; + } + + ++sfp->pos; + + return 0; +} + +static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) { + int b; + + /* The first byte has already been validated by the caller. */ + assert('?' == *sfp->pos); + + ++sfp->pos; + + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + switch (*sfp->pos) { + case '0': + b = 0; + + break; + case '1': + b = 1; + + break; + default: + return SFPARSE_ERR_PARSE; + } + + ++sfp->pos; + + if (dest) { + dest->type = SFPARSE_TYPE_BOOLEAN; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + dest->boolean = b; + } + + return 0; +} + +static int pctdecode(uint8_t *pc, const uint8_t **ppos) { + uint8_t c, b = **ppos; + + switch (b) { + DIGIT_CASES: + c = (uint8_t)((b - '0') << 4); + + break; + LCHEXALPHA_CASES: + c = (uint8_t)((b - 'a' + 10) << 4); + + break; + default: + return -1; + } + + b = *++*ppos; + + switch (b) { + DIGIT_CASES: + c |= (uint8_t)(b - '0'); + + break; + LCHEXALPHA_CASES: + c |= (uint8_t)(b - 'a' + 10); + + break; + default: + return -1; + } + + *pc = c; + ++*ppos; + + return 0; +} + +/* Start of utf8 dfa */ +/* Copyright (c) 2008-2010 Bjoern Hoehrmann + * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + * + * Copyright (c) 2008-2009 Bjoern Hoehrmann + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 12 + +/* clang-format off */ +static const uint8_t utf8d[] = { + /* + * The first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + /* + * The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. + */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; +/* clang-format on */ + +static void utf8_decode(uint32_t *state, uint8_t byte) { + *state = utf8d[256 + *state + utf8d[byte]]; +} + +/* End of utf8 dfa */ + +static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) { + const uint8_t *base; + uint8_t c; + uint32_t utf8state = UTF8_ACCEPT; + + assert('%' == *sfp->pos); + + ++sfp->pos; + + if (parser_eof(sfp) || *sfp->pos != '"') { + return SFPARSE_ERR_PARSE; + } + + base = ++sfp->pos; + + for (; !parser_eof(sfp);) { + switch (*sfp->pos) { + X00_1F_CASES: + X7F_FF_CASES: + return SFPARSE_ERR_PARSE; + case '%': + ++sfp->pos; + + if (sfp->pos + 2 > sfp->end) { + return SFPARSE_ERR_PARSE; + } + + if (pctdecode(&c, &sfp->pos) != 0) { + return SFPARSE_ERR_PARSE; + } + + utf8_decode(&utf8state, c); + if (utf8state == UTF8_REJECT) { + return SFPARSE_ERR_PARSE; + } + + break; + case '"': + if (utf8state != UTF8_ACCEPT) { + return SFPARSE_ERR_PARSE; + } + + if (dest) { + dest->type = SFPARSE_TYPE_DISPSTRING; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + dest->vec.len = (size_t)(sfp->pos - base); + dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; + } + + ++sfp->pos; + + return 0; + default: + if (utf8state != UTF8_ACCEPT) { + return SFPARSE_ERR_PARSE; + } + + ++sfp->pos; + } + } + + return SFPARSE_ERR_PARSE; +} + +static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) { + switch (*sfp->pos) { + case '"': + return parser_string(sfp, dest); + case '-': + DIGIT_CASES: + return parser_number(sfp, dest); + case '@': + return parser_date(sfp, dest); + case ':': + return parser_byteseq(sfp, dest); + case '?': + return parser_boolean(sfp, dest); + case '*': + ALPHA_CASES: + return parser_token(sfp, dest); + case '%': + return parser_dispstring(sfp, dest); + default: + return SFPARSE_ERR_PARSE; + } +} + +static int parser_skip_inner_list(sfparse_parser *sfp); + +int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key, + sfparse_value *dest_value) { + int rv; + + switch (sfp->state & SFPARSE_STATE_OP_MASK) { + case SFPARSE_STATE_BEFORE: + rv = parser_skip_inner_list(sfp); + if (rv != 0) { + return rv; + } + + /* fall through */ + case SFPARSE_STATE_BEFORE_PARAMS: + parser_set_op_state(sfp, SFPARSE_STATE_PARAMS); + + break; + case SFPARSE_STATE_PARAMS: + break; + default: + assert(0); + abort(); + } + + if (parser_eof(sfp) || *sfp->pos != ';') { + parser_set_op_state(sfp, SFPARSE_STATE_AFTER); + + return SFPARSE_ERR_EOF; + } + + ++sfp->pos; + + parser_discard_sp(sfp); + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + rv = parser_key(sfp, dest_key); + if (rv != 0) { + return rv; + } + + if (parser_eof(sfp) || *sfp->pos != '=') { + if (dest_value) { + dest_value->type = SFPARSE_TYPE_BOOLEAN; + dest_value->flags = SFPARSE_VALUE_FLAG_NONE; + dest_value->boolean = 1; + } + + return 0; + } + + ++sfp->pos; + + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + return parser_bare_item(sfp, dest_value); +} + +static int parser_skip_params(sfparse_parser *sfp) { + int rv; + + for (;;) { + rv = sfparse_parser_param(sfp, NULL, NULL); + switch (rv) { + case 0: + break; + case SFPARSE_ERR_EOF: + return 0; + case SFPARSE_ERR_PARSE: + return rv; + default: + assert(0); + abort(); + } + } +} + +int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) { + int rv; + + switch (sfp->state & SFPARSE_STATE_OP_MASK) { + case SFPARSE_STATE_BEFORE: + parser_discard_sp(sfp); + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + break; + case SFPARSE_STATE_BEFORE_PARAMS: + rv = parser_skip_params(sfp); + if (rv != 0) { + return rv; + } + + /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set + another state without reading the state. */ + /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */ + + /* fall through */ + case SFPARSE_STATE_AFTER: + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + switch (*sfp->pos) { + case ' ': + parser_discard_sp(sfp); + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + break; + case ')': + break; + default: + return SFPARSE_ERR_PARSE; + } + + break; + default: + assert(0); + abort(); + } + + if (*sfp->pos == ')') { + ++sfp->pos; + + parser_unset_inner_list_state(sfp); + parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); + + return SFPARSE_ERR_EOF; + } + + rv = parser_bare_item(sfp, dest); + if (rv != 0) { + return rv; + } + + parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); + + return 0; +} + +static int parser_skip_inner_list(sfparse_parser *sfp) { + int rv; + + for (;;) { + rv = sfparse_parser_inner_list(sfp, NULL); + switch (rv) { + case 0: + break; + case SFPARSE_ERR_EOF: + return 0; + case SFPARSE_ERR_PARSE: + return rv; + default: + assert(0); + abort(); + } + } +} + +static int parser_next_key_or_item(sfparse_parser *sfp) { + parser_discard_ows(sfp); + + if (parser_eof(sfp)) { + return SFPARSE_ERR_EOF; + } + + if (*sfp->pos != ',') { + return SFPARSE_ERR_PARSE; + } + + ++sfp->pos; + + parser_discard_ows(sfp); + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + return 0; +} + +static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) { + int rv; + + if (parser_eof(sfp) || *(sfp->pos) != '=') { + /* Boolean true */ + if (dest) { + dest->type = SFPARSE_TYPE_BOOLEAN; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + dest->boolean = 1; + } + + sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; + + return 0; + } + + ++sfp->pos; + + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + if (*sfp->pos == '(') { + if (dest) { + dest->type = SFPARSE_TYPE_INNER_LIST; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + } + + ++sfp->pos; + + sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE; + + return 0; + } + + rv = parser_bare_item(sfp, dest); + if (rv != 0) { + return rv; + } + + sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; + + return 0; +} + +int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key, + sfparse_value *dest_value) { + int rv; + + switch (sfp->state) { + case SFPARSE_STATE_DICT_INNER_LIST_BEFORE: + rv = parser_skip_inner_list(sfp); + if (rv != 0) { + return rv; + } + + /* fall through */ + case SFPARSE_STATE_DICT_BEFORE_PARAMS: + rv = parser_skip_params(sfp); + if (rv != 0) { + return rv; + } + + /* fall through */ + case SFPARSE_STATE_DICT_AFTER: + rv = parser_next_key_or_item(sfp); + if (rv != 0) { + return rv; + } + + break; + case SFPARSE_STATE_INITIAL: + parser_discard_sp(sfp); + + if (parser_eof(sfp)) { + return SFPARSE_ERR_EOF; + } + + break; + default: + assert(0); + abort(); + } + + rv = parser_key(sfp, dest_key); + if (rv != 0) { + return rv; + } + + return parser_dict_value(sfp, dest_value); +} + +int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) { + int rv; + + switch (sfp->state) { + case SFPARSE_STATE_LIST_INNER_LIST_BEFORE: + rv = parser_skip_inner_list(sfp); + if (rv != 0) { + return rv; + } + + /* fall through */ + case SFPARSE_STATE_LIST_BEFORE_PARAMS: + rv = parser_skip_params(sfp); + if (rv != 0) { + return rv; + } + + /* fall through */ + case SFPARSE_STATE_LIST_AFTER: + rv = parser_next_key_or_item(sfp); + if (rv != 0) { + return rv; + } + + break; + case SFPARSE_STATE_INITIAL: + parser_discard_sp(sfp); + + if (parser_eof(sfp)) { + return SFPARSE_ERR_EOF; + } + + break; + default: + assert(0); + abort(); + } + + if (*sfp->pos == '(') { + if (dest) { + dest->type = SFPARSE_TYPE_INNER_LIST; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + } + + ++sfp->pos; + + sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE; + + return 0; + } + + rv = parser_bare_item(sfp, dest); + if (rv != 0) { + return rv; + } + + sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS; + + return 0; +} + +int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) { + int rv; + + switch (sfp->state) { + case SFPARSE_STATE_INITIAL: + parser_discard_sp(sfp); + + if (parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + break; + case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE: + rv = parser_skip_inner_list(sfp); + if (rv != 0) { + return rv; + } + + /* fall through */ + case SFPARSE_STATE_ITEM_BEFORE_PARAMS: + rv = parser_skip_params(sfp); + if (rv != 0) { + return rv; + } + + /* fall through */ + case SFPARSE_STATE_ITEM_AFTER: + parser_discard_sp(sfp); + + if (!parser_eof(sfp)) { + return SFPARSE_ERR_PARSE; + } + + return SFPARSE_ERR_EOF; + default: + assert(0); + abort(); + } + + if (*sfp->pos == '(') { + if (dest) { + dest->type = SFPARSE_TYPE_INNER_LIST; + dest->flags = SFPARSE_VALUE_FLAG_NONE; + } + + ++sfp->pos; + + sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE; + + return 0; + } + + rv = parser_bare_item(sfp, dest); + if (rv != 0) { + return rv; + } + + sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS; + + return 0; +} + +void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data, + size_t datalen) { + if (datalen == 0) { + sfp->pos = sfp->end = NULL; + } else { + sfp->pos = data; + sfp->end = data + datalen; + } + + sfp->state = SFPARSE_STATE_INITIAL; +} + +void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) { + const uint8_t *p, *q; + uint8_t *o; + size_t len, slen; + + if (src->len == 0) { + dest->len = 0; + + return; + } + + o = dest->base; + p = src->base; + len = src->len; + + for (;;) { + q = memchr(p, '\\', len); + if (q == NULL) { + memcpy(o, p, len); + o += len; + + dest->len = (size_t)(o - dest->base); + + return; + } + + slen = (size_t)(q - p); + memcpy(o, p, slen); + o += slen; + + p = q + 1; + *o++ = *p++; + len -= slen + 2; + } +} + +void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) { + static const int index_tbl[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, + -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; + uint8_t *o; + const uint8_t *p, *end; + uint32_t n; + size_t i, left; + int idx; + + if (src->len == 0) { + dest->len = 0; + + return; + } + + o = dest->base; + p = src->base; + left = src->len & 0x3; + if (left == 0 && src->base[src->len - 1] == '=') { + left = 4; + } + end = src->base + src->len - left; + + for (; p != end;) { + n = 0; + + for (i = 1; i <= 4; ++i, ++p) { + idx = index_tbl[*p]; + + assert(idx != -1); + + n += (uint32_t)(idx << (24 - i * 6)); + } + + *o++ = (uint8_t)(n >> 16); + *o++ = (n >> 8) & 0xffu; + *o++ = n & 0xffu; + } + + switch (left) { + case 0: + goto fin; + case 1: + assert(0); + abort(); + case 3: + if (src->base[src->len - 1] == '=') { + left = 2; + } + + break; + case 4: + assert('=' == src->base[src->len - 1]); + + if (src->base[src->len - 2] == '=') { + left = 2; + } else { + left = 3; + } + + break; + } + + switch (left) { + case 2: + *o = (uint8_t)(index_tbl[*p++] << 2); + *o++ |= (uint8_t)(index_tbl[*p++] >> 4); + + break; + case 3: + n = (uint32_t)(index_tbl[*p++] << 10); + n += (uint32_t)(index_tbl[*p++] << 4); + n += (uint32_t)(index_tbl[*p++] >> 2); + *o++ = (n >> 8) & 0xffu; + *o++ = n & 0xffu; + + break; + } + +fin: + dest->len = (size_t)(o - dest->base); +} + +void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) { + const uint8_t *p, *q; + uint8_t *o; + size_t len, slen; + + if (src->len == 0) { + dest->len = 0; + + return; + } + + o = dest->base; + p = src->base; + len = src->len; + + for (;;) { + q = memchr(p, '%', len); + if (q == NULL) { + memcpy(o, p, len); + o += len; + + dest->len = (size_t)(o - dest->base); + + return; + } + + slen = (size_t)(q - p); + memcpy(o, p, slen); + o += slen; + + p = q + 1; + + pctdecode(o++, &p); + + len -= slen + 3; + } +}