From b924acb12aaec655c7301f2d2e9d9e5ce7cafc5e Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 1 Feb 2025 00:12:45 +0000 Subject: [PATCH 1/3] inet: Add rmem accounting in append_inet_rx_pbuf This should cover UDP and the ICMPv4/v6 sockets. Signed-off-by: Pedro Falcato --- kernel/kernel/net/ipv4/ipv4.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/kernel/net/ipv4/ipv4.cpp b/kernel/kernel/net/ipv4/ipv4.cpp index f4861eb3..b362ce44 100644 --- a/kernel/kernel/net/ipv4/ipv4.cpp +++ b/kernel/kernel/net/ipv4/ipv4.cpp @@ -783,8 +783,19 @@ void inet_socket::unbind() proto_fam->unbind(this); } +static void inet_rx_dtor(struct packetbuf *pbf) +{ + sock_discharge_rmem_pbf(pbf->sock, pbf); +} + void inet_socket::append_inet_rx_pbuf(packetbuf *buf) { + /* Make sure we charge rmem and add a dtor */ + if (!sock_charge_rmem_pbf(this, buf)) + return; + buf->sock = this; + buf->dtor = inet_rx_dtor; + buf->ref(); list_add_tail(&buf->list_node, &rx_packet_list); From 806dc32e5979faa7a9d211122ad1728cb029edc4 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 1 Feb 2025 01:42:56 +0000 Subject: [PATCH 2/3] socket: Don't call write_space if we're freeing rmem Signed-off-by: Pedro Falcato --- kernel/include/onyx/net/socket.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/include/onyx/net/socket.h b/kernel/include/onyx/net/socket.h index ba8aca17..768d8a84 100644 --- a/kernel/include/onyx/net/socket.h +++ b/kernel/include/onyx/net/socket.h @@ -417,8 +417,6 @@ static inline void sock_discharge_rmem_bytes(struct socket *sock, unsigned int b WARN_ON(queued < new_space); queued = cmpxchg_relaxed(&sock->sk_rmem, expected, new_space); } while (queued != expected); - - sock->sock_ops->write_space(sock); } static inline void sock_discharge_rmem_pbf(struct socket *sock, struct packetbuf *pbf) From accb89801dc6432af91bbb4e69e81355a40c964a Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sat, 1 Feb 2025 01:44:15 +0000 Subject: [PATCH 3/3] udp: Improve rmem accounting on loopback comms Improve rmem accounting by improving wmem accouting. Use pbf_alloc_sk. We only allocate a single fragment and thus simplify all of the old code around here. This is the part that really, really matters. ICMP (and AF_UNIX and netkernel) can be converted at its own pace. Signed-off-by: Pedro Falcato --- kernel/kernel/net/udp.cpp | 60 +++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/kernel/kernel/net/udp.cpp b/kernel/kernel/net/udp.cpp index 6c924056..68277bd8 100644 --- a/kernel/kernel/net/udp.cpp +++ b/kernel/kernel/net/udp.cpp @@ -107,20 +107,6 @@ uint16_t udp_calculate_checksum(struct udphdr *header, const inet_route::addr &s } } -expected, int> udp_create_pbuf(size_t payload_size, size_t headers_len) -{ - auto b = make_refc(); - if (!b) - return unexpected{-ENOMEM}; - - if (!b->allocate_space(payload_size + headers_len + sizeof(udphdr) + PACKET_MAX_HEAD_LENGTH)) - return unexpected{-ENOMEM}; - - b->reserve_headers(headers_len + sizeof(udphdr) + PACKET_MAX_HEAD_LENGTH); - - return b; -} - int udp_socket::bind(sockaddr *addr, socklen_t len) { auto fam = get_proto_fam(); @@ -184,17 +170,18 @@ void udp_prepare_headers(packetbuf *buf, in_port_t sport, in_port_t dport, size_ udp_header->checksum = 0; } -int udp_put_data(packetbuf *buf, const msghdr *msg, size_t length) +static int udp_put_data(struct packetbuf *buf, struct iovec_iter *iter) { - unsigned char *ptr = (unsigned char *) buf->put((unsigned int) length); + unsigned char *ptr = (unsigned char *) buf->put((unsigned int) iter->bytes); - for (int i = 0; i < msg->msg_iovlen; i++) + while (!iter->empty()) { - const auto &vec = msg->msg_iov[i]; + const auto vec = iter->curiovec(); if (copy_from_user(ptr, vec.iov_base, vec.iov_len) < 0) return -EFAULT; ptr += vec.iov_len; + iter->advance(vec.iov_len); } return 0; @@ -252,6 +239,7 @@ ssize_t udp_socket::udp_sendmsg(const msghdr *msg, int flags, const inet_sock_ad if (payload_size > UINT16_MAX) return -EMSGSIZE; + iovec_iter iter{{msg->msg_iov, (size_t) msg->msg_iovlen}, (size_t) payload_size, IOVEC_USER}; inet_route route; constexpr auto our_domain = inet_domain_type_v; @@ -314,23 +302,33 @@ ssize_t udp_socket::udp_sendmsg(const msghdr *msg, int flags, const inet_sock_ad */ if (!will_append) [[likely]] { - auto pbf_st = udp_create_pbuf(payload_size, inet_header_size(our_domain)); - - if (pbf_st.has_error()) - return pbf_st.error(); - - auto buf = pbf_st.value(); - - udp_prepare_headers(buf.get(), src_addr.port, dst.port, payload_size); - - if (udp_put_data(buf.get(), msg, payload_size) < 0) + struct packetbuf *pbf; + + /* XXX: UDP is currently simplified a lot because pbf_alloc_sk allocates a single fragment, + * thus we can do away with only using the data area instead of carefully and painfully + * handling fragments. + */ + pbf = + pbf_alloc_sk(GFP_KERNEL, this, payload_size + sizeof(udphdr) + PACKET_MAX_HEAD_LENGTH); + if (!pbf) + return -ENOBUFS; + pbf_reserve_headers(pbf, sizeof(udphdr) + PACKET_MAX_HEAD_LENGTH); + + udp_prepare_headers(pbf, src_addr.port, dst.port, payload_size); + if (udp_put_data(pbf, &iter) < 0) + { + pbf_free(pbf); return -EFAULT; + } - udp_do_csum(buf.get(), route); - - if (int st = udp_do_send(buf.get(), route); st < 0) + udp_do_csum(pbf, route); + if (int st = udp_do_send(pbf, route); st < 0) + { + pbf_free(pbf); return st; + } + pbf_put_ref(pbf); return payload_size; }