vquic: handling of io improvements

- better tracing of what system call is used and how often
- ngtcp2: combine vquic_send into larger chunks
- ngtcp2: define own PMTU values and enable MTU probing
- ngtcp2: trace interesting remote transport parameters

Closes #18812
This commit is contained in:
Stefan Eissing 2025-10-02 14:20:05 +02:00 committed by Daniel Stenberg
parent eefd03c572
commit 9aa8e9a783
No known key found for this signature in database
GPG key ID: 5CC908FDB71E12C2
2 changed files with 147 additions and 97 deletions

View file

@ -410,13 +410,23 @@ static void qlog_callback(void *user_data, uint32_t flags,
ctx->qlogfd = -1;
}
}
}
static void quic_settings(struct cf_ngtcp2_ctx *ctx,
struct Curl_easy *data,
struct pkt_io_ctx *pktx)
{
#ifdef NGTCP2_SETTINGS_V2x
static uint16_t mtu_probes[] = {
1472, /* what h2o offers */
1452, /* what Caddy offers */
1454 - 48, /* The well known MTU used by a domestic optic fiber
service in Japan. */
1390 - 48, /* Typical Tunneled MTU */
1280 - 48, /* IPv6 minimum MTU */
1492 - 48, /* PPPoE */
};
#endif
ngtcp2_settings *s = &ctx->settings;
ngtcp2_transport_params *t = &ctx->transport_params;
@ -433,6 +443,12 @@ static void quic_settings(struct cf_ngtcp2_ctx *ctx,
data->set.connecttimeout * NGTCP2_MILLISECONDS : QUIC_HANDSHAKE_TIMEOUT;
s->max_window = 100 * ctx->max_stream_window;
s->max_stream_window = 10 * ctx->max_stream_window;
s->no_pmtud = FALSE;
#ifdef NGTCP2_SETTINGS_V2x
s->pmtud_probes = mtu_probes;
s->pmtud_probeslen = CURL_ARRAYSIZE(mtu_probes);
s->max_tx_udp_payload_size = 64 * 1024; /* mtu_probes[0]; */
#endif
t->initial_max_data = 10 * ctx->max_stream_window;
t->initial_max_stream_data_bidi_local = ctx->max_stream_window;
@ -468,8 +484,18 @@ static int cf_ngtcp2_handshake_completed(ngtcp2_conn *tconn, void *user_data)
ctx->tls_vrfy_result = Curl_vquic_tls_verify_peer(&ctx->tls, cf,
data, &ctx->peer);
CURL_TRC_CF(data, cf, "handshake complete after %dms",
(int)curlx_timediff(ctx->handshake_at, ctx->started_at));
if(Curl_trc_is_verbose(data)) {
const ngtcp2_transport_params *rp;
rp = ngtcp2_conn_get_remote_transport_params(ctx->qconn);
CURL_TRC_CF(data, cf, "handshake complete after %dms, remote transport["
"max_udp_payload=%" FMT_PRIu64
", initial_max_data=%" FMT_PRIu64
"]",
(int)curlx_timediff(ctx->handshake_at, ctx->started_at),
(curl_uint64_t)rp->max_udp_payload_size,
(curl_uint64_t)rp->initial_max_data);
}
/* In case of earlydata, where we simulate being connected, update
* the handshake time when we really did connect */
if(ctx->use_earlydata)
@ -1678,6 +1704,9 @@ static CURLcode recv_pkt(const unsigned char *pkt, size_t pktlen,
ngtcp2_path path;
int rv;
if(ecn)
CURL_TRC_CF(pktx->data, pktx->cf, "vquic_recv(len=%zu, ecn=%x)",
pktlen, ecn);
ngtcp2_addr_init(&path.local, (struct sockaddr *)&ctx->q.local_addr,
(socklen_t)ctx->q.local_addrlen);
ngtcp2_addr_init(&path.remote, (struct sockaddr *)remote_addr,
@ -1696,7 +1725,6 @@ static CURLcode recv_pkt(const unsigned char *pkt, size_t pktlen,
return CURLE_PEER_FAILED_VERIFICATION;
return CURLE_RECV_ERROR;
}
return CURLE_OK;
}
@ -1712,6 +1740,10 @@ static CURLcode cf_progress_ingress(struct Curl_cfilter *cf,
pktx_init(&local_pktx, cf, data);
pktx = &local_pktx;
}
else {
pktx_update_time(pktx, cf);
ngtcp2_path_storage_zero(&pktx->ps);
}
result = Curl_vquic_tls_before_recv(&ctx->tls, cf, data);
if(result)
@ -1831,9 +1863,10 @@ static CURLcode cf_progress_egress(struct Curl_cfilter *cf,
{
struct cf_ngtcp2_ctx *ctx = cf->ctx;
size_t nread;
size_t max_payload_size, path_max_payload_size, max_pktcnt;
size_t max_payload_size, path_max_payload_size;
size_t pktcnt = 0;
size_t gsolen = 0; /* this disables gso until we have a clue */
size_t send_quantum;
CURLcode curlcode;
struct pkt_io_ctx local_pktx;
@ -1869,71 +1902,69 @@ static CURLcode cf_progress_egress(struct Curl_cfilter *cf,
max_payload_size = ngtcp2_conn_get_max_tx_udp_payload_size(ctx->qconn);
path_max_payload_size =
ngtcp2_conn_get_path_max_tx_udp_payload_size(ctx->qconn);
/* maximum number of packets buffered before we flush to the socket */
max_pktcnt = CURLMIN(MAX_PKT_BURST,
ctx->q.sendbuf.chunk_size / max_payload_size);
send_quantum = ngtcp2_conn_get_send_quantum(ctx->qconn);
CURL_TRC_CF(data, cf, "egress, collect and send packets, quantum=%zu",
send_quantum);
for(;;) {
/* add the next packet to send, if any, to our buffer */
curlcode = Curl_bufq_sipn(&ctx->q.sendbuf, max_payload_size,
read_pkt_to_send, pktx, &nread);
if(curlcode) {
if(curlcode != CURLE_AGAIN)
return curlcode;
/* Nothing more to add, flush and leave */
curlcode = vquic_send(cf, data, &ctx->q, gsolen);
if(curlcode) {
if(curlcode == CURLE_AGAIN) {
Curl_expire(data, 1, EXPIRE_QUIC);
return CURLE_OK;
}
return curlcode;
if(curlcode == CURLE_AGAIN)
break;
else if(curlcode)
return curlcode;
else {
size_t buflen = Curl_bufq_len(&ctx->q.sendbuf);
if((buflen >= send_quantum) ||
((buflen + gsolen) >= ctx->q.sendbuf.chunk_size))
break;
DEBUGASSERT(nread > 0);
++pktcnt;
if(pktcnt == 1) {
/* first packet in buffer. This is either of a known, "good"
* payload size or it is a PMTUD. We will see. */
gsolen = nread;
}
goto out;
}
DEBUGASSERT(nread > 0);
if(pktcnt == 0) {
/* first packet in buffer. This is either of a known, "good"
* payload size or it is a PMTUD. We will see. */
gsolen = nread;
}
else if(nread > gsolen ||
(gsolen > path_max_payload_size && nread != gsolen)) {
/* The just added packet is a PMTUD *or* the one(s) before the
* just added were PMTUD and the last one is smaller.
* Flush the buffer before the last add. */
curlcode = vquic_send_tail_split(cf, data, &ctx->q,
gsolen, nread, nread);
if(curlcode) {
if(curlcode == CURLE_AGAIN) {
Curl_expire(data, 1, EXPIRE_QUIC);
return CURLE_OK;
else if(nread > gsolen ||
(gsolen > path_max_payload_size && nread != gsolen)) {
/* The just added packet is a PMTUD *or* the one(s) before the
* just added were PMTUD and the last one is smaller.
* Flush the buffer before the last add. */
curlcode = vquic_send_tail_split(cf, data, &ctx->q,
gsolen, nread, nread);
if(curlcode) {
if(curlcode == CURLE_AGAIN) {
Curl_expire(data, 1, EXPIRE_QUIC);
return CURLE_OK;
}
return curlcode;
}
return curlcode;
pktcnt = 0;
}
pktcnt = 0;
continue;
}
if(++pktcnt >= max_pktcnt || nread < gsolen) {
/* Reached MAX_PKT_BURST *or*
* the capacity of our buffer *or*
* last add was shorter than the previous ones, flush */
curlcode = vquic_send(cf, data, &ctx->q, gsolen);
if(curlcode) {
if(curlcode == CURLE_AGAIN) {
Curl_expire(data, 1, EXPIRE_QUIC);
return CURLE_OK;
}
return curlcode;
else if(nread < gsolen) {
/* Reached MAX_PKT_BURST *or*
* the capacity of our buffer *or*
* last add was shorter than the previous ones, flush */
break;
}
/* pktbuf has been completely sent */
pktcnt = 0;
}
}
out:
if(!Curl_bufq_is_empty(&ctx->q.sendbuf)) {
/* time to send */
CURL_TRC_CF(data, cf, "egress, send collected %zu packets in %zu bytes",
pktcnt, Curl_bufq_len(&ctx->q.sendbuf));
curlcode = vquic_send(cf, data, &ctx->q, gsolen);
if(curlcode) {
if(curlcode == CURLE_AGAIN) {
Curl_expire(data, 1, EXPIRE_QUIC);
return CURLE_OK;
}
return curlcode;
}
pktx_update_time(pktx, cf);
ngtcp2_conn_update_pkt_tx_time(ctx->qconn, pktx->ts);
}
return CURLE_OK;
}

View file

@ -55,7 +55,7 @@
#if !defined(CURL_DISABLE_HTTP) && defined(USE_HTTP3)
#define NW_CHUNK_SIZE (64 * 1024)
#define NW_SEND_CHUNKS 2
#define NW_SEND_CHUNKS 1
int Curl_vquic_init(void)
@ -125,6 +125,7 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf,
const uint8_t *pkt, size_t pktlen, size_t gsolen,
size_t *psent)
{
CURLcode result = CURLE_OK;
#ifdef HAVE_SENDMSG
struct iovec msg_iov;
struct msghdr msg = {0};
@ -181,12 +182,14 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf,
FALLTHROUGH();
default:
failf(data, "sendmsg() returned %zd (errno %d)", sent, SOCKERRNO);
return CURLE_SEND_ERROR;
result = CURLE_SEND_ERROR;
goto out;
}
}
else if(pktlen != (size_t)sent) {
failf(data, "sendmsg() sent only %zd/%zu bytes", sent, pktlen);
return CURLE_SEND_ERROR;
result = CURLE_SEND_ERROR;
goto out;
}
#else
ssize_t sent;
@ -201,12 +204,14 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf,
if(sent == -1) {
if(SOCKERRNO == EAGAIN || SOCKERRNO == SOCKEWOULDBLOCK) {
return CURLE_AGAIN;
result = CURLE_AGAIN;
goto out;
}
else {
failf(data, "send() returned %zd (errno %d)", sent, SOCKERRNO);
if(SOCKERRNO != SOCKEMSGSIZE) {
return CURLE_SEND_ERROR;
result = CURLE_SEND_ERROR;
goto out;
}
/* UDP datagram is too large; caused by PMTUD. Just let it be
lost. */
@ -216,9 +221,16 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf,
(void)cf;
*psent = pktlen;
return CURLE_OK;
out:
return result;
}
#ifdef HAVE_SENDMSG
#define VQUIC_SEND_METHOD "sendmsg"
#else
#define VQUIC_SEND_METHOD "send"
#endif
static CURLcode send_packet_no_gso(struct Curl_cfilter *cf,
struct Curl_easy *data,
struct cf_quic_ctx *qctx,
@ -226,19 +238,23 @@ static CURLcode send_packet_no_gso(struct Curl_cfilter *cf,
size_t gsolen, size_t *psent)
{
const uint8_t *p, *end = pkt + pktlen;
size_t sent;
size_t sent, len, calls = 0;
CURLcode result = CURLE_OK;
*psent = 0;
for(p = pkt; p < end; p += gsolen) {
size_t len = CURLMIN(gsolen, (size_t)(end - p));
CURLcode curlcode = do_sendmsg(cf, data, qctx, p, len, len, &sent);
if(curlcode != CURLE_OK) {
return curlcode;
}
len = CURLMIN(gsolen, (size_t)(end - p));
result = do_sendmsg(cf, data, qctx, p, len, len, &sent);
if(result)
goto out;
*psent += sent;
++calls;
}
out:
CURL_TRC_CF(data, cf, "vquic_%s(len=%zu, gso=%zu, calls=%zu)"
" -> %d, sent=%zu",
VQUIC_SEND_METHOD, pktlen, gsolen, calls, result, *psent);
return CURLE_OK;
}
@ -266,6 +282,9 @@ static CURLcode vquic_send_packets(struct Curl_cfilter *cf,
}
else {
result = do_sendmsg(cf, data, qctx, pkt, pktlen, gsolen, psent);
CURL_TRC_CF(data, cf, "vquic_%s(len=%zu, gso=%zu, calls=1)"
" -> %d, sent=%zu",
VQUIC_SEND_METHOD, pktlen, gsolen, result, *psent);
}
if(!result)
qctx->last_io = qctx->last_op;
@ -289,8 +308,6 @@ CURLcode vquic_flush(struct Curl_cfilter *cf, struct Curl_easy *data,
}
result = vquic_send_packets(cf, data, qctx, buf, blen, gsolen, &sent);
CURL_TRC_CF(data, cf, "vquic_send(len=%zu, gso=%zu) -> %d, sent=%zu",
blen, gsolen, result, sent);
if(result) {
if(result == CURLE_AGAIN) {
Curl_bufq_skip(&qctx->sendbuf, sent);
@ -369,7 +386,7 @@ static CURLcode recvmmsg_packets(struct Curl_cfilter *cf,
struct mmsghdr mmsg[MMSG_NUM];
uint8_t msg_ctrl[MMSG_NUM * CMSG_SPACE(sizeof(int))];
struct sockaddr_storage remote_addr[MMSG_NUM];
size_t total_nread = 0, pkts = 0;
size_t total_nread = 0, pkts = 0, calls = 0;
int mcount, i, n;
char errstr[STRERROR_LEN];
CURLcode result = CURLE_OK;
@ -424,7 +441,7 @@ static CURLcode recvmmsg_packets(struct Curl_cfilter *cf,
goto out;
}
CURL_TRC_CF(data, cf, "recvmmsg() -> %d packets", mcount);
++calls;
for(i = 0; i < mcount; ++i) {
total_nread += mmsg[i].msg_len;
@ -454,8 +471,8 @@ static CURLcode recvmmsg_packets(struct Curl_cfilter *cf,
out:
if(total_nread || result)
CURL_TRC_CF(data, cf, "recvd %zu packets with %zu bytes -> %d",
pkts, total_nread, result);
CURL_TRC_CF(data, cf, "vquic_recvmmsg(len=%zu, packets=%zu, calls=%zu)"
" -> %d", total_nread, pkts, calls, result);
Curl_multi_xfer_sockbuf_release(data, sockbuf);
return result;
}
@ -471,8 +488,9 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf,
struct msghdr msg;
uint8_t buf[64*1024];
struct sockaddr_storage remote_addr;
size_t total_nread, pkts;
ssize_t nread;
size_t total_nread, pkts, calls;
ssize_t rc;
size_t nread;
char errstr[STRERROR_LEN];
CURLcode result = CURLE_OK;
uint8_t msg_ctrl[CMSG_SPACE(sizeof(int))];
@ -481,7 +499,7 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf,
size_t offset, to;
DEBUGASSERT(max_pkts > 0);
for(pkts = 0, total_nread = 0; pkts < max_pkts;) {
for(pkts = 0, total_nread = 0, calls = 0; pkts < max_pkts;) {
/* fully initialise this on each call to `recvmsg()`. There seem to
* operating systems out there that mess with `msg_iov.iov_len`. */
memset(&msg, 0, sizeof(msg));
@ -494,10 +512,10 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf,
msg.msg_namelen = sizeof(remote_addr);
msg.msg_controllen = sizeof(msg_ctrl);
while((nread = recvmsg(qctx->sockfd, &msg, 0)) == -1 &&
while((rc = recvmsg(qctx->sockfd, &msg, 0)) == -1 &&
(SOCKERRNO == SOCKEINTR || SOCKERRNO == SOCKEMSGSIZE))
;
if(nread == -1) {
if(rc == -1) {
if(SOCKERRNO == EAGAIN || SOCKERRNO == SOCKEWOULDBLOCK) {
goto out;
}
@ -511,28 +529,28 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf,
}
Curl_strerror(SOCKERRNO, errstr, sizeof(errstr));
failf(data, "QUIC: recvmsg() unexpectedly returned %zd (errno=%d; %s)",
nread, SOCKERRNO, errstr);
rc, SOCKERRNO, errstr);
result = CURLE_RECV_ERROR;
goto out;
}
total_nread += (size_t)nread;
nread = (size_t)rc;
total_nread += nread;
++calls;
gso_size = vquic_msghdr_get_udp_gro(&msg);
if(gso_size == 0) {
gso_size = (size_t)nread;
gso_size = nread;
}
for(offset = 0; offset < (size_t)nread; offset = to) {
for(offset = 0; offset < nread; offset = to) {
++pkts;
to = offset + gso_size;
if(to > (size_t)nread) {
pktlen = (size_t)nread - offset;
}
else {
if(to > nread)
pktlen = nread - offset;
else
pktlen = gso_size;
}
result =
recv_cb(buf + offset, pktlen, msg.msg_name, msg.msg_namelen, 0, userp);
@ -543,8 +561,8 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf,
out:
if(total_nread || result)
CURL_TRC_CF(data, cf, "recvd %zu packets with %zu bytes -> %d",
pkts, total_nread, result);
CURL_TRC_CF(data, cf, "vquic_recvmsg(len=%zu, packets=%zu, calls=%zu)"
" -> %d", total_nread, pkts, calls, result);
return result;
}
@ -559,7 +577,7 @@ static CURLcode recvfrom_packets(struct Curl_cfilter *cf,
int bufsize = (int)sizeof(buf);
struct sockaddr_storage remote_addr;
socklen_t remote_addrlen = sizeof(remote_addr);
size_t total_nread, pkts;
size_t total_nread, pkts, calls = 0;
ssize_t nread;
char errstr[STRERROR_LEN];
CURLcode result = CURLE_OK;
@ -592,6 +610,7 @@ static CURLcode recvfrom_packets(struct Curl_cfilter *cf,
}
++pkts;
++calls;
total_nread += (size_t)nread;
result = recv_cb(buf, (size_t)nread, &remote_addr, remote_addrlen,
0, userp);
@ -601,8 +620,8 @@ static CURLcode recvfrom_packets(struct Curl_cfilter *cf,
out:
if(total_nread || result)
CURL_TRC_CF(data, cf, "recvd %zu packets with %zu bytes -> %d",
pkts, total_nread, result);
CURL_TRC_CF(data, cf, "vquic_recvfrom(len=%zu, packets=%zu, calls=%zu)"
" -> %d", total_nread, pkts, calls, result);
return result;
}
#endif /* !HAVE_SENDMMSG && !HAVE_SENDMSG */