tests: testrunner reliability improvements

- perform torture tests with '-j2' for shorter runtime - when waiting on test results overly long, log the tests waited for and eventually log the test log directories for easier analysis what is wrong in CI jobs. - sockfilt.c: treat the windows errno 109 (ERROR_BROKEN_PIPE) as a socket closed by the client and do not exit. - when verifying https server, do not in addition check the http server behind it also - when tearing down the stunnel of a non-responsive https server, tear down the http server with it Closes #14960
2026-07-14 06:57:17 +03:00 · 2024-09-18 16:32:07 +02:00 · 2024-09-18 16:32:07 +02:00 · 8ad3597d2d
commit 8ad3597d2d
parent 5a263710f6
4 changed files with 46 additions and 6 deletions
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@ -125,7 +125,7 @@ TEST = srcdir=$(srcdir) $(PERL) $(PERLFLAGS) $(srcdir)/runtests.pl
 TEST_Q = -a -s
 TEST_AM = -a -am
 TEST_F = -a -p -r
-TEST_T = -a -t
+TEST_T = -a -t -j2
 TEST_E = -a -e

 # ~<keyword> means that it will run all tests matching the keyword, but will
--- a/tests/runtests.pl
+++ b/tests/runtests.pl
@ -2886,6 +2886,7 @@ createrunners($numrunners);
 #   - if a runner has a response for us, process the response

 # run through each candidate test and execute it
+my $runner_wait_cnt = 0;
 while () {
    # check the abort flag
    if($globalabort) {
@ -2943,7 +2944,7 @@ while () {
    # If we could be running more tests, don't wait so we can schedule a new
    # one immediately. If all runners are busy, wait a fraction of a second
    # for one to finish so we can still loop around to check the abort flag.
-    my $runnerwait = scalar(@runnersidle) && scalar(@runtests) ? 0 : 0.5;
+    my $runnerwait = scalar(@runnersidle) && scalar(@runtests) ? 0 : 1.0;
    my ($ridready, $riderror) = runnerar_ready($runnerwait);
    if($ridready && ! defined $runnersrunning{$ridready}) {
        # On Linux, a closed pipe still shows up as ready instead of error.
@ -2964,6 +2965,7 @@ while () {
            $runnersrunning{$ridready} = $testnum;
        } else {
            # Test is complete
+            $runner_wait_cnt = 0;
            runnerready($ridready);

            if($error < 0) {
@ -2999,6 +3001,26 @@ while () {
            }
        }
    }
+    if(!$ridready && $runnerwait && !$torture && scalar(%runnersrunning)) {
+        $runner_wait_cnt++;
+        if($runner_wait_cnt >= 5) {
+            my $msg = "waiting for " . scalar(%runnersrunning) . " results:";
+            my $sep = " ";
+            foreach my $rid (keys %runnersrunning) {
+                $msg .= $sep . $runnersrunning{$rid} . "[$rid]";
+                $sep = ", "
+            }
+            logmsg "$msg\n";
+        }
+        if($runner_wait_cnt >= 10) {
+            $runner_wait_cnt = 0;
+            foreach my $rid (keys %runnersrunning) {
+                my $testnum = $runnersrunning{$rid};
+                logmsg "current state of test $testnum in [$rid]:\n";
+                displaylogs($rid, $testnum);
+            }
+        }
+    }
    if($riderror) {
        logmsg "ERROR: runner $riderror is dead! aborting test run\n";
        delete $runnersrunning{$riderror} if(defined $runnersrunning{$riderror});
--- a/tests/server/sockfilt.c
+++ b/tests/server/sockfilt.c
@ -218,6 +218,10 @@ static ssize_t write_wincon(int fd, const void *buf, size_t count)
 #define write(a,b,c) write_wincon(a,b,c)
 #endif

+/* On Windows, we sometimes get this for a broken pipe, seemingly
+ * when the client just closed stdin? */
+#define CURL_WIN32_EPIPE      109
+
 /*
 * fullread is a wrapper around the read() function. This will repeat the call
 * to read() until it actually has read the complete number of bytes indicated
@ -243,6 +247,11 @@ static ssize_t fullread(int filedes, void *buffer, size_t nbytes)
      error = errno;
      if((error == EINTR) || (error == EAGAIN))
        continue;
+      if(error == CURL_WIN32_EPIPE) {
+        logmsg("got Windows ERROR_BROKEN_PIPE on fd=%d, treating as close",
+               filedes);
+        return 0;
+      }
      logmsg("reading from file descriptor: %d,", filedes);
      logmsg("unrecoverable read() failure: (%d) %s",
             error, strerror(error));
--- a/tests/servers.pm
+++ b/tests/servers.pm
@ -2587,15 +2587,24 @@ sub startservers {
                if(stopserver('https')) {
                    return ("failed stopping HTTPS server with different cert", 3);
                }
+                # also stop http server, we do not know which state it is in
+                if($run{'http'} && stopserver('http')) {
+                    return ("failed stopping HTTP server", 3);
+                }
            }
            if($run{'https'} &&
               !responsive_http_server("https", $verbose, 0,
                                       protoport('https'))) {
-               if(stopserver('https')) {
-                   return ("failed stopping unresponsive HTTPS server", 3);
-               }
+                if(stopserver('https')) {
+                    return ("failed stopping unresponsive HTTPS server", 3);
+                }
+                # also stop http server, we do not know which state it is in
+                if($run{'http'} && stopserver('http')) {
+                    return ("failed stopping unresponsive HTTP server", 3);
+                }
            }
-            if($run{'http'} &&
+            # check a running http server if we not already checked https
+            if($run{'http'} && !$run{'https'} &&
               !responsive_http_server("http", $verbose, 0,
                                       protoport('http'))) {
                if(stopserver('http')) {