diff --git a/.github/scripts/spellcheck.yaml b/.github/scripts/spellcheck.yaml index 04dfbfaf1b..05ddf0d937 100644 --- a/.github/scripts/spellcheck.yaml +++ b/.github/scripts/spellcheck.yaml @@ -29,4 +29,4 @@ matrix: - 'strong' - 'em' sources: - - '**/*.md|!docs/BINDINGS.md|!docs/DISTROS.md|!docs/CIPHERS-TLS12.md' + - '**/*.md|!docs/BINDINGS.md|!docs/DISTROS.md|!docs/CIPHERS-TLS12.md|!docs/wcurl.md' diff --git a/.github/workflows/checksrc.yml b/.github/workflows/checksrc.yml index f172e55341..e170d3f660 100644 --- a/.github/workflows/checksrc.yml +++ b/.github/workflows/checksrc.yml @@ -69,6 +69,7 @@ jobs: codespell \ --skip scripts/mk-ca-bundle.pl \ --skip src/tool_hugehelp.c \ + --skip scripts/wcurl \ -I .github/scripts/codespell-ignore.txt \ CMake include m4 scripts src lib diff --git a/CMakeLists.txt b/CMakeLists.txt index f93a082f11..de72bb7f34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2503,6 +2503,13 @@ if(NOT CURL_DISABLE_INSTALL) COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/CMake/cmake_uninstall.cmake") endif() + install(FILES "${PROJECT_SOURCE_DIR}/scripts/wcurl" + DESTINATION ${CMAKE_INSTALL_BINDIR} + PERMISSIONS + OWNER_READ OWNER_WRITE OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE) + # The `-DEV` part is important string(REGEX REPLACE "([0-9]+\.[0-9]+)\.([0-9]+.*)" "\\2" CPACK_PACKAGE_VERSION_PATCH "${_curl_version}") set(CPACK_GENERATOR "TGZ") diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index fd5f1522a2..f5293c5a29 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -30,7 +30,7 @@ if(ENABLE_CURL_MANUAL AND BUILD_CURL_EXE) endif() if(BUILD_MISC_DOCS) - foreach(_man_misc IN ITEMS "curl-config" "mk-ca-bundle") + foreach(_man_misc IN ITEMS "curl-config" "mk-ca-bundle" "wcurl") set(_man_target "${CMAKE_CURRENT_BINARY_DIR}/${_man_misc}.1") add_custom_command(OUTPUT "${_man_target}" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/docs/Makefile.am b/docs/Makefile.am index 0c92dd8a33..80d446217f 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -28,16 +28,16 @@ if BUILD_DOCS # if we disable man page building, ignore these MK_CA_DOCS = mk-ca-bundle.1 CURLCONF_DOCS = curl-config.1 -man_MANS = curl-config.1 +man_MANS = curl-config.1 wcurl.1 endif -CURLPAGES = curl-config.md mk-ca-bundle.md +CURLPAGES = curl-config.md mk-ca-bundle.md wcurl.md SUBDIRS = . cmdline-opts libcurl DIST_SUBDIRS = $(SUBDIRS) examples if BUILD_DOCS -CLEANFILES = mk-ca-bundle.1 curl-config.1 +CLEANFILES = mk-ca-bundle.1 curl-config.1 wcurl.1 endif INTERNALDOCS = \ @@ -134,5 +134,7 @@ curl-config.1: curl-config.md mk-ca-bundle.1: mk-ca-bundle.md +wcurl.1: wcurl.md + distclean: rm -f $(CLEANFILES) diff --git a/docs/wcurl.md b/docs/wcurl.md new file mode 100644 index 0000000000..4111af5226 --- /dev/null +++ b/docs/wcurl.md @@ -0,0 +1,145 @@ +--- +c: Copyright (C) Samuel Henrique , Sergio Durigan Junior and many contributors, see the AUTHORS file. +SPDX-License-Identifier: curl +Title: wcurl +Section: 1 +Source: wcurl +See-also: + - curl (1) + - trurl (1) +Added-in: n/a +--- + +# NAME + +**wcurl** - a simple wrapper around curl to easily download files. + +# SYNOPSIS + +**wcurl \...** + +**wcurl [--curl-options \]... [--dry-run] [--no-decode-filename] [-o|-O|--output \] [--] \...** + +**wcurl [--curl-options=\]... [--dry-run] [--no-decode-filename] [--output=\] [--] \...** + +**wcurl -V|--version** + +**wcurl -h|--help** + +# DESCRIPTION + +**wcurl** is a simple curl wrapper which lets you use curl to download files +without having to remember any parameters. + +Simply call **wcurl** with a list of URLs you want to download and **wcurl** +picks sane defaults. + +If you need anything more complex, you can provide any of curl's supported +parameters via the **--curl-options** option. Just beware that you likely +should be using curl directly if your use case is not covered. + +By default, **wcurl** does: + +## * Percent-encode whitespaces in URLs; + +## * Download multiple URLs in parallel + if the installed curl's version is \>= 7.66.0 (--parallel); + +## * Follow redirects; + +## * Automatically choose a filename as output; + +## * Avoid overwriting files + if the installed curl's version is \>= 7.83.0 (--no-clobber); + +## * Perform retries; + +## * Set the downloaded file timestamp + to the value provided by the server, if available; + +## * Default to https + if the URL does not contain any scheme; + +## * Disable curl's URL globbing parser + so {} and [] characters in URLs are not treated specially; + +## * Percent-decode the resulting filename; + +## * Use 'index.html' as the default filename + if there is none in the URL. + +# OPTIONS + +## --curl-options, --curl-options=\... + +Specify extra options to be passed when invoking curl. May be specified more +than once. + +## -o, -O, --output, --output=\ + +Use the provided output path instead of getting it from the URL. If multiple +URLs are provided, resulting files share the same name with a number appended to +the end (curl \>= 7.83.0). If this option is provided multiple times, only the +last value is considered. + +## --no-decode-filename + +Don't percent-decode the output filename, even if the percent-encoding in the +URL was done by **wcurl**, e.g.: The URL contained whitespaces. + +## --dry-run + +Do not actually execute curl, just print what would be invoked. + +## -V, \--version + +Print version information. + +## -h, \--help + +Print help message. + +# CURL_OPTIONS + +Any option supported by curl can be set here. This is not used by **wcurl**; it +is instead forwarded to the curl invocation. + +# URL + +URL to be downloaded. Anything that is not a parameter is considered +an URL. Whitespaces are percent-encoded and the URL is passed to curl, which +then performs the parsing. May be specified more than once. + +# EXAMPLES + +Download a single file: + +**wcurl example.com/filename.txt** + +Download two files in parallel: + +**wcurl example.com/filename1.txt example.com/filename2.txt** + +Download a file passing the **--progress-bar** and **--http2** flags to curl: + +**wcurl --curl-options="--progress-bar --http2" example.com/filename.txt** + +Resume from an interrupted download (if more options are used, this needs to +be the last one in the list): + +**wcurl --curl-options="--continue-at -" example.com/filename.txt** + +# AUTHORS + + Samuel Henrique \ + Sergio Durigan Junior \ + and many contributors, see the AUTHORS file. + +# REPORTING BUGS + +If you experience any problems with **wcurl** that you do not experience with +curl, submit an issue on Github: https://github.com/curl/wcurl + +# COPYRIGHT + +**wcurl** is licensed under the curl license diff --git a/scripts/Makefile.am b/scripts/Makefile.am index dfee81e055..705a77a09e 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -25,7 +25,9 @@ EXTRA_DIST = coverage.sh completion.pl firefox-db2pem.sh checksrc.pl \ mk-ca-bundle.pl mk-unity.pl schemetable.c cd2nroff nroff2cd cdall cd2cd managen \ dmaketgz maketgz release-tools.sh verify-release cmakelint.sh mdlinkcheck \ - CMakeLists.txt randdisable + CMakeLists.txt randdisable wcurl + +dist_bin_SCRIPTS = wcurl ZSH_FUNCTIONS_DIR = @ZSH_FUNCTIONS_DIR@ FISH_FUNCTIONS_DIR = @FISH_FUNCTIONS_DIR@ diff --git a/scripts/wcurl b/scripts/wcurl new file mode 100755 index 0000000000..35fcb8a5c3 --- /dev/null +++ b/scripts/wcurl @@ -0,0 +1,324 @@ +#!/bin/sh + +# wcurl - a simple wrapper around curl to easily download files. +# +# Requires curl >= 7.46.0 (2015) +# +# Copyright (C) Samuel Henrique , Sergio Durigan +# Junior and many contributors, see the AUTHORS +# file. +# +# Permission to use, copy, modify, and distribute this software for any purpose +# with or without fee is hereby granted, provided that the above copyright +# notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN +# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of a copyright holder shall not be +# used in advertising or otherwise to promote the sale, use or other dealings in +# this Software without prior written authorization of the copyright holder. +# +# SPDX-License-Identifier: curl + +# Stop on errors and on usage of unset variables. +set -eu + +VERSION="2025.04.20" + +PROGRAM_NAME="$(basename "$0")" +readonly PROGRAM_NAME + +# Display the version. +print_version() +{ + cat << _EOF_ +${VERSION} +_EOF_ +} + +# Display the program usage. +usage() +{ + cat << _EOF_ +${PROGRAM_NAME} -- a simple wrapper around curl to easily download files. + +Usage: ${PROGRAM_NAME} ... + ${PROGRAM_NAME} [--curl-options ]... [--no-decode-filename] [-o|-O|--output ] [--dry-run] [--] ... + ${PROGRAM_NAME} [--curl-options=]... [--no-decode-filename] [--output=] [--dry-run] [--] ... + ${PROGRAM_NAME} -h|--help + ${PROGRAM_NAME} -V|--version + +Options: + + --curl-options : Specify extra options to be passed when invoking curl. May be + specified more than once. + + -o, -O, --output : Use the provided output path instead of getting it from the URL. If + multiple URLs are provided, resulting files share the same name with a + number appended to the end (curl >= 7.83.0). If this option is provided + multiple times, only the last value is considered. + + --no-decode-filename: Don't percent-decode the output filename, even if the percent-encoding in + the URL was done by wcurl, e.g.: The URL contained whitespaces. + + --dry-run: Don't actually execute curl, just print what would be invoked. + + -V, --version: Print version information. + + -h, --help: Print this usage message. + + : Any option supported by curl can be set here. This is not used by wcurl; it is + instead forwarded to the curl invocation. + + : URL to be downloaded. Anything that is not a parameter is considered + an URL. Whitespaces are percent-encoded and the URL is passed to curl, which + then performs the parsing. May be specified more than once. +_EOF_ +} + +# Display an error message and bail out. +error() +{ + printf "%s\n" "$*" > /dev/stderr + exit 1 +} + +# Extra curl options provided by the user. +# This is set per-URL for every URL provided. +# Some options are global, but we are erroring on the side of needlesly setting +# them multiple times instead of causing issues with parameters that needs to +# be set per-URL. +CURL_OPTIONS="" + +# The URLs to be downloaded. +URLS="" + +# Variable used to be set to the percent-decoded filename parsed from the URL, unless +# --output or --no-decode-filename are used. +OUTPUT_PATH="" +HAS_USER_SET_OUTPUT="false" + +# The parameters that are passed per-URL to curl. +readonly PER_URL_PARAMETERS="\ + --fail \ + --globoff \ + --location \ + --proto-default https \ + --remote-time \ + --retry 10 \ + --retry-max-time 10 " + +# Whether to invoke curl or not. +DRY_RUN="false" + +# Sanitize parameters. +sanitize() +{ + if [ -z "${URLS}" ]; then + error "You must provide at least one URL to download." + fi + + readonly CURL_OPTIONS URLS DRY_RUN HAS_USER_SET_OUTPUT +} + +# Indicate via exit code whether the string given in the first parameter +# consists solely of characters from the string given in the second parameter. +# In other words, it returns 0 if the first parameter only contains characters +# from the second parameter, e.g.: Are $1 characters a subset of $2 characters? +is_subset_of() +{ + case "${1}" in + *[!${2}]*|'') return 1;; + esac +} + +# Print the given string percent-decoded. +percent_decode() +{ + # Encodings of control characters (00-1F) are passed through without decoding. + # Iterate on the input character-by-character, decoding it. + printf "%s\n" "${1}" | fold -w1 | while IFS= read -r decode_out; do + # If character is a "%", read the next character as decode_hex1. + if [ "${decode_out}" = % ] && IFS= read -r decode_hex1; then + decode_out="${decode_out}${decode_hex1}" + # If there's one more character, read it as decode_hex2. + if IFS= read -r decode_hex2; then + decode_out="${decode_out}${decode_hex2}" + # Skip decoding if this is a control character (00-1F). + # Skip decoding if DECODE_FILENAME is not "true". + if is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" && \ + is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" && \ + [ "${DECODE_FILENAME}" = "true" ]; then + # Use printf to decode it into octal and then decode it to the final format. + decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")" + fi + fi + fi + printf %s "${decode_out}" + done +} + +# Print the percent-decoded filename portion of the given URL. +get_url_filename() +{ + # Remove protocol and query string if present. + hostname_and_path="$(printf %s "${1}" | sed -e 's,^[^/]*//,,' -e 's,?.*$,,')" + # If what remains contains a slash, there's a path; return it percent-decoded. + case "${hostname_and_path}" in + # sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something" + */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,')";; + esac + # No slash means there was just a hostname and no path; return empty string. +} + +# Execute curl with the list of URLs provided by the user. +exec_curl() +{ + CMD="curl " + + # Store version to check if it supports --no-clobber and --parallel. + curl_version=$($CMD --version | cut -f2 -d' ' | head -n1) + curl_version_major=$(echo "$curl_version" | cut -f1 -d.) + curl_version_minor=$(echo "$curl_version" | cut -f2 -d.) + + CURL_HAS_NO_CLOBBER="" + CURL_HAS_PARALLEL="" + # --no-clobber is only supported since 7.83.0. + # --parallel is only supported since 7.66.0. + if [ "${curl_version_major}" -ge 8 ]; then + CURL_HAS_NO_CLOBBER="--no-clobber" + CURL_HAS_PARALLEL="--parallel" + elif [ "${curl_version_major}" -eq 7 ];then + if [ "${curl_version_minor}" -ge 83 ]; then + CURL_HAS_NO_CLOBBER="--no-clobber" + fi + if [ "${curl_version_minor}" -ge 66 ]; then + CURL_HAS_PARALLEL="--parallel" + fi + fi + + # Detecting whether we need --parallel. It's easier to rely on + # the shell's argument parsing. + # shellcheck disable=SC2086 + set -- $URLS + + if [ "$#" -gt 1 ]; then + CURL_PARALLEL="$CURL_HAS_PARALLEL" + else + CURL_PARALLEL="" + fi + + # Start assembling the command. + # + # We use 'set --' here (again) because (a) we don't have arrays on + # POSIX shell, and (b) we need better control over the way we + # split arguments. + # + # shellcheck disable=SC2086 + set -- ${CMD} ${CURL_PARALLEL} + + NEXT_PARAMETER="" + for url in ${URLS}; do + # If the user did not provide an output path, define one. + if [ "${HAS_USER_SET_OUTPUT}" = "false" ]; then + OUTPUT_PATH="$(get_url_filename "${url}")" + # If we could not get a path from the URL, use the default: index.html. + [ -z "${OUTPUT_PATH}" ] && OUTPUT_PATH=index.html + fi + # shellcheck disable=SC2086 + set -- "$@" ${NEXT_PARAMETER} ${PER_URL_PARAMETERS} ${CURL_HAS_NO_CLOBBER} ${CURL_OPTIONS} --output "${OUTPUT_PATH}" "${url}" + NEXT_PARAMETER="--next" + done + + if [ "${DRY_RUN}" = "false" ]; then + exec "$@" + else + printf "%s\n" "$@" + fi +} + +# Default to decoding the output filename +DECODE_FILENAME="true" + +# Use "${1-}" in order to avoid errors because of 'set -u'. +while [ -n "${1-}" ]; do + case "${1}" in + --curl-options=*) + opt=$(printf "%s\n" "${1}" | sed 's/^--curl-options=//') + CURL_OPTIONS="${CURL_OPTIONS} ${opt}" + ;; + + --curl-options) + shift + CURL_OPTIONS="${CURL_OPTIONS} ${1}" + ;; + + --dry-run) + DRY_RUN="true" + ;; + + --output=*) + opt=$(printf "%s\n" "${1}" | sed 's/^--output=//') + HAS_USER_SET_OUTPUT="true" + OUTPUT_PATH="${opt}" + ;; + + -o|-O|--output) + shift + HAS_USER_SET_OUTPUT="true" + OUTPUT_PATH="${1}" + ;; + + -o*|-O*) + opt=$(printf "%s\n" "${1}" | sed 's/^-[oO]//') + HAS_USER_SET_OUTPUT="true" + OUTPUT_PATH="${opt}" + ;; + + --no-decode-filename) + DECODE_FILENAME="false" + ;; + + -h|--help) + usage + exit 0 + ;; + + -V|--version) + print_version + exit 0 + ;; + + --) + # This is the start of the list of URLs. + shift + for url in "$@"; do + # Encode whitespaces into %20, since wget supports those URLs. + newurl=$(printf "%s\n" "${url}" | sed 's/ /%20/g') + URLS="${URLS} ${newurl}" + done + break + ;; + + -*) + error "Unknown option: '$1'." + ;; + + *) + # This must be a URL. + # Encode whitespaces into %20, since wget supports those URLs. + newurl=$(printf "%s\n" "${1}" | sed 's/ /%20/g') + URLS="${URLS} ${newurl}" + ;; + esac + shift +done + +sanitize +exec_curl