curl/tests/getpart.pm
Viktor Szakats f477f3efc3
tests/data: support using native newlines on disk, drop .gitattributes
Data files no longer depend on mixed newline styles. Before this
patch the harness still assumed data files to use LF newlines,
ensured by `.gitattribute` and distributing sources with LF newlines.

To allow using platform native newlines (CRLF on Windows typically),
update the test harness to support data files with any newline style
on disk. And delete `.gitattributes`.

Fix to:
- load original data files (from test/data) so that their newline-style
  doesn't matter on the checked out source repo, meaning it works
  when its CRLF on Windows, just like any other file.
  (if a BOM slips in, it's caught by `spacecheck.pl` as binary content.)
- do the same in `util.py` used by `smbserver.py` (for test 1451).
- also fix `util.py` to use us-ascii encoding for data files, replacing utf-8.

Also:
- runtests: rework the stray CR checker to allow full CRLF data files,
  and keep warning for mixed newlines.

Follow-up to 904e7ecb66 #19347

Closes #19398
2025-11-10 14:21:34 +01:00

425 lines
10 KiB
Perl

#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
package getpart;
use strict;
use warnings;
BEGIN {
use base qw(Exporter);
our @EXPORT = qw(
compareparts
fulltest
checktest
getpart
getpartattr
loadarray
loadtest
partexists
striparray
writearray
);
}
use Memoize;
my @xml; # test data file contents
my $xmlfile; # test data file name
my $warning=0;
my $trace=0;
# Normalize the part function arguments for proper caching. This includes the
# file name in the arguments since that is an implied parameter that affects the
# return value. Any error messages will only be displayed the first time, but
# those are disabled by default anyway, so should never been seen outside
# development.
sub normalize_part {
push @_, $xmlfile;
return join("\t", @_);
}
sub decode_hex {
my $s = $_;
# remove everything not hex
$s =~ s/[^A-Fa-f0-9]//g;
# encode everything
$s =~ s/([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/eg;
return $s;
}
sub testcaseattr {
my %hash;
for(@xml) {
if(($_ =~ /^ *\<testcase ([^>]*)/)) {
my $attr=$1;
while($attr =~ s/ *([^=]*)= *(\"([^\"]*)\"|([^\> ]*))//) {
my ($var, $cont)=($1, $2);
$cont =~ s/^\"(.*)\"$/$1/;
$hash{$var}=$cont;
}
}
}
return %hash;
}
sub getpartattr {
# if $part is undefined (ie only one argument) then
# return the attributes of the section
my ($section, $part)=@_;
my %hash;
my $inside=0;
# print "Section: $section, part: $part\n";
for(@xml) {
# print "$inside: $_";
if(!$inside && ($_ =~ /^ *\<$section/)) {
$inside++;
}
if((1 ==$inside) && ( ($_ =~ /^ *\<$part ([^>]*)/) ||
!(defined($part)) )
) {
$inside++;
my $attr=$1;
while($attr =~ s/ *([^=]*)= *(\"([^\"]*)\"|([^\> ]*))//) {
my ($var, $cont)=($1, $2);
$cont =~ s/^\"(.*)\"$/$1/;
$hash{$var}=$cont;
}
last;
}
# detect end of section when part wasn't found
elsif((1 ==$inside) && ($_ =~ /^ *\<\/$section\>/)) {
last;
}
elsif((2 ==$inside) && ($_ =~ /^ *\<\/$part/)) {
$inside--;
}
}
return %hash;
}
memoize('getpartattr', NORMALIZER => 'normalize_part'); # cache each result
sub getpart {
my ($section, $part)=@_;
my @this;
my $inside=0;
my $hex=0;
my $line;
for(@xml) {
$line++;
if(!$inside && ($_ =~ /^ *\<$section/)) {
$inside++;
}
elsif(($inside >= 1) && ($_ =~ /^ *\<$part[ \>]/)) {
if($inside > 1) {
push @this, $_;
}
elsif($_ =~ /$part [^>]*hex=/) {
# attempt to detect a hex-encoded part
$hex=1;
}
$inside++;
}
elsif(($inside >= 2) && ($_ =~ /^ *\<\/$part[ \>]/)) {
if($inside > 2) {
push @this, $_;
}
$inside--;
}
elsif(($inside >= 1) && ($_ =~ /^ *\<\/$section/)) {
if($inside > 1) {
print STDERR "$xmlfile:$line:1: error: missing </$part> tag before </$section>\n";
@this = ("format error in $xmlfile");
}
if($trace && @this) {
print STDERR "*** getpart.pm: $section/$part returned data!\n";
}
if($warning && !@this) {
print STDERR "*** getpart.pm: $section/$part returned empty!\n";
}
if($hex) {
# decode the whole array before returning it!
for(@this) {
my $decoded = decode_hex($_);
$_ = $decoded;
}
}
return @this;
}
elsif($inside >= 2) {
push @this, $_;
}
}
if($trace && @this) {
# section/part has data but end of section not detected,
# end of file implies end of section.
print STDERR "*** getpart.pm: $section/$part returned data!\n";
}
if($warning && !@this) {
# section/part does not exist or has no data without an end of
# section; end of file implies end of section.
print STDERR "*** getpart.pm: $section/$part returned empty!\n";
}
return @this;
}
memoize('getpart', NORMALIZER => 'normalize_part'); # cache each result
sub partexists {
my ($section, $part)=@_;
my $inside = 0;
for(@xml) {
if(!$inside && ($_ =~ /^ *\<$section/)) {
$inside++;
}
elsif((1 == $inside) && ($_ =~ /^ *\<$part[ \>]/)) {
return 1; # exists
}
elsif((1 == $inside) && ($_ =~ /^ *\<\/$section/)) {
return 0; # does not exist
}
}
return 0; # does not exist
}
# The code currently never calls this more than once per part per file, so
# caching a result that will never be used again just slows things down.
# memoize('partexists', NORMALIZER => 'normalize_part'); # cache each result
sub loadtest {
my ($file, $original)=@_;
if(defined $xmlfile && $file eq $xmlfile) {
# This test is already loaded
return
}
undef @xml;
$xmlfile = "";
if(open(my $xmlh, "<", "$file")) {
if($original) {
binmode $xmlh, ':crlf'
}
else {
binmode $xmlh; # for crapage systems, use binary
}
while(<$xmlh>) {
push @xml, $_;
}
close($xmlh);
}
else {
# failure
if($warning) {
print STDERR "file $file wouldn't open!\n";
}
return 1;
}
$xmlfile = $file;
return 0;
}
# Return entire document as list of lines
sub fulltest {
return @xml;
}
sub eol_detect {
my ($content) = @_;
my $cr = () = $content =~ /\r/g;
my $lf = () = $content =~ /\n/g;
if($cr > 0 && $lf == 0) {
return "cr";
}
elsif($cr == 0 && $lf > 0) {
return "lf";
}
elsif($cr == 0 && $lf == 0) {
return "bin";
}
elsif($cr == $lf) {
return "crlf";
}
return "";
}
sub checktest {
my ($file) = @_;
if(open(my $xmlh, '<', $file)) {
binmode $xmlh; # we want the raw data to check original newlines
my $content = do { local $/; <$xmlh> };
close($xmlh);
my $eol = eol_detect($content);
if($eol eq '') {
print STDERR "*** getpart.pm: $xmlfile has mixed newlines. Replace significant carriage return with %CR macro, or convert to consistent newlines.\n";
return 1;
}
}
return 0;
}
# write the test to the given file
sub savetest {
my ($file)=@_;
if(open(my $xmlh, ">", "$file")) {
binmode $xmlh; # for crapage systems, use binary
for(@xml) {
print $xmlh $_;
}
close($xmlh);
}
else {
# failure
if($warning) {
print STDERR "file $file wouldn't open!\n";
}
return 1;
}
return 0;
}
#
# Strip off all lines that match the specified pattern and return
# the new array.
#
sub striparray {
my ($pattern, $arrayref) = @_;
my @array;
for(@$arrayref) {
if($_ !~ /$pattern/) {
push @array, $_;
}
}
return @array;
}
#
# pass array *REFERENCES* !
#
sub compareparts {
my ($firstref, $secondref)=@_;
# we cannot compare arrays index per index since with data chunks,
# they may not be "evenly" distributed
my $first = join("", @$firstref);
my $second = join("", @$secondref);
if($first =~ /%alternatives\[/) {
die "bad use of compareparts\n";
}
if($second =~ /%alternatives\[([^,]*),([^\]]*)\]/) {
# there can be many %alternatives in this chunk, so we call
# this function recursively
my $alt = $second;
$alt =~ s/%alternatives\[([^,]*),([^\]]*)\]/$1/;
# check first alternative
{
my @f;
my @s;
push @f, $first;
push @s, $alt;
if(!compareparts(\@f, \@s)) {
return 0;
}
}
$alt = $second;
$alt =~ s/%alternatives\[([^,]*),([^\]]*)\]/$2/;
# check second alternative
{
my @f;
my @s;
push @f, $first;
push @s, $alt;
if(!compareparts(\@f, \@s)) {
return 0;
}
}
# neither matched
return 1;
}
if($first ne $second) {
return 1;
}
return 0;
}
#
# Write a given array to the specified file
#
sub writearray {
my ($filename, $arrayref)=@_;
open(my $temp, ">", "$filename") || die "Failure writing file";
binmode($temp,":raw"); # Cygwin fix by Kevin Roth
for(@$arrayref) {
print $temp $_;
}
close($temp) || die "Failure writing file";
}
#
# Load a specified file and return it as an array
#
sub loadarray {
my ($filename)=@_;
my @array;
if(open(my $temp, "<", "$filename")) {
while(<$temp>) {
push @array, $_;
}
close($temp);
}
return @array;
}
1;