aboutsummaryrefslogtreecommitdiff
path: root/contrib/perl5/t/op
diff options
context:
space:
mode:
authorMark Murray <markm@FreeBSD.org>2002-03-16 20:14:31 +0000
committerMark Murray <markm@FreeBSD.org>2002-03-16 20:14:31 +0000
commitcf90a21ebd2262e09730a5a55c5ccb62f1622fad (patch)
tree116739180f0198b4d3857979a8e84444c2fe0949 /contrib/perl5/t/op
parentfc75d0664419eb8c8f264d8f298df2cd155c8966 (diff)
Vendor import Perl 5.6.1vendor/perl5
Notes
Notes: svn path=/vendor/perl5/dist/; revision=92444
Diffstat (limited to 'contrib/perl5/t/op')
-rwxr-xr-xcontrib/perl5/t/op/anonsub.t93
-rwxr-xr-xcontrib/perl5/t/op/concat.t100
-rwxr-xr-xcontrib/perl5/t/op/length.t85
-rwxr-xr-xcontrib/perl5/t/op/my_stash.t31
-rwxr-xr-xcontrib/perl5/t/op/regmesg.t179
-rwxr-xr-xcontrib/perl5/t/op/reverse.t33
-rwxr-xr-xcontrib/perl5/t/op/utf8decode.t183
7 files changed, 704 insertions, 0 deletions
diff --git a/contrib/perl5/t/op/anonsub.t b/contrib/perl5/t/op/anonsub.t
new file mode 100755
index 000000000000..17889d9d2f9d
--- /dev/null
+++ b/contrib/perl5/t/op/anonsub.t
@@ -0,0 +1,93 @@
+#!./perl
+
+chdir 't' if -d 't';
+@INC = '../lib';
+$Is_VMS = $^O eq 'VMS';
+$Is_MSWin32 = $^O eq 'MSWin32';
+$ENV{PERL5LIB} = "../lib" unless $Is_VMS;
+
+$|=1;
+
+undef $/;
+@prgs = split "\n########\n", <DATA>;
+print "1..", scalar @prgs, "\n";
+
+$tmpfile = "asubtmp000";
+1 while -f ++$tmpfile;
+END { if ($tmpfile) { 1 while unlink $tmpfile; } }
+
+for (@prgs){
+ my $switch = "";
+ if (s/^\s*(-\w+)//){
+ $switch = $1;
+ }
+ my($prog,$expected) = split(/\nEXPECT\n/, $_);
+ open TEST, ">$tmpfile";
+ print TEST "$prog\n";
+ close TEST;
+ my $results = $Is_VMS ?
+ `MCR $^X "-I[-.lib]" $switch $tmpfile 2>&1` :
+ $Is_MSWin32 ?
+ `.\\perl -I../lib $switch $tmpfile 2>&1` :
+ `./perl $switch $tmpfile 2>&1`;
+ my $status = $?;
+ $results =~ s/\n+$//;
+ # allow expected output to be written as if $prog is on STDIN
+ $results =~ s/runltmp\d+/-/g;
+ $results =~ s/\n%[A-Z]+-[SIWEF]-.*$// if $Is_VMS; # clip off DCL status msg
+ $expected =~ s/\n+$//;
+ if ($results ne $expected) {
+ print STDERR "PROG: $switch\n$prog\n";
+ print STDERR "EXPECTED:\n$expected\n";
+ print STDERR "GOT:\n$results\n";
+ print "not ";
+ }
+ print "ok ", ++$i, "\n";
+}
+
+__END__
+sub X {
+ my $n = "ok 1\n";
+ sub { print $n };
+}
+my $x = X();
+undef &X;
+$x->();
+EXPECT
+ok 1
+########
+sub X {
+ my $n = "ok 1\n";
+ sub {
+ my $dummy = $n; # eval can't close on $n without internal reference
+ eval 'print $n';
+ die $@ if $@;
+ };
+}
+my $x = X();
+undef &X;
+$x->();
+EXPECT
+ok 1
+########
+sub X {
+ my $n = "ok 1\n";
+ eval 'sub { print $n }';
+}
+my $x = X();
+die $@ if $@;
+undef &X;
+$x->();
+EXPECT
+ok 1
+########
+sub X;
+sub X {
+ my $n = "ok 1\n";
+ eval 'sub Y { my $p = shift; $p->() }';
+ die $@ if $@;
+ Y(sub { print $n });
+}
+X();
+EXPECT
+ok 1
diff --git a/contrib/perl5/t/op/concat.t b/contrib/perl5/t/op/concat.t
new file mode 100755
index 000000000000..76074e0f28f1
--- /dev/null
+++ b/contrib/perl5/t/op/concat.t
@@ -0,0 +1,100 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+print "1..11\n";
+
+($a, $b, $c) = qw(foo bar);
+
+print "not " unless "$a" eq "foo";
+print "ok 1\n";
+
+print "not " unless "$a$b" eq "foobar";
+print "ok 2\n";
+
+print "not " unless "$c$a$c" eq "foo";
+print "ok 3\n";
+
+# Okay, so that wasn't very challenging. Let's go Unicode.
+
+my $test = 4;
+
+{
+ # bug id 20000819.004
+
+ $_ = $dx = "\x{10f2}";
+ s/($dx)/$dx$1/;
+ {
+ use bytes;
+ print "not " unless $_ eq "$dx$dx";
+ print "ok $test\n";
+ $test++;
+ }
+
+ $_ = $dx = "\x{10f2}";
+ s/($dx)/$1$dx/;
+ {
+ use bytes;
+ print "not " unless $_ eq "$dx$dx";
+ print "ok $test\n";
+ $test++;
+ }
+
+ $dx = "\x{10f2}";
+ $_ = "\x{10f2}\x{10f2}";
+ s/($dx)($dx)/$1$2/;
+ {
+ use bytes;
+ print "not " unless $_ eq "$dx$dx";
+ print "ok $test\n";
+ $test++;
+ }
+}
+
+{
+ # bug id 20000901.092
+ # test that undef left and right of utf8 results in a valid string
+
+ my $a;
+ $a .= "\x{1ff}";
+ print "not " unless $a eq "\x{1ff}";
+ print "ok $test\n";
+ $test++;
+}
+
+{
+ # ID 20001020.006
+
+ "x" =~ /(.)/; # unset $2
+
+ # Without the fix this 5.7.0 would croak:
+ # Modification of a read-only value attempted at ...
+ "$2\x{1234}";
+
+ print "ok $test\n";
+ $test++;
+
+ # For symmetry with the above.
+ "\x{1234}$2";
+
+ print "ok $test\n";
+ $test++;
+
+ *pi = \undef;
+ # This bug existed earlier than the $2 bug, but is fixed with the same
+ # patch. Without the fix this 5.7.0 would also croak:
+ # Modification of a read-only value attempted at ...
+ "$pi\x{1234}";
+
+ print "ok $test\n";
+ $test++;
+
+ # For symmetry with the above.
+ "\x{1234}$pi";
+
+ print "ok $test\n";
+ $test++;
+}
diff --git a/contrib/perl5/t/op/length.t b/contrib/perl5/t/op/length.t
new file mode 100755
index 000000000000..ceb005ecc4a8
--- /dev/null
+++ b/contrib/perl5/t/op/length.t
@@ -0,0 +1,85 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+print "1..13\n";
+
+print "not " unless length("") == 0;
+print "ok 1\n";
+
+print "not " unless length("abc") == 3;
+print "ok 2\n";
+
+$_ = "foobar";
+print "not " unless length() == 6;
+print "ok 3\n";
+
+# Okay, so that wasn't very challenging. Let's go Unicode.
+
+{
+ my $a = "\x{41}";
+
+ print "not " unless length($a) == 1;
+ print "ok 4\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\x41" && length($a) == 1;
+ print "ok 5\n";
+ $test++;
+}
+
+{
+ my $a = "\x{80}";
+
+ print "not " unless length($a) == 1;
+ print "ok 6\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc2\x80" && length($a) == 2;
+ print "ok 7\n";
+ $test++;
+}
+
+{
+ my $a = "\x{100}";
+
+ print "not " unless length($a) == 1;
+ print "ok 8\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc4\x80" && length($a) == 2;
+ print "ok 9\n";
+ $test++;
+}
+
+{
+ my $a = "\x{100}\x{80}";
+
+ print "not " unless length($a) == 2;
+ print "ok 10\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc4\x80\xc2\x80" && length($a) == 4;
+ print "ok 11\n";
+ $test++;
+}
+
+{
+ my $a = "\x{80}\x{100}";
+
+ print "not " unless length($a) == 2;
+ print "ok 12\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc2\x80\xc4\x80" && length($a) == 4;
+ print "ok 13\n";
+ $test++;
+}
diff --git a/contrib/perl5/t/op/my_stash.t b/contrib/perl5/t/op/my_stash.t
new file mode 100755
index 000000000000..4a1d5022e023
--- /dev/null
+++ b/contrib/perl5/t/op/my_stash.t
@@ -0,0 +1,31 @@
+#!./perl
+
+package Foo;
+
+BEGIN {
+ @INC = '../lib';
+}
+
+use Test;
+
+plan tests => 7;
+
+use constant MyClass => 'Foo::Bar::Biz::Baz';
+
+{
+ package Foo::Bar::Biz::Baz;
+}
+
+for (qw(Foo Foo:: MyClass __PACKAGE__)) {
+ eval "sub { my $_ \$obj = shift; }";
+ ok ! $@;
+# print $@ if $@;
+}
+
+use constant NoClass => 'Nope::Foo::Bar::Biz::Baz';
+
+for (qw(Nope Nope:: NoClass)) {
+ eval "sub { my $_ \$obj = shift; }";
+ ok $@;
+# print $@ if $@;
+}
diff --git a/contrib/perl5/t/op/regmesg.t b/contrib/perl5/t/op/regmesg.t
new file mode 100755
index 000000000000..01fa675bd5dc
--- /dev/null
+++ b/contrib/perl5/t/op/regmesg.t
@@ -0,0 +1,179 @@
+#!./perl -w
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+my $debug = 1;
+
+##
+## If the markers used are changed (search for "MARKER1" in regcomp.c),
+## update only these two variables, and leave the {#} in the @death/@warning
+## arrays below. The {#} is a meta-marker -- it marks where the marker should
+## go.
+
+my $marker1 = "HERE";
+my $marker2 = " << HERE ";
+
+##
+## Key-value pairs of code/error of code that should have fatal errors.
+##
+
+eval 'use Config'; # assume defaults if fail
+our %Config;
+my $inf_m1 = ($Config{reg_infty} || 32767) - 1;
+my $inf_p1 = $inf_m1 + 2;
+my @death =
+(
+ '/[[=foo=]]/' => 'POSIX syntax [= =] is reserved for future extensions before {#} mark in regex m/[[=foo=]{#}]/',
+
+ '/(?<= .*)/' => 'Variable length lookbehind not implemented before {#} mark in regex m/(?<= .*){#}/',
+
+ '/(?<= x{1000})/' => 'Lookbehind longer than 255 not implemented before {#} mark in regex m/(?<= x{1000}){#}/',
+
+ '/(?@)/' => 'Sequence (?@...) not implemented before {#} mark in regex m/(?@{#})/',
+
+ '/(?{ 1/' => 'Sequence (?{...}) not terminated or not {}-balanced before {#} mark in regex m/(?{{#} 1/',
+
+ '/(?(1x))/' => 'Switch condition not recognized before {#} mark in regex m/(?(1x{#}))/',
+
+ '/(?(1)x|y|z)/' => 'Switch (?(condition)... contains too many branches before {#} mark in regex m/(?(1)x|y|{#}z)/',
+
+ '/(?(x)y|x)/' => 'Unknown switch condition (?(x) before {#} mark in regex m/(?({#}x)y|x)/',
+
+ '/(?/' => 'Sequence (? incomplete before {#} mark in regex m/(?{#}/',
+
+ '/(?;x/' => 'Sequence (?;...) not recognized before {#} mark in regex m/(?;{#}x/',
+ '/(?<;x/' => 'Sequence (?<;...) not recognized before {#} mark in regex m/(?<;{#}x/',
+
+ '/((x)/' => 'Unmatched ( before {#} mark in regex m/({#}(x)/',
+
+ "/x{$inf_p1}/" => "Quantifier in {,} bigger than $inf_m1 before {#} mark in regex m/x{{#}$inf_p1}/",
+
+ '/x{3,1}/' => 'Can\'t do {n,m} with n > m before {#} mark in regex m/x{3,1}{#}/',
+
+ '/x**/' => 'Nested quantifiers before {#} mark in regex m/x**{#}/',
+
+ '/x[/' => 'Unmatched [ before {#} mark in regex m/x[{#}/',
+
+ '/*/', => 'Quantifier follows nothing before {#} mark in regex m/*{#}/',
+
+ '/\p{x/' => 'Missing right brace on \p{} before {#} mark in regex m/\p{{#}x/',
+
+ 'use utf8; /[\p{x]/' => 'Missing right brace on \p{} before {#} mark in regex m/[\p{{#}x]/',
+
+ '/(x)\2/' => 'Reference to nonexistent group before {#} mark in regex m/(x)\2{#}/',
+
+ 'my $m = "\\\"; $m =~ $m', => 'Trailing \ in regex m/\/',
+
+ '/\x{1/' => 'Missing right brace on \x{} before {#} mark in regex m/\x{{#}1/',
+
+ 'use utf8; /[\x{X]/' => 'Missing right brace on \x{} before {#} mark in regex m/[\x{{#}X]/',
+
+ '/[[:barf:]]/' => 'POSIX class [:barf:] unknown before {#} mark in regex m/[[:barf:]{#}]/',
+
+ '/[[=barf=]]/' => 'POSIX syntax [= =] is reserved for future extensions before {#} mark in regex m/[[=barf=]{#}]/',
+
+ '/[[.barf.]]/' => 'POSIX syntax [. .] is reserved for future extensions before {#} mark in regex m/[[.barf.]{#}]/',
+
+ '/[z-a]/' => 'Invalid [] range "z-a" before {#} mark in regex m/[z-a{#}]/',
+);
+
+##
+## Key-value pairs of code/error of code that should have non-fatal warnings.
+##
+@warning = (
+ "m/(?p{ 'a' })/" => "(?p{}) is deprecated - use (??{}) before {#} mark in regex m/(?p{#}{ 'a' })/",
+
+ 'm/\b*/' => '\b* matches null string many times before {#} mark in regex m/\b*{#}/',
+
+ 'm/[:blank:]/' => 'POSIX syntax [: :] belongs inside character classes before {#} mark in regex m/[:blank:]{#}/',
+
+ "m'[\\y]'" => 'Unrecognized escape \y in character class passed through before {#} mark in regex m/[\y{#}]/',
+
+ 'm/[a-\d]/' => 'False [] range "a-\d" before {#} mark in regex m/[a-\d{#}]/',
+ 'm/[\w-x]/' => 'False [] range "\w-" before {#} mark in regex m/[\w-{#}x]/',
+ "m'\\y'" => 'Unrecognized escape \y passed through before {#} mark in regex m/\y{#}/',
+);
+
+my $total = (@death + @warning)/2;
+
+# utf8 is a noop on EBCDIC platforms, it is not fatal
+my $Is_EBCDIC = (ord('A') == 193);
+if ($Is_EBCDIC) {
+ my @utf8_death = grep(/utf8/, @death);
+ $total = $total - scalar(@utf8_death);
+}
+
+print "1..$total\n";
+
+my $count = 0;
+
+while (@death)
+{
+ my $regex = shift @death;
+ my $result = shift @death;
+ # skip the utf8 test on EBCDIC since they do not die
+ next if ($Is_EBCDIC && $regex =~ /utf8/);
+ $count++;
+
+ $_ = "x";
+ eval $regex;
+ if (not $@) {
+ print "# oops, $regex didn't die\nnot ok $count\n";
+ next;
+ }
+ chomp $@;
+ $result =~ s/{\#}/$marker1/;
+ $result =~ s/{\#}/$marker2/;
+ if ($@ !~ /^\Q$result/) {
+ print "# For $regex, expected:\n# $result\n# Got:\n# $@\n#\nnot ";
+ }
+ print "ok $count\n";
+}
+
+
+our $warning;
+$SIG{__WARN__} = sub { $warning = shift };
+
+while (@warning)
+{
+ $count++;
+ my $regex = shift @warning;
+ my $result = shift @warning;
+
+ undef $warning;
+ $_ = "x";
+ eval $regex;
+
+ if ($@)
+ {
+ print "# oops, $regex died with:\n#\t$@#\nnot ok $count\n";
+ next;
+ }
+
+ if (not $warning)
+ {
+ print "# oops, $regex didn't generate a warning\nnot ok $count\n";
+ next;
+ }
+ $result =~ s/{\#}/$marker1/;
+ $result =~ s/{\#}/$marker2/;
+ if ($warning !~ /^\Q$result/)
+ {
+ print <<"EOM";
+# For $regex, expected:
+# $result
+# Got:
+# $warning
+#
+not ok $count
+EOM
+ next;
+ }
+ print "ok $count\n";
+}
+
+
+
diff --git a/contrib/perl5/t/op/reverse.t b/contrib/perl5/t/op/reverse.t
new file mode 100755
index 000000000000..bb7b9b77feae
--- /dev/null
+++ b/contrib/perl5/t/op/reverse.t
@@ -0,0 +1,33 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+print "1..4\n";
+
+print "not " unless reverse("abc") eq "cba";
+print "ok 1\n";
+
+$_ = "foobar";
+print "not " unless reverse() eq "raboof";
+print "ok 2\n";
+
+{
+ my @a = ("foo", "bar");
+ my @b = reverse @a;
+
+ print "not " unless $b[0] eq $a[1] && $b[1] eq $a[0];
+ print "ok 3\n";
+}
+
+{
+ # Unicode.
+
+ my $a = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
+ my $b = scalar reverse($a);
+ my $c = scalar reverse($b);
+ print "not " unless $a eq $c;
+ print "ok 4\n";
+}
diff --git a/contrib/perl5/t/op/utf8decode.t b/contrib/perl5/t/op/utf8decode.t
new file mode 100755
index 000000000000..4d05a6b8d379
--- /dev/null
+++ b/contrib/perl5/t/op/utf8decode.t
@@ -0,0 +1,183 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+no utf8;
+
+print "1..78\n";
+
+my $test = 1;
+
+# This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
+# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
+# version dated 2000-09-02.
+
+# We use the \x notation instead of raw binary bytes for \x00-\x1f\x7f-\xff
+# because e.g. many patch programs have issues with binary data.
+
+my @MK = split(/\n/, <<__EOMK__);
+1 Correct UTF-8
+1.1.1 y "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5" - 11 ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5 5
+2 Boundary conditions
+2.1 First possible sequence of certain length
+2.1.1 y "\x00" 0 1 00 1
+2.1.2 y "\xc2\x80" 80 2 c2:80 1
+2.1.3 y "\xe0\xa0\x80" 800 3 e0:a0:80 1
+2.1.4 y "\xf0\x90\x80\x80" 10000 4 f0:90:80:80 1
+2.1.5 y "\xf8\x88\x80\x80\x80" 200000 5 f8:88:80:80:80 1
+2.1.6 y "\xfc\x84\x80\x80\x80\x80" 4000000 6 fc:84:80:80:80:80 1
+2.2 Last possible sequence of certain length
+2.2.1 y "\x7f" 7f 1 7f 1
+2.2.2 y "\xdf\xbf" 7ff 2 df:bf 1
+# The ffff is illegal unless UTF8_ALLOW_FFFF
+2.2.3 n "\xef\xbf\xbf" ffff 3 ef:bf:bf 1 character 0xffff
+2.2.4 y "\xf7\xbf\xbf\xbf" 1fffff 4 f7:bf:bf:bf 1
+2.2.5 y "\xfb\xbf\xbf\xbf\xbf" 3ffffff 5 fb:bf:bf:bf:bf 1
+2.2.6 y "\xfd\xbf\xbf\xbf\xbf\xbf" 7fffffff 6 fd:bf:bf:bf:bf:bf 1
+2.3 Other boundary conditions
+2.3.1 y "\xed\x9f\xbf" d7ff 3 ed:9f:bf 1
+2.3.2 y "\xee\x80\x80" e000 3 ee:80:80 1
+2.3.3 y "\xef\xbf\xbd" fffd 3 ef:bf:bd 1
+2.3.4 y "\xf4\x8f\xbf\xbf" 10ffff 4 f4:8f:bf:bf 1
+2.3.5 y "\xf4\x90\x80\x80" 110000 4 f4:90:80:80 1
+3 Malformed sequences
+3.1 Unexpected continuation bytes
+3.1.1 n "\x80" - 1 80 - unexpected continuation byte 0x80
+3.1.2 n "\xbf" - 1 bf - unexpected continuation byte 0xbf
+3.1.3 n "\x80\xbf" - 2 80:bf - unexpected continuation byte 0x80
+3.1.4 n "\x80\xbf\x80" - 3 80:bf:80 - unexpected continuation byte 0x80
+3.1.5 n "\x80\xbf\x80\xbf" - 4 80:bf:80:bf - unexpected continuation byte 0x80
+3.1.6 n "\x80\xbf\x80\xbf\x80" - 5 80:bf:80:bf:80 - unexpected continuation byte 0x80
+3.1.7 n "\x80\xbf\x80\xbf\x80\xbf" - 6 80:bf:80:bf:80:bf - unexpected continuation byte 0x80
+3.1.8 n "\x80\xbf\x80\xbf\x80\xbf\x80" - 7 80:bf:80:bf:80:bf:80 - unexpected continuation byte 0x80
+3.1.9 n "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - 64 80:81:82:83:84:85:86:87:88:89:8a:8b:8c:8d:8e:8f:90:91:92:93:94:95:96:97:98:99:9a:9b:9c:9d:9e:9f:a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:b0:b1:b2:b3:b4:b5:b6:b7:b8:b9:ba:bb:bc:bd:be:bf - unexpected continuation byte 0x80
+3.2 Lonely start characters
+3.2.1 n "\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf " - 64 c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20 - unexpected non-continuation byte 0x20 after start byte 0xc0
+3.2.2 n "\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef " - 32 e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20 - unexpected non-continuation byte 0x20 after start byte 0xe0
+3.2.3 n "\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 " - 16 f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 - unexpected non-continuation byte 0x20 after start byte 0xf0
+3.2.4 n "\xf8 \xf9 \xfa \xfb " - 8 f8:20:f9:20:fa:20:fb:20 - unexpected non-continuation byte 0x20 after start byte 0xf8
+3.2.5 n "\xfc \xfd " - 4 fc:20:fd:20 - unexpected non-continuation byte 0x20 after start byte 0xfc
+3.3 Sequences with last continuation byte missing
+3.3.1 n "\xc0" - 1 c0 - 1 byte, need 2
+3.3.2 n "\xe0\x80" - 2 e0:80 - 2 bytes, need 3
+3.3.3 n "\xf0\x80\x80" - 3 f0:80:80 - 3 bytes, need 4
+3.3.4 n "\xf8\x80\x80\x80" - 4 f8:80:80:80 - 4 bytes, need 5
+3.3.5 n "\xfc\x80\x80\x80\x80" - 5 fc:80:80:80:80 - 5 bytes, need 6
+3.3.6 n "\xdf" - 1 df - 1 byte, need 2
+3.3.7 n "\xef\xbf" - 2 ef:bf - 2 bytes, need 3
+3.3.8 n "\xf7\xbf\xbf" - 3 f7:bf:bf - 3 bytes, need 4
+3.3.9 n "\xfb\xbf\xbf\xbf" - 4 fb:bf:bf:bf - 4 bytes, need 5
+3.3.10 n "\xfd\xbf\xbf\xbf\xbf" - 5 fd:bf:bf:bf:bf - 5 bytes, need 6
+3.4 Concatenation of incomplete sequences
+3.4.1 n "\xc0\xe0\x80\xf0\x80\x80\xf8\x80\x80\x80\xfc\x80\x80\x80\x80\xdf\xef\xbf\xf7\xbf\xbf\xfb\xbf\xbf\xbf\xfd\xbf\xbf\xbf\xbf" - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected non-continuation byte 0xe0 after start byte 0xc0
+3.5 Impossible bytes
+3.5.1 n "\xfe" - 1 fe - byte 0xfe
+3.5.2 n "\xff" - 1 ff - byte 0xff
+3.5.3 n "\xfe\xfe\xff\xff" - 4 fe:fe:ff:ff - byte 0xfe
+4 Overlong sequences
+4.1 Examples of an overlong ASCII character
+4.1.1 n "\xc0\xaf" - 2 c0:af - 2 bytes, need 1
+4.1.2 n "\xe0\x80\xaf" - 3 e0:80:af - 3 bytes, need 1
+4.1.3 n "\xf0\x80\x80\xaf" - 4 f0:80:80:af - 4 bytes, need 1
+4.1.4 n "\xf8\x80\x80\x80\xaf" - 5 f8:80:80:80:af - 5 bytes, need 1
+4.1.5 n "\xfc\x80\x80\x80\x80\xaf" - 6 fc:80:80:80:80:af - 6 bytes, need 1
+4.2 Maximum overlong sequences
+4.2.1 n "\xc1\xbf" - 2 c1:bf - 2 bytes, need 1
+4.2.2 n "\xe0\x9f\xbf" - 3 e0:9f:bf - 3 bytes, need 2
+4.2.3 n "\xf0\x8f\xbf\xbf" - 4 f0:8f:bf:bf - 4 bytes, need 3
+4.2.4 n "\xf8\x87\xbf\xbf\xbf" - 5 f8:87:bf:bf:bf - 5 bytes, need 4
+4.2.5 n "\xfc\x83\xbf\xbf\xbf\xbf" - 6 fc:83:bf:bf:bf:bf - 6 bytes, need 5
+4.3 Overlong representation of the NUL character
+4.3.1 n "\xc0\x80" - 2 c0:80 - 2 bytes, need 1
+4.3.2 n "\xe0\x80\x80" - 3 e0:80:80 - 3 bytes, need 1
+4.3.3 n "\xf0\x80\x80\x80" - 4 f0:80:80:80 - 4 bytes, need 1
+4.3.4 n "\xf8\x80\x80\x80\x80" - 5 f8:80:80:80:80 - 5 bytes, need 1
+4.3.5 n "\xfc\x80\x80\x80\x80\x80" - 6 fc:80:80:80:80:80 - 6 bytes, need 1
+5 Illegal code positions
+5.1 Single UTF-16 surrogates
+5.1.1 n "\xed\xa0\x80" - 3 ed:a0:80 - UTF-16 surrogate 0xd800
+5.1.2 n "\xed\xad\xbf" - 3 ed:ad:bf - UTF-16 surrogate 0xdb7f
+5.1.3 n "\xed\xae\x80" - 3 ed:ae:80 - UTF-16 surrogate 0xdb80
+5.1.4 n "\xed\xaf\xbf" - 3 ed:af:bf - UTF-16 surrogate 0xdbff
+5.1.5 n "\xed\xb0\x80" - 3 ed:b0:80 - UTF-16 surrogate 0xdc00
+5.1.6 n "\xed\xbe\x80" - 3 ed:be:80 - UTF-16 surrogate 0xdf80
+5.1.7 n "\xed\xbf\xbf" - 3 ed:bf:bf - UTF-16 surrogate 0xdfff
+5.2 Paired UTF-16 surrogates
+5.2.1 n "\xed\xa0\x80\xed\xb0\x80" - 6 ed:a0:80:ed:b0:80 - UTF-16 surrogate 0xd800
+5.2.2 n "\xed\xa0\x80\xed\xbf\xbf" - 6 ed:a0:80:ed:bf:bf - UTF-16 surrogate 0xd800
+5.2.3 n "\xed\xad\xbf\xed\xb0\x80" - 6 ed:ad:bf:ed:b0:80 - UTF-16 surrogate 0xdb7f
+5.2.4 n "\xed\xad\xbf\xed\xbf\xbf" - 6 ed:ad:bf:ed:bf:bf - UTF-16 surrogate 0xdb7f
+5.2.5 n "\xed\xae\x80\xed\xb0\x80" - 6 ed:ae:80:ed:b0:80 - UTF-16 surrogate 0xdb80
+5.2.6 n "\xed\xae\x80\xed\xbf\xbf" - 6 ed:ae:80:ed:bf:bf - UTF-16 surrogate 0xdb80
+5.2.7 n "\xed\xaf\xbf\xed\xb0\x80" - 6 ed:af:bf:ed:b0:80 - UTF-16 surrogate 0xdbff
+5.2.8 n "\xed\xaf\xbf\xed\xbf\xbf" - 6 ed:af:bf:ed:bf:bf - UTF-16 surrogate 0xdbff
+5.3 Other illegal code positions
+5.3.1 n "\xef\xbf\xbe" - 3 ef:bf:be - byte order mark 0xfffe
+# The ffff is illegal unless UTF8_ALLOW_FFFF
+5.3.2 n "\xef\xbf\xbf" - 3 ef:bf:bf - character 0xffff
+__EOMK__
+
+# 104..181
+{
+ my $WARNCNT;
+ my $id;
+
+ local $SIG{__WARN__} =
+ sub {
+ print "# $id: @_";
+ $WARNCNT++;
+ $WARNMSG = "@_";
+ };
+
+ sub moan {
+ print "$id: @_";
+ }
+
+ sub test_unpack_U {
+ $WARNCNT = 0;
+ $WARNMSG = "";
+ unpack('U*', $_[0]);
+ }
+
+ for (@MK) {
+ if (/^(?:\d+(?:\.\d+)?)\s/ || /^#/) {
+ # print "# $_\n";
+ } elsif (/^(\d+\.\d+\.\d+[bu]?)\s+([yn])\s+"(.+)"\s+([0-9a-f]{1,8}|-)\s+(\d+)\s+([0-9a-f]{2}(?::[0-9a-f]{2})*)(?:\s+((?:\d+|-)(?:\s+(.+))?))?$/) {
+ $id = $1;
+ my ($okay, $bytes, $Unicode, $byteslen, $hex, $charslen, $error) =
+ ($2, $3, $4, $5, $6, $7, $8);
+ my @hex = split(/:/, $hex);
+ unless (@hex == $byteslen) {
+ my $nhex = @hex;
+ moan "amount of hex ($nhex) not equal to byteslen ($byteslen)\n";
+ }
+ {
+ use bytes;
+ my $bytesbyteslen = length($bytes);
+ unless ($bytesbyteslen == $byteslen) {
+ moan "bytes length() ($bytesbyteslen) not equal to $byteslen\n";
+ }
+ }
+ if ($okay eq 'y') {
+ test_unpack_U($bytes);
+ if ($WARNCNT) {
+ moan "unpack('U*') false negative\n";
+ print "not ";
+ }
+ } elsif ($okay eq 'n') {
+ test_unpack_U($bytes);
+ if ($WARNCNT == 0 || ($error ne '' && $WARNMSG !~ /$error/)) {
+ moan "unpack('U*') false positive\n";
+ print "not ";
+ }
+ }
+ print "ok $test\n";
+ $test++;
+ } else {
+ moan "unknown format\n";
+ }
+ }
+}