use strict; use warnings; use utf8; use Future::AsyncAwait; use Encode qw(decode_utf8 encode_utf8); # ============================================================================= # PAGI UTF-8 Round-Trip Test Demo # ============================================================================= # # Tests UTF-8 handling across all PAGI input vectors: # # 1. PATH TEST: /echo/{utf8_string} # - $scope->{path}: decoded Unicode characters (per PAGI spec) # - $scope->{raw_path}: percent-encoded bytes from wire # # 2. QUERY STRING TEST: ?text={utf8_string} # - $scope->{query_string}: percent-encoded bytes (app decodes) # # 3. POST BODY TEST: form submit with textarea # - Request body: percent-encoded bytes (application/x-www-form-urlencoded) # - App must: percent-decode → decode_utf8 → characters # # 4. RESPONSE TEST: UTF-8 literals in source # - App must: encode_utf8 → bytes for wire # - Content-Length must be byte count, not character count # # ============================================================================= my $app = async sub { my ($scope, $receive, $send) = @_; die "Unsupported scope type: $scope->{type}" if $scope->{type} ne 'http'; # Collect request body my $body = ''; while (1) { my $message = await $receive->(); $body .= $message->{body} // ''; last unless $message->{more}; } my $echo = ''; my $source = ''; # 1. PATH: Check for /echo/{value} if ($scope->{path} =~ m{^/echo/(.+)$}) { $echo = $1; # Already decoded per PAGI spec $source = 'path'; } # 2. QUERY STRING: ?text={value} elsif (length($scope->{query_string} // '') && $scope->{query_string} =~ /text=([^&]*)/) { $echo = _uri_decode($1); $source = 'query string'; } # 3. POST BODY: text={value} elsif (length $body && $body =~ /text=(.*)/) { $echo = _uri_decode($1); $source = 'POST body'; } # Build echo section my $echo_section = ''; if (length $echo) { my $chars = length($echo); my $bytes = length(encode_utf8($echo)); my $codepoints = join ' ', map { sprintf 'U+%04X', ord($_) } split //, $echo; $echo_section = <<"ECHO";

Echo (from $source):

$echo

Characters: $chars | UTF-8 bytes: $bytes

Codepoints: $codepoints

ECHO } # Build path test links my @path_samples = ('λ', '🔥', '中文', '♥', 'café'); my $path_links = join ' | ', map { my $encoded = _uri_encode($_); qq{$_} } @path_samples; my $html = <<"HTML"; PAGI UTF-8

PAGI UTF-8 Round-Trip Test

Samples: λ (Greek) | 🔥 (Emoji) | 中文 (CJK) | ♥ (Symbol) | café (Accented)

$echo_section

1. Path Test

Click: $path_links

Tests \$scope->{path} (decoded) vs \$scope->{raw_path} (bytes)

2. Query String Test

Tests \$scope->{query_string} (percent-encoded bytes)

3. POST Body Test


Tests request body (application/x-www-form-urlencoded)

HTML my $bytes = encode_utf8($html); await $send->({ type => 'http.response.start', status => 200, headers => [ ['content-type', 'text/html; charset=utf-8'], ['content-length', length($bytes)], ], }); await $send->({ type => 'http.response.body', body => $bytes, more => 0, }); }; sub _uri_decode { my ($str) = @_; $str =~ s/\+/ /g; $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; return decode_utf8($str); } sub _uri_encode { my ($str) = @_; my $bytes = encode_utf8($str); $bytes =~ s/([^A-Za-z0-9\-._~])/sprintf("%%%02X", ord($1))/eg; return $bytes; } $app;