2005-08-06 15:08 This is a patch for Interchange version 5.2.0. When you apply it, Interchange will: - generate HTML pages in Unicode UTF-8 encoding; - expect the template files to be in UTF-8 encoding; - store your data (like product names and descriptions) in UTF-8 in databases; - expect data from CSV database files, GDBM, DB_File and in-memory databases to be in UTF-8 encoding. The patch is not complete. It does not fix: - encoding for data coming from SQL (DBI, including MySQL) and LDAP databases -- I didn't try that, expect no problem; - it does not fix encoding for data coming from forms with method='POST' and ENCTYPE="multipart/form-data" -- I tried to do that, but failed. Also: - it is not well-tested; - it will probably break the existing internationalization schemes. But it may be useful as an example, a starting point, that shows how to procede if you want to finish it. Ivan Kurmanov diff -wcbr lib.orig/Vend/Config.pm lib/Vend/Config.pm *** lib.orig/Vend/Config.pm 2005-07-06 00:09:31.000000000 +0300 --- lib/Vend/Config.pm 2005-08-06 15:02:19.298462760 +0300 *************** *** 359,364 **** --- 359,365 ---- ['Catalog', 'catalog', ''], ['SubCatalog', 'catalog', ''], ['AutoVariable', 'autovar', 'UrlJoiner'], + ['HTMLCharset', undef, 'UTF-8' ], ]; return $directives; diff -wcbr lib.orig/Vend/Data.pm lib/Vend/Data.pm *** lib.orig/Vend/Data.pm 2005-07-06 00:09:31.000000000 +0300 --- lib/Vend/Data.pm 2005-08-06 12:24:20.232499824 +0300 *************** *** 24,29 **** --- 24,31 ---- # MA 02111-1307 USA. package Vend::Data; + use open ':utf8', ':std'; + require Exporter; @ISA = qw(Exporter); @EXPORT = qw( *************** *** 237,243 **** return '' unless defined $db->test_column($field_name); $key = $db->foreign($key, $foreign) if $foreign; return '' unless $db->test_record($key); ! return $db->field($key, $field_name); } sub database_row { --- 239,247 ---- return '' unless defined $db->test_column($field_name); $key = $db->foreign($key, $foreign) if $foreign; return '' unless $db->test_record($key); ! ! my $v = $db->field($key, $field_name); ! return $v; } sub database_row { diff -wcbr lib.orig/Vend/File.pm lib/Vend/File.pm *** lib.orig/Vend/File.pm 2005-07-06 00:09:32.000000000 +0300 --- lib/Vend/File.pm 2005-07-07 09:09:04.000000000 +0300 *************** *** 46,51 **** --- 46,53 ---- writefile ); + use open ":utf8"; + use strict; use Config; use Fcntl; diff -wcbr lib.orig/Vend/Interpolate.pm lib/Vend/Interpolate.pm *** lib.orig/Vend/Interpolate.pm 2005-07-06 00:09:32.000000000 +0300 --- lib/Vend/Interpolate.pm 2005-08-06 12:25:27.528269312 +0300 *************** *** 30,35 **** --- 30,37 ---- $VERSION = substr(q$Revision: 2.207 $, 10); + use open ":utf8"; + @EXPORT = qw ( interpolate_html *************** *** 149,157 **** --- 151,162 ---- &encode_entities &HTML &interpolate_html + &decode_utf8 /; } + use Encode qw(decode_utf8); + use vars @Share_vars, @Share_routines, qw/$ready_safe $safe_safe/; use vars qw/%Filter %Ship_handler $Safe_data/; *************** *** 218,225 **** my %T; my %QR; ! my $All = '[\000-\377]*'; ! my $Some = '[\000-\377]*?'; my $Codere = '[-\w#/.]+'; my $Coderex = '[-\w:#=/.%]+'; my $Mandx = '\s+([-\w:#=/.%]+)'; --- 223,230 ---- my %T; my %QR; ! my $All = '[\S\s]*'; ! my $Some = '[\S\s]*?'; my $Codere = '[-\w#/.]+'; my $Coderex = '[-\w:#=/.%]+'; my $Mandx = '\s+([-\w:#=/.%]+)'; *************** *** 234,241 **** my $T = '\]'; my $D = '[-_]'; ! my $XAll = qr{[\000-\377]*}; ! my $XSome = qr{[\000-\377]*?}; my $XCodere = qr{[-\w#/.]+}; my $XCoderex = qr{[-\w:#=/.%]+}; my $XMandx = qr{\s+([-\w:#=/.%]+)}; --- 239,246 ---- my $T = '\]'; my $D = '[-_]'; ! my $XAll = qr{[\S\s]*}; ! my $XSome = qr{[\S\s]*?}; my $XCodere = qr{[-\w#/.]+}; my $XCoderex = qr{[-\w:#=/.%]+}; my $XMandx = qr{\s+([-\w:#=/.%]+)}; diff -wcbr lib.orig/Vend/Search.pm lib/Vend/Search.pm *** lib.orig/Vend/Search.pm 2005-07-06 00:09:31.000000000 +0300 --- lib/Vend/Search.pm 2005-07-07 09:20:59.000000000 +0300 *************** *** 21,26 **** --- 21,27 ---- # MA 02111-1307 USA. package Vend::Search; + use open ":utf8"; $VERSION = substr(q$Revision: 2.22 $, 10); diff -wcbr lib.orig/Vend/Server.pm lib/Vend/Server.pm *** lib.orig/Vend/Server.pm 2005-07-06 00:09:31.000000000 +0300 --- lib/Vend/Server.pm 2005-08-06 12:28:20.524969816 +0300 *************** *** 25,30 **** --- 25,32 ---- package Vend::Server; + use open ':utf8', ':std'; + use vars qw($VERSION); $VERSION = substr(q$Revision: 2.50 $, 10); *************** *** 284,289 **** --- 286,292 ---- # Doesn't do unhexify sub store_cgi_kv { my ($key, $value) = @_; + $value = Encode::decode_utf8( $value ); $key = $::IV->{$key} if defined $::IV->{$key}; if(defined $CGI::values{$key} and ! defined $::SV{$key}) { $CGI::values{$key} = "$CGI::values{$key}\0$value"; *************** *** 345,350 **** --- 348,354 ---- #::logDebug("mapping --> $key"); $value =~ tr/+/ /; $value =~ s/%([0-9a-fA-F][0-9a-fA-F])/chr(hex $1)/ge; + $value = Encode::decode_utf8( $value ); # Handle multiple keys if(defined $CGI::values{$key} and ! defined $::SV{$key}) { $CGI::values{$key} = "$CGI::values{$key}\0$value"; *************** *** 484,489 **** --- 488,496 ---- # $body is now a reference my ($s, $body) = @_; #show_times("begin response send") if $Global::ShowTimes; + my $charset = $Global::HTMLCharset; + if ( $charset ) { $charset = "; charset=$charset"; } + my $status; return if $Vend::Sent; if($Vend::StatusLine) { *************** *** 494,500 **** if($CGI::redirect_status and ! $Vend::StatusLine) { $status = "200 OK"; ! $Vend::StatusLine = "Status: 200 OK\nContent-Type: text/html"; } $$body =~ s/^\s+// --- 501,507 ---- if($CGI::redirect_status and ! $Vend::StatusLine) { $status = "200 OK"; ! $Vend::StatusLine = "Status: 200 OK\nContent-Type: text/html$charset"; } $$body =~ s/^\s+// *************** *** 502,508 **** if(! $s and $Vend::StatusLine) { $Vend::StatusLine .= ($Vend::StatusLine =~ /^Content-Type:/im) ! ? '' : "\r\nContent-Type: text/html\r\n"; # TRACK $Vend::StatusLine .= "X-Track: " . $Vend::Track->header() . "\r\n" if $Vend::Track; --- 509,516 ---- if(! $s and $Vend::StatusLine) { $Vend::StatusLine .= ($Vend::StatusLine =~ /^Content-Type:/im) ! ? '' ! : "\r\nContent-Type: text/html$charset\r\n"; # TRACK $Vend::StatusLine .= "X-Track: " . $Vend::Track->header() . "\r\n" if $Vend::Track; *************** *** 611,617 **** print $fh canon_status($Vend::StatusLine); } elsif(! $Vend::ResponseMade) { ! print $fh canon_status("Content-Type: text/html"); # TRACK print $fh canon_status("X-Track: " . $Vend::Track->header()) if $Vend::Track; --- 619,625 ---- print $fh canon_status($Vend::StatusLine); } elsif(! $Vend::ResponseMade) { ! print $fh canon_status("Content-Type: text/html$charset"); # TRACK print $fh canon_status("X-Track: " . $Vend::Track->header()) if $Vend::Track; *************** *** 821,826 **** --- 829,835 ---- for (;;) { $block = _find( \$in, "\n" ); + if (($n) = ($block =~ m/^env (\d+)$/)) { foreach $i (0 .. $n - 1) { $e = _string(\$in); diff -wcbr lib.orig/Vend/Table/Common.pm lib/Vend/Table/Common.pm *** lib.orig/Vend/Table/Common.pm 2005-07-06 00:09:32.000000000 +0300 --- lib/Vend/Table/Common.pm 2005-08-06 12:37:45.966009712 +0300 *************** *** 303,309 **** $s = $s->import_db() if ! defined $s->[$TIE_HASH]; return undef unless $s->record_exists($key); my %row; ! @row{ @{$s->[$COLUMN_NAMES]} } = $s->row($key); return \%row; } --- 303,311 ---- $s = $s->import_db() if ! defined $s->[$TIE_HASH]; return undef unless $s->record_exists($key); my %row; ! ! my @v = $s->row($key); ! @row{ @{$s->[$COLUMN_NAMES]} } = @v; return \%row; } *************** *** 311,316 **** --- 313,324 ---- my ($s, $key) = @_; $s->lock_table() if $s->[$CONFIG]{IC_LOCKING}; my $line = $s->[$TIE_HASH]{"k$key"}; + + # this may work and may not: + my $d = Encode::decode_utf8( $line ); + # if it did not work, we restore the previous value: + if ( $d ) { $line = $d; } + $s->unlock_table() if $s->[$CONFIG]{IC_LOCKING}; die $s->log_error( "There is no row with index '%s' in database %s", *************** *** 525,531 **** sub field { my ($s, $key, $column) = @_; $s = $s->import_db() if ! defined $s->[$TIE_HASH]; ! return ($s->row($key))[$s->column_index($column)]; } sub set_field { --- 533,539 ---- sub field { my ($s, $key, $column) = @_; $s = $s->import_db() if ! defined $s->[$TIE_HASH]; ! return (($s->row($key))[$s->column_index($column)] ); } sub set_field { *************** *** 686,700 **** return (); } $key =~ s/^k// or next; if($restrict) { - my (@row) = $s->row($key); #::logDebug("each_nokey: rfield='$row[$rfield]' eq '$rsession' ??") if defined $rfield; #::logDebug("each_nokey: hfield='$row[$hfield]'") if defined $hfield; next if defined $hfield and $row[$hfield]; next if defined $rfield and $row[$rfield] ne $rsession; return \@row; } ! return [ $s->row($key) ]; } } --- 694,709 ---- return (); } $key =~ s/^k// or next; + + my @row = $s->row($key); if($restrict) { #::logDebug("each_nokey: rfield='$row[$rfield]' eq '$rsession' ??") if defined $rfield; #::logDebug("each_nokey: hfield='$row[$hfield]'") if defined $hfield; next if defined $hfield and $row[$hfield]; next if defined $rfield and $row[$rfield] ne $rsession; return \@row; } ! return [ @row ]; } } diff -wcbr lib.orig/Vend/Table/InMemory.pm lib/Vend/Table/InMemory.pm *** lib.orig/Vend/Table/InMemory.pm 2005-07-06 00:09:32.000000000 +0300 --- lib/Vend/Table/InMemory.pm 2005-07-08 10:55:56.000000000 +0300 *************** *** 111,116 **** --- 111,119 ---- $key, $s->[$FILENAME], ) unless defined $a; + foreach ( @$a ) { + $_ = Encode::decode_utf8( $_ ); + } return @$a; } *************** *** 119,124 **** --- 122,130 ---- my $a = $s->[$TIE_HASH]{$key} or return undef; #::logDebug("here is row $key: " . ::uneval($a)); + foreach ( @$a ) { + $_ = Encode::decode_utf8( $_ ); + } my %row; @row{ @{$s->[$COLUMN_NAMES]} } = @$a; return \%row; diff -wcbr lib.orig/Vend/Table/Shadow.pm lib/Vend/Table/Shadow.pm *** lib.orig/Vend/Table/Shadow.pm 2005-07-06 00:09:32.000000000 +0300 --- lib/Vend/Table/Shadow.pm 2005-07-08 11:48:54.000000000 +0300 *************** *** 358,363 **** --- 358,364 ---- for $row (@$result) { for $pos (@map_matches) { ($name, $map_entry) = @{$map_entries[$pos]}; + ### XXX decode_utf8 $row->[$pos] = $s->_map_column($row->[$keypos], $name, 1, $row->[$pos], $map_entry); } } diff -wcbr lib.orig/Vend/TextSearch.pm lib/Vend/TextSearch.pm *** lib.orig/Vend/TextSearch.pm 2005-07-06 00:09:31.000000000 +0300 --- lib/Vend/TextSearch.pm 2005-07-07 09:20:48.000000000 +0300 *************** *** 23,28 **** --- 23,31 ---- # MA 02111-1307 USA. package Vend::TextSearch; + + use open ":utf8"; + require Vend::Search; require Exporter;