# This is a patch for perl5.005_04 to update it to jperl5.005_04. # It was generated by makepatch 1.91 (2.0BETA) on Thu Apr 1 23:08:44 2004. # # To apply this patch, chdir to source directory perl5.005_04 and enter # # /bin/sh # patch -p1 -N < touch Changes.jp touch README.j touch README.jp touch kanji.c touch kanji.h touch lib/I18N/Japanese.pm touch pod/jperl.pod mkdir t/kanji touch t/kanji/format.t touch t/kanji/op.t touch t/kanji/re_tests touch t/kanji/regexp.t touch t/kanji/sjis.t exit # End of preamble. Index: patchlevel.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/patchlevel.h Thu Feb 5 06:04:12 2004 --- jperl5.005_04/patchlevel.h Wed Mar 31 01:33:34 2004 *************** *** 40,45 **** --- 40,46 ---- */ static char *local_patches[] = { NULL + ,"jperl5.005_04-20040401.pat - April Fool again" ,NULL }; Index: Changes.jp ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/Changes.jp Mon Jul 18 08:46:18 1994 --- jperl5.005_04/Changes.jp Thu Apr 1 23:06:56 2004 *************** *** 0 **** --- 1,412 ---- + Thu Apr 1 23:06:01 2004 WATANABE Hirofumi + + * jperl5.005_04-20040401.pat + + * 5.005_04が出た記念に。 + + Sat Apr 1 16:13:49 2000 WATANABE Hirofumi + + * jperl5.005_03-20000401.pat + + * regcomp.c, regexec.c: せっかく考えないですむようにしたのに, + gcc の最適化で memcpy が builtin になってしまって, sparc + とかだと core を吐いていた. ぐはぁ. + + * pp_ctl.c: use, require で HINT_KANJI_* のビットはいじらない + ようにしてみた. + CGI.pm の先頭に no I18N::Japanese; を入れなくても + no I18N::Japanese; + use CGI; + use I18N::Japanese; + でいける(はず). + + Sat Feb 12 17:37:28 2000 WATANABE Hirofumi + + * jperl5.005_03-20000212.pat + + * えっと, 20000212 にしました. + + * regcomp.c: 64 ビットアーキテクチャ対応. bit vector へのポインタ + は memcpy を使うことで sizeof (char *) 境界とか考えないですむよ + うにした. + + * regexec.c, regcomp.h: ditto. + + * lib/I18N/Japanese.pm: typo. + + * perl.h, regexec.c: ctype の is????? は不必要だと思うので削除. + + Sun Aug 22 15:55:46 1999 WATANABE Hirofumi + + * jperl5.005_03-990822.pat + + * regcomp.c: regclassfree() で CURLY のときの再帰にするときの + 引数がちょっとまずい(あまり影響はないとおもうけど). + + * intrpvar.h: PERL_OBJECT のバイナリ互換のために + Iminus_b と Ilangtype を最後に移動. + + * patchlevel.h: jperl 情報を追加. + + * proto.h: jperl で追加された関数をすべて VIRTUAL に. + + Sun Jul 11 16:17:12 1999 WATANABE Hirofumi + + * index/rindex の define はやめた. + ActivePerl は index という変数を使いたがる. + + * perl.c: rindex -> jrindex + + Sat Jul 3 20:23:59 1999 WATANABE Hirofumi + + * jperl5.005_03-990703.pat + + * proto.h の regclassfree() の function prototype を忘れてた. + + * toke.c: scan_formline() の要らないコードが ActivePerl build 518 + に悪影響. + + Sat Jun 19 23:10:37 1999 WATANABE Hirofumi + + * jperl5.005_03_990619.pat + + * /[a-z]+/ や /[a-z]*/ のようなときに regclassfree() で + free されないため Out of memory になることがあった. + パッチありがとう丸岡 実利さん. + + Sat Jun 12 17:11:27 1999 WATANABE Hirofumi + + * jperl5.005_03_990612.pat + + * split(/\|/) のように 1 バイトのときが日本語化未対応だった. + SJIS の場合特にまずい. + + Mon May 3 18:56:44 1999 WATANABE Hirofumi + + * jperl5.005_03_990503.pat + + * chop が常に日本語モードだった. + + * jperl Makefile.PL 対策. + ありがとう前田薫さん. + + Sat Apr 3 16:15:56 1999 WATANABE Hirofumi + + * jperl5.005_03-990403.pat + + Sun Feb 28 02:14:51 1999 WATANABE Hirofumi + + * " 012" =~ /0.2/ が偽になる不具合を修正. + + Fri Dec 25 03:18:45 1998 WATANABE Hirofumi + + * jperl5.005_02-981225.pat + + * regcomp.c ちょっと高速化. + + Tue Dec 22 20:39:02 1998 WATANABE Hirofumi + + * use, require されるファイル内で日本語モードが off になってた. + このバグのおかげで jcode.pl が無修正で動いてた. + + * reg*.c も iskanji2 を使うようにした. + これで jcode.pl は本当に無修正で動くはず. + + Tue Dec 8 12:01:49 1998 WATANABE Hirofumi + + * fold.pl に負けて悔しいらしいので format を修正. + パッチありがとう前田薫さん. + + * mod_perl(Win32) で argv[0] が NULL になり jrindex で SEGV. + + * djgpp/djgpp.c: set_lang_type() を呼ばないと SEGV. + + * makepatch 2.00a を使用. + + Tue Sep 29 12:23:42 1998 WATANABE Hirofumi + + * jperl5.005_02-980929.pat + + * pp.c, util.c: kpart の引数を cast. + + Sun Sep 27 00:04:49 1998 WATANABE Hirofumi + + * jperl5.005_02-980927.pat + + * util.c: fbm_instr に処理が増えてたので日本語対応. + ("討つ" =~ /い/ が 1 になってしまう) + study も同様. + + * regexec.c: kpart() を KANJI_MODE なしで使ってた. + pp_sys.c: iskanji() を PL_hints を見るようにした. + + * pp_ctl.c: pp_formline で ... の日本語対応. + ありがとう前田薫さん. + + * perl.c: jperl -Llatin のときは perl とという名前で + 起動した時と同じ動作になるようにした. + + * 差分は makepatch を使うようにした. + + Thu Sep 17 10:55:07 1998 WATANABE Hirofumi + + * jperl5.005_02-980917.pat + + * linuxthread, Solaris pthread, mingw32 thread での確認. + + Wed Sep 16 11:46:07 1998 WATANABE Hirofumi + + * jperl5.005_02-980916.pat + + * win32/win32.c: index -> l_index + + * global.sym: langtype は要らない. + + * embed.h, embedvar.h は embed.pl から作るようにした. + + Tue Sep 15 21:46:11 1998 WATANABE Hirofumi + + * jperl5.005_02-980915.pat + + * t/kanji/* を更新. use I18N::Japanese を使うようにした. + regexp.t, re_tests は op から新たに作った. + + Mon Sep 14 00:01:16 1998 WATANABE Hirofumi + + * jperl5.005_02-980914.pat + + * mingw32, PERL_OBJECT 対応(のつもり). + + Sun Sep 13 02:56:49 1998 WATANABE Hirofumi + + * jperl5.005_02-980913.pat + + * regcomp.c: ANYOF_LITERAL の処理で SIZE_ONLY で場合分けする + のを忘れていた. + + Sat Sep 5 17:59:34 1998 WATANABE Hirofumi + + * jperl5.005_02-980905.pat + + * perl5.005_02 対応. + + * unified diff 形式だと Solaris user から質問攻めにあうので + context diff 形式にした. + + Sun Aug 30 18:22:18 1998 WATANABE Hirofumi + + * jperl5.004_67-980830.pat + + * perl5.004_67 対応. + + Mon Mar 23 01:27:33 1998 WATANABE Hirofumi + + * jperl5.004m5t1-980323.pat: + + * perl5.004_04-m1 対応. + + * win32/Makefile: -stack:0x200000(for VC++ 5.0) + ありがとう木村浩一さん. + + Tue Mar 3 12:35:43 1998 WATANABE Hirofumi + + * jperl5.005_04-980303.pat: + + * /.い/ が "あいうえお" が match しない bug を fix. + (あるいは変なところに match してしまう) + + Wed Oct 29 10:02:40 1997 WATANABE Hirofumi + + * jperl5.005_04-971029.pat: (未公開) + + * perl95.exe が作れない不具合を解消. + ありがとう木村浩一さん. + + Thu Oct 16 10:30:10 1997 WATANABE Hirofumi + + * jperl5.004_04-971016.pat: + + * perl5.004_04 対応. + + Tue Oct 14 09:51:56 1997 WATANABE Hirofumi + + * jperl5.004_03-971014.pat: + + * Win95 で日本語機能が活きないバグを修正. + + * #! の不具合が再発. やっぱり kanji.c の setlangtype() で + icbm() を使うことにした. + + Mon Sep 8 09:35:03 1997 WATANABE Hirofumi + + * jperl5.004_03-970908.pat: + + * perl5.004_03 対応. + + * perl.c の冗長な処理を削除. + + Fri Aug 15 09:23:45 1997 WATANABE Hirofumi + + * jperl5.004m3t2-970815.pat: + + * perl5.004m3t2 対応. + + Fri Aug 8 10:08:21 1997 WATANABE Hirofumi + + * jperl5.004_02.pat: + + * perl5.004_02 対応. + + Thu Aug 7 13:00:23 1997 WATANABE Hirofumi + + * jperl5.004_01_03.pat: + + * perl5.004_01_03 対応. + + * $_ = 'abc0123"; tr/0-9/a/c; が "aaa0123" にならないバグを修正. + bug report ありがとう新井康司さん. + + Tue Aug 5 18:28:32 1997 WATANABE Hirofumi + + * jperl5.004_01_02.pat: + + * perl5.004_01_02 対応. + + Thu Jul 10 19:38:40 1997 WATANABE Hirofumi + + * jperl5.004_01-970710.pat: + + * kanji.c で strcasecmp() を使うのをやめて jstricmp() を使うよう + にした. + + * no I18N::Japanese; で $^H &= ~0xf000 になるようにした. + qw(string) も独立させた. これで jcode.pl の頭に + no I18N::Japanese; を入れれば jperl で使えるようになる. + + * win32/Makefile に kanji.[co] を追加. + + Tue Jun 17 13:22:23 1997 WATANABE Hirofumi + + * jperl5.004_01-970617.pat: + + * perl5.004_01 対応. + + Mon May 26 11:01:49 1997 WATANABE Hirofumi + + * jperl5.004-970526.pat: + + * jcode.pl 対応. 文字クラス中の 16/8 進リテラルは 1 byte 単位で + アクセスすることした. いままでは 1 byte でも日本語 1 文字でもな + いので, 全く機能してなかった. + + * tr も同じ. + + Sun May 18 17:49:11 1997 WATANABE Hirofumi + + * jperl5.004-970518.pat: + + * perl5.004 Release 対応. + + Fri May 16 11:26:35 1997 WATANABE Hirofumi + + * jperl5.004RC2-970516.pat: + + * perl5.004 Release Candidate 2 対応. + + Mon May 12 09:55:02 1997 WATANABE Hirofumi + + * jperl5.003_99a-970512.pat: + + * perl5.003_99a 対応. + + Wed May 7 12:40:05 1997 WATANABE Hirofumi + + * jperl5.003_99-970507.pat: + + * bit vector を 256+4096 に縮小. + + * bit vector へのポインタの alignment を 4 バイト境界にした. + + Tue May 6 12:15:45 1997 WATANABE Hirofumi + + * regclass(), reginclass() を bit vector(8192 bytes)にして高速化. + + Sun May 4 17:50:14 1997 WATANABE Hirofumi + + * jperl5.003_99-970504.pat: + + * \Q, quotemeta() が日本語対応してなかった. + + Sat May 3 22:29:35 1997 WATANABE Hirofumi + + * perl.5003_99 対応. + + Thu May 1 19:29:21 1997 WATANABE Hirofumi + + * jperl5.003_98-970501.pat: + + * perl5.003_98 対応. + + Sun Apr 27 22:18:08 1997 WATANABE Hirofumi + + * jperl5.003_97i-970427.pat: + + * regdump() の結果がめちゃくちゃになるので ANYOF_ENDMARK は 99 に + した. ついでに ANYOF で範囲を表示するようにした. + + * KANJI MODE じゃないとき, ANY, SANY, ANYOF は SIMPLE になるように + した. + + Fri Apr 25 12:12:09 1997 WATANABE Hirofumi + + * jperl5.003_97h-970425.pat: + + * patch 2.2 の -p の仕様が変更になったので, man + patch で推奨されてる形式にした. + + Wed Apr 23 12:25:16 1997 WATANABE Hirofumi + + * jperl5.003_97g-970423.pat: + + * regexp.h: ROPT_KANJI と ROPT_IMPLICIT が重なってた. + + * regcomp.h: ANY, SANY, ANYOF を simple[] へ移動. [] + が速くなった(はず). + + Wed Apr 21 12:23:56 1997 WATANABE Hirofumi + + * jperl5.003_97g-970421.pat: + + * perl5.003_97g 対応. + + -------------- + + jperl5.003_97e-970417.pat: + + * perl5.003_97e 対応. + + jperl5.003_97d-970415.pat: + + * perl5.003_97d 対応. + + jperl5.003_95-970331.pat: + + * perl5.003_93, 94, 95 対応. + + * /[\W]/, /[\S]/ がまったく逆の意味になっていたので修正. + 誰も使ってないのかな? わたしも使ったことがない. :-) + make test で発覚. + regcomp.c は大幅に書き換えた. + + * #! のラインに -Lsjis とかあると駄目だったので strncasecmp + を使おうと思ったけど util.c の ibcmp() を使った. + + * -w option をつけると tr/// が "Use of uninitialized value" + という warning になっていたので修正. t/lib/db-*.t になぜか + -w がついてて鬱陶しくなった. + + * toke.c の無意味なインデントの変更はやめた. + + jperl5.003_05.patch7_1: + + * jperl4.036 とマージした. Index: Configure Prereq: 3.0.1.9 ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/Configure Wed Jan 14 05:15:38 2004 --- jperl5.005_04/Configure Mon Mar 29 23:56:51 2004 *************** *** 723,728 **** --- 723,729 ---- usrinc='' defvoidused='' voidflags='' + d_euc='' CONFIG='' define='define' *************** *** 2930,2935 **** --- 2931,2949 ---- installprivlib="$privlibexp" fi + if $test -r ../kanji.h; then + echo "You have Japanized version."; + dflt=y + rp="Do you want to use EUC as the kanji code(y=EUC, n=SJIS)?" + . ./myread + case "$ans" in + [yY]*) + d_euc='define';; + *) + d_euc='undef';; + esac + fi + : set the prefixup variable, to restore leading tilda escape prefixup='case "$prefixexp" in "$prefix") ;; *************** *** 12293,12298 **** --- 12307,12313 ---- xlibpth='$xlibpth' zcat='$zcat' zip='$zip' + d_euc='$d_euc' EOT : Add in command line options if available Index: MANIFEST ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/MANIFEST Sat Feb 7 00:33:15 2004 --- jperl5.005_04/MANIFEST Mon Mar 29 23:56:51 2004 *************** *** 1,5 **** --- 1,6 ---- Artistic The "Artistic License" Changes Differences from previous version + Changes.jp Differences from previous version(jperl) Changes5.000 Differences between 4.x and 5.000 Changes5.001 Differences between 5.000 and 5.001 Changes5.002 Differences between 5.001 and 5.002 *************** *** 36,41 **** --- 37,44 ---- README.dos Notes about dos/djgpp port README.hpux Notes about HP-UX port README.hurd Notes about GNU/Hurd port + README.j Notes about jperl5.003 + README.jp Notes about latest jperl README.mint Notes about Atari MiNT port README.mpeix Notes about MPE/iX port README.os2 Notes about OS/2 port *************** *** 473,478 **** --- 476,483 ---- installperl Perl script to do "make install" dirty work intrpvar.h Variables held in each interpreter instance iperlsys.h Perl's interface to the system + kanji.c Kanji handling routines. + kanji.h Header file for the above. keywords.h The keyword numbers keywords.pl Program to write keywords.h lib/AnyDBM_File.pm Perl module to emulate dbmopen *************** *** 538,543 **** --- 543,549 ---- lib/Getopt/Long.pm Fetch command options (GetOptions) lib/Getopt/Std.pm Fetch command options (getopt, getopts) lib/I18N/Collate.pm Routines to do strxfrm-based collation + lib/I18N/Japanese.pm Pragma to control Japanese. lib/IPC/Open2.pm Open a two-ended pipe lib/IPC/Open3.pm Open a three-ended pipe! lib/Math/BigFloat.pm An arbitrary precision floating-point arithmetic package *************** *** 738,743 **** --- 744,750 ---- pod/Win32.pod Documentation for Win32 extras pod/buildtoc generate perltoc.pod pod/checkpods.PL Tool to check for common errors in pods + pod/jperl.pod Jperl man page pod/perl.pod Top level perl man page pod/perl5004delta.pod Changes from 5.003 to 5.004 pod/perlapio.pod IO API info *************** *** 857,862 **** --- 864,874 ---- t/io/print.t See if print commands work t/io/read.t See if read works t/io/tell.t See if file seeking works + t/kanji/format.t See if jperl works + t/kanji/op.t See if jperl works + t/kanji/re_tests Regular expressions for regexp.t + t/kanji/regexp.t See if jperl works + t/kanji/sjis.t See if jperl works t/lib/abbrev.t See if Text::Abbrev works t/lib/anydbm.t See if AnyDBM_File works t/lib/autoloader.t See if AutoLoader works Index: Makefile.SH ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/Makefile.SH Wed Jan 14 05:15:41 2004 --- jperl5.005_04/Makefile.SH Mon Mar 29 23:56:51 2004 *************** *** 226,232 **** obj1 = $(mallocobj) gv$(OBJ_EXT) toke$(OBJ_EXT) perly$(OBJ_EXT) op$(OBJ_EXT) regcomp$(OBJ_EXT) dump$(OBJ_EXT) util$(OBJ_EXT) mg$(OBJ_EXT) byterun$(OBJ_EXT) obj2 = hv$(OBJ_EXT) av$(OBJ_EXT) run$(OBJ_EXT) pp_hot$(OBJ_EXT) sv$(OBJ_EXT) pp$(OBJ_EXT) scope$(OBJ_EXT) pp_ctl$(OBJ_EXT) pp_sys$(OBJ_EXT) ! obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) taint$(OBJ_EXT) deb$(OBJ_EXT) universal$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) obj = $(obj1) $(obj2) $(obj3) $(ARCHOBJS) --- 226,232 ---- obj1 = $(mallocobj) gv$(OBJ_EXT) toke$(OBJ_EXT) perly$(OBJ_EXT) op$(OBJ_EXT) regcomp$(OBJ_EXT) dump$(OBJ_EXT) util$(OBJ_EXT) mg$(OBJ_EXT) byterun$(OBJ_EXT) obj2 = hv$(OBJ_EXT) av$(OBJ_EXT) run$(OBJ_EXT) pp_hot$(OBJ_EXT) sv$(OBJ_EXT) pp$(OBJ_EXT) scope$(OBJ_EXT) pp_ctl$(OBJ_EXT) pp_sys$(OBJ_EXT) ! obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) taint$(OBJ_EXT) deb$(OBJ_EXT) universal$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) kanji$(OBJ_EXT) obj = $(obj1) $(obj2) $(obj3) $(ARCHOBJS) Index: README.j ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/README.j Mon Jul 18 08:46:18 1994 --- jperl5.005_04/README.j Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,52 ---- + perl5.003に対する日本語パッチです。 + jperl4.019をperl5に移植した形になっています。 + + * 正規表現 + * chop + * split + * tr + * フォーマット + * open ( 漢字2文字目の|を認識) + + が日本語化されています。逆に日本語化されていないのは, + + * reverse + * index, rindex + + スイッチとして-bが追加されています。-bを付けると,日本語機能が + offになります。 + + 新しいモジュールとして, + I18N::Japaneseが追加されています。 + + use I18N::Japanese qw(re tr format); + + あるいは + + no I18N::Japanese qw(re tr); + + のように使います。 + + 起動ファイル名がjperlの場合には, + 自動的に use I18N::Japanese qw(re tr format string); + が起動時に行われます。 + + バグ : use I18N::Japanese qw(format)指定はブロックスコープを持ちません。 + 要するに,プログラムテキスト中,最後に行ったformat指定がプログラム全体を + 通して有効になります。 + + 日本語コードは一応EUCとMS漢字をサポート。 + MS漢字はちょっとあやしい。 + + Wed Jan 31 12:11:49 1996 + 斎藤靖 + yasushi@cs.washington.edu + http://cs.washington.edu/homes/yasushi + + + Tue Aug 20 18:15:00 1996 Yasushi Saito + + * ^.が日本語にマッチしないバグ修正。渡辺博文@ソニーさん。 + 5.003対応。 + + Index: README.jp ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/README.jp Mon Jul 18 08:46:18 1994 --- jperl5.005_04/README.jp Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,77 ---- + 1. はじめに + + perl5.005_03 に対する日本語パッチです. + jperl5.003.patch7 と jperl 4.036 をミックスした形で + 移植してます. + + 斎藤さんの変更は README.j をお読みください. + + t/kanji/*.t は chmod +x する必要はありません(5.005 から). + sjis で make するときは t/kanji/* をあらかじめ sjis に変換して + ください. + + 2. warning message について + + |perl: warning: Setting locale failed. + |perl: warning: Please check that your locale settings: + | LC_ALL = (unset), + | LANG = "ja_JP.ujis" + | are supported and installed on your system. + |perl: warning: Falling back to the standard locale ("C"). + + という warning message は環境変数 PERL_BADLANG を設定する + ことで抑制することができます. + csh 系: setenv PERL_BADLANG 0 + sh 系: PERL_BADLANG=0; export PERL_BADLANG + もしくは ./Configure -U d_setlocale してから make しましょう. + + jperl 自体は -U d_setlocale での環境でしか make test してません. + + 3. jcode.pl を動かす + + 歌代さん作 jcode.pl を動かすには + jcode.pl の先頭に no I18N::Japanese; を入れます. + 後は script の中で適宜 use, no を使い分けてください. + 基本的に jcode 内の subroutine を call する前に + no I18N::Japanese; して jperl 特有の機能を使う前に + use I18N::Japanese; します. + + jperl5.005_02-981225.pat からは基本的に無修正で動きますが, + 怪しいときは上記の方法をとってください. + + jcode.pl は + ftp://ftp.iij.ad.jp/pub/IIJ/dist/utashiro/perl/ + にあります. + + 4. look behind + + jperl では正規表現で look behind は使えません. すみません. + 使えるようにするパッチは歓迎します. + + 5. jperl4.036 からの機能. + + 文字種のオプション + + -Lsjis 日本語 Shift-JIS + -Leuc 日本語 EUC + -Ltca 中国語(台湾) TCA + -Lkseuc 韓国語EUC(KS C5601-1987) + -Llatin 1バイト文字のみ + + の 5 通りに切り替わります.jperl という名前で起動された場合 + はコンパイル時に指定した文字種になります(でも Configure では + euc か sjis しか選べない. もうしわけない). + + jperl5.003.patch7 では full path で表わしたときにディレクト + リに jperl という文字が含まれていたら日本語機能が ON になっ + ていたので basename に jperl が含まれているときだけ ON にな + るようにしました(そうしないと jperl という名前のディレクトリ + で make するとデバッグできん :-) これは主に DOS での話). つ + いでに sjisperl, eucperl なんて名前で起動されればその文字種 + になるようにしました. + + Sun Aug 22 16:08:09 JST 1999 + 渡辺博文(わたなべひろふみ) + eban@os.rim.or.jp + watanabe@ase.ptg.sony.co.jp + $CPAN/authors/id/WATANABE Index: av.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/av.c Wed Jan 14 05:15:46 2004 --- jperl5.005_04/av.c Mon Mar 29 23:56:51 2004 *************** *** 597,606 **** STATIC I32 avhv_index_sv(SV* sv) { ! I32 index = SvIV(sv); ! if (index < 1) croak("Bad index while coercing array into hash"); ! return index; } HV* --- 597,606 ---- STATIC I32 avhv_index_sv(SV* sv) { ! I32 l_index = SvIV(sv); ! if (l_index < 1) croak("Bad index while coercing array into hash"); ! return l_index; } HV* Index: config_h.SH Prereq: 3.0.1.5 ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/config_h.SH Wed Jan 14 05:15:47 2004 --- jperl5.005_04/config_h.SH Mon Mar 29 23:56:51 2004 *************** *** 70,75 **** --- 70,82 ---- #define CPPSTDIN "$cppstdin" #define CPPMINUS "$cppminus" + /* EUC + * It defined, the perl handles Japanese EUC characters. + * Otherwise, it handles Shift JIS(aka MS Kanji) characters. + */ + #$d_euc EUC /**/ + + /* HAS_ALARM: * This symbol, if defined, indicates that the alarm routine is * available. Index: djgpp/djgpp.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/djgpp/djgpp.c Wed Jan 14 05:15:49 2004 --- jperl5.005_04/djgpp/djgpp.c Mon Mar 29 23:56:51 2004 *************** *** 422,427 **** --- 422,428 ---- { char *p; + set_lang_type(0); perlprefix=strdup (**argvp); strlwr (perlprefix); if ((p=strrchr (perlprefix,'/'))!=NULL) Index: doio.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/doio.c Wed Jan 14 05:15:49 2004 --- jperl5.005_04/doio.c Mon Mar 29 23:56:51 2004 *************** *** 272,278 **** else fp = PerlIO_open(name,mode); } ! else if (len > 1 && name[len-1] == '|') { name[--len] = '\0'; while (len && isSPACE(name[len-1])) name[--len] = '\0'; --- 272,279 ---- else fp = PerlIO_open(name,mode); } ! else if (len > 1 && name[len-1] == '|' ! && kpart(name, name+len-1) == KPART_OTHER) { name[--len] = '\0'; while (len && isSPACE(name[len-1])) name[--len] = '\0'; Index: doop.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/doop.c Wed Jan 14 05:15:49 2004 --- jperl5.005_04/doop.c Mon Mar 29 23:56:51 2004 *************** *** 18,40 **** #include #endif I32 do_trans(SV *sv, OP *arg) { dTHR; ! register short *tbl; ! register U8 *s; ! register U8 *send; ! register U8 *d; ! register I32 ch; register I32 matches = 0; register I32 squash = PL_op->op_private & OPpTRANS_SQUASH; register U8 *p; STRLEN len; if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_COUNTONLY)) croak(no_modify); ! tbl = (short*)cPVOP->op_pv; s = (U8*)SvPV(sv, len); if (!len) return 0; --- 18,74 ---- #include #endif + #ifndef PERL_OBJECT + static I32 sv_catkanji _((SV *sv, U32 tch)); + #endif + + /* Cat 1 or 2-byte char into SV */ + STATIC I32 + sv_catkanji(SV *sv, U32 tch) + { + char xx; + /* horendously inefficent. I'll fix it later. hold on + -yasushi */ + if (tch > 255) { + xx = tch >> 8; + sv_catpvn(sv, &xx, 1); + xx = tch; + sv_catpvn(sv, &xx, 1); + return 2; + } else { + xx = tch; + sv_catpvn(sv, &xx, 1); + return 1; + } + } + I32 do_trans(SV *sv, OP *arg) { dTHR; ! register U16 *tbl; ! register U8 *s, *next_s; register I32 matches = 0; + register I32 ch; + register U8 *send; + STRLEN dlen = 0; + U32 last_rch; + SV *dest_sv = sv_newmortal(); + register I32 squash = PL_op->op_private & OPpTRANS_SQUASH; + I32 del = PL_op->op_private & OPpTRANS_DELETE; + I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; + I32 kanji = PL_op->op_private & OPpTRANS_KANJI; + I32 countonly = PL_op->op_private & OPpTRANS_COUNTONLY; register U8 *p; + STRLEN len; + sv_setpvn(dest_sv, "", 0); + if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_COUNTONLY)) croak(no_modify); ! tbl = (U16 *)cPVOP->op_pv; s = (U8*)SvPV(sv, len); if (!len) return 0; *************** *** 45,92 **** if (!tbl || !s) croak("panic: do_trans"); DEBUG_t( deb("2.TBL\n")); ! if (!PL_op->op_private) { ! while (s < send) { ! if ((ch = tbl[*s]) >= 0) { matches++; - *s = ch; } - s++; - } - SvSETMAGIC(sv); - } - else if (PL_op->op_private & OPpTRANS_COUNTONLY) { - while (s < send) { - if (tbl[*s] >= 0) - matches++; - s++; } } ! else { ! d = s; ! p = send; ! while (s < send) { ! if ((ch = tbl[*s]) >= 0) { ! *d = ch; ! matches++; ! if (squash) { ! if (p == d - 1 && *p == *d) ! matches--; ! else ! p = d++; ! } ! else ! d++; ! } ! else if (ch == -1) /* -1 is unmapped character */ ! *d++ = *s; /* -2 is delete character */ ! s++; ! } ! matches += send - d; /* account for disappeared chars */ ! *d = '\0'; ! SvCUR_set(sv, d - (U8*)SvPVX(sv)); ! SvSETMAGIC(sv); ! } return matches; } --- 79,136 ---- if (!tbl || !s) croak("panic: do_trans"); DEBUG_t( deb("2.TBL\n")); ! ! while (s < send) { ! U32 tch, rch; ! U8 *nest_s; ! U16 *tp; ! int matched; ! ! if (kanji && iskanji(*s) && s < send-1) { ! tch = twochar_to_int(*s, *(s+1)); ! next_s = s+2; ! } else { ! tch = *(U8*)s; ! next_s = s+1; ! } ! ! /* look for ch in tbl */ ! if (!complement) { ! for (tp = tbl; *tp != (U16)(-1); tp += 2) { ! if (*tp == tch) break; ! } ! matched = (*tp != (U16)(-1)); ! rch = tp[1]; ! } else { ! for (tp = tbl; *tp != (U16)(-1); tp += 2) { ! if (*tp == tch) break; ! } ! matched = (*tp == (U16)(-1)); ! if (matched && tbl != tp && tp[-1] != (U16)-1) ! tch = tp[-1]; ! rch = (U16)(del ? -2 : -1); ! } ! ! if (!matched) { ! dlen += sv_catkanji(dest_sv, tch); ! } else { ! if (rch == (U16)-1) ! rch = tch; ! if (rch == (U16)-2) { ! /* delete this character */; ! rch = tch; ! } else if (!squash || last_rch != rch) { ! dlen += sv_catkanji(dest_sv, rch); matches++; } } + last_rch = rch; + s = next_s; } ! matches += (s-(U8*)SvPVX(sv)) - dlen; /* account for disappeared chars */ ! sv_setpvn(sv, SvPVX(dest_sv), dlen); ! if (!countonly) ! SvSETMAGIC(sv); return matches; } *************** *** 241,248 **** if (len && !SvPOK(sv)) s = SvPV_force(sv, len); if (s && len) { ! s += --len; ! sv_setpvn(astr, s, 1); *s = '\0'; SvCUR_set(sv, len); SvNIOK_off(sv); --- 285,299 ---- if (len && !SvPOK(sv)) s = SvPV_force(sv, len); if (s && len) { ! if (PL_hints & HINT_KANJI_STRING && kpart(s, s+len-1) == KPART_KANJI_2) { ! len -= 2; ! s += len; ! sv_setpvn(astr, s, 2); ! } else { ! len--; ! s += len; ! sv_setpvn(astr, s, 1); ! } *s = '\0'; SvCUR_set(sv, len); SvNIOK_off(sv); Index: embed.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/embed.h Wed Jan 14 05:15:54 2004 --- jperl5.005_04/embed.h Mon Mar 29 23:56:51 2004 *************** *** 262,267 **** --- 262,269 ---- #define jmaybe Perl_jmaybe #define keyword Perl_keyword #define know_next Perl_know_next + #define kpart Perl_kpart + #define kvaries Perl_kvaries #define le_amg Perl_le_amg #define leave_scope Perl_leave_scope #define lex_end Perl_lex_end *************** *** 907,912 **** --- 909,915 ---- #define scope Perl_scope #define screaminstr Perl_screaminstr #define seq_amg Perl_seq_amg + #define set_lang_type Perl_set_lang_type #define setdefout Perl_setdefout #define setenv_getix Perl_setenv_getix #define sge_amg Perl_sge_amg Index: embedvar.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/embedvar.h Wed Jan 14 05:15:55 2004 --- jperl5.005_04/embedvar.h Mon Mar 29 23:56:51 2004 *************** *** 194,199 **** --- 194,200 ---- #define PL_incgv (PL_curinterp->Iincgv) #define PL_initav (PL_curinterp->Iinitav) #define PL_inplace (PL_curinterp->Iinplace) + #define PL_langtype (PL_curinterp->Ilangtype) #define PL_last_proto (PL_curinterp->Ilast_proto) #define PL_lastfd (PL_curinterp->Ilastfd) #define PL_lastsize (PL_curinterp->Ilastsize) *************** *** 211,216 **** --- 212,218 ---- #define PL_mess_sv (PL_curinterp->Imess_sv) #define PL_minus_F (PL_curinterp->Iminus_F) #define PL_minus_a (PL_curinterp->Iminus_a) + #define PL_minus_b (PL_curinterp->Iminus_b) #define PL_minus_c (PL_curinterp->Iminus_c) #define PL_minus_l (PL_curinterp->Iminus_l) #define PL_minus_n (PL_curinterp->Iminus_n) *************** *** 329,334 **** --- 331,337 ---- #define PL_Iincgv PL_incgv #define PL_Iinitav PL_initav #define PL_Iinplace PL_inplace + #define PL_Ilangtype PL_langtype #define PL_Ilast_proto PL_last_proto #define PL_Ilastfd PL_lastfd #define PL_Ilastsize PL_lastsize *************** *** 346,351 **** --- 349,355 ---- #define PL_Imess_sv PL_mess_sv #define PL_Iminus_F PL_minus_F #define PL_Iminus_a PL_minus_a + #define PL_Iminus_b PL_minus_b #define PL_Iminus_c PL_minus_c #define PL_Iminus_l PL_minus_l #define PL_Iminus_n PL_minus_n Index: form.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/form.h Wed Jan 14 05:16:09 2004 --- jperl5.005_04/form.h Mon Mar 29 23:56:51 2004 *************** *** 23,26 **** --- 23,27 ---- #define FF_NEWLINE 13 #define FF_BLANK 14 #define FF_MORE 15 + #define FF_OPTION 16 /* c : hints value */ Index: global.sym ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/global.sym Wed Jan 14 05:16:09 2004 --- jperl5.005_04/global.sym Mon Mar 29 23:56:51 2004 *************** *** 38,43 **** --- 38,44 ---- inc_amg init_thread_intern io_close + kvaries know_next le_amg log_amg *************** *** 364,369 **** --- 365,371 ---- intuit_more invert jmaybe + kpart keyword leave_scope lex_end *************** *** 959,964 **** --- 961,967 ---- scan_word scope screaminstr + set_lang_type setdefout setenv_getix share_hek Index: intrpvar.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/intrpvar.h Wed Jan 14 05:16:13 2004 --- jperl5.005_04/intrpvar.h Mon Mar 29 23:56:51 2004 *************** *** 216,219 **** --- 216,222 ---- PERLVARI(piDir, IPerlDir*, NULL) PERLVARI(piSock, IPerlSock*, NULL) PERLVARI(piProc, IPerlProc*, NULL) + #else + PERLVAR(Iminus_b, bool) + PERLVAR(Ilangtype, unsigned char *) #endif Index: kanji.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/kanji.c Mon Jul 18 08:46:18 1994 --- jperl5.005_04/kanji.c Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,381 ---- + /*kanji.c: kanji handling routines + *$Id$ + */ + + #include "EXTERN.h" + #include "perl.h" + + unsigned char langtype_euc_tab[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, /* 8x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9x */ + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Ax */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Bx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Cx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Dx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Ex */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, /* Fx */ + }; + unsigned char langtype_sjis_tab[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 4x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 5x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 6x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, /* 7x */ + 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 8x */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 9x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Ax */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Bx */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Cx */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Dx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Ex */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, /* Fx */ + }; + unsigned char langtype_ks_euc_tab[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9x */ + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Ax */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Bx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Cx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Dx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Ex */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, /* Fx */ + }; + unsigned char langtype_tca_tab[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 4x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, /* 5x */ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 6x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, /* 7x */ + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 8x */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 9x */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Ax */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Bx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Cx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Dx */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* Ex */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, /* Fx */ + }; + unsigned char langtype_latin_tab[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Ax */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Cx */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Dx */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Ex */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ + }; + struct _langtypetable langtypetable[]= + { + {langtype_sjis_tab, "SJIS"}, + {langtype_euc_tab, "EUC"}, + {langtype_ks_euc_tab, "KSEUC"}, + {langtype_tca_tab, "TCA"}, + {langtype_latin_tab, "LATIN"}, + {0, 0}, + }; + #ifdef EUC + char *langtypesymbol = "EUC"; + unsigned char *init_langtype = langtype_euc_tab; + #else + #ifdef KSEUC + char *langtypesymbol = "KSEUC"; + unsigned char *init_langtype = langtype_kseuc_tab; + #else + #ifdef TCA + char *langtypesymbol = "TCA"; + unsigned char *init_langtype = langtype_tca_tab; + #else + #ifdef LATIN + char *langtypesymbol = "LATIN"; + unsigned char *init_langtype = langtype_latin_tab; + #else + char *langtypesymbol = "SJIS"; + unsigned char *init_langtype = langtype_sjis_tab; + #endif + #endif + #endif + #endif + + #define IsLower(c) ('a' <= (unsigned char)(c) && (unsigned char)(c) <= 'z') + #define IsUpper(c) ('A' <= (unsigned char)(c) && (unsigned char)(c) <= 'Z') + #define ToLower(c) ((c) - 'A' + 'a') + #define ToUpper(c) ((c) - 'a' + 'A') + + int + set_lang_type(char *symbol) + { + struct _langtypetable *l; + + if (!symbol || !*symbol) { + PL_langtype = init_langtype; + return 0; + } + for (l = langtypetable; l->symbol; l++) { + if (ibcmp(l->symbol, symbol, strlen(l->symbol)) == 0) { + PL_langtype = l->table; + langtypesymbol = l->symbol; + return 0; + } + } + return -1; + } + + /* + Combine two chars C1, C2 into single 16-bit integer + */ + I32 twochar_to_int(int c1, int c2) + { + return (((unsigned char)c1)<<8 | ((unsigned char)c2)); + } + I32 int_to_1stbyte(I32 ch) + { + return (ch >> 8); + } + I32 int_to_2ndbyte(I32 ch) + { + return ch & 0xFF; + } + + /* kpart(pLim, pChr); + * char * pLim; Buffer Top or Limit for scanning + * char * pChr; Pointer to the char + * return whith 1 : *pChr is First byte of MS-Kanji + * return whith 2 : *pChr is Second byte of MS-Kanji + * return whith 0 : otherwize + */ + int + kpart(char *pLim, char *pChr) + { + register char *p = pChr - 1; + register int ct = 0; + + while (p >= pLim && iskanji(*p)) { + p--; + ct++; + } + return (ct & 1) ? 2 : iskanji(*pChr); + } + + /* jstrlen: return the number of charctors in string. + */ + int + jstrlen(char *s) + { + int len; + + for (len = 0; *s; s++, len++) { + if (iskanji(*s) && s[1]) + s++; + } + return len; + } + /* jstlen: return the number of charctors in string. + */ + int + jstlen(char *s, int len) + { + int l; + char *send = s + len; + + for (l = 0; s < send; s++, l++) { + if (iskanji(*s) && s[1]) + s++; + } + return l; + } + + /* jnthchar: return with the pointer to n'th charactor in string. + * return (char *)0, when jstrlen(s) < n + */ + char * + jnthchar(char *s, int n) + { + if (n) { + while (--n) { + if (iskanji(*s)) + s++; + s++; + } + } + return s; + } + + /* jindex: return with the pointer to 'ch' in "string" + * return with (char *)0, when not found + */ + char * + jindex(char *s, int c) + { + while (*s) { + if (UCH(*s) == UCH(c)) + return s; + + if (iskanji(*s) && s[1]) { + s++; + } + s++; + } + return (char *) 0; + } + + /* jrindex: return with the pointer to Right end 'ch' in "string" + * return with (char *)0, when not found + */ + char * + jrindex(char *s, int c) + { + char *olds = (char *) 0; + + while (*s) { + if (UCH(*s) == UCH(c)) + olds = s; + + if (iskanji(*s) && s[1]) { + s++; + } + s++; + } + return olds; + } + + char * + jstrlower(char *s) + { + char *ws = s; + + while (*ws) { + if (iskanji(*ws) && ws[1]) + ws++; + else + *ws = IsUpper(*ws) ? ToLower(*ws) : *ws; + ws++; + } + return s; + } + + int + jstricmp(char *s1, char *s2) + { + int c1, c2; + + for (;;) { + c1 = UCH(*s1++); + c2 = UCH(*s2++); + if (!iskanji(c1)) { + c1 = IsUpper(c1) ? ToLower(c1) : c1; + c2 = IsUpper(c2) ? ToLower(c2) : c2; + } + if (c1 != c2) + return c1 - c2; + if (c1 == 0) + return 0; + if (iskanji(c1)) { + c1 = UCH(*s1++); + c2 = UCH(*s2++); + if (c1 != c2) + return c1 - c2; + if (c1 == 0) + return 0; + } + } + return 0; + } + + int + jstrnicmp(char *s1, char *s2, int n) + { + int c1, c2; + + for (;;) { + if (n--) + return 0; + c1 = UCH(*s1++); + c2 = UCH(*s2++); + if (!iskanji(c1)) { + c1 = IsUpper(c1) ? ToLower(c1) : c1; + c2 = IsUpper(c2) ? ToLower(c2) : c2; + } + if (c1 != c2) + break; + if (c1 == 0) + return 0; + if (iskanji(c1)) { + if (n--) + return 0; + c1 = UCH(*s1++); + c2 = UCH(*s2++); + if (c1 != c2) + break; + if (c1 == 0) + return 0; + } + } + return c1 - c2; + } + + unsigned int + jnextcode(unsigned int cc) + { + unsigned int c1, c2; + + if (PL_langtype == langtype_latin_tab) { + return cc + 1; /* ?:-P */ + } + c1 = ((cc >> 8) & 255); + c2 = (cc & 255); + c2++; + if (iskanji2(c2)) { + return (c1 << 8)|c2; + } + while (!iskanji2(c2)) { + c2++; + } + if (c2 <= 0x100) { + return (c1 << 8)|c2; + } + c2 &= 0xFF; + c1++; + if (iskanji(c1)) { + return (c1 << 8)|c2; + } + while (!iskanji(c1)) { + c1++; + } + return ((c1 & 255) << 8)|c2; + } Index: kanji.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/kanji.h Mon Jul 18 08:46:18 1994 --- jperl5.005_04/kanji.h Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,46 ---- + #ifndef _KANJI_H_INCLUDE + #ifdef iskanji + #undef iskanji + #endif + + #define UCH(c) ((unsigned char)(c)) + #define BND(l, h, c) (((l) <= (c))&&((c) <= (h))) + + #define iskanji(c) (!PL_minus_b && ((PL_langtype)[(int)((unsigned char)(c))] & 1)) + #define iskanji2(c) (!PL_minus_b && ((PL_langtype)[(int)((unsigned char)(c))] & 2)) + + #define MBSKIP(s) (((PL_langtype)[*(U8*)s]&1) + 1) + + struct _langtypetable { + unsigned char *table; + char *symbol; + }; + extern char *langtypesymbol; + /*extern unsigned char *PL_langtype;*/ + extern unsigned char *init_langtype; + extern struct _langtypetable langtypetable[]; + + /* return values of kpart */ + #define KPART_KANJI_1 1 /* kanji 1st byte */ + #define KPART_KANJI_2 2 /* kanji 2nd byte */ + #define KPART_OTHER 0 /* other (ASCII) */ + + #ifndef PERL_OBJECT + I32 twochar_to_int _((int c1, int c2)); + I32 int_to_1stbyte _((I32 c1)); + I32 int_to_2ndbyte _((I32 c1)); + int set_lang_type _((char *symbol)); + int kpart _((char *pLim,char *pChr)); + unsigned int jnextcode _((unsigned int cc)); + int jstrlen _((char *s)); + int jstlen _((char *s, int len)); + char *jnthchar _((char *s,int n)); + char *jindex _((char *s,int c)); + char *jrindex _((char *s,int c)); + char *jstrlower _((char *s)); + int jstricmp _((char *s1, char *s2)); + int jstrnicmp _((char *s1, char *s2, int n)); + #endif + + #define _KANJI_H_INCLUDE 1 + #endif /* _KANJI_H_INCLUDE */ Index: lib/ExtUtils/MM_Unix.pm Prereq: 1.126 ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/lib/ExtUtils/MM_Unix.pm Wed Jan 14 05:16:21 2004 --- jperl5.005_04/lib/ExtUtils/MM_Unix.pm Mon Mar 29 23:56:51 2004 *************** *** 84,93 **** if ( $^O eq 'qnx' && $path =~ s|^(//\d+)/|/| ) { $node = $1; } ! $path =~ s|(?<=[^/])/+|/|g ; # xx////xx -> xx/xx $path =~ s|(/\.)+/|/|g ; # xx/././xx -> xx/xx $path =~ s|^(\./)+|| unless $path eq "./"; # ./xx -> xx ! $path =~ s|(?<=[^/])/$|| ; # xx/ -> xx "$node$path"; } --- 84,93 ---- if ( $^O eq 'qnx' && $path =~ s|^(//\d+)/|/| ) { $node = $1; } ! $path =~ s|([^/])/+|$1/|g ; # xx////xx -> xx/xx $path =~ s|(/\.)+/|/|g ; # xx/././xx -> xx/xx $path =~ s|^(\./)+|| unless $path eq "./"; # ./xx -> xx ! $path =~ s|([^/])/$|$1| ; # xx/ -> xx "$node$path"; } Index: lib/I18N/Japanese.pm ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/lib/I18N/Japanese.pm Mon Jul 18 08:46:18 1994 --- jperl5.005_04/lib/I18N/Japanese.pm Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,71 ---- + package I18N::Japanese; + + =head1 NAME + + Japanese - Perl pragma to control whether some built-in operations understand + composite characters or not. + + =head1 SYNOPSIS + + use I18N::Japanese qw(re tr format); + no I18N::Japanese qw(re tr format); + + =head1 DESCRIPTION + + This module is actually a pragma module, though it begins with capital J. + Import/unimport takes list of strings. + + =over 5 + + =re + + Enables Japanized regexp, including split. + + =tr + + Enables Japanized tr//. + + =format + + Enables Japanized format. + + =back + + As usual, omitting the arguments means specifying all the bits. + The setting is valid only within the current block. + + =cut + + # ウラワザ : 0x8000000 は,文字列定数で2バイト文字を認識するかどうか + sub import { + shift; + my $bits = 0; + @_ = qw(re tr format string) if (@_ == 0); + for (@_) { + $bits |= 0x1000000 if $_ eq 're'; + $bits |= 0x2000000 if $_ eq 'tr'; + $bits |= 0x4000000 if $_ eq 'format'; + $bits |= 0x8000000 if $_ eq 'string'; + } + # $bits |= 0x8000000 if ($bits != 0); + + $^H |= $bits; + #print "imp : hints = $bits\n"; + } + + sub unimport { + shift; + my $bits = 0; + @_ = qw(re tr format string) if (@_ == 0); + for (@_) { + $bits |= 0x1000000 if $_ eq 're'; + $bits |= 0x2000000 if $_ eq 'tr'; + $bits |= 0x4000000 if $_ eq 'format'; + $bits |= 0x8000000 if $_ eq 'string'; + } + # $bits = 0x8000000 if ($^H != 0x8000000 && $bits == 0); + $^H &= ~$bits; + #print "unimp : hints = $bits\n"; + } + + 1; Index: objXSUB.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/objXSUB.h Wed Jan 14 05:16:33 2004 --- jperl5.005_04/objXSUB.h Mon Mar 29 23:56:51 2004 *************** *** 333,338 **** --- 333,340 ---- #define PL_initav pPerl->PL_initav #undef PL_inplace #define PL_inplace pPerl->PL_inplace + #undef PL_langtype + #define PL_langtype pPerl->PL_langtype #undef PL_last_proto #define PL_last_proto pPerl->PL_last_proto #undef PL_lastfd *************** *** 367,372 **** --- 369,376 ---- #define PL_minus_F pPerl->PL_minus_F #undef PL_minus_a #define PL_minus_a pPerl->PL_minus_a + #undef PL_minus_b + #define PL_minus_b pPerl->PL_minus_b #undef PL_minus_c #define PL_minus_c pPerl->PL_minus_c #undef PL_minus_l *************** *** 1977,1983 **** --- 1981,1989 ---- #define kill PerlProc_kill #define killpg PerlProc_killpg #define pause PerlProc_pause + #undef popen #define popen PerlProc_popen + #undef pclose #define pclose PerlProc_pclose #define pipe PerlProc_pipe #define setuid PerlProc_setuid Index: objpp.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/objpp.h Wed Jan 14 05:16:34 2004 --- jperl5.005_04/objpp.h Mon Mar 29 23:56:51 2004 *************** *** 511,516 **** --- 511,520 ---- #define init_postdump_symbols CPerlObj::init_postdump_symbols #undef init_stacks #define init_stacks CPerlObj::Perl_init_stacks + #undef int_to_1stbyte + #define int_to_1stbyte CPerlObj::int_to_1stbyte + #undef int_to_2ndbyte + #define int_to_2ndbyte CPerlObj::int_to_2ndbyte #undef intro_my #define intro_my CPerlObj::Perl_intro_my #undef nuke_stacks *************** *** 529,536 **** --- 533,560 ---- #define is_an_int CPerlObj::is_an_int #undef isa_lookup #define isa_lookup CPerlObj::isa_lookup + #undef jindex + #define jindex CPerlObj::jindex + #undef jnextcode + #define jnextcode CPerlObj::jnextcode + #undef jnthchar + #define jnthchar CPerlObj::jnthchar + #undef jrindex + #define jrindex CPerlObj::jrindex + #undef jstricmp + #define jstricmp CPerlObj::jstricmp + #undef jstrnicmp + #define jstrnicmp CPerlObj::jstrnicmp + #undef jstlen + #define jstlen CPerlObj::jstlen + #undef jstrlen + #define jstrlen CPerlObj::jstrlen + #undef jstrlower + #define jstrlower CPerlObj::jstrlower #undef jmaybe #define jmaybe CPerlObj::Perl_jmaybe + #undef kpart + #define kpart CPerlObj::Perl_kpart #undef keyword #define keyword CPerlObj::Perl_keyword #undef leave_scope *************** *** 1019,1024 **** --- 1043,1050 ---- #define regcppop CPerlObj::regcppop #undef regclass #define regclass CPerlObj::regclass + #undef regclassfree + #define regclassfree CPerlObj::regclassfree #undef regexec_flags #define regexec_flags CPerlObj::Perl_regexec_flags #undef reginclass *************** *** 1203,1208 **** --- 1229,1236 ---- #define setdefout CPerlObj::Perl_setdefout #undef setenv_getix #define setenv_getix CPerlObj::Perl_setenv_getix + #undef set_lang_type + #define set_lang_type CPerlObj::Perl_set_lang_type #undef sharepvn #define sharepvn CPerlObj::Perl_sharepvn #undef set_csh *************** *** 1255,1260 **** --- 1283,1290 ---- #define sv_backoff CPerlObj::Perl_sv_backoff #undef sv_bless #define sv_bless CPerlObj::Perl_sv_bless + #undef sv_catkanji + #define sv_catkanji CPerlObj::sv_catkanji #undef sv_catpv #define sv_catpv CPerlObj::Perl_sv_catpv #undef sv_catpv_mg *************** *** 1421,1426 **** --- 1451,1458 ---- #define too_few_arguments CPerlObj::Perl_too_few_arguments #undef too_many_arguments #define too_many_arguments CPerlObj::Perl_too_many_arguments + #undef twochar_to_int + #define twochar_to_int CPerlObj::twochar_to_int #undef unlnk #define unlnk CPerlObj::unlnk #undef unsharepvn Index: op.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/op.c Wed Jan 14 05:16:35 2004 --- jperl5.005_04/op.c Mon Mar 29 23:56:51 2004 *************** *** 2110,2165 **** register U8 *r = (U8*)SvPV(rstr, rlen); register I32 i; register I32 j; I32 Delete; I32 complement; I32 squash; ! register short *tbl; - tbl = (short*)cPVOPo->op_pv; complement = o->op_private & OPpTRANS_COMPLEMENT; Delete = o->op_private & OPpTRANS_DELETE; squash = o->op_private & OPpTRANS_SQUASH; ! if (complement) { ! Zero(tbl, 256, short); ! for (i = 0; i < tlen; i++) ! tbl[t[i]] = -1; ! for (i = 0, j = 0; i < 256; i++) { ! if (!tbl[i]) { ! if (j >= rlen) { ! if (Delete) ! tbl[i] = -2; ! else if (rlen) ! tbl[i] = r[j-1]; ! else ! tbl[i] = i; ! } ! else ! tbl[i] = r[j++]; ! } ! } ! } ! else { ! if (!rlen && !Delete) { ! r = t; rlen = tlen; ! if (!squash) ! o->op_private |= OPpTRANS_COUNTONLY; ! } ! for (i = 0; i < 256; i++) ! tbl[i] = -1; ! for (i = 0, j = 0; i < tlen; i++,j++) { ! if (j >= rlen) { ! if (Delete) { ! if (tbl[t[i]] == -1) ! tbl[t[i]] = -2; ! continue; ! } ! --j; ! } ! if (tbl[t[i]] == -1) ! tbl[t[i]] = r[j]; ! } ! } op_free(expr); op_free(repl); --- 2110,2182 ---- register U8 *r = (U8*)SvPV(rstr, rlen); register I32 i; register I32 j; + register I32 k; + I32 lasttch = -3; + I32 lastrch = -1; + I32 tbl_size = 256; + I32 Delete; I32 complement; I32 squash; ! I32 kanji; ! /* the even index holds the t-char(in 2byte), and the odd index ! holds the r-char(in 2 byte) if t-char is to be removed, then ! r-char is -2. */ ! register U16 *tbl; ! ! New(803, tbl, tbl_size, U16); complement = o->op_private & OPpTRANS_COMPLEMENT; Delete = o->op_private & OPpTRANS_DELETE; + kanji = o->op_private & OPpTRANS_KANJI; squash = o->op_private & OPpTRANS_SQUASH; ! if (!rlen && !complement && !Delete && !squash) ! o->op_private |= OPpTRANS_COUNTONLY; ! ! for (i = 0, j = 0, k = 0; i < tlen || j < rlen; ) { ! I32 tch, rch; ! if (i >= tlen) { ! tch = lasttch; ! } else { ! if (kanji && iskanji(t[i]) && i < tlen-1) { ! tch = twochar_to_int(t[i], t[i+1]); ! i+=2; ! } else { ! tch = (unsigned char)t[i]; ! i++; ! } ! lasttch = tch; ! } ! if (j >= rlen) { ! if (Delete) rch = -2; ! else rch = lastrch; ! } else { ! if (kanji && iskanji(r[j]) && j < rlen-1) { ! rch = twochar_to_int(r[j], r[j+1]); ! j += 2; ! } else { ! rch = (unsigned char)r[j]; ! j++; ! } ! lastrch = rch; ! } ! if (k >= tbl_size) { ! tbl_size += 256; ! Renew(tbl, tbl_size, U16); ! } ! tbl[k++] = tch; ! tbl[k++] = rch; ! } ! if (k >= tbl_size) { ! tbl_size += 4; ! Renew(tbl, tbl_size, U16); ! } ! /* mark the end */ ! tbl[k++] = (U16)-1; ! tbl[k++] = (U16)-1; ! cPVOPo->op_pv = (char*)tbl; ! op_free(expr); op_free(repl); Index: op.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/op.h Wed Jan 14 05:16:35 2004 --- jperl5.005_04/op.h Mon Mar 29 23:56:51 2004 *************** *** 107,112 **** --- 107,113 ---- #define OPpTRANS_SQUASH 16 #define OPpTRANS_DELETE 32 #define OPpTRANS_COMPLEMENT 64 + #define OPpTRANS_KANJI 128 /* Private for OP_REPEAT */ #define OPpREPEAT_DOLIST 64 /* List replication. */ Index: perl.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/perl.c Wed Feb 18 20:02:55 2004 --- jperl5.005_04/perl.c Mon Mar 29 23:56:51 2004 *************** *** 623,628 **** --- 623,630 ---- ++PL_exitlistlen; } + int PL_default_hints; + int #ifdef PERL_OBJECT CPerlObj::perl_parse(void (*xsinit) (CPerlObj*), int argc, char **argv, char **env) *************** *** 642,647 **** --- 644,651 ---- int ret; int fdscript = -1; + set_lang_type(0); + #ifdef SETUID_SCRIPTS_ARE_SECURE_NOW #ifdef IAMSUID #undef IAMSUID *************** *** 678,683 **** --- 682,721 ---- return 0; } + if (PL_origargv[0]) { + s = jrindex(PL_origargv[0], '/'); + #ifdef _WIN32 + if (!s) + s = jrindex(PL_origargv[0], '\\'); + if (!s) + s = jrindex(PL_origargv[0], ':'); + #endif + if (!s) + s = PL_origargv[0]; + else + s++; + if (!ibcmp(s, "jperl", 5)) + s = ""; + else if (!ibcmp(s, "kseucperl", 9)) + s = "KSEUC"; + else if (!ibcmp(s, "tcaperl", 7)) + s = "TCA"; + else if (!ibcmp(s, "eucperl", 7)) + s = "EUC"; + else if (!ibcmp(s, "sjisperl", 8)) + s = "SJIS"; + else if (!ibcmp(s, "latinperl", 9)) + s = 0; + else + s = 0; + if (s) { + PL_minus_b = FALSE; + PL_default_hints = PL_hints = HINT_KANJI_REGEXP | HINT_KANJI_TR + | HINT_KANJI_FORMAT | HINT_KANJI_STRING; + set_lang_type(s); + } + } + if (PL_main_root) { PL_curpad = AvARRAY(PL_comppad); op_free(PL_main_root); *************** *** 735,740 **** --- 773,779 ---- case '0': case 'F': case 'a': + case 'b': case 'c': case 'd': case 'D': *************** *** 750,755 **** --- 789,795 ---- case 'U': case 'v': case 'w': + case 'L': if (s = moreswitches(s)) goto reswitch; break; *************** *** 1562,1567 **** --- 1602,1615 ---- PL_minus_a = TRUE; s++; return s; + case 'b': + /* No kanji support */ + PL_minus_b = TRUE; + PL_default_hints = PL_hints &= ~(HINT_KANJI_REGEXP | HINT_KANJI_TR + | HINT_KANJI_FORMAT | HINT_KANJI_STRING); + set_lang_type("LATIN"); + s++; + return s; case 'c': PL_minus_c = TRUE; s++; *************** *** 1729,1734 **** --- 1777,1787 ---- #endif printf("\n\nCopyright 1987-1999, Larry Wall\n"); + printf("\nJapanization patch 4 by Yasushi Saito, 1996\n"); + printf("\nModified by Hirofumi Watanabe, 1996-2000\n"); + printf("jperl5.005_03-20000401\n"); + printf("%s version\n", langtypesymbol); + #ifdef MSDOS printf("\nMS-DOS port Copyright (c) 1989, 1990, Diomidis Spinellis\n"); #endif *************** *** 1772,1777 **** --- 1825,1844 ---- PL_dowarn = TRUE; s++; return s; + case 'L': + s++; + if (!set_lang_type(s)) { + PL_default_hints = PL_hints = + HINT_KANJI_REGEXP | + HINT_KANJI_TR | + HINT_KANJI_FORMAT | + HINT_KANJI_STRING; + } else + croak("Unknown language type '%s'", s); + PL_minus_b = FALSE; + while (*s) + s++; + return s; case '*': case ' ': if (s[1] == '-') /* Additional switches on #! line. */ Index: perl.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/perl.h Wed Jan 14 05:16:39 2004 --- jperl5.005_04/perl.h Mon Mar 29 23:56:51 2004 *************** *** 1997,2002 **** --- 1997,2007 ---- #define HINT_RE_TAINT 0x00100000 #define HINT_RE_EVAL 0x00200000 + #define HINT_KANJI_REGEXP 0x01000000 + #define HINT_KANJI_TR 0x02000000 + #define HINT_KANJI_FORMAT 0x04000000 + #define HINT_KANJI_STRING 0x08000000 + /* Various states of an input record separator SV (rs, nrs) */ #define RsSNARF(sv) (! SvOK(sv)) #define RsSIMPLE(sv) (SvOK(sv) && SvCUR(sv)) *************** *** 2203,2214 **** PERLVAR(psig_ptr[PSIG_SIZE], SV*); PERLVAR(psig_name[PSIG_SIZE], SV*); /* * The following is a buffer where new variables must * be defined to maintain binary compatibility with PERL_OBJECT * for 5.005 */ ! PERLVAR(object_compatibility[30], char) }; #include "objpp.h" --- 2208,2225 ---- PERLVAR(psig_ptr[PSIG_SIZE], SV*); PERLVAR(psig_name[PSIG_SIZE], SV*); + PERLVAR(Iminus_b, bool) + PERLVAR(Ilangtype, unsigned char *) + /* * The following is a buffer where new variables must * be defined to maintain binary compatibility with PERL_OBJECT * for 5.005 */ ! /* ! * 30 - (sizeof minus_b + sizeof langtype) == 22 ! */ ! PERLVAR(object_compatibility[22], char) }; #include "objpp.h" *************** *** 2355,2360 **** --- 2366,2373 ---- EXT MGVTBL vtbl_amagicelem; #endif /* OVERLOAD */ + #include "kanji.h" + #endif /* !DOINIT */ #ifdef OVERLOAD Index: pod/jperl.pod ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/pod/jperl.pod Mon Jul 18 08:46:18 1994 --- jperl5.005_04/pod/jperl.pod Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,56 ---- + =head1 NAME + + jperl - Japanized Perl. + + =head1 DESCRIPTION + + Jperl is a variation of L that recognizes 16-bit Japanese characters. + It supports Japanese EUC and Shift JIS(aka MS Kanji code). + + Currently, it understands Japanese in the following circumstances. + + =over 5 + + =item * Regular Expression + + '.' matches either 1-byte ASCII character or 2-byte Japanese character. + Character class notation('[X-Y]', '[^X-Y]') recognizes a + Japanese letter as a character. + + =item * tr + + Similarly, tr recognizes a Japanese letter as a letter. + + =item * split + + Similar. + + =item * formats + + It doesn't cut 2-byte letter at the middle when linebreaking. + + =item * chop + + It understands 2-byte letters. + + =back + + =head1 ADDITIONAL COMMAND LINE ARGUMENT + + =over 5 + + =item B<-b> + + turns off Japanese mode. This is supported only in Japanized perl. + Without this option, most 8-bit characters are interpreted to be + composite character, and some programs that + use 8th bit for some control information(L is an example) won't + work. With B<-b>, jperl will be compatible with the original perl. + + =back + + =head1 AUTHOR + + Yasushi Saito + + yasushi@cs.washington.edu Index: pod/perlrun.pod ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/pod/perlrun.pod Wed Jan 14 05:17:00 2004 --- jperl5.005_04/pod/perlrun.pod Mon Mar 29 23:56:51 2004 *************** *** 243,248 **** --- 243,256 ---- An alternate delimiter may be specified using B<-F>. + =item B<-b> + + turns off Japanese mode. This is supported only in Japanized perl. + Without this option, most 8-bit characters are interpreted to be + composite character(i.e., Japanese Kanji), and some programs that + use 8th bit for some control information(L is an example) won't + work. With -b, jperl will be compatible with the original perl. + =item B<-c> causes Perl to check the syntax of the script and then exit without Index: pod/pod2man.PL Prereq: 1.5 ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/pod/pod2man.PL Wed Jan 14 05:17:08 2004 --- jperl5.005_04/pod/pod2man.PL Mon Mar 29 23:56:51 2004 *************** *** 27,33 **** # You can use $Config{...} to use Configure variables. print OUT <<"!GROK!THIS!"; ! $Config{startperl} eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' if \$running_under_some_shell; --- 27,33 ---- # You can use $Config{...} to use Configure variables. print OUT <<"!GROK!THIS!"; ! $Config{startperl} -b eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' if \$running_under_some_shell; Index: pp.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/pp.c Wed Jan 14 05:17:10 2004 --- jperl5.005_04/pp.c Mon Mar 29 23:56:51 2004 *************** *** 645,650 **** --- 645,652 ---- sfirst -= 256; while (--pos >= 0) { + if (PL_hints & HINT_KANJI_STRING && kpart((char *)s, (char *)s+pos) == KPART_KANJI_2) + continue; ch = s[pos]; if (sfirst[ch] >= 0) snext[pos] = sfirst[ch] - pos; *************** *** 2234,2244 **** TAINT; SvTAINTED_on(sv); for (; s < send; s++) ! *s = toUPPER_LC(*s); } else { for (; s < send; s++) ! *s = toUPPER(*s); } } RETURN; --- 2236,2252 ---- TAINT; SvTAINTED_on(sv); for (; s < send; s++) ! if (PL_hints & HINT_KANJI_STRING && iskanji(*s) && s+1 < send) ! s++; ! else ! *s = toUPPER_LC(*s); } else { for (; s < send; s++) ! if (PL_hints & HINT_KANJI_STRING && iskanji(*s) && s+1 < send) ! s++; ! else ! *s = toUPPER(*s); } } RETURN; *************** *** 2266,2276 **** TAINT; SvTAINTED_on(sv); for (; s < send; s++) ! *s = toLOWER_LC(*s); } else { for (; s < send; s++) ! *s = toLOWER(*s); } } RETURN; --- 2274,2290 ---- TAINT; SvTAINTED_on(sv); for (; s < send; s++) ! if (PL_hints & HINT_KANJI_STRING && iskanji(*s) && s+1 < send) ! s++; ! else ! *s = toLOWER_LC(*s); } else { for (; s < send; s++) ! if (PL_hints & HINT_KANJI_STRING && iskanji(*s) && s+1 < send) ! s++; ! else ! *s = toLOWER(*s); } } RETURN; *************** *** 2289,2295 **** SvGROW(TARG, (len * 2) + 1); d = SvPVX(TARG); while (len--) { ! if (!isALNUM(*s)) *d++ = '\\'; *d++ = *s++; } --- 2303,2312 ---- SvGROW(TARG, (len * 2) + 1); d = SvPVX(TARG); while (len--) { ! if (PL_hints & HINT_KANJI_STRING && iskanji(*s) && len) { ! *d++ = *s++; ! len--; ! } else if (!isALNUM(*s)) *d++ = '\\'; *d++ = *s++; } *************** *** 4441,4447 **** i = *SvPVX(rx->check_substr); while (--limit) { /*SUPPRESS 530*/ ! for (m = s; m < strend && *m != i; m++) ; if (m >= strend) break; dstr = NEWSV(30, m-s); --- 4458,4472 ---- i = *SvPVX(rx->check_substr); while (--limit) { /*SUPPRESS 530*/ ! if (PL_hints & HINT_KANJI_REGEXP) { ! for (m = s; m < strend; m++) ! if (iskanji(m[0]) && m+1 < strend && iskanji2(m[1])) ! m++; ! else if (*m == i) ! break; ! } ! else ! for (m = s; m < strend && *m != i; m++) ; if (m >= strend) break; dstr = NEWSV(30, m-s); Index: pp_ctl.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/pp_ctl.c Wed Jan 14 05:17:11 2004 --- jperl5.005_04/pp_ctl.c Mon Mar 29 23:56:51 2004 *************** *** 281,286 **** --- 281,288 ---- I32 itemsize; I32 fieldsize; I32 lines = 0; + I32 kanji = (PL_hints & HINT_KANJI_FORMAT); + bool chopatkanji = kanji != 0; bool chopspace = (strchr(PL_chopset, ' ') != Nullch); char *chophere; char *linemark; *************** *** 366,371 **** --- 368,378 ---- itemsize = len; if (itemsize > fieldsize) itemsize = fieldsize; + if (kanji + && kpart(item, item+itemsize) == KPART_KANJI_2) { + /* don't split kanji at the middle. */ + itemsize--; + } send = chophere = s + itemsize; while (s < send) { if (*s & ~31) *************** *** 393,400 **** } else { itemsize = fieldsize; send = chophere = s + itemsize; ! while (s < send || (s == send && isSPACE(*s))) { if (isSPACE(*s)) { if (chopspace) chophere = s; --- 400,414 ---- } else { itemsize = fieldsize; + if (kanji + && kpart(item, item+itemsize) == KPART_KANJI_2) { + /* don't split kanji at the middle. */ + itemsize--; + } send = chophere = s + itemsize; ! while (s < send || ! (s == send && isSPACE(*s)) || ! (chopatkanji && s == send && iskanji(*s))) { if (isSPACE(*s)) { if (chopspace) chophere = s; *************** *** 406,412 **** --- 420,429 ---- gotsome = TRUE; if (strchr(PL_chopset, *s)) chophere = s + 1; + else if (chopatkanji && iskanji(*s)) + chophere = s+2 <= send ? s+2 : s; } + if (kanji && iskanji(*s)) s++; s++; } itemsize = chophere - item; *************** *** 536,541 **** --- 553,560 ---- s++; } if (s < send) { + int chopkanji = 0; + char *st = t - itemsize; arg = fieldsize - itemsize; if (arg) { fieldsize -= arg; *************** *** 543,548 **** --- 562,573 ---- *t++ = ' '; } s = t - 3; + if (kanji + && kpart(st, s) == KPART_KANJI_2) { + /* don't split kanji at the middle. */ + s--; + chopkanji++; + } if (strnEQ(s," ",3)) { while (s > SvPVX(PL_formtarget) && isSPACE(s[-1])) s--; *************** *** 550,555 **** --- 575,581 ---- *s++ = '.'; *s++ = '.'; *s++ = '.'; + if (chopkanji) *s++ = ' '; } break; *************** *** 2371,2377 **** #else SAVEPPTR(PL_op); #endif - PL_hints = 0; PL_op = &dummy; PL_op->op_type = 0; /* Avoid uninit warning. */ --- 2397,2402 ---- *************** *** 2584,2589 **** --- 2609,2615 ---- I32 gimme = G_SCALAR; PerlIO *tryrsfp = 0; STRLEN n_a; + extern int PL_default_hints; sv = POPs; if (SvNIOKp(sv) && !SvPOKp(sv)) { *************** *** 2697,2703 **** name = savepv(name); SAVEFREEPV(name); SAVEHINTS(); ! PL_hints = 0; /* switch to eval mode */ --- 2723,2730 ---- name = savepv(name); SAVEFREEPV(name); SAVEHINTS(); ! #define HINT_KANJI (HINT_KANJI_REGEXP|HINT_KANJI_TR|HINT_KANJI_FORMAT|HINT_KANJI_STRING) ! PL_hints = PL_default_hints & ~HINT_KANJI | PL_hints & HINT_KANJI; /* switch to eval mode */ *************** *** 2966,2971 **** --- 2993,2999 ---- U16 *linepc; register I32 arg; bool ischop; + I32 kanji = (PL_hints & HINT_KANJI_FORMAT); if (len == 0) croak("Null picture in formline"); *************** *** 2981,2986 **** --- 3009,3020 ---- } while (s <= send) { + if (kanji && iskanji(*s)) { + s += 2; + skipspaces = 0; + continue; + } + switch (*s++) { default: skipspaces = 0; Index: pp_sys.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/pp_sys.c Wed Jan 14 05:17:12 2004 --- jperl5.005_04/pp_sys.c Mon Mar 29 23:56:51 2004 *************** *** 2649,2654 **** --- 2649,2656 ---- else if (!(isPRINT(*s) || isSPACE(*s))) odd++; #else + else if ((PL_hints & HINT_KANJI_STRING) && iskanji(*s) && s[1]) + ; else if (*s & 128) odd++; else if (*s < 32 && Index: proto.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/proto.h Wed Jan 14 05:17:13 2004 --- jperl5.005_04/proto.h Mon Mar 29 23:56:51 2004 *************** *** 642,647 **** --- 642,652 ---- #endif #ifdef PERL_OBJECT + VIRTUAL I32 sv_catkanji _((SV* sv, U32 tch)); + regnode * regclassfree _((regnode *node, regnode *last)); + #endif + + #ifdef PERL_OBJECT protected: void hsplit _((HV *hv)); void hfreeentries _((HV *hv)); *************** *** 900,905 **** --- 905,925 ---- VIRTUAL void sv_setsv_mg _((SV *dstr, SV *sstr)); VIRTUAL void sv_usepvn_mg _((SV *sv, char *ptr, STRLEN len)); + VIRTUAL I32 twochar_to_int _((int c1, int c2)); + VIRTUAL I32 int_to_1stbyte _((I32 c1)); + VIRTUAL I32 int_to_2ndbyte _((I32 c1)); + VIRTUAL int set_lang_type _((char *symbol)); + VIRTUAL int kpart _((char *pLim,char *pChr)); + VIRTUAL unsigned int jnextcode _((unsigned int cc)); + VIRTUAL int jstrlen _((char *s)); + VIRTUAL int jstlen _((char *s, int len)); + VIRTUAL char * jnthchar _((char *s,int n)); + VIRTUAL char * jindex _((char *s,int c)); + VIRTUAL char * jrindex _((char *s,int c)); + VIRTUAL char * jstrlower _((char *s)); + VIRTUAL int jstricmp _((char *s1, char *s2)); + VIRTUAL int jstrnicmp _((char *s1, char *s2, int n)); + VIRTUAL MGVTBL* get_vtbl _((int vtbl_id)); VIRTUAL OP* dofile _((OP* term)); VIRTUAL void save_generic_svref _((SV** sptr)); Index: regcomp.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/regcomp.c Wed Jan 14 05:17:14 2004 --- jperl5.005_04/regcomp.c Mon Mar 29 23:56:51 2004 *************** *** 101,106 **** --- 101,108 ---- #define STATIC static #endif + #define ISKANJI(c) ((PL_hints & HINT_KANJI_REGEXP) && iskanji(c)) + #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \ ((*s) == '{' && regcurly(s))) *************** *** 133,138 **** --- 135,141 ---- static regnode *regbranch _((I32 *, I32)); static void regc _((U8, char *)); static regnode *regclass _((void)); + static regnode * regclassfree _((regnode *node, regnode *last)); STATIC I32 regcurly _((char *)); static regnode *reg_node _((U8)); static regnode *regpiece _((I32 *)); *************** *** 400,406 **** min += *OPERAND(scan); if (data && (flags & SCF_DO_SUBSTR)) data->pos_min += *OPERAND(scan); ! } else if (strchr(varies,OP(scan))) { I32 mincount, maxcount, minnext, deltanext, pos_before, fl; regnode *oscan = scan; --- 403,410 ---- min += *OPERAND(scan); if (data && (flags & SCF_DO_SUBSTR)) data->pos_min += *OPERAND(scan); ! } else if (strchr(varies,OP(scan)) || ! ((PL_hints & HINT_KANJI_REGEXP) && strchr(kvaries,OP(scan)))) { I32 mincount, maxcount, minnext, deltanext, pos_before, fl; regnode *oscan = scan; *************** *** 498,503 **** --- 502,508 ---- } nogo: + #if 0 /* Try optimization CURLYX => CURLYM. */ if ( OP(oscan) == CURLYX && data && !(data->flags & SF_HAS_PAR) *************** *** 549,554 **** --- 554,560 ---- } else oscan->flags = 0; } + #endif if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) pars++; if (flags & SCF_DO_SUBSTR) { *************** *** 818,823 **** --- 824,831 ---- /* Dig out information for optimizations. */ r->reganch = pm->op_pmflags & PMf_COMPILETIME; + if (PL_hints & HINT_KANJI_REGEXP) + r->reganch |= ROPT_KANJI; pm->op_pmflags = PL_regflags; r->regstclass = NULL; r->naughty = PL_regnaughty >= 10; /* Probably an expensive pattern. */ *************** *** 961,969 **** if (r->reganch & ROPT_ANCH_SINGLE) r->reganch |= ROPT_NOSCAN; } else { ! r->check_substr = r->float_substr; ! r->check_offset_min = data.offset_float_min; ! r->check_offset_max = data.offset_float_max; } } else { /* Several toplevels. Best we can is to set minlen. */ --- 969,982 ---- if (r->reganch & ROPT_ANCH_SINGLE) r->reganch |= ROPT_NOSCAN; } else { ! if (PL_hints & HINT_KANJI_REGEXP) { ! r->check_substr = 0; ! r->check_offset_min = r->check_offset_max = 0; ! } else { ! r->check_substr = r->float_substr; ! r->check_offset_min = data.offset_float_min; ! r->check_offset_max = data.offset_float_max; ! } } } else { /* Several toplevels. Best we can is to set minlen. */ *************** *** 1553,1564 **** else ret = reg_node(ANY); PL_regnaughty++; ! *flagp |= HASWIDTH|SIMPLE; break; case '[': PL_regcomp_parse++; ret = regclass(); ! *flagp |= HASWIDTH|SIMPLE; break; case '(': nextchar(); --- 1566,1581 ---- else ret = reg_node(ANY); PL_regnaughty++; ! *flagp |= HASWIDTH; ! if (!(PL_hints & HINT_KANJI_REGEXP)) ! *flagp |= SIMPLE; break; case '[': PL_regcomp_parse++; ret = regclass(); ! *flagp |= HASWIDTH; ! if (!(PL_hints & HINT_KANJI_REGEXP)) ! *flagp |= SIMPLE; break; case '(': nextchar(); *************** *** 1722,1728 **** s = (char *) OPERAND(ret); regc(0, s++); /* save spot for len */ for (len = 0, p = PL_regcomp_parse - 1; ! len < 127 && p < PL_regxend; len++) { oldp = p; --- 1739,1745 ---- s = (char *) OPERAND(ret); regc(0, s++); /* save spot for len */ for (len = 0, p = PL_regcomp_parse - 1; ! len < 126 && p < PL_regxend; len++) { oldp = p; *************** *** 1814,1820 **** } if (PL_regflags & PMf_EXTENDED) p = regwhite(p, PL_regxend); ! if (ISMULT2(p)) { /* Back off on ?+*. */ if (len) p = oldp; else { --- 1831,1852 ---- } if (PL_regflags & PMf_EXTENDED) p = regwhite(p, PL_regxend); ! if (ISKANJI(ender) && *p && iskanji2(*p)) { ! if (ISMULT2((p+1))) { ! if (len) { ! p = oldp; ! } else { ! len += 2; ! regc(ender, s++); ! ender = *p++; ! regc(ender, s++); ! } ! break; ! } ! len++; ! regc(ender, s++); ! ender = *p++; ! } else if (ISMULT2(p)) { /* Back off on ?+*. */ if (len) p = oldp; else { *************** *** 1866,1871 **** --- 1898,1945 ---- return p; } + static void + block_on(char *b, int x, int y, int ex) + { + int lo; + int hi; + int r_lo = x & 7; + int r_hi = y & 7; + + if (!b) + return; + + if (0x8000 <= x) + x -= 0x8000 - 0x100; + if (0x8000 <= y) + y -= 0x8000 - 0x100; + + lo = x >> 3; + hi = y >> 3; + + if (ex) { + if (lo == hi) { + b[lo] ^= (1 << (r_hi+1)) - (1 << r_lo); + } else { + int i; + for (i = lo + 1; i < hi; i++) + b[i] ^= 0xff; + b[lo] ^= (0xff << r_lo); + b[hi] ^= ~(0xff << (r_hi+1)); + } + } else { + if (lo == hi) { + b[lo] |= (1 << (r_hi+1)) - (1 << r_lo); + } else { + int i; + for (i = lo + 1; i < hi; i++) + b[i] = 0xff; + b[lo] |= (0xff << r_lo); + b[hi] |= ~(0xff << (r_hi+1)); + } + } + } + STATIC regnode * regclass(void) { *************** *** 1877,1887 **** register regnode *ret; register I32 def; I32 numlen; s = opnd = (char *) OPERAND(PL_regcode); ret = reg_node(ANYOF); ! for (Class = 0; Class < 33; Class++) regc(0, s++); if (*PL_regcomp_parse == '^') { /* Complement of range. */ PL_regnaughty++; PL_regcomp_parse++; --- 1951,1970 ---- register regnode *ret; register I32 def; I32 numlen; + int ismb = 0; + char *bitvec = 0; + char **bitvec_in_regnode = 0; s = opnd = (char *) OPERAND(PL_regcode); ret = reg_node(ANYOF); ! regc(0, s++); ! ! if (!SIZE_ONLY) ! bitvec_in_regnode = (char **)s; ! ! for (Class = 0; Class < sizeof (char *); Class++) regc(0, s++); + if (*PL_regcomp_parse == '^') { /* Complement of range. */ PL_regnaughty++; PL_regcomp_parse++; *************** *** 1889,1894 **** --- 1972,1980 ---- *opnd |= ANYOF_INVERT; } if (!SIZE_ONLY) { + int len = sizeof (char *); /* avoid GCC optimization */ + Newz(1003, bitvec, (0x8000 + 0x100) / 8, char); + Copy(&bitvec, bitvec_in_regnode, len, char); PL_regcode += ANY_SKIP; if (PL_regflags & PMf_FOLD) *opnd |= ANYOF_FOLD; *************** *** 1926,1931 **** --- 2012,2023 ---- } } } + if (ISKANJI(Class) && PL_regcomp_parse < PL_regxend) { + if (!SIZE_ONLY && *opnd & ANYOF_LITERAL) + FAIL("kanji and hex/octal literal"); + Class = (Class << 8) + UCHARAT(PL_regcomp_parse++); + ismb = 1; + } if (Class == '\\') { Class = UCHARAT(PL_regcomp_parse++); switch (Class) { *************** *** 1936,1942 **** else { for (Class = 0; Class < 256; Class++) if (isALNUM(Class)) ! ANYOF_SET(opnd, Class); } } lastclass = 1234; --- 2028,2034 ---- else { for (Class = 0; Class < 256; Class++) if (isALNUM(Class)) ! ANYOF_SET(bitvec, Class); } } lastclass = 1234; *************** *** 1948,1954 **** else { for (Class = 0; Class < 256; Class++) if (!isALNUM(Class)) ! ANYOF_SET(opnd, Class); } } lastclass = 1234; --- 2040,2047 ---- else { for (Class = 0; Class < 256; Class++) if (!isALNUM(Class)) ! ANYOF_SET(bitvec, Class); ! block_on(bitvec, 0x8000, 0xffff, 0); } } lastclass = 1234; *************** *** 1960,1966 **** else { for (Class = 0; Class < 256; Class++) if (isSPACE(Class)) ! ANYOF_SET(opnd, Class); } } lastclass = 1234; --- 2053,2059 ---- else { for (Class = 0; Class < 256; Class++) if (isSPACE(Class)) ! ANYOF_SET(bitvec, Class); } } lastclass = 1234; *************** *** 1972,1995 **** else { for (Class = 0; Class < 256; Class++) if (!isSPACE(Class)) ! ANYOF_SET(opnd, Class); } } lastclass = 1234; continue; case 'd': if (!SIZE_ONLY) { ! for (Class = '0'; Class <= '9'; Class++) ! ANYOF_SET(opnd, Class); } lastclass = 1234; continue; case 'D': if (!SIZE_ONLY) { ! for (Class = 0; Class < '0'; Class++) ! ANYOF_SET(opnd, Class); ! for (Class = '9' + 1; Class < 256; Class++) ! ANYOF_SET(opnd, Class); } lastclass = 1234; continue; --- 2065,2087 ---- else { for (Class = 0; Class < 256; Class++) if (!isSPACE(Class)) ! ANYOF_SET(bitvec, Class); ! block_on(bitvec, 0x8000, 0xffff, 0); } } lastclass = 1234; continue; case 'd': if (!SIZE_ONLY) { ! block_on(bitvec, '0', '9', 0); } lastclass = 1234; continue; case 'D': if (!SIZE_ONLY) { ! block_on(bitvec, 0, '0' - 1, 0); ! block_on(bitvec, '9' + 1, 255, 0); ! block_on(bitvec, 0x8000, 0xffff, 0); } lastclass = 1234; continue; *************** *** 2017,2022 **** --- 2109,2119 ---- case 'x': Class = scan_hex(PL_regcomp_parse, 2, &numlen); PL_regcomp_parse += numlen; + if (!SIZE_ONLY) { + if (ismb) + FAIL("kanji and hex/octal literal"); + *opnd |= ANYOF_LITERAL; + } break; case 'c': Class = UCHARAT(PL_regcomp_parse++); *************** *** 2026,2031 **** --- 2123,2133 ---- case '5': case '6': case '7': case '8': case '9': Class = scan_oct(--PL_regcomp_parse, 3, &numlen); PL_regcomp_parse += numlen; + if (!SIZE_ONLY) { + if (ismb) + FAIL("kanji and hex/octal literal"); + *opnd |= ANYOF_LITERAL; + } break; } } *************** *** 2060,2067 **** } else #endif ! for ( ; lastclass <= Class; lastclass++) ! ANYOF_SET(opnd, lastclass); } lastclass = Class; } --- 2162,2168 ---- } else #endif ! block_on(bitvec, lastclass, Class, 0); } lastclass = Class; } *************** *** 2071,2087 **** /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) { for (Class = 0; Class < 256; ++Class) { ! if (ANYOF_TEST(opnd, Class)) { I32 cf = fold[Class]; ! ANYOF_SET(opnd, cf); } } *opnd &= ~ANYOF_FOLD; } /* optimize inverted simple patterns (e.g. [^a-z]) */ if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) { ! for (Class = 0; Class < 32; ++Class) ! opnd[1 + Class] ^= 0xFF; *opnd = 0; } return ret; --- 2172,2188 ---- /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) { for (Class = 0; Class < 256; ++Class) { ! if (ANYOF_TEST(bitvec, Class)) { I32 cf = fold[Class]; ! ANYOF_SET(bitvec, cf); } } *opnd &= ~ANYOF_FOLD; } /* optimize inverted simple patterns (e.g. [^a-z]) */ if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) { ! block_on(bitvec, 0, 0xff, 1); ! block_on(bitvec, 0x8000, 0xffff, 1); *opnd = 0; } return ret; *************** *** 2605,2610 **** --- 2706,2760 ---- #endif /* DEBUGGING */ } + STATIC regnode * + regclassfree(regnode *node, regnode *last) + { + register char op = EXACT; /* Arbitrary non-END op. */ + register regnode *next, *onode; + + while (op != END && (!last || node < last)) { + /* While that wasn't END last time... */ + + NODE_ALIGN(node); + op = OP(node); + next = regnext(node); + + if (regkind[(U8)op] == BRANCHJ) { + register regnode *nnode = (OP(next) == LONGJMP + ? regnext(next) + : next); + if (last && nnode > last) + nnode = last; + node = regclassfree(NEXTOPER(NEXTOPER(node)), nnode); + } else if (regkind[(U8)op] == BRANCH) { + node = regclassfree(NEXTOPER(node), next); + } else if ( op == CURLY) { /* `next' might be very big: optimizer */ + node = regclassfree(NEXTOPER(node) + EXTRA_STEP_2ARGS, + NEXTOPER(node) + EXTRA_STEP_2ARGS + 1); + } else if (regkind[(U8)op] == CURLY && op != CURLYX) { + node = regclassfree(NEXTOPER(node) + EXTRA_STEP_2ARGS, next); + } else if ( op == PLUS || op == STAR) { + node = regclassfree(NEXTOPER(node), NEXTOPER(node) + 1); + } else if (op == ANYOF) { + char *bitvec; + int len = sizeof (char *); /* avoid GCC optimization */ + Copy(OPERAND(node) + 1, &bitvec, len, char); + Safefree(bitvec); + node = NEXTOPER(node); + node += ANY_SKIP; + } else if (regkind[(U8)op] == EXACT) { + /* Literal string, where present. */ + node += ((*OPERAND(node)) + 2 + sizeof(regnode) - 1) / sizeof(regnode); + node = NEXTOPER(node); + } else { + node = NEXTOPER(node); + node += regarglen[(U8)op]; + } + } + + return node; + } + void pregfree(struct regexp *r) { *************** *** 2643,2648 **** --- 2793,2799 ---- } Safefree(r->startp); Safefree(r->endp); + (void)regclassfree(r->program + 1, NULL); Safefree(r); } Index: regcomp.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/regcomp.h Wed Jan 14 05:17:14 2004 --- jperl5.005_04/regcomp.h Mon Mar 29 23:56:51 2004 *************** *** 155,160 **** --- 155,161 ---- #define SIZE_ONLY (PL_regcode == &PL_regdummy) /* Flags for first parameter byte of ANYOF */ + #define ANYOF_LITERAL 0x80 #define ANYOF_INVERT 0x40 #define ANYOF_FOLD 0x20 #define ANYOF_LOCALE 0x10 *************** *** 165,177 **** #define ANYOF_NSPACEL 0x01 /* Utility macros for bitmap of ANYOF */ ! #define ANYOF_BYTE(p,c) (p)[1 + (((c) >> 3) & 31)] #define ANYOF_BIT(c) (1 << ((c) & 7)) #define ANYOF_SET(p,c) (ANYOF_BYTE(p,c) |= ANYOF_BIT(c)) #define ANYOF_CLEAR(p,c) (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c)) #define ANYOF_TEST(p,c) (ANYOF_BYTE(p,c) & ANYOF_BIT(c)) ! #define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1) /* * Utility definitions. --- 166,178 ---- #define ANYOF_NSPACEL 0x01 /* Utility macros for bitmap of ANYOF */ ! #define ANYOF_BYTE(p,c) (p)[(c) >> 3] #define ANYOF_BIT(c) (1 << ((c) & 7)) #define ANYOF_SET(p,c) (ANYOF_BYTE(p,c) |= ANYOF_BIT(c)) #define ANYOF_CLEAR(p,c) (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c)) #define ANYOF_TEST(p,c) (ANYOF_BYTE(p,c) & ANYOF_BIT(c)) ! #define ANY_SKIP (sizeof (char *)/sizeof(regnode) + 1) /* * Utility definitions. *************** *** 208,213 **** --- 209,222 ---- }; #endif + #ifndef DOINIT + EXTCONST char kvaries[]; + #else + EXTCONST char kvaries[] = { + ANY, SANY, ANYOF, 0 + }; + #endif + /* The following always have a length of 1. char* since we do strchr on it. */ #ifndef DOINIT EXTCONST char simple[]; Index: regexec.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/regexec.c Wed Jan 14 05:17:15 2004 --- jperl5.005_04/regexec.c Mon Mar 29 23:56:51 2004 *************** *** 86,91 **** --- 86,95 ---- #define STATIC static #endif + #define KANJI_MODE (_prog->reganch & ROPT_KANJI) + static regexp *_prog; /* save the latest regexp* here. + XXX this won't work in multi-threaded environment */ + #ifndef PERL_OBJECT typedef I32 CHECKPOINT; *************** *** 102,108 **** static CHECKPOINT regcppush _((I32 parenfloor)); static char * regcppop _((void)); #endif ! #define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c)) STATIC CHECKPOINT regcppush(I32 parenfloor) --- 106,113 ---- static CHECKPOINT regcppush _((I32 parenfloor)); static char * regcppop _((void)); #endif ! /*#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))*/ ! #define REGINCLASS(p,c) reginclass(p,c) STATIC CHECKPOINT regcppush(I32 parenfloor) *************** *** 245,250 **** --- 250,257 ---- croak("NULL regexp parameter"); return 0; } + + _prog = prog; /* save the thing */ minlen = prog->minlen; if (strend - startpos < minlen) goto phooey; *************** *** 266,271 **** --- 273,279 ---- PL_reg_flags = 0; PL_reg_eval_set = 0; + /* If there is a "must appear" string, look for it. */ s = startpos; if (!(flags & REXEC_CHECKED) *************** *** 304,309 **** --- 312,320 ---- prog->float_substr = Nullsv; /* clear */ s = startpos; } else s = startpos; + if (KANJI_MODE && s && kpart(startpos, s) == KPART_KANJI_2) { + s = startpos; + } } /* Mark beginning of line for ^ and lookbehind. */ *************** *** 359,367 **** while (s < strend) { if (*s == ch) { if (regtry(prog, s)) goto got_it; s++; ! while (s < strend && *s == ch) s++; } s++; } --- 370,381 ---- while (s < strend) { if (*s == ch) { if (regtry(prog, s)) goto got_it; + if (KANJI_MODE && iskanji(*s) && s+1= PL_regeol) sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case ANY: if (!nextchr && locinput >= PL_regeol || nextchr == '\n') sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case EXACT: *************** *** 910,915 **** --- 948,958 ---- s = (char *) OPERAND(scan); if (nextchr < 0) nextchr = UCHARAT(locinput); + if (KANJI_MODE && !(*s & ANYOF_LITERAL) + && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) { + locinput++; + nextchr = (nextchr << 8) + UCHARAT(locinput); + } if (!REGINCLASS(s, nextchr)) sayNO; if (!nextchr && locinput >= PL_regeol) *************** *** 925,930 **** --- 968,975 ---- if (!(OP(scan) == ALNUM ? isALNUM(nextchr) : isALNUM_LC(nextchr))) sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case NALNUML: *************** *** 936,941 **** --- 981,988 ---- if (OP(scan) == NALNUM ? isALNUM(nextchr) : isALNUM_LC(nextchr)) sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case BOUNDL: *************** *** 966,971 **** --- 1013,1020 ---- if (!(OP(scan) == SPACE ? isSPACE(nextchr) : isSPACE_LC(nextchr))) sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case NSPACEL: *************** *** 977,987 **** --- 1026,1040 ---- if (OP(scan) == SPACE ? isSPACE(nextchr) : isSPACE_LC(nextchr)) sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case DIGIT: if (!isDIGIT(nextchr)) sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case NDIGIT: *************** *** 989,994 **** --- 1042,1049 ---- sayNO; if (isDIGIT(nextchr)) sayNO; + if (KANJI_MODE && iskanji(nextchr) && locinput[1] && iskanji2(locinput[1])) + ++locinput; nextchr = UCHARAT(++locinput); break; case REFFL: *************** *** 1357,1377 **** REGCP_SET; /* This may be improved if l == 0. */ while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */ /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) ! { ! if (paren) { ! if (n) { ! PL_regstartp[paren] = PL_reginput - l; ! PL_regendp[paren] = PL_reginput; ! } else ! PL_regendp[paren] = NULL; } - if (regmatch(next)) - sayYES; - REGCP_UNWIND; } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput; --- 1412,1435 ---- REGCP_SET; /* This may be improved if l == 0. */ while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */ + if (!KANJI_MODE + || kpart(locinput, PL_reginput) != KPART_KANJI_2) { /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) ! { ! if (paren) { ! if (n) { ! PL_regstartp[paren] = PL_reginput - l; ! PL_regendp[paren] = PL_reginput; ! } else ! PL_regendp[paren] = NULL; ! } ! if (regmatch(next)) ! sayYES; ! REGCP_UNWIND; } } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput; *************** *** 1411,1420 **** REGCP_SET; while (n >= ln) { /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) ! { DEBUG_r( PerlIO_printf(Perl_debug_log, "%*s trying tail with n=%ld...\n", --- 1469,1479 ---- REGCP_SET; while (n >= ln) { /* If it could work, try it. */ ! if (!KANJI_MODE ! || kpart(locinput, PL_reginput) != KPART_KANJI_2) { ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) { DEBUG_r( PerlIO_printf(Perl_debug_log, "%*s trying tail with n=%ld...\n", *************** *** 1431,1436 **** --- 1490,1496 ---- sayYES; REGCP_UNWIND; } + } /* Couldn't or didn't -- back up. */ n--; locinput -= l; *************** *** 1491,1511 **** sayNO; REGCP_SET; while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */ ! /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) ! { ! if (paren) { ! if (n) { ! PL_regstartp[paren] = PL_reginput - 1; ! PL_regendp[paren] = PL_reginput; ! } else ! PL_regendp[paren] = NULL; } - if (regmatch(next)) - sayYES; - REGCP_UNWIND; } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput + ln; --- 1551,1575 ---- sayNO; REGCP_SET; while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */ ! if (!KANJI_MODE ! || kpart(locinput, PL_reginput) != KPART_KANJI_2) { ! /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) { ! if (paren) { ! if (n) { ! PL_regstartp[paren] = PL_reginput - 1; ! if (KANJI_MODE && kpart(locinput, PL_reginput-1) == KPART_KANJI_2) ! PL_regstartp[paren]--; ! PL_regendp[paren] = PL_reginput; ! } else ! PL_regendp[paren] = NULL; ! } ! if (regmatch(next)) ! sayYES; ! REGCP_UNWIND; } } /* Couldn't or didn't -- move forward. */ PL_reginput = locinput + ln; *************** *** 1526,1538 **** if (paren) { while (n >= ln) { /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) ! { if (paren && n) { if (n) { PL_regstartp[paren] = PL_reginput - 1; PL_regendp[paren] = PL_reginput; } else PL_regendp[paren] = NULL; --- 1590,1605 ---- if (paren) { while (n >= ln) { /* If it could work, try it. */ ! if (!KANJI_MODE ! || kpart(locinput, PL_reginput) != KPART_KANJI_2) { ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) { if (paren && n) { if (n) { PL_regstartp[paren] = PL_reginput - 1; + if (KANJI_MODE && kpart(locinput, PL_reginput-1) == KPART_KANJI_2) + PL_regstartp[paren]--; PL_regendp[paren] = PL_reginput; } else PL_regendp[paren] = NULL; *************** *** 1541,1561 **** sayYES; REGCP_UNWIND; } /* Couldn't or didn't -- back up. */ n--; PL_reginput = locinput + n; } } else { while (n >= ln) { /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) ! { if (regmatch(next)) sayYES; REGCP_UNWIND; } /* Couldn't or didn't -- back up. */ n--; PL_reginput = locinput + n; --- 1608,1631 ---- sayYES; REGCP_UNWIND; } + } /* Couldn't or didn't -- back up. */ n--; PL_reginput = locinput + n; } } else { while (n >= ln) { + if (!KANJI_MODE + || kpart(locinput, PL_reginput) != KPART_KANJI_2) { /* If it could work, try it. */ ! if (c1 == -1000 || ! UCHARAT(PL_reginput) == c1 || ! UCHARAT(PL_reginput) == c2) { if (regmatch(next)) sayYES; REGCP_UNWIND; } + } /* Couldn't or didn't -- back up. */ n--; PL_reginput = locinput + n; *************** *** 1657,1662 **** --- 1727,1733 ---- register char *opnd; register I32 c; register char *loceol = PL_regeol; + char *oldscan; scan = PL_reginput; if (max != REG_INFTY && max < loceol - scan) *************** *** 1664,1671 **** opnd = (char *) OPERAND(p); switch (OP(p)) { case ANY: ! while (scan < loceol && *scan != '\n') scan++; break; case SANY: scan = loceol; --- 1735,1744 ---- opnd = (char *) OPERAND(p); switch (OP(p)) { case ANY: ! while (scan < loceol && *scan != '\n') { ! if (KANJI_MODE && iskanji(*scan) && iskanji2(scan[1])) scan++; scan++; + } break; case SANY: scan = loceol; *************** *** 1689,1696 **** scan++; break; case ANYOF: ! while (scan < loceol && REGINCLASS(opnd, *scan)) scan++; break; case ALNUM: while (scan < loceol && isALNUM(*scan)) --- 1762,1785 ---- scan++; break; case ANYOF: ! c = UCHARAT(scan); ! oldscan = scan; ! if (KANJI_MODE && !(*opnd & ANYOF_LITERAL) ! && iskanji(c) && scan < loceol && iskanji2(scan[1])) { ! scan++; ! c = (c << 8) + UCHARAT(scan); ! } ! while (scan < loceol && REGINCLASS(opnd, c)) { scan++; + c = UCHARAT(scan); + oldscan = scan; + if (KANJI_MODE && iskanji(c) && scan < loceol && iskanji2(scan[1])) { + scan++; + c = (c << 8) + UCHARAT(scan); + } + } + scan = oldscan; + break; break; case ALNUM: while (scan < loceol && isALNUM(*scan)) *************** *** 1702,1714 **** scan++; break; case NALNUM: ! while (scan < loceol && !isALNUM(*scan)) scan++; break; case NALNUML: PL_reg_flags |= RF_tainted; ! while (scan < loceol && !isALNUM_LC(*scan)) scan++; break; case SPACE: while (scan < loceol && isSPACE(*scan)) --- 1791,1807 ---- scan++; break; case NALNUM: ! while (scan < loceol && !isALNUM(*scan)) { ! if (KANJI_MODE && iskanji(*scan) && iskanji2(scan[1])) scan++; scan++; + } break; case NALNUML: PL_reg_flags |= RF_tainted; ! while (scan < loceol && !isALNUM_LC(*scan)) { ! if (KANJI_MODE && iskanji(*scan) && iskanji2(scan[1])) scan++; scan++; + } break; case SPACE: while (scan < loceol && isSPACE(*scan)) *************** *** 1720,1736 **** scan++; break; case NSPACE: ! while (scan < loceol && !isSPACE(*scan)) scan++; break; case NSPACEL: PL_reg_flags |= RF_tainted; ! while (scan < loceol && !isSPACE_LC(*scan)) scan++; break; case DIGIT: ! while (scan < loceol && isDIGIT(*scan)) scan++; break; case NDIGIT: while (scan < loceol && !isDIGIT(*scan)) --- 1813,1835 ---- scan++; break; case NSPACE: ! while (scan < loceol && !isSPACE(*scan)) { ! if (KANJI_MODE && iskanji(*scan) && iskanji2(scan[1])) scan++; scan++; + } break; case NSPACEL: PL_reg_flags |= RF_tainted; ! while (scan < loceol && !isSPACE_LC(*scan)) { ! if (KANJI_MODE && iskanji(*scan) && iskanji2(scan[1])) scan++; scan++; + } break; case DIGIT: ! while (scan < loceol && isDIGIT(*scan)) { ! if (KANJI_MODE && iskanji(*scan) && iskanji2(scan[1])) scan++; scan++; + } break; case NDIGIT: while (scan < loceol && !isDIGIT(*scan)) *************** *** 1801,1810 **** dTHR; char flags = *p; bool match = FALSE; ! c &= 0xFF; ! if (ANYOF_TEST(p, c)) match = TRUE; else if (flags & ANYOF_FOLD) { I32 cf; if (flags & ANYOF_LOCALE) { --- 1900,1918 ---- dTHR; char flags = *p; bool match = FALSE; + char *bitvec; + int len = sizeof (char *); /* avoid GCC optimization */ ! p++; ! Copy(p, &bitvec, len, char); ! ! c &= 0xFFFF; ! if (0x8000 <= c) ! c -= 0x8000 - 0x100; ! if (ANYOF_TEST(bitvec, c)) match = TRUE; + else if (0x100 <= c) + goto end; else if (flags & ANYOF_FOLD) { I32 cf; if (flags & ANYOF_LOCALE) { *************** *** 1813,1819 **** } else cf = fold[c]; ! if (ANYOF_TEST(p, cf)) match = TRUE; } --- 1921,1927 ---- } else cf = fold[c]; ! if (ANYOF_TEST(bitvec, cf)) match = TRUE; } *************** *** 1829,1836 **** } } return (flags & ANYOF_INVERT) ? !match : match; } - - - --- 1937,1942 ---- } } + end:; return (flags & ANYOF_INVERT) ? !match : match; } Index: regexp.h ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/regexp.h Wed Jan 14 05:17:15 2004 --- jperl5.005_04/regexp.h Mon Mar 29 23:56:51 2004 *************** *** 87,92 **** --- 87,93 ---- #define ROPT_LOOKBEHIND_SEEN 0x100 #define ROPT_EVAL_SEEN 0x200 #define ROPT_TAINTED_SEEN 0x400 + #define ROPT_KANJI 0x1000 /* 0xf800 of reganch is used by PMf_COMPILETIME */ #define RX_MATCH_TAINTED(prog) ((prog)->reganch & ROPT_TAINTED_SEEN) Index: t/TEST ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/t/TEST Wed Jan 14 05:17:18 2004 --- jperl5.005_04/t/TEST Mon Mar 29 23:56:51 2004 *************** *** 24,30 **** if ($#ARGV == -1) { @ARGV = split(/[ \n]/, ! `echo base/*.t comp/*.t cmd/*.t io/*.t; echo op/*.t pragma/*.t lib/*.t`); } %infinite = ( 'comp/require.t', 1, 'op/bop.t', 1, 'lib/hostname.t', 1 ); --- 24,30 ---- if ($#ARGV == -1) { @ARGV = split(/[ \n]/, ! `echo base/*.t comp/*.t cmd/*.t io/*.t; echo op/*.t pragma/*.t lib/*.t kanji/*.t`); } %infinite = ( 'comp/require.t', 1, 'op/bop.t', 1, 'lib/hostname.t', 1 ); Index: t/harness ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/t/harness Wed Jan 14 05:17:20 2004 --- jperl5.005_04/t/harness Mon Mar 29 23:56:51 2004 *************** *** 16,22 **** $Test::Harness::verbose = shift if @ARGV && $ARGV[0] eq '-v'; @tests = @ARGV; ! @tests = unless @tests; Test::Harness::runtests @tests; --- 16,22 ---- $Test::Harness::verbose = shift if @ARGV && $ARGV[0] eq '-v'; @tests = @ARGV; ! @tests = unless @tests; Test::Harness::runtests @tests; Index: t/kanji/format.t ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/t/kanji/format.t Mon Jul 18 08:46:18 1994 --- jperl5.005_04/t/kanji/format.t Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,79 ---- + #!./perl + BEGIN { + chdir 't' if -d 't'; + @INC = '../lib' if -d '../lib'; + } + + use I18N::Japanese; + + print "1..4\n"; + + $CAT = (($^O eq 'MSWin32') ? '.\perl -e "print <>"' : 'cat'); + + format XXX = + ^>>>> align + $var + ^>>>> here + $var + . + + open XXX, ">Kanji_xxx.tmp"; + + $var = "あいうえお"; + write XXX; + close XXX; + + if (`$CAT Kanji_xxx.tmp` eq " あい align\n うえ here\n") + {print "ok 1\n"; unlink "Kanji_xxx.tmp";} + else {print "not ok 1\n";} + + + format YYY = + ^>>>>>>>> align + $var + ^>>>>>... here + $var + . + open YYY, ">Kanji_xxx.tmp"; + $var = "あいうえおかきくけこ"; + write YYY; + close YYY; + + if (`$CAT Kanji_xxx.tmp` eq " あいうえ align\n おか... here\n") + {print "ok 2\n"; unlink "Kanji_xxx.tmp";} + else {print "not ok 2\n";} + + + format ZZZ = + ^>>>>>>> align + $var + ^>>>>... here + $var + . + open ZZZ, ">Kanji_xxx.tmp"; + $var = "あいうえおかきくけこ"; + write ZZZ; + close ZZZ; + + if (`$CAT Kanji_xxx.tmp` eq "あいうえ align\nおか... here\n") + {print "ok 3\n"; unlink "Kanji_xxx.tmp";} + else {print "not ok 3\n";} + + + format XXXX = + ^<<<<<<<< align~~ + $var + . + open XXXX, ">Kanji_xxx.tmp"; + $var = "あいうabcde おかきくけこ"; + write XXXX; + close XXXX; + + if (`$CAT Kanji_xxx.tmp` eq < jperl5.005_04 *** jperl5.005_04/t/kanji/op.t Mon Jul 18 08:46:18 1994 --- jperl5.005_04/t/kanji/op.t Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,52 ---- + #!./perl + # + # chop, chomp + # + BEGIN { + chdir 't' if -d 't'; + @INC = '../lib' if -d '../lib'; + } + + use I18N::Japanese; + + print "1..11\n"; + $y = "あいう"; + $x = chomp($y); + if ($x ne 0 || $y ne "あいう") {print "not ok 1\n";} else {print "ok 1\n";}; + + $y = "あいう\n"; + $x = chomp($y); + if ($x ne 1 || $y ne "あいう") {print "not ok 2\n";} else {print "ok 2\n";}; + + $y = "あいう"; + $x = chop($y); + if ($x ne "う" || $y ne "あい") {print "not ok 3\n";} else {print "ok 3\n";}; + + $y = "あいうt"; + $x = chop($y); + if ($x ne "t" || $y ne "あいう") {print "not ok 4\n";} else {print "ok 4\n";}; + + # + # tr + # + $y = "あいうえお"; $y =~ tr/あ-う//cd; + if ($y ne "あいう") {print "not ok 5\n";} else {print "ok 5\n";}; + $y = "あいうえお"; $y =~ tr/あ-う/か-く/; + if ($y ne "かきくえお") {print "not ok 6\n";} else {print "ok 6\n";}; + + $y = 'abcabcabc'; $y =~ tr/a-c/d-f/; + if ($y ne 'defdefdef') {print "not ok 7\n";} else {print "ok 7\n";}; + $y = 'abc'; $y =~ tr/abc/def/; + if ($y ne 'def') {print "not ok 8\n";} else {print "ok 8\n";}; + $y = 'abcabcabc'; $y =~ tr/abc/def/; + if ($y ne 'defdefdef') {print "not ok 9\n";} else {print "ok 9\n";}; + + # + # split + # + @X = split(/(.)/, "abcde"); + if ($#X != 9) {print "not ok 10\n";} else {print "ok 10\n";}; + + @X = split(/(.)/, "あいうtえお"); + $x = $#X; + if ($x != 11) {print "not ok 11\n";} else {print "ok 11\n";}; Index: t/kanji/re_tests ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/t/kanji/re_tests Mon Jul 18 08:46:18 1994 --- jperl5.005_04/t/kanji/re_tests Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,467 ---- + あいう あいう y $& あいう + あいう xいう n - - + あいう あxう n - - + あいう あいx n - - + あいう xあいうy y $& あいう + あいう あいあいう y $& あいう + あい*う あいう y $& あいう + あい*いう あいう y $& あいう + あい*いう あいいう y $& あいいう + あい*いう あいいいいう y $& あいいいいう + .{1} あいいいいう y $& あ + .{3,4} あいいいいう y $& あいいい + あい{0,}いう あいいいいう y $& あいいいいう + あい+いう あいいう y $& あいいう + あい+いう あいう n - - + あい+いう あいq n - - + あい{1,}いう あいq n - - + あい+いう あいいいいう y $& あいいいいう + あい{1,}いう あいいいいう y $& あいいいいう + あい{1,3}いう あいいいいう y $& あいいいいう + あい{3,4}いう あいいいいう y $& あいいいいう + あい{4,5}いう あいいいいう n - - + あい?いう あいいう y $& あいいう + あい?いう あいう y $& あいう + あい{0,1}いう あいう y $& あいう + あい?いう あいいいいう n - - + あい?う あいう y $& あいう + あい{0,1}う あいう y $& あいう + ^あいう$ あいう y $& あいう + ^あいう$ あいうう n - - + ^あいう あいうう y $& あいう + ^あいう$ ああいう n - - + あいう$ ああいう y $& あいう + あいう$ ああいうえ n - - + ^ あいう y $& + $ あいう y $& + あ.う あいう y $& あいう + あ.う あxう y $& あxう + あ.*う あxyzう y $& あxyzう + あ.*う あxyzえ n - - + あ[いう]え あいう n - - + あ[いう]え あいえ y $& あいえ + あ[い-え]e あいえ n - - + あ[い-え]e あうe y $& あうe + あ[い-え] ああう y $& あう + あ[-い] あ- y $& あ- + あ[い-] あ- y $& あ- + あ[い-あ] - c - /あ[い-あ]/: invalid [] range in regexp + あ[]い - c - /あ[]い/: unmatched [] in regexp + あ[ - c - /あ[/: unmatched [] in regexp + あ] あ] y $& あ] + あ[]]い あ]い y $& あ]い + あ[^いう]え あeえ y $& あeえ + あ[^いう]え あいえ n - - + あ[^-い]う あえう y $& あえう + あ[^-い]う あ-う n - - + あ[^]い]う あ]う n - - + あ[^]い]う あえう y $& あえう + あ\sい あ い y - - + あ\sい あ-い n - - + あ\Sい あ い n - - + あ\Sい あ-い y - - + \d 1 y - - + \d - n - - + \D 1 n - - + \D - y - - + あ[\s]い あ い y - - + あ[\s]い あ-い n - - + あ[\S]い あ い n - - + あ[\S]い あ-い y - - + [\d] 1 y - - + [\d] - n - - + [\D] 1 n - - + [\D] - y - - + あい|うえ あいう y $& あい + あい|うえ あいうえ y $& あい + ()ef えef y $&-$1 ef- + *あ - c - /*あ/: ?+*{} follows nothing in regexp + (*)い - c - /(*)い/: ?+*{} follows nothing in regexp + $い い n - - + あ\ - c - Search pattern not terminated + あ\(い あ(い y $&-$1 あ(い- + あ\(*い あい y $& あい + あ\(*い あ((い y $& あ((い + あ\\い あ\い y $& あ\い + あいう) - c - /あいう)/: unmatched () in regexp + (あいう - c - /(あいう/: unmatched () in regexp + ((あ)) あいう y $&-$1-$2 あ-あ-あ + (あ)い(う) あいう y $&-$1-$2 あいう-あ-う + あ+い+う ああいいあいう y $& あいう + あ{1,}い{1,}う ああいいあいう y $& あいう + あ** - c - /あ**/: nested *?+ in regexp + あ.+?う あいうあいう y $& あいう + (あ+|い)* あい y $&-$1 あい-い + (あ+|い){0,} あい y $&-$1 あい-い + (あ+|い)+ あい y $&-$1 あい-い + (あ+|い){1,} あい y $&-$1 あい-い + (あ+|い)? あい y $&-$1 あ-あ + (あ+|い){0,1} あい y $&-$1 あ-あ + )( - c - /)(/: unmatched () in regexp + [^あい]* うえe y $& うえe + あいう n - - + あ* y $& + ([あいう])*え あいいいうえ y $&-$1 あいいいうえ-う + ([あいう])*いうえ あいうえ y $&-$1 あいうえ-あ + あ|い|う|え|e e y $& e + (あ|い|う|え|e)f ef y $&-$1 ef-e + あいうえ*efg あいうえefg y $& あいうえefg + あい* xあいyあいいいz y $& あい + あい* xあyあいいいz y $& あ + (あい|うえ)e あいうえe y $&-$1 うえe-うえ + [あいhgefえう]ij hij y $& hij + ^(あい|うえ)e あいうえe n x$1y xy + (あいう|)ef あいうえef y $&-$1 ef- + (あ|い)う*え あいうえ y $&-$1 いうえ-い + (あい|あい*)いう あいう y $&-$1 あいう-あ + あ([いう]*)う* あいう y $&-$1 あいう-いう + あ([いう]*)(う*え) あいうえ y $&-$1-$2 あいうえ-いう-え + あ([いう]+)(う*え) あいうえ y $&-$1-$2 あいうえ-いう-え + あ([いう]*)(う+え) あいうえ y $&-$1-$2 あいうえ-い-うえ + あ[いうえ]*えうえうえe あえうえうえe y $& あえうえうえe + あ[いうえ]+えうえうえe あえうえうえe n - - + (あい|あ)い*う あいう y $&-$1 あいう-あい + ((あ)(い)う)(え) あいうえ y $1-$2-$3-$4 あいう-あ-い-え + [a-zA-Z_][a-zA-Z0-9_]* alpha y $& alpha + ^あ(いう+|い[eh])g|.h$ あいh y $&-$1 いh- + (いう+え$|ef*g.|h?i(j|k)) effgz y $&-$1-$2 effgz-effgz- + (いう+え$|ef*g.|h?i(j|k)) ij y $&-$1-$2 ij-ij-j + (いう+え$|ef*g.|h?i(j|k)) effg n - - + (いう+え$|ef*g.|h?i(j|k)) いうええ n - - + (いう+え$|ef*g.|h?i(j|k)) reffgz y $&-$1-$2 effgz-effgz- + ((((((((((あ)))))))))) あ y $10 あ + ((((((((((あ))))))))))\10 ああ y $& ああ + ((((((((((あ))))))))))${bang} ああ n - - + ((((((((((あ))))))))))${bang} あ! y $& あ! + (((((((((あ))))))))) あ y $& あ + multiple words of text uh-uh n - - + multiple words multiple words, yeah y $& multiple words + (.*)う(.*) あいうえe y $&-$1-$2 あいうえe-あい-えe + \((.*), (.*)\) (あ, い) y ($2, $1) (い, あ) + [k] あい n - - + あいうえ あいうえ y $&-\$&-\\$& あいうえ-$&-\あいうえ + あ(いう)え あいうえ y $1-\$1-\\$1 いう-$1-\いう + あ[-]?う あう y $& あう + (あいう)\1 あいうあいう y $1 あいう + ([あ-う]*)\1 あいうあいう y $1 あいう + \1 - c - /\1/: reference to nonexistent group + \2 - c - /\2/: reference to nonexistent group + (あ)|\1 あ y - - + (あ)|\1 x n - - + (あ)|\2 - c - /(あ)|\2/: reference to nonexistent group + (([あ-う])い*?\2)* あいあいいいういう y $&-$1-$2 あいあいい-いい-い + (([あ-う])い*?\2){3} あいあいいいういう y $&-$1-$2 あいあいいいういう-ういう-う + ((\3|い)\2(あ)x)+ ああxあいxいあxいいx n - - + ((\3|い)\2(あ)x)+ あああxあいあxいああxいいあx y $&-$1-$2-$3 いいあx-いいあx-い-あ + ((\3|い)\2(あ)){2,} いいああいあいいあいあああああいいああああいいあ y $&-$1-$2-$3 いいああああいいあ-いいあ-い-あ + 'あいう'i あいう y $& あいう + 'あいう'i Xいう n - - + 'あいう'i あXう n - - + 'あいう'i あいX n - - + 'あいう'i XあいうY y $& あいう + 'あいう'i あいあいう y $& あいう + 'あい*う'i あいう y $& あいう + 'あい*いう'i あいう y $& あいう + 'あい*いう'i あいいう y $& あいいう + 'あい*?いう'i あいいいいう y $& あいいいいう + 'あい{0,}?いう'i あいいいいう y $& あいいいいう + 'あい+?いう'i あいいう y $& あいいう + 'あい+いう'i あいう n - - + 'あい+いう'i あいQ n - - + 'あい{1,}いう'i あいQ n - - + 'あい+いう'i あいいいいう y $& あいいいいう + 'あい{1,}?いう'i あいいいいう y $& あいいいいう + 'あい{1,3}?いう'i あいいいいう y $& あいいいいう + 'あい{3,4}?いう'i あいいいいう y $& あいいいいう + 'あい{4,5}?いう'i あいいいいう n - - + 'あい??いう'i あいいう y $& あいいう + 'あい??いう'i あいう y $& あいう + 'あい{0,1}?いう'i あいう y $& あいう + 'あい??いう'i あいいいいう n - - + 'あい??う'i あいう y $& あいう + 'あい{0,1}?う'i あいう y $& あいう + '^あいう$'i あいう y $& あいう + '^あいう$'i あいうう n - - + '^あいう'i あいうう y $& あいう + '^あいう$'i ああいう n - - + 'あいう$'i ああいう y $& あいう + '^'i あいう y $& + '$'i あいう y $& + 'あ.う'i あいう y $& あいう + 'あ.う'i あXう y $& あXう + 'あ.*?う'i あXYZう y $& あXYZう + 'あ.*う'i あXYZえ n - - + 'あ[いう]え'i あいう n - - + 'あ[いう]え'i あいえ y $& あいえ + 'あ[い-え]e'i あいえ n - - + 'あ[い-え]e'i あうE y $& あうE + 'あ[い-え]'i ああう y $& あう + 'あ[-い]'i あ- y $& あ- + 'あ[い-]'i あ- y $& あ- + 'あ[い-あ]'i - c - /あ[い-あ]/: invalid [] range in regexp + 'あ[]い'i - c - /あ[]い/: unmatched [] in regexp + 'あ['i - c - /あ[/: unmatched [] in regexp + 'あ]'i あ] y $& あ] + 'あ[]]い'i あ]い y $& あ]い + 'あ[^いう]え'i あEえ y $& あEえ + 'あ[^いう]え'i あいえ n - - + 'あ[^-い]う'i あえう y $& あえう + 'あ[^-い]う'i あ-う n - - + 'あ[^]い]う'i あ]う n - - + 'あ[^]い]う'i あえう y $& あえう + 'あい|うえ'i あいう y $& あい + 'あい|うえ'i あいうえ y $& あい + '()ef'i えEF y $&-$1 EF- + '*あ'i - c - /*あ/: ?+*{} follows nothing in regexp + '(*)い'i - c - /(*)い/: ?+*{} follows nothing in regexp + '$い'i い n - - + 'あ\'i - c - Search pattern not terminated + 'あ\(い'i あ(い y $&-$1 あ(い- + 'あ\(*い'i あい y $& あい + 'あ\(*い'i あ((い y $& あ((い + 'あ\\い'i あ\い y $& あ\い + 'あいう)'i - c - /あいう)/: unmatched () in regexp + '(あいう'i - c - /(あいう/: unmatched () in regexp + '((あ))'i あいう y $&-$1-$2 あ-あ-あ + '(あ)い(う)'i あいう y $&-$1-$2 あいう-あ-う + 'あ+い+う'i ああいいあいう y $& あいう + 'あ{1,}い{1,}う'i ああいいあいう y $& あいう + 'あ**'i - c - /あ**/: nested *?+ in regexp + 'あ.+?う'i あいうあいう y $& あいう + 'あ.*?う'i あいうあいう y $& あいう + 'あ.{0,5}?う'i あいうあいう y $& あいう + '(あ+|い)*'i あい y $&-$1 あい-い + '(あ+|い){0,}'i あい y $&-$1 あい-い + '(あ+|い)+'i あい y $&-$1 あい-い + '(あ+|い){1,}'i あい y $&-$1 あい-い + '(あ+|い)?'i あい y $&-$1 あ-あ + '(あ+|い){0,1}'i あい y $&-$1 あ-あ + '(あ+|い){0,1}?'i あい y $&-$1 - + ')('i - c - /)(/: unmatched () in regexp + '[^あい]*'i うえE y $& うえE + 'あいう'i n - - + 'あ*'i y $& + '([あいう])*え'i あいいいうえ y $&-$1 あいいいうえ-う + '([あいう])*いうえ'i あいうえ y $&-$1 あいうえ-あ + 'あ|い|う|え|e'i E y $& E + '(あ|い|う|え|e)f'i EF y $&-$1 EF-E + 'あいうえ*efg'i あいうえEFG y $& あいうえEFG + 'あい*'i XあいYあいいいZ y $& あい + 'あい*'i XあYあいいいZ y $& あ + '(あい|うえ)e'i あいうえE y $&-$1 うえE-うえ + '[あいhgefえう]ij'i HIJ y $& HIJ + '^(あい|うえ)e'i あいうえE n x$1y XY + '(あいう|)ef'i あいうえEF y $&-$1 EF- + '(あ|い)う*え'i あいうえ y $&-$1 いうえ-い + '(あい|あい*)いう'i あいう y $&-$1 あいう-あ + 'あ([いう]*)う*'i あいう y $&-$1 あいう-いう + 'あ([いう]*)(う*え)'i あいうえ y $&-$1-$2 あいうえ-いう-え + 'あ([いう]+)(う*え)'i あいうえ y $&-$1-$2 あいうえ-いう-え + 'あ([いう]*)(う+え)'i あいうえ y $&-$1-$2 あいうえ-い-うえ + 'あ[いうえ]*えうえうえe'i あえうえうえE y $& あえうえうえE + 'あ[いうえ]+えうえうえe'i あえうえうえE n - - + '(あい|あ)い*う'i あいう y $&-$1 あいう-あい + '((あ)(い)う)(え)'i あいうえ y $1-$2-$3-$4 あいう-あ-い-え + '[a-zA-Z_][a-zA-Z0-9_]*'i ALPHA y $& ALPHA + '^あ(いう+|い[eh])g|.h$'i あいH y $&-$1 いH- + '(いう+え$|ef*g.|h?i(j|k))'i EFFGZ y $&-$1-$2 EFFGZ-EFFGZ- + '(いう+え$|ef*g.|h?i(j|k))'i IJ y $&-$1-$2 IJ-IJ-J + '(いう+え$|ef*g.|h?i(j|k))'i EFFG n - - + '(いう+え$|ef*g.|h?i(j|k))'i いうええ n - - + '(いう+え$|ef*g.|h?i(j|k))'i REFFGZ y $&-$1-$2 EFFGZ-EFFGZ- + '((((((((((あ))))))))))'i あ y $10 あ + '((((((((((あ))))))))))\10'i ああ y $& ああ + '((((((((((あ))))))))))${bang}'i ああ n - - + '((((((((((あ))))))))))${bang}'i あ! y $& あ! + '(((((((((あ)))))))))'i あ y $& あ + '(?:(?:(?:(?:(?:(?:(?:(?:(?:(あ))))))))))'i あ y $1 あ + '(?:(?:(?:(?:(?:(?:(?:(?:(?:(あ|い|う))))))))))'i う y $1 う + 'multiple words of text'i UH-UH n - - + 'multiple words'i MULTIPLE WORDS, YEAH y $& MULTIPLE WORDS + '(.*)う(.*)'i あいうえE y $&-$1-$2 あいうえE-あい-えE + '\((.*), (.*)\)'i (あ, い) y ($2, $1) (い, あ) + '[k]'i あい n - - + 'あいうえ'i あいうえ y $&-\$&-\\$& あいうえ-$&-\あいうえ + 'あ(いう)え'i あいうえ y $1-\$1-\\$1 いう-$1-\いう + 'あ[-]?う'i あう y $& あう + '(あいう)\1'i あいうあいう y $1 あいう + '([あ-う]*)\1'i あいうあいう y $1 あいう + あ(?!い). あいあえ y $& あえ + あ(?=え). あいあえ y $& あえ + あ(?=う|え). あいあえ y $& あえ + あ(?:い|う|え)(.) あうe y $1 e + あ(?:い|う|え)*(.) あうe y $1 e + あ(?:い|う|え)+?(.) あうe y $1 e + あ(?:い|う|え)+?(.) あうえいうえいe y $1 え + あ(?:い|う|え)+(.) あうえいうえいe y $1 e + あ(?:い|う|え){2}(.) あうえいうえいe y $1 い + あ(?:い|う|え){4,5}(.) あうえいうえいe y $1 い + あ(?:い|う|え){4,5}?(.) あうえいうえいe y $1 え + ((ふー)|(ばー))* ふーばー y $1-$2-$3 ばー-ふー-ばー + :(?: - c - /(?/: Sequence (? incomplete + あ(?:い|う|え){6,7}(.) あうえいうえいe y $1 e + あ(?:い|う|え){6,7}?(.) あうえいうえいe y $1 e + あ(?:い|う|え){5,6}(.) あうえいうえいe y $1 e + あ(?:い|う|え){5,6}?(.) あうえいうえいe y $1 い + あ(?:い|う|え){5,7}(.) あうえいうえいe y $1 e + あ(?:い|う|え){5,7}?(.) あうえいうえいe y $1 い + あ(?:い|(う|e){1,2}?|え)+?(.) あうe y $1$2 うe + ^(.+)?い あい y $1 あ + ^([^a-z])|(\^)$ . y $1 . + ^[<>]& <&OUT y $& <& + ^(あ\1?){4}$ ああああああああああ y $1 ああああ + ^(あ\1?){4}$ あああああああああ n - - + ^(あ\1?){4}$ あああああああああああ n - - + ^(あ(?(1)\1)){4}$ ああああああああああ y $1 ああああ + ^(あ(?(1)\1)){4}$ あああああああああ n - - + ^(あ(?(1)\1)){4}$ あああああああああああ n - - + (?:(ふ)(お)(お)|(ぶ)(あ)(る))* ふおおぶある y $1:$2:$3:$4:$5:$6 ふ:お:お:ぶ:あ:る + (?<=あ)い あい y $& い + (?<=あ)い うい n - - + (?<=あ)い い n - - + (?あ+)あい あああい n - - + (?>あ+)い あああい y - - + ([[:]+) あ:[い]: y $1 :[ + ([[=]+) あ=[い]= y $1 =[ + ([[.]+) あ.[い]. y $1 .[ + [あ[:xyz: - c - /[あ[:xyz:/: unmatched [] in regexp + [あ[:xyz:] - c - /[あ[:xyz:]/: unmatched [] in regexp + ([あ[:xyz:]い]+) pいあq y $1 いあ + ((?>あ+)い) あああい y $1 あああい + (?>(あ+))い あああい y $1 あああ + ((?>[^()]+)|\([^()]*\))+ ((あいう(あえe)ufh()()x y $& あいう(あえe)ufh()()x + (?<=x+)y - c - /(?<=x+)y/: variable length lookbehind not implemented + あ{37,17} - c - /あ{37,17}/: Can't do {n,m} with n > m + あ\Z あ\nい\n n - - + い\Z あ\nい\n y - - + い\z あ\nい\n n - - + い\Z あ\nい y - - + い\z あ\nい y - - + い 討つ n - - + あ.う あいう y $& あいう Index: t/kanji/regexp.t ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/t/kanji/regexp.t Mon Jul 18 08:46:18 1994 --- jperl5.005_04/t/kanji/regexp.t Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,100 ---- + #!./perl + + # XXX known to leak scalars + $ENV{PERL_DESTRUCT_LEVEL} = 0 unless $ENV{PERL_DESTRUCT_LEVEL} > 3; + + # The tests are in a separate file 't/op/re_tests'. + # Each line in that file is a separate test. + # There are five columns, separated by tabs. + # + # Column 1 contains the pattern, optionally enclosed in C<''>. + # Modifiers can be put after the closing C<'>. + # + # Column 2 contains the string to be matched. + # + # Column 3 contains the expected result: + # y expect a match + # n expect no match + # c expect an error + # + # Columns 4 and 5 are used only if column 3 contains C or C. + # + # Column 4 contains a string, usually C<$&>. + # + # Column 5 contains the expected result of double-quote + # interpolating that string after the match, or start of error message. + # + # \n in the tests are interpolated, as are variables of the form ${\w+}. + # + # If you want to add a regular expression test that can't be expressed + # in this format, don't add it here: put it in op/pat.t instead. + + BEGIN { + chdir 't' if -d 't'; + @INC = '../lib' if -d '../lib'; + } + use I18N::Japanese; + + $iters = shift || 1; # Poor man performance suite, 10000 is OK. + + open(TESTS,'kanji/re_tests') || open(TESTS,'t/kanji/re_tests') || + die "Can't open re_tests"; + + while () { } + $numtests = $.; + seek(TESTS,0,0); + $. = 0; + + $bang = sprintf "\\%03o", ord "!"; # \41 would not be portable. + + $| = 1; + print "1..$numtests\n# $iters iterations\n"; + TEST: + while () { + if (/^#/) { print "ok $.\n"; next TEST } + chomp; + s/\\n/\n/g; + ($pat, $subject, $result, $repl, $expect) = split(/\t/,$_); + $input = join(':',$pat,$subject,$result,$repl,$expect); + infty_subst(\$pat); + infty_subst(\$expect); + $pat = "'$pat'" unless $pat =~ /^[:']/; + $pat =~ s/\\n/\n/g; + $pat =~ s/(\$\{\w+\})/$1/eeg; + $subject =~ s/\\n/\n/g; + $expect =~ s/\\n/\n/g; + $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/; + for $study ("", "study \$subject") { + $c = $iters; + eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";"; + chomp( $err = $@ ); + if ($result eq 'c') { + if ($err !~ m!^\Q$expect!) { print "not ok $. (compile) $input: => `$err'\n"; next TEST } + last; # no need to study a syntax error + } + elsif ($@) { + print "$result\n"; + print "not ok $. $input => error `$err'\n"; next TEST; + } + elsif ($result eq 'n') { + if ($match) { print "not ok $. ($study) $input => false positive\n"; next TEST } + } + else { + if (!$match || $got ne $expect) { + print "not ok $. ($study) $input => `$got', match=$match\n"; + next TEST; + } + } + } + print "ok $.\n"; + } + + close(TESTS); + + sub infty_subst # Special-case substitution + { # of $reg_infty and friends + my $tp = shift; + $$tp =~ s/,\$reg_infty_m}/,$reg_infty_m}/o; + $$tp =~ s/,\$reg_infty_p}/,$reg_infty_p}/o; + $$tp =~ s/,\$reg_infty}/,$reg_infty}/o; + } Index: t/kanji/sjis.t ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/t/kanji/sjis.t Mon Jul 18 08:46:18 1994 --- jperl5.005_04/t/kanji/sjis.t Mon Mar 29 23:56:51 2004 *************** *** 0 **** --- 1,3 ---- + #!./perl + print "1..1\n"; + print "ok 1\n"; Index: toke.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/toke.c Wed Jan 14 05:17:35 2004 --- jperl5.005_04/toke.c Mon Mar 29 23:56:51 2004 *************** *** 625,630 **** --- 625,632 ---- if (s + 1 < send && (s[1] == '\\')) s++; /* all that, just for this */ } + if ((PL_hints & HINT_KANJI_STRING) && iskanji(*s) && s < send) + *d++ = *s++; *d++ = *s++; } *d = '\0'; *************** *** 876,906 **** I32 max; /* last character in range */ i = d - SvPVX(sv); /* remember current offset */ ! SvGROW(sv, SvLEN(sv) + 256); /* expand the sv -- there'll never be more'n 256 chars in a range for it to grow by */ ! d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */ ! d -= 2; /* eat the first char and the - */ ! min = (U8)*d; /* first char in range */ ! max = (U8)d[1]; /* last char in range */ #ifndef ASCIIish ! if ((isLOWER(min) && isLOWER(max)) || ! (isUPPER(min) && isUPPER(max))) { ! if (isLOWER(min)) { ! for (i = min; i <= max; i++) ! if (isLOWER(i)) ! *d++ = i; ! } else { ! for (i = min; i <= max; i++) ! if (isUPPER(i)) ! *d++ = i; } ! } ! else #endif ! for (i = min; i <= max; i++) ! *d++ = i; ! /* mark the range as done, and continue */ dorange = FALSE; continue; --- 878,922 ---- I32 max; /* last character in range */ i = d - SvPVX(sv); /* remember current offset */ ! if ((PL_hints & HINT_KANJI_TR) ! && kpart(SvPVX(sv), d-1) == KPART_KANJI_2 ! && kpart(SvPVX(sv), d-3) == KPART_KANJI_2) { ! /* We have a kanji-range here */ ! max = twochar_to_int(d[-2], d[-1]); ! min = twochar_to_int(d[-4], d[-3]); ! SvGROW(sv, SvLEN(sv) + (max-min)*2); ! d = SvPVX(sv) + i; ! d -= 4; ! for (i = min; i <= max; i = jnextcode(i)) { ! *d++ = int_to_1stbyte(i); ! *d++ = int_to_2ndbyte(i); ! } ! } else { ! SvGROW(sv, SvLEN(sv) + 256); /* expand the sv -- there'll never be more'n 256 chars in a range for it to grow by */ ! d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */ ! d -= 2; /* eat the first char and the - */ ! min = (U8)*d; /* first char in range */ ! max = (U8)d[1]; /* last char in range */ #ifndef ASCIIish ! if ((isLOWER(min) && isLOWER(max)) || ! (isUPPER(min) && isUPPER(max))) { ! if (isLOWER(min)) { ! for (i = min; i <= max; i++) ! if (isLOWER(i)) ! *d++ = i; ! } else { ! for (i = min; i <= max; i++) ! if (isUPPER(i)) ! *d++ = i; ! } } ! else #endif ! for (i = min; i <= max; i++) ! *d++ = i; ! } /* mark the range as done, and continue */ dorange = FALSE; continue; *************** *** 1011,1022 **** --- 1027,1042 ---- case '4': case '5': case '6': case '7': *d++ = scan_oct(s, 3, &len); s += len; + if ((PL_hints & HINT_KANJI_TR) && PL_lex_inwhat == OP_TRANS) + yylval.opval->op_private &= ~OPpTRANS_KANJI; continue; /* \x24 indicates a hex constant */ case 'x': *d++ = scan_hex(++s, 2, &len); s += len; + if ((PL_hints & HINT_KANJI_TR) && PL_lex_inwhat == OP_TRANS) + yylval.opval->op_private &= ~OPpTRANS_KANJI; continue; /* \c is a control character */ *************** *** 1061,1066 **** --- 1081,1089 ---- continue; } /* end if (backslash) */ + if ((PL_hints & HINT_KANJI_STRING) && iskanji(*s) && s < send) + *d++ = *s++; + *d++ = *s++; } /* while loop to process each character */ *************** *** 5180,5187 **** croak("Transliteration replacement not terminated"); } ! New(803,tbl,256,short); ! o = newPVOP(OP_TRANS, 0, (char*)tbl); complement = Delete = squash = 0; while (*s == 'c' || *s == 'd' || *s == 's') { --- 5203,5210 ---- croak("Transliteration replacement not terminated"); } ! o = newPVOP(OP_TRANS, 0, 0); ! /* tbl is allocated at pmtrans since it's size is is no longer fixed */ complement = Delete = squash = 0; while (*s == 'c' || *s == 'd' || *s == 's') { *************** *** 5194,5199 **** --- 5217,5224 ---- s++; } o->op_private = Delete|squash|complement; + if (PL_hints & HINT_KANJI_TR) + o->op_private |= OPpTRANS_KANJI; PL_lex_op = o; yylval.ival = OP_TRANS; *************** *** 5608,5613 **** --- 5633,5641 ---- have found the terminator */ else if (*s == term) break; + else if ((PL_hints & HINT_KANJI_STRING) && iskanji(s[0]) && iskanji2(s[1]) && s < PL_bufend-1) { + *to++ = *s++; + } *to = *s; } } *************** *** 5634,5639 **** --- 5662,5669 ---- break; else if (*s == PL_multi_open) brackets++; + else if ((PL_hints & HINT_KANJI_STRING) && iskanji(s[0]) && iskanji2(s[1]) && s < PL_bufend-1) + *to++ = *s++; *to = *s; } } *************** *** 5986,5991 **** --- 6016,6026 ---- eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); if (*s != '#') { for (t = s; t < eol; t++) { + if ((PL_hints & HINT_KANJI_STRING) && iskanji(*t)) { + t++; + continue; + } + if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { needargs = FALSE; goto enough; /* ~~ must be first line in formline */ Index: util.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/util.c Wed Jan 14 05:17:36 2004 --- jperl5.005_04/util.c Mon Mar 29 23:56:51 2004 *************** *** 393,398 **** --- 393,399 ---- { register char *s, *x; register I32 first; + char *tops = big; if (!little) return big; *************** *** 400,405 **** --- 401,410 ---- if (!first) return big; while (*big) { + if ((PL_hints & HINT_KANJI_STRING) && kpart(tops, big) == KPART_KANJI_2) { + big++; + continue; + } if (*big++ != first) continue; for (x=big,s=little; *s; /**/ ) { *************** *** 424,429 **** --- 429,435 ---- register char *s, *x; register I32 first = *little; register char *littleend = lend; + char *tops = big; if (!first && little >= littleend) return big; *************** *** 431,436 **** --- 437,446 ---- return Nullch; bigend -= littleend - little++; while (big <= bigend) { + if ((PL_hints & HINT_KANJI_STRING) && kpart(tops, big) == KPART_KANJI_2) { + big++; + continue; + } if (*big++ != first) continue; for (x=big,s=little; s < littleend; /**/ ) { *************** *** 460,465 **** --- 470,479 ---- bigbeg = big; big = bigend - (littleend - little++); while (big >= bigbeg) { + if ((PL_hints & HINT_KANJI_STRING) && kpart(bigbeg, big) == KPART_KANJI_2) { + big--; + continue; + } if (*big-- != first) continue; for (x=big+2,s=little; s < littleend; /**/ ) { *************** *** 927,932 **** --- 941,948 ---- s = (unsigned char*)(SvPVX(sv)); /* deeper magic */ for (i = 0; i < len; i++) { + if ((PL_hints & HINT_KANJI_STRING) && kpart((char *)s, (char *)s+i) == KPART_KANJI_2) + continue; if (freq[s[i]] < frequency) { rarest = i; frequency = freq[s[i]]; *************** *** 947,952 **** --- 963,969 ---- register unsigned char *table; register unsigned char *olds; register unsigned char *oldlittle; + unsigned char *tops = big; if (SvTYPE(littlestr) != SVt_PVBM || !SvVALID(littlestr)) { STRLEN len; *************** *** 979,987 **** s = bigend - littlelen; if (s > big && bigend[-1] == '\n' ! && s[-1] == *little && memEQ((char*)s - 1,(char*)little,littlelen)) return (char*)s - 1; /* how sweet it is */ ! else if (*s == *little && memEQ((char*)s,(char*)little,littlelen)) return (char*)s; /* how sweet it is */ return Nullch; } --- 996,1010 ---- s = bigend - littlelen; if (s > big && bigend[-1] == '\n' ! && s[-1] == *little ! && (!(PL_hints & HINT_KANJI_STRING) ! || kpart((char *)tops,(char *)s - 1) != KPART_KANJI_2) ! && memEQ((char*)s - 1,(char*)little,littlelen)) return (char*)s - 1; /* how sweet it is */ ! else if (*s == *little ! && (!(PL_hints & HINT_KANJI_STRING) ! || kpart((char *)tops,(char *)s)!=KPART_KANJI_2) ! && memEQ((char*)s,(char*)little,littlelen)) return (char*)s; /* how sweet it is */ return Nullch; } *************** *** 995,1000 **** --- 1018,1025 ---- bigend -= littlelen; while (s <= bigend) { if (s[0] == c1 + && (!(PL_hints & HINT_KANJI_STRING) + || kpart((char *)tops,(char *)s)!=KPART_KANJI_2) && (littlelen == 1 || s[1] == c2) && (!SvTAIL(littlestr) || s == bigend *************** *** 1039,1044 **** --- 1064,1077 ---- goto top2; return Nullch; } + if ((PL_hints & HINT_KANJI_STRING) + && kpart((char *)tops,(char *)s) == 2) { + s = olds + 1; + little = oldlittle; + if (s < bigend) + goto top2; + return Nullch; + } if (SvTAIL(littlestr) /* automatically multiline */ && olds + 1 != bigend && olds[1] != '\n') *************** *** 1130,1135 **** --- 1163,1170 ---- #endif /* POINTERRIGOR */ } + #define ISKANJI(c) ((PL_hints & HINT_KANJI_REGEXP) && iskanji(c)) + I32 ibcmp(char *s1, char *s2, register I32 len) { *************** *** 1138,1143 **** --- 1173,1184 ---- while (len--) { if (*a != *b && *a != fold[*b]) return 1; + if (ISKANJI(*a) && a[1] && b[1]) { + len--; + a++,b++; + if (*a != *b) + return 1; + } a++,b++; } return 0; *************** *** 1151,1156 **** --- 1192,1203 ---- while (len--) { if (*a != *b && *a != fold_locale[*b]) return 1; + if (ISKANJI(*a) && a[1] && b[1]) { + len--; + a++,b++; + if (*a != *b) + return 1; + } a++,b++; } return 0; Index: win32/Makefile ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/win32/Makefile Mon Feb 23 22:45:19 2004 --- jperl5.005_04/win32/Makefile Tue Mar 30 00:04:57 2004 *************** *** 279,285 **** $(PCHFLAGS) $(OPTIMIZE) LINK_FLAGS = -nologo -nodefaultlib $(LINK_DBG) \ -libpath:"$(INST_COREDIR)" \ ! -machine:$(PROCESSOR_ARCHITECTURE) OBJOUT_FLAG = -Fo EXEOUT_FLAG = -Fe --- 279,287 ---- $(PCHFLAGS) $(OPTIMIZE) LINK_FLAGS = -nologo -nodefaultlib $(LINK_DBG) \ -libpath:"$(INST_COREDIR)" \ ! -machine:$(PROCESSOR_ARCHITECTURE) \ ! -stack:0x200000 ! OBJOUT_FLAG = -Fo EXEOUT_FLAG = -Fe *************** *** 398,404 **** ..\taint.c \ ..\toke.c \ ..\universal.c \ ! ..\util.c !IF "$(PERL_MALLOC)" == "define" EXTRACORE_SRC = $(EXTRACORE_SRC) ..\malloc.c --- 400,407 ---- ..\taint.c \ ..\toke.c \ ..\universal.c \ ! ..\util.c \ ! ..\kanji.c !IF "$(PERL_MALLOC)" == "define" EXTRACORE_SRC = $(EXTRACORE_SRC) ..\malloc.c Index: win32/makefile.mk ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/win32/makefile.mk Mon Feb 23 22:45:19 2004 --- jperl5.005_04/win32/makefile.mk Mon Mar 29 23:56:51 2004 *************** *** 499,504 **** --- 499,505 ---- ..\globals.c \ ..\gv.c \ ..\hv.c \ + ..\kanji.c \ ..\mg.c \ ..\op.c \ ..\perl.c \ Index: win32/win32.c ####### jperl5.005_04/ => jperl5.005_04 *** jperl5.005_04/win32/win32.c Wed Jan 14 05:17:52 2004 --- jperl5.005_04/win32/win32.c Tue Mar 30 01:32:44 2004 *************** *** 656,663 **** WIN32_FIND_DATA FindData; HANDLE fh; ! len = strlen(filename); ! if (len > MAX_PATH) return NULL; /* check to see if filename is a directory */ --- 656,663 ---- WIN32_FIND_DATA FindData; HANDLE fh; ! len = strlen(filename) - 1; ! if (len > MAX_PATH || len <= 0) return NULL; /* check to see if filename is a directory */ *************** *** 677,683 **** scanname[len++] = '.'; scanname[len++] = '/'; } ! else if (scanname[len-1] != '/' && scanname[len-1] != '\\') { scanname[len++] = '/'; } scanname[len++] = '*'; --- 677,684 ---- scanname[len++] = '.'; scanname[len++] = '/'; } ! else if ((scanname[len-1] != '/' && scanname[len-1] != '\\') || ! iskanji(scanname[len-2])) { scanname[len++] = '/'; } scanname[len++] = '*'; End of Patch.