¥Æ¥¥¹¥ÈÀ°·Á
¤³¤Î¥»¥¯¥·¥ç¥ó¤Ç¤Ï¡¢Web¾å¤«¤é¤È¤Ã¤¿¥Æ¥¥¹¥È¤òÀ°·Á¤¹¤ëÊýË¡¤òÀâÌÀ¤·¤Þ¤¹¡£
°Ê²¼¤Î½èÍý¤òÌÜɸ¤Ë¤·¤Þ¤¹¡£
¡¦¥´¥ß¼è¤ê...ÆüËܸ쥷¥¹¥Æ¥à¤Çʸ»ú²½¤±¤¹¤ë¤è¤¦¤Êʸ»ú¤ò½ñ¤´¹¤¨¤ë¡ÊÃí¡§ºî¼Ô¤ä¥µ¡¼¥Ð¤Ë¤è¤Ã¤Æ¤É¤Î¤è¤¦¤Êʸ»ú¤¬½Ð¤ë¤«¤Ï¤Þ¤Á¤Þ¤Á¤Ç¤¹¡Ë
¡¦Ê¸¤´¤È¤Ë²þ¹Ô¤¹¤ë¡ÊÃí¡§100¡ó´°àú¤Ë¤Ï¤Ç¤¤Þ¤»¤ó¡Ë
¥Ç¡¼¥¿¤ÎÆÉ¤ß¹þ¤ß
¤³¤³¤Ç¤Ï¡¢target¥Õ¥©¥ë¥ÀÆâ¤Ë¤¢¤ëwowcat.txt¤ò¥µ¥ó¥×¥ë¤È¤·¤Æ»ÈÍѤ·¤Þ¤¹¡£¤Þ¤º¡¢¤³¤Î¥Æ¥¥¹¥È¤ò¥ê¥¹¥È¤ËÆÉ¤ß¹þ¤ß¤Þ¤¹¡£
--
#!/usr/local/bin/perl
$directory="target"; #ÌÜŪ¤Î¥Õ¥¡¥¤¥ë¤¬¤¢¤ë¥Õ¥©¥ë¥À¤Î̾¾Î
$out_dir="data"; #½ÐÎÏ¥Õ¥¡¥¤¥ë¤ò¤³¤Î¥Ç¥£¥ì¥¯¥È¥ê¤Ø½ñ¤½Ð¤¹--¸å½Ò
$file="wowcat\.txt"; #¥Õ¥¡¥¤¥ë¤Î̾¾Î¡£¡Ö\.¡×¤È¤·¤Æ¤¢¤ê¤Þ¤¹¤¬\¤Ï¤Ê¤¯¤Æ¤â¤Û¤ÜOK
$separator="\\"; #¥Ñ¥¹¤ò¤Ä¤Ê¤°µ¹æ¤Ç¡¢µ¡¼ï¸ÇÍÃÍ(UNIX... / Mac... :)
$in=$directory.$separator.$file; #ÁêÂХѥ¹¤Ç¥Õ¥¡¥¤¥ë¤ò»ØÄê
open (FILE, $in) || die "$in\n$!\n"; #¥Õ¥¡¥¤¥ë¥ª¡¼¥×¥ó
while (<FILE>) { #¥Õ¥¡¥¤¥ëÆâ¤Î¥Ç¡¼¥¿Ê¬·«¤êÊÖ¤·
push (@array, $_); #ÇÛÎó@array¤Ë³ÊǼ
}
close FILE; #¥Õ¥¡¥¤¥ë¤Î¥¯¥í¡¼¥º
--
ÅÓÃæ¤Ç²þ¹Ô¤µ¤ì¤¿Ê¸¤ò¤Ä¤Ê¤°
¼¡¤Ë¡¢¥Æ¥¹¥È¥Ç¡¼¥¿¤Ï¶õÇò¹Ô¤Ç»Ï¤Þ¤ë²Õ½ê¤È¤½¤¦¤Ç¤Ê¤¤²Õ½ê¤¬¤¢¤ê¡¢Ê¸¤âÂçÉôʬ¤¬ÅÓÃæ¤ÇÀÚ¤ì¤Æ¤·¤Þ¤Ã¤Æ¤¤¤Þ¤¹¡£¤³¤ì¤òÃÊÍî¤ÎÀÚ¤ìÌܤÀ¤±¤¬²þ¹Ô¤µ¤ì¡¢¤¢¤È¤Ï¤Ä¤Ê¤¬¤Ã¤Æ¤¤¤ëʸ¤È¤·¤ÆÀ°·Á¤¹¤ë¤³¤È¤ò¹Í¤¨¤Þ¤¹¡£
Web¾å¤Ç¤ÏÃÊÍî¤òɽ¤¹¥¿¥°¤Ï<P>¤È½ñ¤«¤ì¤Þ¤¹¡£¤³¤ì¤Ï¸«¤¿ÌܤˤÏÃÊÍî¤ÎÀÚ¤ìÌܤ˰ì¹Ô¤Î¶õÇò¹Ô¤òÆþ¤ì¤Þ¤¹¡£¤³¤Î¶õÇò¹Ô¤òÃÊÍî¤ÎÀÚ¤ìÌܤȤ·¤ÆÈ½ÃǤ¹¤ë¤³¤È¤Ë¤·¡¢¤¢¤È¤Î²þ¹Ô¤Ïºï½ü¤·¤ÆÁ°¸å¤ò¥¹¥Ú¡¼¥¹¤Ç¤Ä¤Ê¤²¤Þ¤¹¡£
--
for($i=1; $i<=$#array; $i++) { #ÇÛÎó¤ÎÍ×ÁǤòºï½ü¤·¤¿¤¤¤Î¤Ç¡¢forʸ¤ò»ÈÍÑ
if ($array[$i] eq "\n") {next;} #¶õÇò¹Ô¤Ï²¿¤â¤»¤º¥ë¡¼¥×¤òÈ´¤±¤ë
if ($array[$i-1] eq "\n") {$array[$i] =~ s/^ +//; next;} #Í×ÁǤΣ±¤ÄÁ°¤¬¶õÇò¹Ô¤Î¾ì¹ç¡¢ÀèÆ¬¤Î¥¹¥Ú¡¼¥¹½èÍý¤Î¤ß¤Ç¥ë¡¼¥×¤òÈ´¤±¤ë
#¤³¤³¤«¤é¥¹¥Ú¡¼¥¹¤ÇÏ¢·ë¤µ¤»¤ë½èÍý
$array[$i] =~ s/^ +//; #¹ÔƬ¤ÎÉÔÍѤʥ¹¥Ú¡¼¥¹ºï½ü
$array[$i-1] =~ s/ *\n$/ /; #¸½ºß½èÍýÃæ¤ÎÍ×ÁǤΣ±¤ÄÁ°¤ÎÍ×ÁǤιÔËö¤Î²þ¹Ô¤ò¥¹¥Ú¡¼¥¹¤ËÃÖ´¹
$array[$i-1] = $array[$i-1].$array[$i]; #¸½ºß¤ÎÍ×ÁǤò£±¤ÄÁ°¤ÎÍ×ÁǤˤĤʤ²¤ë
splice(@array, $i, 1); #¸½ºß¤ÎÍ×ÁǤòÇÛÎ󤫤éºï½ü
$i--; #Í×ÁÇ£±¸ºÊ¸¤ò¥«¥¦¥ó¥¿¡¼¤ËÈ¿±Ç
}
#°Ê²¼¤Ï³Îǧ¤Î¤¿¤á¤Îforeachʸ
foreach $yoso (@array) {
print $yoso;
}
--
£±Ê¸¤º¤Ä¤Ëʬ³ä¤¹¤ë
¼¡¤Ë¡¢Ê¸¤´¤È¤Ë²þ¹Ô¤¹¤ëÊýË¡¤Ë¤Ä¤¤¤Æ¹Í¤¨¤Þ¤¹¡£
ʸ¤Ï¡Ö.¡×¡Ö?¡×¡Ö!¡×¤Î¸å¤Ë¶õÇòʸ»ú¡Ê¥¹¥Ú¡¼¥¹¤«²þ¹Ô¡Ë¤ä°úÍÑÉä¡¢¥«¥Ã¥³¤¬¤¢¤ë¾ì¹ç¤Ë¶èÀÚ¤ê¤Ç¤¢¤ë¤È¹Í¤¨¤ë¤³¤È¤¬¤Ç¤¤Þ¤¹¡£¤¿¤À¡¢¼¡¤Î¤è¤¦¤Ê¾ì¹ç¤ÏÌñ²ð¤Ç¤¹¡£
Mr. Downey is very optimistic and upbeat.
U.S. officials say they lack even a recent photograph.
¾åµ¤Ç¡ÖMr.¡×¤ä¡ÖU.S.¡×¤Þ¤¿¤Ï¡ÖU.¡×¤È¡ÖS.¡×¤Ç²þ¹Ô¤¹¤ë¤Î¤Ï¤Þ¤º¤¤¤Î¤Ç¡¢¤Þ¤º¹Í¤¨¤é¤ì¤ë¤³¤ì¤é¤Îʸ»úÎó¤ò¥Ç¡¼¥¿¤È¤·¤ÆµÏ¿¤·¤Þ¤¹¡£¤³¤³¤Ç¤Ï¥¹¥«¥é¡¼ÊÑ¿ô$dont¤Ë³ÊǼ¤·¤Þ¤¹¡£
--
$dont="\[A-Z\]|M(r|s|rs)|Dr|Calif|V[Aa]|[MS][Tt]|Jan|Feb|Mar|Apr|Aug|Sep(t|)|Oct|Nov|Dec|Assoc|Co|Gov|Se(n|c)|Ont|i\\\.e|e\\\.g|v(s|)|Pa|Fla|Re(p|v)|Gen|Univ|Jr|[fF]t|[Ss]gt|[Pp]res|[Pp]rof";
--
¾åµ¤Î¥Ç¡¼¥¿¤Ï¡¢°Ê²¼¤Î¸ì¤òɽ¤·¤Þ¤¹¡£
A. B. C. ... Z.
Mr. Ms. Mrs.
Dr.
Calif.
VA. Va.
MT. Mt. ST. St.
Jan. Feb. Mar. Apr. Aug. Sep. Sept. Oct. Nov. Dec.
Assoc.
Co.
Gov.
Sen. Sec.
Ont.
i.e.
e.g.
v. vs.
Pa.
Fla.
Rep. Rev.
Gen.
Univ.
Jr.
ft. Ft.
Sgt. sgt.
Pres. pres.
Prof. prof.
¤½¤ì¤Ç¤Ï¡¢¼ÂºÝ¤Ë¥»¥ó¥Æ¥ó¥¹¤´¤È¤Ë²þ¹Ô¤¹¤ëºî¶È¤ò¹Í¤¨¤Þ¤¹¡£
--
$dont="\[A-Z\]|M(r|s|rs)|Dr|Calif|V[Aa]|[MS][Tt]|Jan|Feb|Mar|Apr|Aug|Sep(t|)|Oct|Nov|Dec|Assoc|Co|Gov|Se(n|c)|Ont|i\\\.e|e\\\.g|v(s|)|Pa|Fla|Re(p|v)|Gen|Univ|Jr|[fF]t|[Ss]gt|[Pp]res|[Pp]rof";
foreach $yoso (@array) {
$yoso =~ s/\? /\?\n/g; #¡Ö?¡×+¥¹¥Ú¡¼¥¹¤ò²þ¹Ô
$yoso =~ s/! /!\n/g; #¡Ö!¡×¡Ü¥¹¥Ú¡¼¥¹¤ò²þ¹Ô
$yoso =~ s/(\.|!|\?)\" /$1\"\n/g; #¡Ö."¡×¡Ö!"¡×¡Ö?"¡×¡Ü¥¹¥Ú¡¼¥¹¤ò²þ¹Ô
$yoso =~ s/\.\) /\.\)\n/g; #¡Ö?)¡×¡Ü¥¹¥Ú¡¼¥¹¤ò²þ¹Ô
$yoso =~ s/\. /\.\n/g; #¡Ö.¡×¡Ü¥¹¥Ú¡¼¥¹¤ò²þ¹Ô
$yoso =~ s/\b($dont)\.\n/$1\. /g; #$dont¤Ë¹çÃפ¹¤ë¾ì¹ç¤Î¤ßÉüµ¢¤µ¤»¤ë
print $yoso;
}
--
$dontÊÑ¿ôÃæ¡¢i\\\.e¤Ê¤É¤Îµ½Ò¤¬¤¢¤ê¤Þ¤¹¤¬¡¢¤³¤ì¤Ï¡Ö\\¡×¤È½ñ¤¤¤ÆÊ¸»ú¤Ç¤¢¤ë¡Ö\¡×¤ò¡¢¡Ö\.¡×¤È½ñ¤¤¤ÆÊ¸»ú¤Ç¤¢¤ë¡Ö.¡×¤òɽ¤¹¤¿¤á¤Ç¤¹¡£Æ±ÍͤˡÖ\[¡×¤Ïʸ»ú¤Ç¤¢¤ë¡Ö[¡×¤ò¡¢¡Ö\]¡×¤Ïʸ»ú¤Ç¤¢¤ë¡Ö]¡×¤òɽ¤¹¤¿¤á¤Ç¤¹¡£°úÍÑÉä¡Ö""¡×¤Ç°Ï¤Ã¤¿Ê¸»úÎóÃæ¤Ë¤Ï¡¢¡Ö\n¡×¤Î¤è¤¦¤Ê¥á¥¿¥¥ã¥é¥¯¥¿¤¬µ½Ò¤Ç¤¤Þ¤¹¤¬¡¢¤³¤ì¤ò¡Ö//¡×¤Ç°Ï¤Ã¤¿Àµµ¬É½¸½¤ÇÍѤ¤¤ë¾ì¹ç¤Ï¡¢¾åµ¤Î¤è¤¦¤Êµ½Ò¤Î»ÅÊý¤Ë¤Ê¤ê¤Þ¤¹¡£perl¤Ç¤Ï¾¤Ë¡Ö''¡×¤È¤¤¤¦¡¢¥·¥ó¥°¥ë¥¯¥©¡¼¥È¤Ç°Ï¤Ã¤¿Ê¸»úÎó¤ÎɽµË¡¤¬¤¢¤ê¤Þ¤¹¤¬¡¢¤³¤Î¾ì¹ç¤Ï¥á¥¿¥¥ã¥é¥¯¥¿¤ÏƯ¤¤Þ¤»¤ó¡£
¤´¤ß¼è¤ê
¼¡¤Ë¡Ö¤´¤ß¼è¤ê¡×¤Î½èÍý¤ò¹Í¤¨¤Þ¤¹¡£¤³¤Î½èÍý¤Ï¥Ç¡¼¥¿¤ä¥æ¡¼¥¶¡¼¤¬É¬ÍפȤ·¤Æ¤¤¤ë¥Æ¥¥¹¥È¤Î¼ïÎà¤Ë¤è¤Ã¤Æº¸±¦¤µ¤ì¤Þ¤¹¤Î¤Ç¡¢³Æ¼«¤ÇɬÍפʥǡ¼¥¿¤òÄɲᦽ¤Àµ¤·¤Æ²¼¤µ¤¤¡£
Îã¡§
$yoso =~ s/<[^<>]*>//g; #¥¿¥°¤òºï½ü
$yoso =~ s/\s+\n$/\n/; #²þ¹ÔÁ°¤Î¥¹¥Ú¡¼¥¹¤Îºï½ü
$yoso =~ s/ {2,}/ /g; #£²¸Ä°Ê¾å¤Î¥¹¥Ú¡¼¥¹¤ÎϢ³¤ò£±¸Ä¤Î¥¹¥Ú¡¼¥¹¤ËÃÖ´¹
$yoso =~ s/^ +//; #¹ÔƬ¤Î¥¹¥Ú¡¼¥¹ºï½ü
$yoso =~ s/ŽÑ/-/g; #ʸ»ú²½¤±¤¹¤ëʸ»ú¤ÎÃÖ´¹
$yoso =~ s/-/-/g; #ʸ»ú²½¤±¤¹¤ëʸ»ú¤ÎÃÖ´¹
$yoso =~ s/Žý/(c)/g; #ʸ»ú²½¤±¤¹¤ëʸ»ú¤ÎÃÖ´¹
$yoso =~ s/¡Ç/'/g; #£²¥Ð¥¤¥Èʸ»ú¤ÎÃÖ´¹
$yoso =~ s/¡È|¡É/"/g; #£²¥Ð¥¤¥Èʸ»ú¤ÎÃÖ´¹
$yoso =~ s/¡¦/*/g; #£²¥Ð¥¤¥Èʸ»ú¤ÎÃÖ´¹
$yoso =~ s/([^ ])--([^ ])/$1 -- $2/g; #word--word¤È¤¤¤¦²Õ½ê¤Ë¥¹¥Ú¡¼¥¹ÁÞÆþ
$yoso =~ s/ --([^ ])/ -- $1/g; # --word¤È¤¤¤¦²Õ½ê¤Ë¥¹¥Ú¡¼¥¹ÁÞÆþ
$yoso =~ s/ -([^ -])/ - $1/g; # -word¤È¤¤¤¦²Õ½ê¤Ë¥¹¥Ú¡¼¥¹ÁÞÆþ
$yoso =~ s/ \? / -- /g; # ? ¤È¤¤¤¦¥Ñ¥¿¡¼¥ó¤ò -- ¤ËÃÖ´¹
¤³¤Î¤è¤¦¤Êºî¶È¤Ï¡¢¥»¥ó¥Æ¥ó¥¹Ê¬³ä½èÍý¤ÈƱ»þ¤Ë¤Ç¤¤Þ¤¹¤Î¤Ç¡¢¤½¤Î¥ë¡¼¥×Æâ¤Ë¤³¤ì¤é¤Îʸ¤òËä¤á¹þ¤ß¤Þ¤¹¡£
¥Õ¥¡¥¤¥ë¤Ø¤Î½ñ¤½Ð¤·
¥Õ¥¡¥¤¥ë¤ò½ñ¤¹þ¤ß¤Ç¥ª¡¼¥×¥ó¤·¤Þ¤¹¡£¥Õ¥¡¥¤¥ë¤Ø½ñ¤¹þ¤à¾ì¹ç¤âprint¤ò»È¤¤¤Þ¤¹¡£
$in =~ s/$directory/$out_dir/; #ÆþÎÏ¥Õ¥¡¥¤¥ë¤Î¥Ç¥£¥ì¥¯¥È¥êÉôʬ¤ò¡Ödata¡×¤ËÊѹ¹
open (FILE, "> $in") || die "Can't create $in\n$!\n"; #¥Õ¥¡¥¤¥ë¤ò½ñ¤¹þ¤ß¤Ç¥ª¡¼¥×¥ó
foreach $yoso (@array) {
print FILE $yoso; #print ¤Î¸å¤Ë¥Õ¥¡¥¤¥ë¥Ï¥ó¥É¥ë̾¤ò½ñ¤±¤Ð¡¢¥Õ¥¡¥¤¥ë¤Ø½ÐÎϤµ¤ì¤ë
}
close FILE; #¥Õ¥¡¥¤¥ë¥¯¥í¡¼¥º
Àè¤Û¤É¤Î²þ¹Ô½èÍý¤Î¥ë¡¼¥×Æâ¤Ë¡Ö¥´¥ß¼è¤ê¡×¤È¡Ö½ñ¤½Ð¤·¡×¤òÄɲ乤ë¤È°Ê²¼¤Î¤è¤¦¤Ë¤Ê¤ê¤Þ¤¹¡£
--
$dont="\[A-Z\]|M(r|s|rs)|Dr|Calif|V[Aa]|[MS][Tt]|Jan|Feb|Mar|Apr|Aug|Sep(t|)|Oct|Nov|Dec|Assoc|Co|Gov|Se(n|c)|Ont|i\\\.e|e\\\.g|v(s|)|Pa|Fla|Re(p|v)|Gen|Univ|Jr|[fF]t|[Ss]gt|[Pp]res|[Pp]rof";
$in =~ s/$directory/$out_dir/;
open (FILE, "> $in") || die "Can't create $in\n$!\n";
foreach $yoso (@array) {
#¤´¤ß¼è¤êÉôʬ
$yoso =~ s/<[^<>]*>//g;
$yoso =~ s/\s+\n$/\n/;
$yoso =~ s/ {2,}/ /g;
$yoso =~ s/^ +//;
$yoso =~ s/ŽÑ/-/g;
$yoso =~ s/-/-/g;
$yoso =~ s/Žý/(c)/g;
$yoso =~ s/¡Ç/'/g;
$yoso =~ s/¡È|¡É/"/g;
$yoso =~ s/¡¦/*/g;
$yoso =~ s/([^ ])--([^ ])/$1 -- $2/g;
$yoso =~ s/ --([^ ])/ -- $1/g;
$yoso =~ s/ -([^ -])/ - $1/g;
$yoso =~ s/ \? / -- /g;
#¥»¥ó¥Æ¥ó¥¹Ê¬³äÉôʬ
$yoso =~ s/\? /\?\n/g;
$yoso =~ s/! /!\n/g;
$yoso =~ s/(\.|!|\?)\" /$1\"\n/g;
$yoso =~ s/\.\) /\.\)\n/g;
$yoso =~ s/\. /\.\n/g;
$yoso =~ s/\b($dont)\.\n/$1\. /g;
print FILE $yoso;
}
close FILE;
exit (0);
--
¥¹¥¯¥ê¥×¥ÈÁ´ÂΡÄedit.pl