#!/usr/local/bin/perl $directory="target"; $out_dir="data"; $file="wowcat\.txt"; $separator=":"; $in=$separator.$directory.$separator.$file; open (FILE, $in) || die "$in\n$!\n"; while () { push (@array, $_); } close FILE; for($i=1; $i<=$#array; $i++) { #ÇÛÎó¤ÎÍ×ÁǤòºï½ü¤·¤¿¤¤¤Î¤Ç¡¢forʸ¤ò»ÈÍÑ if ($array[$i] eq "\n") {next;} #¶õÇò¹Ô²¿¤â¤»¤º¥ë¡¼¥×¤òÈ´¤±¤ë if ($array[$i-1] eq "\n") {$array[$i] =~ s/^ +//; next;} #Í×ÁǤΣ±¤ÄÁ°¤¬¶õÇò¹Ô¤Î¾ì¹ç¡¢ÀèÆ¬¤Î¥¹¥Ú¡¼¥¹½èÍý¤Î¤ß¤Ç¥ë¡¼¥×¤òÈ´¤±¤ë #¤³¤³¤«¤é¥¹¥Ú¡¼¥¹¤ÇÏ¢·ë¤µ¤»¤ë½èÍý $array[$i] =~ s/^ +//; #¹ÔƬ¤ÎÉÔÍѤʥ¹¥Ú¡¼¥¹ºï½ü $array[$i-1] =~ s/ *\n$/ /; #¸½ºß½èÍýÃæ¤ÎÍ×ÁǤΣ±¤ÄÁ°¤ÎÍ×ÁǤιÔËö¤Î²þ¹Ô¤ò¥¹¥Ú¡¼¥¹¤ËÃÖ´¹ $array[$i-1] = $array[$i-1].$array[$i]; #¸½ºß¤ÎÍ×ÁǤò£±¤ÄÁ°¤ÎÍ×ÁǤˤĤʤ²¤ë splice(@array, $i, 1); #¸½ºß¤ÎÍ×ÁǤòÇÛÎ󤫤éºï½ü $i--; #Í×ÁÇ£±¸ºÊ¸¤ò¥«¥¦¥ó¥¿¡¼¤ËÈ¿±Ç } $dont="\[A-Z\]|M(r|s|rs)|Dr|Calif|V[Aa]|[MS][Tt]|Jan|Feb|Mar|Apr|Aug|Sep(t|)|Oct|Nov|Dec|Assoc|Co|Gov|Se(n|c)|Ont|i\\\.e|e\\\.g|v(s|)|Pa|Fla|Re(p|v)|Gen|Univ|Jr|[fF]t|[Ss]gt|[Pp]res|[Pp]rof"; $in =~ s/$directory/$out_dir/; open (FILE, "> $in") || die "Can't create $in\n$!\n"; foreach $yoso (@array) { #¤´¤ß¼è¤êÉôʬ $yoso =~ s/<[^<>]*>//g; $yoso =~ s/\s+\n$/\n/; $yoso =~ s/ {2,}/ /g; $yoso =~ s/^ +//; $yoso =~ s/ŽÑ/-/g; $yoso =~ s/-/-/g; $yoso =~ s/Žý/(c)/g; $yoso =~ s/¡Ç/'/g; $yoso =~ s/¡È|¡É/"/g; $yoso =~ s/¡¦/*/g; $yoso =~ s/([^ ])--([^ ])/$1 -- $2/g; $yoso =~ s/ --([^ ])/ -- $1/g; $yoso =~ s/ -([^ -])/ - $1/g; $yoso =~ s/ \? / -- /g; #¥»¥ó¥Æ¥ó¥¹Ê¬³äÉôʬ $yoso =~ s/\? /\?\n/g; $yoso =~ s/! /!\n/g; $yoso =~ s/(\.|!|\?)\" /$1\"\n/g; $yoso =~ s/\.\) /\.\)\n/g; $yoso =~ s/\. /\.\n/g; $yoso =~ s/\b($dont)\.\n/$1\. /g; print FILE $yoso; } close FILE; exit (0);