#!/usr/local/bin/perl

$directory="target";
$out_dir="data";
$file="wowcat\.txt";
$separator=":";
$in=$separator.$directory.$separator.$file;

open (FILE, $in) || die "$in\n$!\n";
while (<FILE>) {
	push (@array, $_);
}
close FILE;

for($i=1; $i<=$#array; $i++) { #ÇÛÎó¤ÎÍ×ÁÇ¤òºï½ü¤·¤¿¤¤¤Î¤Ç¡¢forÊ¸¤ò»ÈÍÑ
  if ($array[$i] eq "\n") {next;} #¶õÇò¹Ô²¿¤â¤»¤º¥ë¡¼¥×¤òÈ´¤±¤ë
  if ($array[$i-1] eq "\n") {$array[$i] =~ s/^ +//; next;} #Í×ÁÇ¤Î£±¤ÄÁ°¤¬¶õÇò¹Ô¤Î¾ì¹ç¡¢ÀèÆ¬¤Î¥¹¥Ú¡¼¥¹½èÍý¤Î¤ß¤Ç¥ë¡¼¥×¤òÈ´¤±¤ë
  #¤³¤³¤«¤é¥¹¥Ú¡¼¥¹¤ÇÏ¢·ë¤µ¤»¤ë½èÍý
  $array[$i] =~ s/^ +//; #¹ÔÆ¬¤ÎÉÔÍÑ¤Ê¥¹¥Ú¡¼¥¹ºï½ü
  $array[$i-1] =~ s/ *\n$/ /; #¸½ºß½èÍýÃæ¤ÎÍ×ÁÇ¤Î£±¤ÄÁ°¤ÎÍ×ÁÇ¤Î¹ÔËö¤Î²þ¹Ô¤ò¥¹¥Ú¡¼¥¹¤ËÃÖ´¹
  $array[$i-1] = $array[$i-1].$array[$i]; #¸½ºß¤ÎÍ×ÁÇ¤ò£±¤ÄÁ°¤ÎÍ×ÁÇ¤Ë¤Ä¤Ê¤²¤ë
  splice(@array, $i, 1); #¸½ºß¤ÎÍ×ÁÇ¤òÇÛÎó¤«¤éºï½ü
  $i--; #Í×ÁÇ£±¸ºÊ¸¤ò¥«¥¦¥ó¥¿¡¼¤ËÈ¿±Ç
}

$dont="\[A-Z\]|M(r|s|rs)|Dr|Calif|V[Aa]|[MS][Tt]|Jan|Feb|Mar|Apr|Aug|Sep(t|)|Oct|Nov|Dec|Assoc|Co|Gov|Se(n|c)|Ont|i\\\.e|e\\\.g|v(s|)|Pa|Fla|Re(p|v)|Gen|Univ|Jr|[fF]t|[Ss]gt|[Pp]res|[Pp]rof";

$in =~ s/$directory/$out_dir/;
open (FILE, "> $in") || die "Can't create $in\n$!\n";

foreach $yoso (@array) {
  #¤´¤ß¼è¤êÉôÊ¬
  $yoso =~ s/<[^<>]*>//g;
  $yoso =~ s/\s+\n$/\n/;
  $yoso =~ s/ {2,}/ /g;
  $yoso =~ s/^ +//;
  $yoso =~ s/ŽÑ/-/g;
  $yoso =~ s/-/-/g;
  $yoso =~ s/Žý/(c)/g;
  $yoso =~ s/¡Ç/'/g;
  $yoso =~ s/¡È|¡É/"/g;
  $yoso =~ s/¡¦/*/g;
  $yoso =~ s/([^ ])--([^ ])/$1 -- $2/g;
  $yoso =~ s/ --([^ ])/ -- $1/g;
  $yoso =~ s/ -([^ -])/ - $1/g;
  $yoso =~ s/ \? / -- /g;
  
  #¥»¥ó¥Æ¥ó¥¹Ê¬³äÉôÊ¬
  $yoso =~ s/\? /\?\n/g;
  $yoso =~ s/! /!\n/g;
  $yoso =~ s/(\.|!|\?)\" /$1\"\n/g;
  $yoso =~ s/\.\) /\.\)\n/g;
  $yoso =~ s/\. /\.\n/g;
  $yoso =~ s/\b($dont)\.\n/$1\. /g;
  print FILE $yoso;
}
close FILE;
exit (0);
