Tryag File Manager
Home
-
Turbo Force
Current Path :
/
home
/
cluster1
/
data
/
bu01
/
1121861
/
html
/
xfst_dir_copy
/
Upload File :
New :
File
Dir
//home/cluster1/data/bu01/1121861/html/xfst_dir_copy/find_verbs.php4
<? include_once "utf8/utf8.class.php"; $utfConverter = new utf8(); //defaults to CP1250. $xfst_path = "/spd25/htdocs/hyperlex2/nahuatl/xfst_dir/xfst -utf8 -q -e 'loadd SurfaceForms.fsm' -e 'regex GlSR;'"; $HOST = "wave.ldc.upenn.edu"; $USER = "hyperlex"; $PASSWORD = "jota"; $DATABASE = "hyperlex"; $TABLE = $language; @$db = mysql_pconnect($HOST, $USER, $PASSWORD); if(!$db) { echo "Error: Could not connect to database. Please try again later."; exit; } mysql_select_db($DATABASE); if($story != "") { $in = file($story); } else { $in = array($lines); } unset($words); $verbs = array(); $words = array(); $dict_words = array(); $commands = $xfst_path; $query = "SELECT lxa_d, lxo_d FROM nahuatl WHERE ("; foreach($in as $line) { $line = trim($line); //$line = utf8_encode($line); $line = ereg_replace("[?,;'!]","",$line); $line = $utfConverter->strToUtf8($line); $line = explode(" ",$line); foreach($line as $word) { $word = trim($word); if(!strpos($word,"...")) { $word = ereg_replace("[.]","",$word); if(($word != "") && !ereg("=|[0-9]|Roman",$word)){ $word = ereg_replace("<.*>","",$word); $word = ereg_replace("[<>|{}]","",$word); $word = str_replace("\"","",$word); $word = str_replace("*","",$word); $word = str_replace("\\","",$word); $word = str_replace("/","",$word); $word = str_replace("\"","",$word); $word = str_replace("'","",$word); $word = str_replace("@","",$word); if(strpos($word,"]")) { $with_brackets = $word; $word = str_replace("[","",$word); $word = str_replace("]","",$word); $bracketed_words[$with_brackets] = $word; } $word = trim($word); if($word != "") { //echo "word :$word \n"; $commands .= " -e 'apply up ".strtolower($word)."'"; $word = $utfConverter->utf8ToStr($word); if(ereg("^[a-z]*",$word)) { $words[] = $word; } } } } } } $pre = "SELECT lxa_d, lxo_d FROM nahuatl WHERE ("; $count = 0; foreach($words as $word) { if($count == 100) { $query = substr($query,0,-4).")"; $query_result = mysql_query($query); if($query_result) { while($row = mysql_fetch_assoc($query_result)) { $lxa = trim($row["lxa_d"]); $lxo = trim($row["lxo_d"]); if($lxa != "" && !in_array($lxa,$dict_words)) { $dict_words[] = $lxa; } if($lxo != "" && !in_array($lxo,$dict_words)) { $dict_words[] = $lxo; } } } else { mysql_close(); @$db = mysql_pconnect($HOST, $USER, $PASSWORD); mysql_select_db($DATABASE); } $query = $pre; $count = 0; } else { $query .= "(lxa_d LIKE '$word' OR lxo_d LIKE '$word') OR "; $count++; } } $query = substr($query,0,-4).")"; $query_result = mysql_query($query); if($query_result) { while($row = mysql_fetch_assoc($query_result)) { $lxa = trim($row["lxa_d"]); $lxo = trim($row["lxo_d"]); if($lxa != "" && !in_array($lxa,$dict_words)) { $dict_words[] = $lxa; } if($lxo != "" && !in_array($lxo,$dict_words)) { $dict_words[] = $lxo; } } } $commands .= " -stop"; unset($parsable); exec($commands,$parsable); $commands = $xfst_path; foreach($parsable as $p) { $commands .= " -e 'apply down $p'"; } $commands .= " -stop"; unset($verbs); exec($commands,$verbs); sort($verbs); $verbs = array_unique($verbs); /* foreach($verbs as $v) { echo "$v \n"; } */ foreach($verbs as $v) { //$v = utf8_decode($v); $v = $utfConverter->utf8ToStr($v); $with_brackets = $bracketed[$v]; } reset($in); if($story != "") { $out = fopen("linked_story.html","w"); } foreach($in as $line) { $new_line = ""; $line = trim($line); $line = explode(" ",$line); foreach($line as $word) { if(!strpos($word,"...")) { $trimmed_word = trim($word); $trimmed_word = strtolower($trimmed_word); $trimmed_word = ereg_replace("[.,!?']","",$trimmed_word); if(in_array($trimmed_word,$verbs)) { $word = ereg_replace($trimmed_word,"<A href='http://hyperlex2.ldc.upenn.edu/nahuatl/xfst_dir/verb_forms.php4?condition=story&dialect=Oa&word=$trimmed_word' target='parser'>$trimmed_word</A>",$word); $upper = ucwords($trimmed_word); $word = ereg_replace($upper,"<A href='http://hyperlex2.ldc.upenn.edu/nahuatl/xfst_dir/verb_forms.php4?condition=story&dialect=Oa&word=$trimmed_word' target='parser'>$upper</A>",$word); } else if(in_array($trimmed_word,$dict_words)) { $word = ereg_replace($trimmed_word,"<A href='http://hyperlex2.ldc.upenn.edu/nahuatl/search_results.php4?user_lang=english&condition=word_search&entry_template=englishwhole.html&word=$trimmed_word'>$trimmed_word</A>",$word); } } $new_line .= "$word "; } echo $new_line; $new_line .= "\n"; if($story != "") { fwrite($out,$new_line); } } if($story != "") { fclose($out); } ?>