Tryag File Manager
Home
-
Turbo Force
Current Path :
/
home
/
cluster1
/
data
/
bu01
/
1121861
/
jlex
/
rdloci
/
php5
/
Upload File :
New :
File
Dir
/home/cluster1/data/bu01/1121861/jlex/rdloci/php5/repeat_analyzer.php5
<? class repeat_analyzer { public $MAX_MISMATCHES; public $FONT_SIZE; public $colors = array("pink","green","blue","red","cyan","gray","purple","gold","brown","#FF8C00", "#8B0000"); public $light_colors = array("red"=>"salmon","cyan"=>"#E0FFFF", "green"=>"#ADFF2F", "blue"=>"#4682B4","gray"=>"silver","pink"=>"#FFE4E1", "purple"=>"#EE82EE", "gold"=>"yellow","brown"=>"tan", "#FF8C00"=>"orange", "#8B0000"=>"#B22222"); function __construct($max_mismatches, $font_size) { $this->MAX_MISMATCHES = $max_mismatches; $this->FONT_SIZE = $font_size; } function str_split($string) { $array = array(); $length = strlen($string); for($i=0;$i<$length;$i++) { $array[] = substr($string,$i,1); } return $array; } function count_mismatches($string_1, $string_2) { if(strlen($string_1) == strlen($string_2)) { $array_1 = $this->str_split($string_1); $array_2 = $this->str_split($string_2); $length = count($array_1); $mismatches = 0; for($i=0;$i<$length;$i++) { if($array_1[$i] != $array_2[$i]) { $mismatches++; } } return $mismatches; } else { return strlen($string_1); } } function load_seqs($file) { $in = file($file); $seqs; $maps = array(); $name = ""; foreach($in as $line) { $line = trim($line); if(ereg("^%",$line)) { $line = substr($line,1); $i = strpos($line,":"); $name = substr($line,0,$i); $loci = explode(" ",substr($line,$i+1)); foreach($loci as $locus) { $maps[$name][] = $locus; } $maps["num_loci"] = count($loci); $name = ""; } else if(ereg("^>",$line)) { if($name != "") { $seqs[$name] = $s; } $name = trim(substr($line,1)); $s = ""; } else { $s .= " ".$line; } } $seqs[$name] = $s; $out = fopen("variable_regions.txt","w"); foreach($seqs as $name=>$seq) { $seq = ereg_replace(" ","",$seq); $seq = ereg_replace("\*","",$seq); fwrite($out,">".$name."\n".$seq."\n\n"); } fclose($out); $seqs["maps"] = $maps; return $seqs; } function find_patterns($seqs,$repeat_colors) { $by_repeat = array(); foreach($repeat_colors as $repeat=>$color) { $by_repeat[$repeat] = array(); } foreach($seqs as $name=>$seq) { $seq = ereg_replace("\*","",$seq); $repeats = explode(" ",$seq); foreach($repeats as $repeat) { $repeat = trim($repeat); if($repeat != "") { $index = false; foreach($by_repeat as $r=>$set) { if(strlen($r) == strlen($repeat)) { $num_mismatches = $this->count_mismatches($repeat,$r); if($num_mismatches <= $this->MAX_MISMATCHES) { $index = $r; break; } } } $length = strlen($repeat); $repeat_a = $this->str_split($repeat); $position = 0; foreach($repeat_a as $letter) { if(@!array_key_exists($letter,$by_repeat[$index][$position])) { $by_repeat[$index][$position][$letter] = 1; } else { $by_repeat[$index][$position][$letter]++; } $position++; } } } } foreach($by_repeat as $repeat=>$set) { if(count($set) == 0) { unset($by_repeat[$repeat]); } } return $by_repeat; } function display_patterns($by_repeat,$repeat_colors) { $rows = array(); //$by_repeat = array_unique($by_repeat); foreach($by_repeat as $repeat=>$positions) { if(count($positions) != 0) { $s = "<tr><td bgcolor='".$repeat_colors[$repeat]."' width='100'></td>"; $r = "<tr><td></td>"; foreach($positions as $position) { arsort($position); $sum = 0; foreach($position as $letter=>$count) { $sum += $count; } $s .= "<td><table>"; foreach($position as $letter=>$count) { $s .= "<tr><td>$letter</td></tr>"; $percent = round((100*$count/$sum),2); $s .= "<tr><td>$percent%</td></tr>"; } $s .= "</table></td>"; } $s .= "</tr>"; } $rows[] = $s; } echo "<table border='1'>"; foreach($rows as $row) { echo $row; } echo "</table>"; } /** * Counts the number of types of repeats in a set of sequences and returns * a sorted associated array, (repeat=>count), of highest count first. */ function count_repeats($seqs) { $total_words = 0; $repeats = array(); foreach($seqs as $name=>$seq) { $a = explode(" ",$seq); foreach($a as $repeat) { $repeat = trim($repeat); if(($repeat != "") && (!ereg("[*]",$repeat))) { $total_words++; if(array_key_exists($repeat,$repeats)) { $repeats[$repeat]++; } else { $repeats[$repeat] = 1; } } } } arsort($repeats); return $repeats; } function assign_colors($standards_file) { $colors = $this->colors; $light_colors = $this->light_colors; $standards = $this->load_seqs($standards_file); $repeats = $this->count_repeats($standards); $reps = array(); $sim_reps = array(); $i = 0; foreach($repeats as $repeat=>$count) { $repeat = trim($repeat); if(!ereg("\*",$repeat) && ($repeat != "")) { $cur_color = ""; foreach($reps as $rep=>$color) { $num_mismatches = $this->count_mismatches($rep,$repeat); if($num_mismatches <= $this->MAX_MISMATCHES) { $cur_color = $color; break; } } if(($cur_color == "") && ($count > 1)) { $reps[$repeat] = $colors[$i]; $i++; } else { $sim_reps[$repeat] = $light_colors[$cur_color]; } } } $repeat_colors = array_merge($reps,$sim_reps); return $repeat_colors; } function display_repeat_counts($repeat_counts, $repeat_colors) { $total_words = 0; foreach($repeat_counts as $repeat=>$count) { $total_words += $count; } echo "<table border='1'><tr><td><b>word</b></td><td><b>count</b></td><td><b>% of total</b></td></tr>"; foreach($repeat_counts as $repeat=>$count) { $percent = round((100*$count/$total_words),2); echo "<tr><td bgcolor='".$repeat_colors[$repeat]."' NOWRAP='1'>$repeat</td><td>$count</td><td>$percent%</td></tr>"; } echo "</table>"; $count_by_color = array(); foreach($repeat_counts as $repeat=>$count) { $color = $repeat_colors[$repeat]; $count_by_color[$color] += $count; } echo "<table border='1'>"; foreach($this->colors as $color) { $light_color = $this->light_colors[$color]; $total = $count_by_color[$color] + $count_by_color[$light_color]; $percent = round((100*$total/$total_words),2); if($total != 0) { echo "<tr><td bgcolor='$color' width='50'></td><td>+</td><td bgcolor='$light_color' width='50'></td><td>$total</td><td>$percent%</td></tr>"; } } echo "</table>"; } function display_locus_maps($maps) { $font_size = $this->FONT_SIZE; if(count($maps) != 0) { $num_loci = $maps["num_loci"]; unset($maps["num_loci"]); echo "<table border='1'>"; echo "<tr><td><font style=\"font-size: $font_size;\"><b>Strain</b></font></td>"; for($i=0;$i<$num_loci;$i++) { echo "<td><font style=\"font-size: $font_size;\"><b>Locus".($i+1)."</b></font></td>"; } echo "</tr>"; foreach($maps as $name=>$loci) { echo "<tr><td><font style=\"font-size: $font_size;\"><b>$name</b></font></td>"; foreach($loci as $locus) { $alleles = explode("-",$locus); echo "<td>"; foreach($alleles as $allele) { echo "<img src='../images/$allele.jpg' height='$font_size' width='50'>"; } echo "</td>"; } echo "</tr>"; } echo "</table>"; echo "<br><br><br>"; } } function display_repeats($seqs, $repeat_colors) { $font_size = $this->FONT_SIZE; echo "<table>"; foreach($seqs as $name=>$seq) { $a = explode(" ",$seq); $strain = substr($name,0,strpos($name,"_")); $name = ereg_replace(" ","_",$name); $name = substr($name,0,-1); echo "<tr><td><font style=\"font-size: $font_size;\" NOWRAP='1'><a href='../mysql_to_xml.php4?num_conditions=1&database=strains&phrases_and_fields_0=name®_exps_0=equals&string_0=$strain&stylesheet=strains/to_html.xml'>$name</a></b></font></td>"; foreach($a as $cur_repeat) { if(trim($cur_repeat) != "") { if(ereg("[*]",$cur_repeat)) { echo "<td bgcolor='white'></td>"; } else { $color = $repeat_colors[$cur_repeat]; if(trim($color) == "") { $color = "DDDDDD"; } echo "<td bgcolor=\"$color\"><font style=\"font-size: $font_size;\">"; $length = 2*strlen($cur_repeat); if($color == "DDDDDD") { echo "-"; } else { for($q=0;$q<15;$q++) { echo " "; } } echo "</font></td>"; //echo "$cur_repeat</font></td>"; } } } echo "</tr>"; echo "<tr></tr>"; } echo "</table>"; } function analyze($seqs_file, $standards_file, $font_size) { $repeat_colors = $this->assign_colors($standards_file); $seqs = $this->load_seqs($seqs_file); $maps = $seqs["maps"]; unset($seqs["maps"]); $repeat_counts = $this->count_repeats($seqs); $repeat_patterns = $this->find_patterns($seqs,$repeat_colors); echo "<table>"; echo "<tr><td>"; $this->display_locus_maps($maps); echo"</td></tr>"; echo "<tr><td colspan='2'>"; $this->display_repeats($seqs, $repeat_colors); echo "</td></tr>"; echo "<tr><td valign='top'>"; $this->display_repeat_counts($repeat_counts, $repeat_colors); echo "</td><td>"; $this->display_patterns($repeat_patterns,$repeat_colors); echo "</td></tr></table>"; } } /* error_reporting(0); $file = $_POST["file"]; $standards = $_POST["standards"]; echo $stardards."<BR>"; $size = $_POST["size"]; $max_mismatches = $_POST["max_mismatches"]; $ca = new repeat_analyzer($max_mismatches, $size); $ca->analyze("../vregions/$file","../vregions/$standards",$size); */ ?>