[Avida-cvs] [Avida2-svn] r286 - trunk/source/main

huangw10@myxo.css.msu.edu huangw10 at myxo.css.msu.edu
Mon Aug 15 12:23:29 PDT 2005


Author: huangw10
Date: 2005-08-15 15:23:29 -0400 (Mon, 15 Aug 2005)
New Revision: 286

Modified:
   trunk/source/main/analyze.cc
Log:
Update analyze function of community_complexity

Modified: trunk/source/main/analyze.cc
===================================================================
--- trunk/source/main/analyze.cc	2005-08-14 21:38:11 UTC (rev 285)
+++ trunk/source/main/analyze.cc	2005-08-15 19:23:29 UTC (rev 286)
@@ -2289,7 +2289,6 @@
   cpx_fp << "# 5: Total Complexity" << endl;
   cpx_fp << endl;
 
-
   /////////////////////////////////////////////////////////////////////////////////
   // Loop through all of the genotypes in all batches and build id vs. genotype map
 
@@ -2321,6 +2320,7 @@
   // Backup test CPU data
   bool backupUsage = cTestCPU::UseResources();
   tArray<double> backupResources(cTestCPU::GetResources());
+  cTestCPU::UseResources() = true;
   FillResources(update);
 
   ///////////////////////////////////////////////////////////////////////
@@ -2362,19 +2362,29 @@
   // Get Most Recent Common Ancestor
 
   map<gen_pair, cAnalyzeGenotype *> mrca;
+  map<gen_pair, int> raw_dist;
   for (int i = 0; i< size_community; ++ i) {
     for (int j = i+1; j < size_community; ++ j) {
 				    
       cAnalyzeGenotype * lineage1_genotype = community[i];
       cAnalyzeGenotype * lineage2_genotype = community[j];
+      int total_dist = 0;
 
       while (lineage1_genotype->GetID() != lineage2_genotype->GetID()) {
 	if (lineage1_genotype->GetID() > lineage2_genotype->GetID()) {
 	  int parent_id = lineage1_genotype->GetParentID();
-	  lineage1_genotype = genotype_database.find(parent_id)->second;
+	  cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
+
+	  total_dist += cGenomeUtil::FindHammingDistance(lineage1_genotype->GetGenome(),
+							 parent->GetGenome());
+	  lineage1_genotype = parent;
 	} else {
 	  int parent_id = lineage2_genotype->GetParentID();
-	  lineage2_genotype = genotype_database.find(parent_id)->second;
+	  cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
+	  total_dist += cGenomeUtil::FindHammingDistance(lineage2_genotype->GetGenome(),
+							 parent->GetGenome());
+      
+	  lineage2_genotype = parent;
 	}
       }
 
@@ -2382,6 +2392,8 @@
       int id2 = community[j]->GetID();
       mrca.insert(make_pair(gen_pair(id1, id2), lineage1_genotype));
       mrca.insert(make_pair(gen_pair(id2, id1), lineage1_genotype));
+      raw_dist.insert(make_pair(gen_pair(id1, id2), total_dist));
+      raw_dist.insert(make_pair(gen_pair(id2, id1), total_dist));
     }
   }
 
@@ -2420,6 +2432,7 @@
   }
 
   */
+
   vector<cAnalyzeGenotype *> sorted_community = community;
   /////////////////////////////////////////////
   // Loop through genotypes in sorted community
@@ -2442,7 +2455,7 @@
 
     cout << endl << genotype->GetID() << endl;
 
-    if (given_genotypes.size() >= 2) {
+    /*if (given_genotypes.size() >= 2) {
 
       ///////////////////////////////////////////////////////////////////
       // Look for two given genotypes that has minimun depth dist with it
@@ -2481,9 +2494,14 @@
 	given_inst = given_genome2[line].GetOp();
 	prob[line][given_inst] += pow(1 - 1.0/length_genome, min_depth_dist);
       }
+
+      cpx_fp << genotype->GetID() << " " << min_depth_dist << " " << second_min_depth 
+	     << " " << raw_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second
+	     << " " << raw_dist.find(gen_pair(genotype->GetID(), second_min_gen->GetID()))->second
+	     << " ";
       
 	
-    } else if (given_genotypes.size() == 1) {
+    } else  if (given_genotypes.size() == 1) {
       //////////////////////////////////////////////////////
       // Calculate the probability of each inst at each line
       cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(), 
@@ -2496,8 +2514,56 @@
 	prob[line][given_inst] += pow(1 - 1.0/length_genome, dist);
       }
       
-    } 
+      cpx_fp << genotype->GetID() << " " << dist << " " 
+	     << raw_dist.find(gen_pair(genotype->GetID(), given_genotypes[0]->GetID()))->second << " ";
+    } else {
+      cpx_fp << genotype->GetID() << " ";
+      }*/
+
+    if (given_genotypes.size() >= 1) {
+      //////////////////////////////////////////////////
+      // Look for a genotype that is closest to this one
       
+      cAnalyzeGenotype * min_depth_gen = given_genotypes[0];
+      cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(), 
+						    given_genotypes[0]->GetID()))->second;
+      int min_depth_dist = genotype->GetDepth() + given_genotypes[0]->GetDepth() - 2 * tmrca->GetDepth();
+
+      for (int i = 1; i < given_genotypes.size() ; ++ i) {
+	cAnalyzeGenotype * given_genotype = given_genotypes[i];
+	cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(),
+						      given_genotype->GetID()))->second;
+	int dist = genotype->GetDepth() + given_genotype->GetDepth() - 2 * tmrca->GetDepth();
+	
+	if (dist < min_depth_dist) {
+	  min_depth_dist = dist;
+	  min_depth_gen = given_genotype;
+	}
+      }
+
+      const cGenome & given_genome = min_depth_gen->GetGenome();
+      const cGenome & base_genome = genotype->GetGenome();
+      cGenome mod_genome(base_genome);
+      for (int line = 0; line < length_genome; ++ line) {
+	int given_inst = given_genome[line].GetOp();
+	mod_genome = base_genome;
+	mod_genome[line].SetOp(given_inst);
+	cAnalyzeGenotype test_genotype(mod_genome, inst_set);
+	test_genotype.Recalculate();
+
+	// Only when given inst make the genotype alive
+	if (test_genotype.GetFitness() > 0) {
+	  prob[line][given_inst] += pow(1 - 1.0/length_genome, min_depth_dist);
+	}
+      }
+      
+      cpx_fp << genotype->GetID() << " " << min_depth_dist << " " 
+	     << raw_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << " "
+	     << hamming_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << "   ";
+    } else {
+      cpx_fp << genotype->GetID() << " ";
+    }
+
     /////////////////////////////////////////
     // Normalize the probability at each line
 
@@ -2513,9 +2579,31 @@
 	cur_total_prob = 1.0;
       }
       double left_prob = 1 - cur_total_prob;
+
+      // Check the number of insts that could make genotype alive ...
+      int num_alive = 0;
+      const cGenome & base_genome = genotype->GetGenome();
+      cGenome mod_genome(base_genome);
       for (int inst = 0; inst < num_insts; ++ inst) {
-	prob[line][inst] += left_prob / num_insts;
+	mod_genome[line].SetOp(inst);
+	cAnalyzeGenotype test_genotype(mod_genome, inst_set);
+	test_genotype.Recalculate();
+
+	// Only when given inst make the genotype alive
+	if (test_genotype.GetFitness() > 0) {
+	  num_alive ++;
+	} else {
+	  prob[line][inst] = -1;
+	}
       }
+	  
+      for (int inst = 0; inst < num_insts; ++ inst) {
+	if (prob[line][inst] == -1) {
+	  prob[line][inst] = 0;
+	} else {
+	  prob[line][inst] += left_prob / num_alive;
+	}	
+      }
 
     }
 
@@ -2539,7 +2627,7 @@
    
     genotype->Recalculate();
     double base_fitness = genotype->GetFitness();
-    
+    cout << base_fitness << endl;
     const cGenome & base_genome = genotype->GetGenome();
     cGenome mod_genome(base_genome);
       
@@ -2561,7 +2649,8 @@
 
     /////////////////////////////////////////////////////
     // Redistribute the probability of insts at each line
-
+    vector< vector<double> > prob_given_env(length_genome, one_line_prob);
+    
     for (int line = 0; line < length_genome; ++ line) {
       double total_prob = 0.0;
       for (int inst = 0; inst < num_insts; ++ inst) {
@@ -2570,13 +2659,16 @@
 	}
       }
 
-     
+      if (total_prob == 0) {
+	cout << "total prob is zero." ;
+	exit(1);
+      }
 
       for (int inst = 0; inst < num_insts; ++ inst) {
 	if (neutral_mut[line][inst] == true) {
-	  prob[line][inst] = prob[line][inst] / total_prob;
+	  prob_given_env[line][inst] = prob[line][inst] / total_prob;
 	} else {
-	  prob[line][inst] = 0.0;
+	  prob_given_env[line][inst] = 0.0;
 	}
       }
       
@@ -2584,12 +2676,13 @@
 
     ////////////////////////////////////////////////
     // Calculate the entropy given environment
-    
+
     vector<double> entropy_given_env(length_genome, 0.0);
     for (int line = 0; line < length_genome; ++ line) {
       for (int inst = 0; inst < num_insts; ++ inst) {
-	if (prob[line][inst] > 0) {
-	  entropy_given_env[line] -= prob[line][inst] * log(prob[line][inst]) / log(num_insts*1.0);
+	if (prob_given_env[line][inst] > 0) {
+	  entropy_given_env[line] -= prob_given_env[line][inst] * log(prob_given_env[line][inst]) / 
+	                             log(num_insts*1.0);
 	}
       }
     }
@@ -2606,24 +2699,40 @@
 
       if (entropy[line] >= entropy_given_env[line]) {
 	information += entropy[line] - entropy_given_env[line];	
-      } else {
-	information += 0;    // Env did not give more information about this site.
+      } else {    // Negative information is deemed as new information ...
+	
+	// Count the number of insts that can make genotype alive
+	int num_inst_alive = 0;
+	for (int inst = 0; inst < num_insts; ++ inst) {
+	  if (prob[line][inst] > 0) {
+	    num_inst_alive ++;
+	  }
+	}
+	
+	double entropy_before = - log(1.0/num_inst_alive) / log(num_insts*1.0);
+	information += entropy_before - entropy_given_env[line];
+	if (information < 0) {
+	  cout << "Negative information at site " << line << endl;
+	  exit(1);
+	}
       }
 
     }
     complexity += information;
 
-    cpx_fp << genotype->GetID() << " " << entropy_before << " " << entropy_after << " "
-	   << information << " " << complexity << endl;
+    cpx_fp << entropy_before << " " << entropy_after << " "
+	   << information << " " << complexity << "   ";
 
+    genotype->PrintTasks(cpx_fp, 0, -1);
+    cpx_fp << endl; 
+
     /////////////////////////////////////////////////////////////
     // This genotype becomes the given condition of next genotype
 
     given_genotypes.push_back(genotype);
 
   }
-	 
-	 
+
   // Set the test CPU back to the state it was 
   cTestCPU::UseResources() = backupUsage;
   cTestCPU::SetupResourceArray(backupResources);
@@ -2795,7 +2904,7 @@
       cout << output_it.Get()->GetName() << " ";
     }
     cout << endl;
-    }
+  }
   
   
   ///////////////////////////////////////////////////////
@@ -3021,7 +3130,7 @@
     delete [] col_fail_count;
     
     }
-    }
+}
 
 void cAnalyze::CommandAverageModularity(cString cur_string)
 {




More information about the Avida-cvs mailing list