[Avida-cvs] [Avida2-svn] r242 - trunk/source/main

huangw10@myxo.css.msu.edu huangw10 at myxo.css.msu.edu
Fri Jul 15 12:10:25 PDT 2005


Author: huangw10
Date: 2005-07-15 15:10:24 -0400 (Fri, 15 Jul 2005)
New Revision: 242

Modified:
   trunk/source/main/analyze.cc
   trunk/source/main/analyze.hh
Log:
Analyze new information in the child about environment along the lineage.

Modified: trunk/source/main/analyze.cc
===================================================================
--- trunk/source/main/analyze.cc	2005-07-15 04:01:02 UTC (rev 241)
+++ trunk/source/main/analyze.cc	2005-07-15 19:10:24 UTC (rev 242)
@@ -1970,6 +1970,160 @@
   }
 }
 
+double cAnalyze::AnalyzeEntropy(cAnalyzeGenotype * genotype, double mu) 
+{
+  double entropy = 0.0;
+
+  // Calculate the stats for the genotype we're working with ...
+  genotype->Recalculate();
+  const int num_insts = inst_set.GetSize();
+  const int num_lines = genotype->GetLength();
+  const cGenome & base_genome = genotype->GetGenome();
+  cGenome mod_genome(base_genome);
+  double base_fitness = genotype->GetFitness();
+  
+  // Loop through all the lines of code, testing all mutations...
+  tArray<double> test_fitness(num_insts);
+  tArray<double> prob(num_insts);
+  for (int line_no = 0; line_no < num_lines; line_no ++) {
+    int cur_inst = base_genome[line_no].GetOp();
+    
+    // Test fitness of each mutant.
+    for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
+      mod_genome[line_no].SetOp(mod_inst);
+      cAnalyzeGenotype test_genotype(mod_genome, inst_set);
+      test_genotype.Recalculate();
+      // Ajust fitness ...
+      if (test_genotype.GetFitness() <= base_fitness) {
+	test_fitness[mod_inst] = test_genotype.GetFitness();
+      } else {
+	test_fitness[mod_inst] = base_fitness;
+      }
+    }
+    
+    // Calculate probabilities at mut-sel balance
+    double w_bar = 1;
+    
+    // Normalize fitness values, assert if they are all zero
+    double maxFitness = 0.0;
+    for(int i=0; i<num_insts; i++) {
+      if(test_fitness[i] > maxFitness) {
+	maxFitness = test_fitness[i];
+      }
+    }
+    
+    if(maxFitness > 0) {
+      for(int i=0; i<num_insts; i++) {
+	test_fitness[i] /= maxFitness;
+      }
+    } else {
+      cout << "All zero fitness, ERROR." << endl;
+      return -1.0;
+    }
+    
+    while(1) {
+      double sum = 0.0;
+      for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
+	prob[mod_inst] = (mu * w_bar) / 
+	  ((double)num_insts * 
+	   (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
+	sum = sum + prob[mod_inst];
+      }
+      if ((sum-1.0)*(sum-1.0) <= 0.0001) 
+	break;
+      else
+	w_bar = w_bar - 0.000001;
+    }
+    
+    // Calculate entropy ...
+    double this_entropy = 0.0;
+    for (int i = 0; i < num_insts; i ++) {
+      this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
+    }
+    entropy += this_entropy;
+    
+    // Reset the mod_genome back to the original sequence.
+    mod_genome[line_no].SetOp(cur_inst);
+  }
+  return entropy;
+}
+
+double cAnalyze::AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,
+					   cAnalyzeGenotype * parent, double mut_rate)
+{
+  double entropy = 0.0;
+
+  // Calculate the stats for the genotype we're working with ...
+  genotype->Recalculate();
+  const int num_insts = inst_set.GetSize();
+  const int num_lines = genotype->GetLength();
+  const cGenome & base_genome = genotype->GetGenome();
+  cGenome mod_genome(base_genome);
+  double base_fitness = genotype->GetFitness();
+
+  // Loop through all the lines of code, testing all mutations ...
+  tArray<double> test_fitness(num_insts);
+  tArray<double> prob(num_insts);
+  for (int line_no = 0; line_no < num_lines; line_no ++) {
+    int cur_inst = base_genome[line_no].GetOp();
+
+    // Test fitness of each mutant.
+    for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
+      mod_genome[line_no].SetOp(mod_inst);
+      cAnalyzeGenotype test_genotype(mod_genome, inst_set);
+      test_genotype.Recalculate();
+      test_fitness[mod_inst] = test_genotype.GetFitness();
+    }
+    
+    // Calculate probabilities at mut-sel balance
+    double w_bar = 1;
+    
+    // Normalize fitness values, assert if they are all zero
+    double maxFitness = 0.0;
+    for(int i=0; i<num_insts; i++) {
+      if(test_fitness[i] > maxFitness) {
+	maxFitness = test_fitness[i];
+      }
+    }
+    
+    if(maxFitness > 0) {
+      for(int i=0; i<num_insts; i++) {
+	test_fitness[i] /= maxFitness;
+      }
+    } else {
+      cout << "All zero fitness, ERROR." << endl;
+      return -1.0;
+    }
+    
+    while(1) {
+      double sum = 0.0;
+      for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
+	prob[mod_inst] = (mut_rate * w_bar) / 
+	  ((double)num_insts * 
+	   (w_bar + test_fitness[mod_inst] * mut_rate - test_fitness[mod_inst]));
+	sum = sum + prob[mod_inst];
+      }
+      if ((sum-1.0)*(sum-1.0) <= 0.0001) 
+	break;
+      else
+	w_bar = w_bar - 0.000001;
+    }
+    
+    // Calculate entropy ...
+    double this_entropy = 0.0;
+    this_entropy -= (1-mut_rate)*log(1-mut_rate)/log(num_insts);
+    for (int i = 0; i < num_insts; i ++) {
+      prob[i] = prob[i] * mut_rate;
+      this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
+    }
+    entropy += this_entropy;
+    
+    // Reset the mod_genome back to the base_genome.
+    mod_genome[line_no].SetOp(cur_inst);
+  }
+  return entropy;
+}
+
 void cAnalyze::CommandFitnessMatrix(cString cur_string)
 {
   if (verbose == true) cout << "Calculating fitness matrix for batch " << cur_batch << endl;
@@ -3414,7 +3568,135 @@
   batch[cur_batch].SetAligned(true);
 }
 
+// Now this command do not consider changing environment 
+// and only work for lineage and fixed-length runs.
+void cAnalyze::AnalyzeNewInfo(cString cur_string)
+{
+  cout << "Analyze new information in child about environment ..." << endl;
 
+  // Load in the variables
+  int words = cur_string.CountNumWords();
+  if (words < 1) {
+    cout << "This command requires mutation rate, skipping." << endl;
+    return;
+  }
+
+  // Get the mutation rate ...
+  double mu = cur_string.PopWord().AsDouble();
+
+  // Create the directory using the string given as the second argument
+  cString dir = cur_string.PopWord();
+  cString defaultDir = "newinfo/";
+  cString directory = PopDirectory(dir, defaultDir);
+
+  ///////////////////////////////////////////////////////
+  // Loop through all of the genotypes in this batch...
+
+  cString newinfo_fn;
+  ofstream newinfo_fp;
+  if (batch[cur_batch].IsLineage() == true) {
+    newinfo_fn.Set("%s%s.newinfo.dat", directory(), "lineage");
+    newinfo_fp.open(newinfo_fn);
+  } else {
+    cout << "This command requires the lineage in the batch, skipping.\n";
+    return;
+  }
+  newinfo_fp << "# Legend:" << endl;
+  newinfo_fp << "# 1:Child Genotype ID" << endl;
+  newinfo_fp << "# 2:Parent Genotype ID" << endl;
+  newinfo_fp << "# 3:New Information about Environment given Parent" << endl; 
+
+  tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+  cAnalyzeGenotype * parent_genotype = batch_it.Next();
+  if (parent_genotype == NULL) {
+    newinfo_fp.close();
+    return;
+  }
+  cAnalyzeGenotype * child_genotype = NULL;
+  
+  while ((child_genotype = batch_it.Next()) != NULL) {
+    assert( parent_genotype->GetLength() == child_genotype->GetLength() );
+
+    if (verbose == true) {
+      cout << endl << "Analyze new information for " << child_genotype->GetName() << endl;
+    }
+
+    // 1.
+    // Analyze the information between parent and child ...
+    // H(C) = all insts are equally probably = 1
+    // H(C|P) = -(1-u)log(1-u)-26*(u/26)*log(u/26)
+    // I(C:P) = H(C)-H(C|P)
+    const int num_insts = inst_set.GetSize();
+    const int num_lines = child_genotype->GetLength();
+    double H_C = 1;
+    double H_C_P = -(1-mu)*log(1-mu)/log(num_insts)
+      -(num_insts)*(mu/(num_insts))*log(mu/(num_insts))/log(num_insts);
+    double I_CP = (H_C - H_C_P) * num_lines;
+    if (verbose) {
+      cout << "I(C:P) =  H(C) - H(C|P) = (" << H_C << " - " <<  H_C_P << ") * 100 = " 
+	   << I_CP << endl;
+    }
+    
+    // 2.
+    // Analyze the information between parent and child given the environment.
+    // H(C|E)
+    // H(C|PE) = -(1-u)log(1-u)
+    //           -u distributed among 26 insts according to mut/sel balance 
+    // I(C:P|E) = H(C|E)-H(C|PE)
+    double H_C_E = AnalyzeEntropy(child_genotype, mu);
+    if (H_C_E < 0) {
+      cout << "Cannot calculate the entropy of genotype " << child_genotype->GetID();
+      cout << " given environment. Error." << endl;
+      return;
+    }
+    double H_C_PE = AnalyzeEntropyGivenParent(child_genotype, parent_genotype, mu);
+    if (H_C_PE < 0) {
+      cout << "Cannot calculate the entropy of genotype " << child_genotype->GetID();
+      cout << " given parent and environment. Error." << endl;
+    }
+
+    double I_CP_E = H_C_E - H_C_PE;
+    if (verbose) {
+      cout << "I(C:P|E) = H(C|E) - H(C|PE) = " << H_C_E << " - " << H_C_PE << 
+	" = " << I_CP_E << endl;
+    }
+
+    // 3.
+    // Information among parent, child and environment.
+    // I(C:P:E) = I(C:P)-I(C:P|E)
+    double I_CPE = I_CP -I_CP_E;
+    if (verbose) {
+      cout << "I(C:P:E) = I(C:P) - I(C:P|E) = " << I_CP << " - " << I_CP_E << 
+	" = " << I_CPE << endl;
+    }
+
+    // 4.
+    // New information child about environment given parent.
+    // I(C:E|P) = I(C:E)-I(C:P:E)
+    double I_CE = (double)child_genotype->GetLength() - H_C_E;
+    double I_CE_P = I_CE - I_CPE;
+    if (verbose) {
+      cout << "I(C:E|P) = I(C:E) - I(C:P:E) = " << I_CE << " - " << I_CPE << 
+	" = " << I_CE_P << endl;
+    }
+
+    // Write information to file ...
+    newinfo_fp << child_genotype->GetID() << " ";
+    newinfo_fp << parent_genotype->GetID() << " ";
+    newinfo_fp << I_CE_P << endl;
+
+    if (verbose) {
+      cout << child_genotype->GetID() << " ";
+      cout << parent_genotype->GetID() << " ";
+      cout << I_CE_P << endl;
+    }
+    parent_genotype = child_genotype;
+  }
+
+  newinfo_fp.close();
+  return;
+}
+
 void cAnalyze::WriteClone(cString cur_string)
 {
   // Load in the variables...
@@ -5410,6 +5692,7 @@
 
   // Lineage analysis commands...
   AddLibraryDef("ALIGN", &cAnalyze::CommandAlign);
+  AddLibraryDef("ANALYZE_NEWINFO", &cAnalyze::AnalyzeNewInfo);
 
   // Build input files for avida...
   AddLibraryDef("WRITE_CLONE", &cAnalyze::WriteClone);

Modified: trunk/source/main/analyze.hh
===================================================================
--- trunk/source/main/analyze.hh	2005-07-15 04:01:02 UTC (rev 241)
+++ trunk/source/main/analyze.hh	2005-07-15 19:10:24 UTC (rev 242)
@@ -167,6 +167,7 @@
 
   // Lineage Analysis Commands...
   void CommandAlign(cString cur_string);
+  void AnalyzeNewInfo(cString cur_string);   
 
   // Build Input Files for Avida
   void WriteClone(cString cur_string);
@@ -182,6 +183,9 @@
   void AnalyzeComplexity(cString cur_string);
   void AnalyzePopComplexity(cString cur_string);
   void AnalyzeEpistasis(cString cur_string);
+  double AnalyzeEntropy(cAnalyzeGenotype * genotype, double mut_rate);
+  double AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype, 
+				   cAnalyzeGenotype * parent, double mut_rate);
 
   // Environment Manipulation
   void EnvironmentSetup(cString cur_string);




More information about the Avida-cvs mailing list