[Avida-SVN] r2798 - in development/source: analyze cpu

jbarrick at myxo.css.msu.edu jbarrick at myxo.css.msu.edu
Sat Sep 27 10:39:47 PDT 2008


Author: jbarrick
Date: 2008-09-27 13:39:46 -0400 (Sat, 27 Sep 2008)
New Revision: 2798

Modified:
   development/source/analyze/cAnalyze.cc
   development/source/analyze/cAnalyze.h
   development/source/analyze/cAnalyzeGenotype.h
   development/source/cpu/cInstSet.h
Log:
Instructions that fail (due to prob_fail) are flagged as executed so that high failure organisms can still meet division conditions (cHardwareCPU only).

Added CommandAnalyzeRedundancyByInstFailure

Tests an organism at a range of different probabilities of instruction failure, and outputs the fraction of the time that they maintain their current fitness.

Skeleton for cAnalyze::FindLastCommonAncestor

Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc	2008-09-27 17:38:59 UTC (rev 2797)
+++ development/source/analyze/cAnalyze.cc	2008-09-27 17:39:46 UTC (rev 2798)
@@ -1265,6 +1265,7 @@
   batch[cur_batch].SetAligned(false);
 }
 
+
 void cAnalyze::FindSexLineage(cString cur_string)
 {
   
@@ -1493,6 +1494,146 @@
   batch[cur_batch].SetAligned(false);
 }
 
+// @JEB 9-25-2008
+void cAnalyze::FindLastCommonAncestor(cString cur_string)
+{  
+
+/*
+  // Assumes that the current batch contains a population and all of its common ancestors
+  // Finds the last common ancestor among all current organisms that are still alive,
+  // i.e. have an update_died of -1.
+
+  cout << "Finding last common ancestor of batch " << cur_batch << endl;
+  
+  // Make a list of alive organisms
+  tListPlus<cAnalyzeGenotype> alive_list;
+  {
+    tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+    cAnalyzeGenotype * test_genotype = NULL;
+    while ((test_genotype = batch_it.Next()) != NULL) {
+      if (test_genotype->GetUpdateDead() == -1) {
+        alive_list.Push(test_genotype);
+      }
+    }
+  }
+  
+  if (m_world->GetVerbosity() >= VERBOSE_ON) {
+    cout << "  Number of genotypes that are alive: " << alive_list.GetSize() << endl;
+    cout << "  Number of ancestor genotypes: " << batch[cur_batch].List().GetSize() << endl;
+  }
+    
+  // Extract the lineage of the first alive organism.
+  // The LCA must be among these genotypes. The approach is to step back one ancestor
+  // at a time, collect all of its descendants, and then check to see if there are
+  // andy alive organisms that have not been collected yet.
+  
+  // find the lineage of the first genotype...
+  cAnalyzeGenotype * first_alive_genotype = alive_list.Pop();
+  tListPlus<cAnalyzeGenotype> master_lineage;
+  {
+    master_lineage.Push(first_alive_genotype);
+    int next_id = first_alive_genotype->GetParentID();
+    bool found = true;
+    while (found == true) {
+      found = false;
+      
+      tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+      cAnalyzeGenotype * found_gen = NULL;
+      while ((found_gen = batch_it.Next()) != NULL) {
+        if (found_gen->GetID() == next_id) {
+          master_lineage.Push(found_gen);
+          next_id = found_gen->GetParentID();
+          found = true;
+          break;
+        }
+      }
+    }
+  }
+ 
+  if (m_world->GetVerbosity() >= VERBOSE_ON) {
+    cout << "  Size of master lineage: " << master_lineage.GetSize() << endl;
+  }
+    
+  tListIterator<cAnalyzeGenotype> master_lineage_batch_it(master_lineage);
+  
+  while ((collect_genotype = master_lineage_batch_it.Next()) != NULL) {
+    
+    // collect all children of the current lineage genotype
+    tListPlus<cAnalyzeGenotype> collect_genotype_list;
+    collect_genotype_list.PushRear(collect_genotype);
+    tListIterator<cAnalyzeGenotype> collect_batch_it(collect_genotype_list);
+    
+    next_collect_genotype_list;
+    
+    int current_id = alive_genotype->GetID();
+    int parent_id = alive_genotype->GetParentID();
+    bool found_parent = true;
+    bool found_in_master_lineage = false;
+    while (found_parent == true) {
+        
+      // Check to see if this id is among those in the first lineage.       
+      tListIterator<cAnalyzeGenotype> master_lineage_batch_it(master_lineage);
+      cAnalyzeGenotype * master_lineage_genotype;
+      while ((master_lineage_genotype = master_lineage_batch_it.Next()) != NULL) {
+        if (master_lineage_genotype->GetID() == current_id) break;
+      }
+
+      found_in_master_lineage = master_lineage_genotype != NULL;
+      if (found_in_master_lineage) {
+        
+        // Remove anything in the master lineage that is past this point.
+        // as it is younger than the new most recent common ancestor
+        while ((master_lineage_genotype = master_lineage_batch_it.Next()) != NULL) {
+          master_lineage_batch_it.Remove();
+        }
+        
+        // We can also stop looking at ancestors of the current alive_genotype
+        if (found_in_master_lineage) break;
+      }
+      
+      // Find the ancestor of the current organism in the alive_genotype lineage
+      found_parent = false;      
+      tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+      cAnalyzeGenotype * test_genotype = NULL;
+      while ((test_genotype = batch_it.Next()) != NULL) {
+        if (test_genotype->GetID() == parent_id) {
+          parent_id = test_genotype->GetParentID();
+          current_id = test_genotype->GetID();
+          found_parent = true;
+          break;
+        }
+      }
+    }
+    
+    // Warn if we did not find a common ancestor at all.
+    if (!found_in_master_lineage) { 
+      cout << "  Warning! Did not find common ancestor between two organisms. " << endl;
+    }
+    
+    if (m_world->GetVerbosity() >= VERBOSE_ON) {
+      cout << "  Size of master lineage: " << master_lineage.GetSize() << endl;
+    }
+  }
+  
+  // The first one left in this lineage is the one we want to save.
+  cAnalyzeGenotype * last_common_ancestor = master_lineage.Pop();
+  
+  // Delete everything else.
+  tListIterator<cAnalyzeGenotype> delete_batch_it(batch[cur_batch].List());
+  cAnalyzeGenotype * delete_genotype = NULL;
+  while ((delete_genotype = delete_batch_it.Next()) != NULL) {
+    if (delete_genotype->GetID() != last_common_ancestor->GetID()) {
+      delete batch[cur_batch].List().Pop();
+    }
+  }
+  
+  // And fill it back in with the good stuff.
+  batch[cur_batch].List().PushRear(last_common_ancestor);
+
+  */
+}
+
+
 void cAnalyze::SampleOrganisms(cString cur_string)
 {
   double fraction = cur_string.PopWord().AsDouble();
@@ -5388,6 +5529,87 @@
 }
 
 
+// Determine redundancy by calculating the percentage of the lifetimes
+// where fitness is decreased over a range of instruction failure probabilities.
+// @JEB 9-24-2008
+void cAnalyze::CommandAnalyzeRedundancyByInstFailure(cString cur_string)
+{
+  cString filename("analyze_redundancy_by_inst_failure.dat");
+  if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
+  int replicates = 1000;
+  if (cur_string.GetSize() != 0) replicates = cur_string.PopWord().AsInt();
+
+  // Output is one line per organism in the current batch with columns.
+  cDataFile & df = m_world->GetDataFile(filename);
+  df.WriteComment( "Redundancy calculated by changing the probability of instruction failure" );
+  cString s;
+  s.Set("%i replicates at each chance of instruction failure", replicates);
+  df.WriteComment(s);
+  df.WriteTimeStamp();
+
+  // Loop through all of the genotypes in this batch...
+
+  tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+  cAnalyzeGenotype * genotype = NULL;
+  while ((genotype = batch_it.Next()) != NULL) {
+
+    if (m_world->GetVerbosity() >= VERBOSE_ON) {
+      cout << "  Determining redundancy by instruction failure for " << genotype->GetName() << endl;
+    }
+    
+     cInstSet modify_inst_set = genotype->GetInstructionSet();
+      
+    // Modify the instruction set to include the current probability of failure.
+    for (int j=0; j<modify_inst_set.GetSize(); j++)
+    {
+      cString inst_name = modify_inst_set.GetName(j);
+      cInstruction inst = modify_inst_set.GetInst(inst_name);
+      modify_inst_set.SetProbFail(inst, 0);
+    }
+    genotype->SetInstructionSet(modify_inst_set);
+  
+    // Recalculate the baseline fitness
+    // May need to calculate multiple times to check for stochastic behavior....
+    genotype->Recalculate(m_ctx);
+    double baseline_fitness = genotype->GetFitness();
+  
+    if (baseline_fitness>0)
+    {
+      // Write information for this 
+      df.Write(genotype->GetName(), "genotype name");
+      df.Write(genotype->GetID(), "genotype id");
+      df.Write(baseline_fitness, "fitness");
+      
+      // Run the organism the specified number of replicates
+      
+      for (double log10_fc=-4.0; log10_fc<=0.0; log10_fc+=0.1)
+      {
+        double fc = exp(log10_fc*log(10));
+        
+        // Modify the instruction set to include the current probability of failure.
+        for (int j=0; j<modify_inst_set.GetSize(); j++)
+        {
+          cString inst_name = modify_inst_set.GetName(j);
+          cInstruction inst = modify_inst_set.GetInst(inst_name);
+          modify_inst_set.SetProbFail(inst, fc);
+        }
+        genotype->SetInstructionSet(modify_inst_set);
+        
+        // Recalculate the requested number of times
+        double chance = 0;
+        for (int i=0; i<replicates; i++)
+        {
+          genotype->Recalculate(m_ctx);
+          if (genotype->GetFitness() < baseline_fitness) chance++;
+        }      
+        s.Set("Inst prob fail %.3g", fc);
+        df.Write(chance/replicates, s);
+      }
+      df.Endl();
+    }
+  }
+}
+
 void cAnalyze::CommandMapMutations(cString cur_string)
 {
   cout << "Constructing genome mutations maps..." << endl;
@@ -9254,6 +9476,7 @@
   AddLibraryDef("FIND_LINEAGE", &cAnalyze::FindLineage);
   AddLibraryDef("FIND_SEX_LINEAGE", &cAnalyze::FindSexLineage);
   AddLibraryDef("FIND_CLADE", &cAnalyze::FindClade);
+  AddLibraryDef("FIND_LAST_COMMON_ANCESTOR", &cAnalyze::FindLastCommonAncestor);  
   AddLibraryDef("SAMPLE_ORGANISMS", &cAnalyze::SampleOrganisms);
   AddLibraryDef("SAMPLE_GENOTYPES", &cAnalyze::SampleGenotypes);
   AddLibraryDef("KEEP_TOP", &cAnalyze::KeepTopGenotypes);
@@ -9286,6 +9509,7 @@
   AddLibraryDef("MAP", &cAnalyze::CommandMapTasks);  // Deprecated...
   AddLibraryDef("MAP_TASKS", &cAnalyze::CommandMapTasks);
   AddLibraryDef("AVERAGE_MODULARITY", &cAnalyze::CommandAverageModularity);
+  AddLibraryDef("ANALYZE_REDUNDANCY_BY_INST_FAILURE", &cAnalyze::CommandAnalyzeRedundancyByInstFailure);
   AddLibraryDef("MAP_MUTATIONS", &cAnalyze::CommandMapMutations);
   AddLibraryDef("ANALYZE_COMPLEXITY", &cAnalyze::AnalyzeComplexity);
   AddLibraryDef("ANALYZE_FITNESS_TWO_SITES", &cAnalyze::AnalyzeFitnessLandscapeTwoSites);

Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h	2008-09-27 17:38:59 UTC (rev 2797)
+++ development/source/analyze/cAnalyze.h	2008-09-27 17:39:46 UTC (rev 2798)
@@ -234,6 +234,7 @@
   void FindLineage(cString cur_string);
   void FindSexLineage(cString cur_string);
   void FindClade(cString cur_string);
+  void FindLastCommonAncestor(cString cur_string);
   void SampleOrganisms(cString cur_string);
   void SampleGenotypes(cString cur_string);
   void KeepTopGenotypes(cString cur_string);
@@ -268,6 +269,7 @@
   void CommandMapTasks(cString cur_string);
   void CommandAverageModularity(cString cur_string);
   void CommandAnalyzeModularity(cString cur_string);
+  void CommandAnalyzeRedundancyByInstFailure(cString cur_string);
   void CommandMapMutations(cString cur_string);
   void CommandMapDepth(cString cur_string);
   void CommandPairwiseEntropy(cString cur_string);

Modified: development/source/analyze/cAnalyzeGenotype.h
===================================================================
--- development/source/analyze/cAnalyzeGenotype.h	2008-09-27 17:38:59 UTC (rev 2797)
+++ development/source/analyze/cAnalyzeGenotype.h	2008-09-27 17:39:46 UTC (rev 2798)
@@ -37,6 +37,9 @@
 #ifndef cGenome_h
 #include "cGenome.h"
 #endif
+#ifndef cInstSet_h
+#include "cInstSet.h"
+#endif
 #ifndef cLandscape_h
 #include "cLandscape.h"
 #endif
@@ -241,6 +244,7 @@
   // Set...
   void SetSequence(cString _sequence);
   void SetName(const cString & _name) { name = _name; }
+  void SetInstructionSet(cInstSet& _inst_set) { m_inst_set = _inst_set; }
   void SetAlignedSequence(const cString & _seq) { aligned_sequence = _seq; }
   void SetTag(const cString & _tag) { tag = _tag; }
 
@@ -285,6 +289,7 @@
   // Accessors...
   const cGenome & GetGenome() const { return genome; }
   const cString & GetName() const { return name; }
+  const cInstSet & GetInstructionSet() const { return m_inst_set; }
   const cString & GetAlignedSequence() const { return aligned_sequence; }
   cString GetExecutedFlags() const { return executed_flags; }
   cString GetAlignmentExecutedFlags() const;

Modified: development/source/cpu/cInstSet.h
===================================================================
--- development/source/cpu/cInstSet.h	2008-09-27 17:38:59 UTC (rev 2797)
+++ development/source/cpu/cInstSet.h	2008-09-27 17:39:46 UTC (rev 2798)
@@ -138,6 +138,9 @@
 
   // Insertion of new instructions...
   cInstruction ActivateNullInst();
+  
+  // Modification of instructions during run.
+  void SetProbFail(const cInstruction& inst, double _prob_fail) { m_lib_name_map[inst.GetOp()].prob_fail = _prob_fail; }
 
   // accessors for instruction library
   cInstLib* GetInstLib() { return m_inst_lib; }




More information about the Avida-cvs mailing list