[Avida-cvs] [avida-svn] r438 - trunk/source/main

ofria@myxo.css.msu.edu ofria at myxo.css.msu.edu
Mon Jan 16 17:15:40 PST 2006


Author: ofria
Date: 2006-01-16 20:15:39 -0500 (Mon, 16 Jan 2006)
New Revision: 438

Modified:
   trunk/source/main/cAnalyze.cc
   trunk/source/main/cAnalyzeGenotype.cc
   trunk/source/main/cAnalyzeGenotype.h
Log:
Some old changes to analyze mode for info theory commands.  Cleaning up account and found them.


Modified: trunk/source/main/cAnalyze.cc
===================================================================
--- trunk/source/main/cAnalyze.cc	2006-01-15 21:54:54 UTC (rev 437)
+++ trunk/source/main/cAnalyze.cc	2006-01-17 01:15:39 UTC (rev 438)
@@ -3661,9 +3661,7 @@
     // Create a copy of the genome.
     cCPUMemory mod_genome = genotype->GetGenome();
     
-    if (copy_mut_prob == 0.0 &&
-	ins_mut_prob == 0.0 &&
-	del_mut_prob == 0.0) {
+    if (copy_mut_prob == 0.0 && ins_mut_prob == 0.0 && del_mut_prob == 0.0) {
       cerr << "ERROR: All mutation rates are zero!  No complexity delta analysis possible." << endl;
       return;
     }
@@ -3671,6 +3669,8 @@
     // Perform the per-site mutations -- we are going to keep looping until
     // we trigger at least one mutation.
     int num_mutations = 0;
+    int ins_line = -1;
+    int del_line = -1;
     while (num_mutations == 0) {
       if (copy_mut_prob > 0.0) {
 	for (int i = 0; i < mod_genome.GetSize(); i++) {
@@ -3683,38 +3683,197 @@
 
       // Perform an Insertion if it has one.
       if (g_random.P(ins_mut_prob)) {
-	int ins_line = g_random.GetInt(mod_genome.GetSize() + 1);
+	ins_line = g_random.GetInt(mod_genome.GetSize() + 1);
 	mod_genome.Insert(ins_line, inst_set.GetRandomInst());
 	num_mutations++;
       }
       
       // Perform a Deletion if it has one.
       if (g_random.P(del_mut_prob)) {
-	int del_line = g_random.GetInt(mod_genome.GetSize());
+	del_line = g_random.GetInt(mod_genome.GetSize());
 	mod_genome.Remove(del_line);
 	num_mutations++;
       }
     }
     
-    // Calculate the complexities....
+    // Collect basic state before and after the mutations...
     genotype->Recalculate();
-    double start_complexity = genotype->GetKO_Complexity(); // genotype->GetComplexity();
+    double start_complexity = genotype->GetKO_Complexity();
     double start_fitness = genotype->GetFitness();
     int start_length = genotype->GetLength();
     int start_gest = genotype->GetGestTime();
+    const tArray<int> & start_task_counts = genotype->GetTaskCounts();
+    const tArray< tArray<int> > & start_KO_task_counts =
+      genotype->GetKO_TaskCounts();
 
+//     cout << "Start tasks: ";
+//     for (int i = 0; i < start_task_counts.GetSize(); i++) {
+//       cout << "  " << start_task_counts[i];
+//     }
+//     cout << endl;
+
+//     cout << "Knockouts:" << endl;
+//     for (int l = 0; l < start_length; l++) {
+//       for (int i = 0; i < start_task_counts.GetSize(); i++) {
+// 	cout << "  " << start_KO_task_counts[l][i];
+//       }
+//       cout << endl;
+//     }
+//     cout << endl;
+
     cAnalyzeGenotype new_genotype(mod_genome, inst_set);
     new_genotype.Recalculate();
-    double end_complexity = new_genotype.GetKO_Complexity();  // GetComplexity();
-    double complexity_change = end_complexity - start_complexity;
+    double end_complexity = new_genotype.GetKO_Complexity();
     double end_fitness = new_genotype.GetFitness();
     int end_length = new_genotype.GetLength();
     int end_gest = new_genotype.GetGestTime();
+    const tArray<int> & end_task_counts = new_genotype.GetTaskCounts();
+    const tArray< tArray<int> > & end_KO_task_counts =
+      new_genotype.GetKO_TaskCounts();
 
+//     cout << "End tasks: ";
+//     for (int i = 0; i < end_task_counts.GetSize(); i++) {
+//       cout << "  " << end_task_counts[i];
+//     }
+//     cout << endl;
+
+    // Calculate the complexities....
+    double complexity_change = end_complexity - start_complexity;
+
+    // Loop through each line and determine if each line contributes to
+    int total_info_new = 0;    // Site didn't encode info, but now does.
+    int total_info_shift = 0;  // Shift in which tasks this site codes for.
+    int total_info_pshift = 0; // Partial, but not total shift of tasks.
+    int total_info_share = 0;  // Site codes for more tasks than before.
+    int total_info_lost = 0;   // Site list all tasks it encoded for.
+    int total_info_plost = 0;  // Site reduced tasks it encodes for.
+    int total_info_kept = 0;   // Site still codes for sames tasks as before
+    int total_info_lack = 0;   // Site never codes for any tasks.
+ 
+    const int num_tasks = start_task_counts.GetSize();
+    tArray<int> mut_effects(num_tasks);
+    for (int i = 0; i < num_tasks; i++) {
+      mut_effects[i] = end_task_counts[i] - start_task_counts[i];
+    }
+
+    int end_line = 0;
+    for (int start_line = 0; start_line < start_length; start_line++) {
+      if (start_line == del_line) {
+	// This line was deleted in the end.  Skip it, but don't increment
+	// the end_line
+	continue;
+      }
+      if (start_line == ins_line) {
+	// This position had an insertion.  Deal with it and then skip it.
+	end_line++;
+
+	// No "continue" here.  With the updated end_line we can move on.
+      }
+
+      // If we made it this far, the start_line and end_line should be aligned.
+      int info_maintained_count = 0;
+      int info_gained_count = 0;
+      int info_lost_count = 0;
+
+      for (int cur_task = 0; cur_task < num_tasks; cur_task++) {
+	// At the organism level, the mutation may have caused four options
+	// for this task  (A) Was never present, (B) Was present and still is,
+	// (C) Was not present, but is now, or (D) Was present, but was lost.
+
+	// Case A:
+	if (start_task_counts[cur_task]==0 && end_task_counts[cur_task]==0) {
+	  // This task was never done.  Keep looping.
+	  continue;
+	}
+
+	// Case B:
+	if (start_task_counts[cur_task] == end_task_counts[cur_task]) {
+	  // The task hasn't changed.  Has its encoding?
+	  bool KO_start = true;
+	  bool KO_end = true;
+	  if (start_KO_task_counts[start_line][cur_task]  ==
+	      start_task_counts[cur_task]) {
+	    // start_count is unchanged by knocking out this line.
+	    KO_start = false;
+	  }
+	  if (end_KO_task_counts[end_line][cur_task]  ==
+	      end_task_counts[cur_task]) {
+	    // end_count is unchanged by knocking out this line.
+	    KO_end = false;
+	  }
+
+	  if (KO_start == true && KO_end == true) info_maintained_count++;
+	  if (KO_start == true && KO_end == false) info_lost_count++;
+	  if (KO_start == false && KO_end == true) info_gained_count++;
+	  continue;
+	}
+
+	// Case C:
+	if (start_task_counts[cur_task] < end_task_counts[cur_task]) {
+	  // Task was GAINED...  Is this site important?
+	  if (end_KO_task_counts[end_line][cur_task]  <
+	      end_task_counts[cur_task]) {
+	    info_gained_count++;
+	  }
+	  continue;
+	}
+
+	// Case D:
+	if (start_task_counts[cur_task] > end_task_counts[cur_task]) {
+	  // The task was LOST...  Was this site important?
+	  if (start_KO_task_counts[start_line][cur_task]  <
+	      start_task_counts[cur_task]) {
+	    info_lost_count++;
+	  }
+	  continue;
+	}
+      }
+
+      // We now have counts and know how often this site was responsible for
+      // a task gain, a task loss, or a task being maintained.
+
+      bool has_keep = info_maintained_count > 0;
+      bool has_loss = info_lost_count > 0;
+      bool has_gain = info_gained_count > 0;      
+
+      if      ( !has_loss  &&  !has_gain  &&  !has_keep ) total_info_lack++;
+      else if ( !has_loss  &&  !has_gain  &&   has_keep ) total_info_kept++;
+      else if ( !has_loss  &&   has_gain  &&  !has_keep ) total_info_new++;
+      else if ( !has_loss  &&   has_gain  &&   has_keep ) total_info_share++;
+      else if (  has_loss  &&  !has_gain  &&  !has_keep ) total_info_lost++;
+      else if (  has_loss  &&  !has_gain  &&   has_keep ) total_info_plost++;
+      else if (  has_loss  &&   has_gain  &&  !has_keep ) total_info_shift++;
+      else if (  has_loss  &&   has_gain  &&   has_keep ) total_info_pshift++;
+
+      end_line++;
+    }
+
+
+    // Output the results.
     df.Write(num_mutations, "Number of mutational differences between original organism and mutant.");
     df.Write(complexity_change, "Complexity difference between original organism and mutant.");
-    df.Write(start_complexity, "Complexity of initial organism.");
-    df.Write(end_complexity, "Complexity of mutant.");
+    df.Write(start_complexity, "Total complexity of initial organism.");
+    df.Write(end_complexity, "Total complexity of mutant.");
+
+    // Broken down complexity info
+    df.Write(total_info_lack, "Num sites with no info at all.");
+    df.Write(total_info_kept, "Num sites with info, but no change.");
+    df.Write(total_info_new, "Num sites with new info (prev. none).");
+    df.Write(total_info_share, "Num sites with newly shared info.");
+    df.Write(total_info_lost, "Num sites with lost info.");
+    df.Write(total_info_plost, "Num sites with parital lost info.");
+    df.Write(total_info_shift, "Num sites with shift in info.");
+    df.Write(total_info_pshift, "Num sites with partial shift in info.");
+
+    // Start and End task counts...
+    for (int i = 0; i < start_task_counts.GetSize(); i++) {
+      df.Write(start_task_counts[i], cStringUtil::Stringf("Start task %d", i));
+    }
+
+    for (int i = 0; i < end_task_counts.GetSize(); i++) {
+      df.Write(end_task_counts[i], cStringUtil::Stringf("End task %d", i));
+    }
+
     df.Write(start_fitness, "Fitness of initial organism.");
     df.Write(end_fitness, "Fitness of mutant.");
     df.Write(start_length, "Length of initial organism.");

Modified: trunk/source/main/cAnalyzeGenotype.cc
===================================================================
--- trunk/source/main/cAnalyzeGenotype.cc	2006-01-15 21:54:54 UTC (rev 437)
+++ trunk/source/main/cAnalyzeGenotype.cc	2006-01-17 01:15:39 UTC (rev 438)
@@ -153,7 +153,7 @@
   return cConfig::GetTestCPUTimeMod() * genome.GetSize();
 }
 
-void cAnalyzeGenotype::CalcKnockouts(bool check_pairs) const
+void cAnalyzeGenotype::CalcKnockouts(bool check_pairs, bool check_chart) const
 {
   if (knockout_stats == NULL) {
     // We've never called this before -- setup the stats.
@@ -163,6 +163,10 @@
     // We don't have the pair stats we need -- keep going.
     knockout_stats->Reset();
   }
+  else if (check_chart == true && knockout_stats->has_chart_info == false) {
+    // We don't have the phyenotype chart we need -- keep going.
+    knockout_stats->Reset();
+  }
   else {
     // We already have all the info we need -- just quit.
     return;
@@ -174,6 +178,7 @@
   cAnalyzeGenotype base_genotype(genome, inst_set);
   base_genotype.Recalculate();      
   double base_fitness = base_genotype.GetFitness();
+  const tArray<int> base_task_counts( base_genotype.GetTaskCounts() );
 
   // If the base fitness is 0, the organism is dead and has no complexity.
   if (base_fitness == 0.0) {
@@ -197,6 +202,13 @@
     ko_inst_set.Add2(lib_null_inst.GetOp());
   }
   const cInstruction null_inst = ko_inst_set.GetInst("NULL");
+
+  // If we are keeping track of the specific effects on tasks from the
+  // knockouts, setup the matrix.
+  if (check_chart == true) {
+    knockout_stats->task_counts.Resize(length);
+    knockout_stats->has_chart_info = true;
+  }
     
   // Loop through all the lines of code, testing the removal of each.
   // -2=lethal, -1=detrimental, 0=neutral, 1=beneficial
@@ -207,7 +219,11 @@
     mod_genome[line_num] = null_inst;
     cAnalyzeGenotype ko_genotype(mod_genome, ko_inst_set);
     ko_genotype.Recalculate();
-      
+    if (check_chart == true) {
+      const tArray<int> ko_task_counts( ko_genotype.GetTaskCounts() );
+      knockout_stats->task_counts[line_num] = ko_task_counts;
+    }
+    
     double ko_fitness = ko_genotype.GetFitness();
     if (ko_fitness == 0.0) {
       knockout_stats->dead_count++;
@@ -434,8 +450,14 @@
   return knockout_stats->pair_dead_count + knockout_stats->pair_neg_count;
 }
 
+const tArray< tArray<int> > & cAnalyzeGenotype::GetKO_TaskCounts() const
+{
+  CalcKnockouts(false, true);  // Make sure knockouts are calculated
+  return knockout_stats->task_counts;
+}
 
 
+
 double cAnalyzeGenotype::GetFracDead() const
 {
   CalcLandscape();  // Make sure the landscape is calculated...

Modified: trunk/source/main/cAnalyzeGenotype.h
===================================================================
--- trunk/source/main/cAnalyzeGenotype.h	2006-01-15 21:54:54 UTC (rev 437)
+++ trunk/source/main/cAnalyzeGenotype.h	2006-01-17 01:15:39 UTC (rev 438)
@@ -88,12 +88,15 @@
     int neut_count;
     int pos_count;
 
-    bool has_pair_info;
+    bool has_pair_info;  // Try all pairs of knocks to get epistasis effects?
     int pair_dead_count;
     int pair_neg_count;
     int pair_neut_count;
     int pair_pos_count;
 
+    bool has_chart_info; // Keep a chart of which sites affect which tasks?
+    tArray< tArray<int> > task_counts;
+
     void Reset() {
       dead_count = 0;
       neg_count = 0;
@@ -105,6 +108,9 @@
       pair_neg_count = 0;
       pair_neut_count = 0;
       pair_pos_count = 0;
+
+      has_chart_info = false;
+      task_counts.Resize(0);
     }
 
     cAnalyzeKnockouts() { Reset(); }
@@ -139,7 +145,7 @@
   }
 
   int CalcMaxGestation() const;
-  void CalcKnockouts(bool check_pairs=false) const;
+  void CalcKnockouts(bool check_pairs=false, bool check_chart=false) const;
   void CalcLandscape() const;
 public:
   cAnalyzeGenotype(cString symbol_string, cInstSet & in_inst_set);
@@ -236,6 +242,7 @@
   int GetKOPair_NeutCount() const;
   int GetKOPair_PosCount() const;
   int GetKOPair_Complexity() const;
+  const tArray< tArray<int> > & GetKO_TaskCounts() const;
 
   // Landscape accessors
   double GetFracDead() const;
@@ -264,6 +271,9 @@
     if (special_args.HasString("binary")) return (task_counts[task_id] > 0);
     return task_counts[task_id];
   }
+  const tArray<int> & GetTaskCounts() const {
+    return task_counts;
+  }
 
   // Comparisons...  Compares a genotype to the "previous" one, which is
   // passed in, in one specified phenotype.




More information about the Avida-cvs mailing list