[Avida-SVN] r2545 - development/source/analyze

kaben at myxo.css.msu.edu kaben at myxo.css.msu.edu
Mon Apr 21 19:36:03 PDT 2008


Author: kaben
Date: 2008-04-21 22:36:03 -0400 (Mon, 21 Apr 2008)
New Revision: 2545

Modified:
   development/source/analyze/cAnalyze.cc
   development/source/analyze/cAnalyze.h
   development/source/analyze/cAnalyzeTreeStats_Gamma.cc
   development/source/analyze/cAnalyzeTreeStats_Gamma.h
Log:
Rewrite of analyze command 'PRINT_GAMMA'.

Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc	2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyze.cc	2008-04-22 02:36:03 UTC (rev 2545)
@@ -3282,6 +3282,30 @@
 
 
 // Calculate Pybus-Harvey gamma statistic for trees in population.
+void cAnalyze::Original_CommandPrintGamma(cString cur_string)
+{
+  if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing Pybus-Harvey gamma statistic for batch "
+    << cur_batch << endl;
+  else cout << "Printing Pybus-Harvey gamma statistic..." << endl;
+  
+  // Load in the variables...
+  cString filename("gamma.dat");
+  if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
+  
+  ofstream& fp = m_world->GetDataFileOFStream(filename);
+  
+  fp << "# Legend:" << endl;
+  fp << "# 1: Pybus-Harvey gamma statistic" << endl;
+  fp << endl;
+  
+  cAnalyzeTreeStats_Orig_Gamma agts(m_world);
+  agts.AnalyzeBatch(batch[cur_batch].List());
+  
+  fp << agts.Gamma();
+  fp << endl;
+}
+
+// Calculate Pybus-Harvey gamma statistic for trees in population.
 void cAnalyze::CommandPrintGamma(cString cur_string)
 {
   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing Pybus-Harvey gamma statistic for batch "
@@ -3289,8 +3313,16 @@
   else cout << "Printing Pybus-Harvey gamma statistic..." << endl;
   
   // Load in the variables...
+  int end_time = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;         // #1
+  if (end_time < 0) {
+    cout << "Error: end_time (argument 1) must be specified as nonzero." << endl;
+    return;
+  }
+  
   cString filename("gamma.dat");
   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
+
+  int furcation_time_convention = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 2;         // #1
   
   ofstream& fp = m_world->GetDataFileOFStream(filename);
   
@@ -3299,7 +3331,7 @@
   fp << endl;
   
   cAnalyzeTreeStats_Gamma agts(m_world);
-  agts.AnalyzeBatch(batch[cur_batch].List());
+  agts.AnalyzeBatch(batch[cur_batch].List(), end_time, furcation_time_convention);
   
   fp << agts.Gamma();
   fp << endl;
@@ -9241,6 +9273,7 @@
   AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
   AddLibraryDef("PRINT_TREE_STATS", &cAnalyze::CommandPrintTreeStats);
   AddLibraryDef("PRINT_CUMULATIVE_STEMMINESS", &cAnalyze::CommandPrintCumulativeStemminess);
+  AddLibraryDef("ORIGINAL_PRINT_GAMMA", &cAnalyze::Original_CommandPrintGamma);
   AddLibraryDef("PRINT_GAMMA", &cAnalyze::CommandPrintGamma);
   AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::AnalyzeCommunityComplexity);
   AddLibraryDef("PRINT_RESOURCE_FITNESS_MAP", &cAnalyze::CommandPrintResourceFitnessMap);

Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h	2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyze.h	2008-04-22 02:36:03 UTC (rev 2545)
@@ -258,6 +258,7 @@
   void CommandPrintDiversity(cString cur_string);
   void CommandPrintTreeStats(cString cur_string);
   void CommandPrintCumulativeStemminess(cString cur_string);
+  void Original_CommandPrintGamma(cString cur_string);
   void CommandPrintGamma(cString cur_string);
   void PhyloCommunityComplexity(cString cur_string);
   void AnalyzeCommunityComplexity(cString cur_string);

Modified: development/source/analyze/cAnalyzeTreeStats_Gamma.cc
===================================================================
--- development/source/analyze/cAnalyzeTreeStats_Gamma.cc	2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyzeTreeStats_Gamma.cc	2008-04-22 02:36:03 UTC (rev 2545)
@@ -1,5 +1,5 @@
 /*
- *  cAnalyzeTreeStats_Gamma.cc
+ *  cAnalyzeTreeStats_Orig_Gamma.cc
  *  Avida at vallista
  *
  *  Created by Kaben Nanlohy on 2007.12.03.
@@ -34,14 +34,14 @@
 using namespace std;
 
 
-cAnalyzeTreeStats_Gamma::cAnalyzeTreeStats_Gamma(cWorld* world)
+cAnalyzeTreeStats_Orig_Gamma::cAnalyzeTreeStats_Orig_Gamma(cWorld* world)
 : m_world(world)
 , m_gen_array(0)
 , m_g(0)
 , m_gamma(0.)
 {}
 
-void cAnalyzeTreeStats_Gamma::LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list){
+void cAnalyzeTreeStats_Orig_Gamma::LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list){
   cAnalyzeGenotype * genotype = NULL;
   tListIterator<cAnalyzeGenotype> batch_it(genotype_list);
   
@@ -53,10 +53,10 @@
     array_pos++;
   }
 }
-void cAnalyzeTreeStats_Gamma::QSortGenotypes(void){
+void cAnalyzeTreeStats_Orig_Gamma::QSortGenotypes(void){
   QSortAGPhyloDepth(m_gen_array);
 }
-void cAnalyzeTreeStats_Gamma::CalculateInternodeDistances(void){
+void cAnalyzeTreeStats_Orig_Gamma::CalculateInternodeDistances(void){
   m_g.Resize(1 + m_gen_array.GetSize());
   m_g[0] = 0;
   m_g[1] = m_gen_array[0]->GetDepth() - 0;
@@ -64,7 +64,7 @@
     m_g[i+1] = m_gen_array[i]->GetDepth() - m_gen_array[i-1]->GetDepth();
   }  
 }
-void cAnalyzeTreeStats_Gamma::FixupInternodeDistances(void){
+void cAnalyzeTreeStats_Orig_Gamma::FixupInternodeDistances(void){
   bool in_redundant_subsequence = false;
   int saved_g = -1;
   for(int i = 1; i < m_gen_array.GetSize(); i++) {
@@ -87,7 +87,7 @@
     m_g[m_gen_array.GetSize()] = saved_g;
   }
 }
-void cAnalyzeTreeStats_Gamma::CalculateGamma(void){
+void cAnalyzeTreeStats_Orig_Gamma::CalculateGamma(void){
   // n: number of leaves, constant for a given tree.
   int n = m_gen_array.GetSize();
   
@@ -112,11 +112,11 @@
   
   m_gamma = ( ( (1./(n-2.)) * so ) - (T/2.) ) / ( T*sqrt( 1. / (12.*(n-2.)) ) );
 }
-double cAnalyzeTreeStats_Gamma::Gamma(void){
+double cAnalyzeTreeStats_Orig_Gamma::Gamma(void){
   return m_gamma;
 }
 
-void cAnalyzeTreeStats_Gamma::AnalyzeBatch(tList<cAnalyzeGenotype> &genotype_list){
+void cAnalyzeTreeStats_Orig_Gamma::AnalyzeBatch(tList<cAnalyzeGenotype> &genotype_list){
   LoadGenotypes(genotype_list);
   QSortGenotypes();
   CalculateInternodeDistances();
@@ -149,3 +149,229 @@
 void QSortAGUpdateBorn(tArray<cAnalyzeGenotype *> &gen_array){
   gen_array.QSort(CompareAGUpdateBorn);
 }  
+
+
+
+/* Rewrite */
+cAnalyzeLineageFurcation::cAnalyzeLineageFurcation(
+  cAnalyzeGenotype *parent,
+  cAnalyzeGenotype *first_child,
+  cAnalyzeGenotype *second_child
+):m_parent(parent), m_first_child(first_child), m_second_child(second_child)
+{
+}
+bool cAnalyzeLineageFurcation::operator==(const cAnalyzeLineageFurcation &in) const {
+  if (m_parent != in.m_parent) { return false; }
+  if (m_first_child != in.m_first_child) { return false; }
+  if (m_second_child != in.m_second_child) { return false; }
+  return true;
+}
+
+int FurcationTimePolicy_ParentBirth(cAnalyzeLineageFurcation &furcation){
+  if(furcation.m_parent){ return furcation.m_parent->GetUpdateBorn();
+  } else { return -1; }
+}
+int FurcationTimePolicy_FirstChildBirth(cAnalyzeLineageFurcation &furcation){
+  if(furcation.m_first_child){ return furcation.m_first_child->GetUpdateBorn();
+  } else { return -1; }
+}
+int FurcationTimePolicy_SecondChildBirth(cAnalyzeLineageFurcation &furcation){
+  if(furcation.m_second_child){ return furcation.m_second_child->GetUpdateBorn();
+  } else { return -1; }
+}
+int CompareInt(const void * _a, const void * _b){
+  int a(*((int*)_a));
+  int b(*((int*)_b));
+  if(a > b){ return 1; }
+  if(a < b){ return -1; }
+  return 0;
+}
+
+
+cAnalyzeTreeStats_Gamma::cAnalyzeTreeStats_Gamma(cWorld* world){
+}
+
+void cAnalyzeTreeStats_Gamma::LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list){
+  cAnalyzeGenotype * genotype = NULL;
+  tListIterator<cAnalyzeGenotype> batch_it(genotype_list);
+  
+  m_gen_array.Resize(genotype_list.GetSize());
+  int array_pos = 0;
+  batch_it.Reset();
+  while ((genotype = batch_it.Next()) != NULL) {
+    m_gen_array[array_pos] = genotype;
+    array_pos++;
+  }
+}
+
+void cAnalyzeTreeStats_Gamma::MapIDToGenotypePos(
+  tArray<cAnalyzeGenotype *> &lineage,
+  tHashTable<int, int> &out_mapping
+){
+  out_mapping.ClearAll();
+  for(int i = 0; i < lineage.GetSize(); i++){
+    out_mapping.SetValue(lineage[i]->GetID(), i);
+  }
+}
+
+void cAnalyzeTreeStats_Gamma::Unlink(tArray<cAnalyzeGenotype *> &lineage){
+  for(int i = 0; i < lineage.GetSize(); i++){
+    lineage[i]->Unlink();
+  }
+}
+
+void cAnalyzeTreeStats_Gamma::EstablishLinks(
+  tArray<cAnalyzeGenotype *> &lineage,
+  tHashTable<int, int> &out_mapping
+){
+  this->Unlink(lineage);
+  this->MapIDToGenotypePos(lineage, out_mapping);
+
+  int parent_id(-1);
+  int parent_index(-1);
+
+  for(int i = 0; i < lineage.GetSize(); i++){
+    parent_id = lineage[i]->GetParentID();
+    if(parent_id >= 0){
+      out_mapping.Find(parent_id, parent_index);
+      lineage[parent_index]->LinkChild(*lineage[i]);
+    }
+  }
+}
+
+void cAnalyzeTreeStats_Gamma::FindFurcations(
+  tArray<cAnalyzeGenotype *> &lineage,
+  tArray<cAnalyzeLineageFurcation> &out_furcations
+){
+  cAnalyzeGenotype *parent(0);
+  cAnalyzeLineageFurcation furcation(0,0,0);
+  int child_list_size(0);
+
+  out_furcations.Resize(0);
+  for(int i = 0; i < lineage.GetSize(); i++){
+    parent = lineage[i];
+
+    child_list_size = parent->GetChildList().GetSize();
+    if(child_list_size > 1){
+      for(int j = 1; j < child_list_size; j++){
+        furcation = cAnalyzeLineageFurcation(
+          parent,
+          parent->GetChildList().GetPos(j-1),
+          parent->GetChildList().GetPos(j)
+        );
+        out_furcations.Push(furcation);
+      }
+    }
+  }
+}
+
+void cAnalyzeTreeStats_Gamma::FindFurcationTimes(
+  tArray<cAnalyzeGenotype *> &lineage,
+  int (*furcation_time_policy)(cAnalyzeLineageFurcation &furcation),
+  tArray<int> &out_furcation_times
+){
+  tArray<cAnalyzeLineageFurcation> furcations;
+  FindFurcations(lineage, furcations);
+
+  int size = furcations.GetSize();
+  out_furcation_times.Resize(size, 0);
+  for(int i = 0; i < size; i++){
+    out_furcation_times[i] = furcation_time_policy(furcations[i]);
+  }
+  out_furcation_times.QSort(CompareInt);
+}
+
+void cAnalyzeTreeStats_Gamma::FindInternodeDistances(
+  tArray<int> &furcation_times,
+  int end_time,
+  tArray<int> &out_internode_distances
+){
+  int size = furcation_times.GetSize();
+  out_internode_distances.Resize(size, 0);
+
+  for(int i = 0; i < size-1; i++){
+    out_internode_distances[i] = furcation_times[i+1] - furcation_times[i];
+  }
+  out_internode_distances[size-1] = end_time - furcation_times[size-1];
+}
+
+double cAnalyzeTreeStats_Gamma::CalculateGamma(tArray<int> &inode_dists){
+  // n: number of leaves, constant for a given tree.
+  int n = inode_dists.GetSize() + 1;
+  
+  if(n <= 2){
+    if(m_world->GetVerbosity() >= VERBOSE_ON) {
+      cerr << "Error: not enough genotypes in batch to calculate gamma - " << endl;
+    }
+    return nan("0");
+  }
+  
+  /*
+   For convenience, make a copy of m_g with the indices offset.
+   This permits use of the same indices as in the formula for gamma.
+   */
+  tArray<int> g = tArray<int>(2, 0) + inode_dists;
+  
+  int T = 0;
+  for(int j = 2; j <= n; j++) { T += j*g[j]; }
+
+  // si: interior summation values, cached
+  tArray<int> si(n, 0);
+  for(int k = 2; k <= n-1; k++) { si[k] = k*g[k] + si[k-1]; }    
+  
+  // so: exterior summation
+  int so = 0;
+  for(int i = 2; i <= n-1; i++) { so += si[i]; }
+  
+  m_gamma = ( ( (1./(n-2.)) * so ) - (T/2.) ) / ( T*sqrt( 1. / (12.*(n-2.)) ) );
+  
+  return m_gamma;
+}
+
+double cAnalyzeTreeStats_Gamma::Gamma(void){
+  return m_gamma;
+}
+
+void cAnalyzeTreeStats_Gamma::AnalyzeBatch(
+  tList<cAnalyzeGenotype> &genotype_list,
+  int end_time,
+  int furcation_time_convention
+){
+  tHashTable<int, int> mapping;
+  tArray<int> furcation_times;
+  tArray<int> internode_distances;
+
+  int (*furcation_time_policy)(cAnalyzeLineageFurcation &furcation);
+  furcation_time_policy = 0;
+  if (furcation_time_convention == 1){
+    furcation_time_policy = FurcationTimePolicy_ParentBirth;
+  } else if (furcation_time_convention == 2){
+    furcation_time_policy = FurcationTimePolicy_FirstChildBirth;
+  } else if (furcation_time_convention == 3){
+    furcation_time_policy = FurcationTimePolicy_SecondChildBirth;
+  } else {
+    /* Bad furcation time convention specified. */
+    if(m_world->GetVerbosity() >= VERBOSE_ON) {
+      cerr << "Error: Bad furcation time convention specified." << endl;
+      cerr << " - choices are" << endl;
+      cerr << " - 1: Use parent's birth time" << endl;
+      cerr << " - 2: Use elder child's birth time" << endl;
+      cerr << " - 3: Use younger child's birth time" << endl;
+    }
+  }
+
+  LoadGenotypes(genotype_list);
+  EstablishLinks(m_gen_array, mapping);
+  FindFurcationTimes(m_gen_array, furcation_time_policy, furcation_times);
+
+  if (end_time < furcation_times[furcation_times.GetSize() - 1]){
+    /* Bad furcation time convention specified. */
+    if(m_world->GetVerbosity() >= VERBOSE_ON) {
+      cerr << "Error: Lineage end time comes before last furcation." << endl;
+    }
+  }
+
+  FindInternodeDistances(furcation_times, end_time, internode_distances);
+  CalculateGamma(internode_distances);
+}
+

Modified: development/source/analyze/cAnalyzeTreeStats_Gamma.h
===================================================================
--- development/source/analyze/cAnalyzeTreeStats_Gamma.h	2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyzeTreeStats_Gamma.h	2008-04-22 02:36:03 UTC (rev 2545)
@@ -21,12 +21,15 @@
  *
  */
 
-#ifndef cAnalyzeTreeStats_Gamma_h
-#define cAnalyzeTreeStats_Gamma_h
+#ifndef cAnalyzeTreeStats_Orig_Gamma_h
+#define cAnalyzeTreeStats_Orig_Gamma_h
 
 #ifndef tArray_h
 #include "tArray.h"
 #endif
+#ifndef tHashTable
+#include "tHashTable.h"
+#endif
 #ifndef tList_h
 #include "tList.h"
 #endif
@@ -34,14 +37,14 @@
 class cAnalyzeGenotype;
 class cWorld;
 
-class cAnalyzeTreeStats_Gamma {
+class cAnalyzeTreeStats_Orig_Gamma {
 public:
   cWorld* m_world;
   tArray<cAnalyzeGenotype *> m_gen_array;
   tArray<int> m_g;
   double m_gamma;
 public:
-  cAnalyzeTreeStats_Gamma(cWorld* world);
+  cAnalyzeTreeStats_Orig_Gamma(cWorld* world);
   
   void LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list);
   void QSortGenotypes(void);
@@ -62,4 +65,70 @@
 void QSortAGPhyloDepth(tArray<cAnalyzeGenotype *> &gen_array);
 void QSortAGUpdateBorn(tArray<cAnalyzeGenotype *> &gen_array);
 
+
+
+/* Rewrite */
+class cAnalyzeLineageFurcation {
+public:
+  cAnalyzeGenotype *m_parent;
+  cAnalyzeGenotype *m_first_child;
+  cAnalyzeGenotype *m_second_child;
+public:
+  cAnalyzeLineageFurcation(
+    cAnalyzeGenotype *parent = 0,
+    cAnalyzeGenotype *first_child = 0,
+    cAnalyzeGenotype *second_child = 0
+  );
+  /* This equality operator compares pointers -- this is bad form, but propagated from a similar equality operator in cAnalyzeGenotype. */
+  bool operator==(const cAnalyzeLineageFurcation &in) const ;
+};
+
+class cAnalyzeTreeStats_Gamma {
+public:
+  cWorld* m_world;
+  tArray<cAnalyzeGenotype *> m_gen_array;
+  tArray<int> m_g;
+  double m_gamma;
+public:
+  cAnalyzeTreeStats_Gamma(cWorld* world);
+  
+  void LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list);
+  void MapIDToGenotypePos(
+    tArray<cAnalyzeGenotype *> &lineage,
+    tHashTable<int, int> &out_mapping
+  );
+  void Unlink(tArray<cAnalyzeGenotype *> &lineage);
+  void EstablishLinks(
+    tArray<cAnalyzeGenotype *> &lineage,
+    tHashTable<int, int> &out_mapping
+  );
+  void FindFurcations(
+    tArray<cAnalyzeGenotype *> &lineage,
+    tArray<cAnalyzeLineageFurcation> &out_furcations
+  );
+  void FindFurcationTimes(
+    tArray<cAnalyzeGenotype *> &lineage,
+    int (*furcation_time_policy)(cAnalyzeLineageFurcation &furcation),
+    tArray<int> &out_furcation_times
+  );
+  void FindInternodeDistances(
+    tArray<int> &furcation_times,
+    int end_time,
+    tArray<int> &out_internode_distances
+  );
+  double CalculateGamma(tArray<int> &inode_dists);
+  double Gamma(void);
+  
+  // Commands.
+  void AnalyzeBatch(
+    tList<cAnalyzeGenotype> &genotype_list,
+    int end_time,
+    int furcation_time_convention
+  );    
+};
+  
+int FurcationTimePolicy_ParentBirth(cAnalyzeLineageFurcation &furcation);
+int FurcationTimePolicy_FirstChildBirth(cAnalyzeLineageFurcation &furcation);
+int FurcationTimePolicy_SecondChildBirth(cAnalyzeLineageFurcation &furcation);
+
 #endif




More information about the Avida-cvs mailing list