[Avida-SVN] r2545 - development/source/analyze
kaben at myxo.css.msu.edu
kaben at myxo.css.msu.edu
Mon Apr 21 19:36:03 PDT 2008
Author: kaben
Date: 2008-04-21 22:36:03 -0400 (Mon, 21 Apr 2008)
New Revision: 2545
Modified:
development/source/analyze/cAnalyze.cc
development/source/analyze/cAnalyze.h
development/source/analyze/cAnalyzeTreeStats_Gamma.cc
development/source/analyze/cAnalyzeTreeStats_Gamma.h
Log:
Rewrite of analyze command 'PRINT_GAMMA'.
Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc 2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyze.cc 2008-04-22 02:36:03 UTC (rev 2545)
@@ -3282,6 +3282,30 @@
// Calculate Pybus-Harvey gamma statistic for trees in population.
+void cAnalyze::Original_CommandPrintGamma(cString cur_string)
+{
+ if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing Pybus-Harvey gamma statistic for batch "
+ << cur_batch << endl;
+ else cout << "Printing Pybus-Harvey gamma statistic..." << endl;
+
+ // Load in the variables...
+ cString filename("gamma.dat");
+ if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
+
+ ofstream& fp = m_world->GetDataFileOFStream(filename);
+
+ fp << "# Legend:" << endl;
+ fp << "# 1: Pybus-Harvey gamma statistic" << endl;
+ fp << endl;
+
+ cAnalyzeTreeStats_Orig_Gamma agts(m_world);
+ agts.AnalyzeBatch(batch[cur_batch].List());
+
+ fp << agts.Gamma();
+ fp << endl;
+}
+
+// Calculate Pybus-Harvey gamma statistic for trees in population.
void cAnalyze::CommandPrintGamma(cString cur_string)
{
if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing Pybus-Harvey gamma statistic for batch "
@@ -3289,8 +3313,16 @@
else cout << "Printing Pybus-Harvey gamma statistic..." << endl;
// Load in the variables...
+ int end_time = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1; // #1
+ if (end_time < 0) {
+ cout << "Error: end_time (argument 1) must be specified as nonzero." << endl;
+ return;
+ }
+
cString filename("gamma.dat");
if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
+
+ int furcation_time_convention = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 2; // #1
ofstream& fp = m_world->GetDataFileOFStream(filename);
@@ -3299,7 +3331,7 @@
fp << endl;
cAnalyzeTreeStats_Gamma agts(m_world);
- agts.AnalyzeBatch(batch[cur_batch].List());
+ agts.AnalyzeBatch(batch[cur_batch].List(), end_time, furcation_time_convention);
fp << agts.Gamma();
fp << endl;
@@ -9241,6 +9273,7 @@
AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
AddLibraryDef("PRINT_TREE_STATS", &cAnalyze::CommandPrintTreeStats);
AddLibraryDef("PRINT_CUMULATIVE_STEMMINESS", &cAnalyze::CommandPrintCumulativeStemminess);
+ AddLibraryDef("ORIGINAL_PRINT_GAMMA", &cAnalyze::Original_CommandPrintGamma);
AddLibraryDef("PRINT_GAMMA", &cAnalyze::CommandPrintGamma);
AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::AnalyzeCommunityComplexity);
AddLibraryDef("PRINT_RESOURCE_FITNESS_MAP", &cAnalyze::CommandPrintResourceFitnessMap);
Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h 2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyze.h 2008-04-22 02:36:03 UTC (rev 2545)
@@ -258,6 +258,7 @@
void CommandPrintDiversity(cString cur_string);
void CommandPrintTreeStats(cString cur_string);
void CommandPrintCumulativeStemminess(cString cur_string);
+ void Original_CommandPrintGamma(cString cur_string);
void CommandPrintGamma(cString cur_string);
void PhyloCommunityComplexity(cString cur_string);
void AnalyzeCommunityComplexity(cString cur_string);
Modified: development/source/analyze/cAnalyzeTreeStats_Gamma.cc
===================================================================
--- development/source/analyze/cAnalyzeTreeStats_Gamma.cc 2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyzeTreeStats_Gamma.cc 2008-04-22 02:36:03 UTC (rev 2545)
@@ -1,5 +1,5 @@
/*
- * cAnalyzeTreeStats_Gamma.cc
+ * cAnalyzeTreeStats_Orig_Gamma.cc
* Avida at vallista
*
* Created by Kaben Nanlohy on 2007.12.03.
@@ -34,14 +34,14 @@
using namespace std;
-cAnalyzeTreeStats_Gamma::cAnalyzeTreeStats_Gamma(cWorld* world)
+cAnalyzeTreeStats_Orig_Gamma::cAnalyzeTreeStats_Orig_Gamma(cWorld* world)
: m_world(world)
, m_gen_array(0)
, m_g(0)
, m_gamma(0.)
{}
-void cAnalyzeTreeStats_Gamma::LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list){
+void cAnalyzeTreeStats_Orig_Gamma::LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list){
cAnalyzeGenotype * genotype = NULL;
tListIterator<cAnalyzeGenotype> batch_it(genotype_list);
@@ -53,10 +53,10 @@
array_pos++;
}
}
-void cAnalyzeTreeStats_Gamma::QSortGenotypes(void){
+void cAnalyzeTreeStats_Orig_Gamma::QSortGenotypes(void){
QSortAGPhyloDepth(m_gen_array);
}
-void cAnalyzeTreeStats_Gamma::CalculateInternodeDistances(void){
+void cAnalyzeTreeStats_Orig_Gamma::CalculateInternodeDistances(void){
m_g.Resize(1 + m_gen_array.GetSize());
m_g[0] = 0;
m_g[1] = m_gen_array[0]->GetDepth() - 0;
@@ -64,7 +64,7 @@
m_g[i+1] = m_gen_array[i]->GetDepth() - m_gen_array[i-1]->GetDepth();
}
}
-void cAnalyzeTreeStats_Gamma::FixupInternodeDistances(void){
+void cAnalyzeTreeStats_Orig_Gamma::FixupInternodeDistances(void){
bool in_redundant_subsequence = false;
int saved_g = -1;
for(int i = 1; i < m_gen_array.GetSize(); i++) {
@@ -87,7 +87,7 @@
m_g[m_gen_array.GetSize()] = saved_g;
}
}
-void cAnalyzeTreeStats_Gamma::CalculateGamma(void){
+void cAnalyzeTreeStats_Orig_Gamma::CalculateGamma(void){
// n: number of leaves, constant for a given tree.
int n = m_gen_array.GetSize();
@@ -112,11 +112,11 @@
m_gamma = ( ( (1./(n-2.)) * so ) - (T/2.) ) / ( T*sqrt( 1. / (12.*(n-2.)) ) );
}
-double cAnalyzeTreeStats_Gamma::Gamma(void){
+double cAnalyzeTreeStats_Orig_Gamma::Gamma(void){
return m_gamma;
}
-void cAnalyzeTreeStats_Gamma::AnalyzeBatch(tList<cAnalyzeGenotype> &genotype_list){
+void cAnalyzeTreeStats_Orig_Gamma::AnalyzeBatch(tList<cAnalyzeGenotype> &genotype_list){
LoadGenotypes(genotype_list);
QSortGenotypes();
CalculateInternodeDistances();
@@ -149,3 +149,229 @@
void QSortAGUpdateBorn(tArray<cAnalyzeGenotype *> &gen_array){
gen_array.QSort(CompareAGUpdateBorn);
}
+
+
+
+/* Rewrite */
+cAnalyzeLineageFurcation::cAnalyzeLineageFurcation(
+ cAnalyzeGenotype *parent,
+ cAnalyzeGenotype *first_child,
+ cAnalyzeGenotype *second_child
+):m_parent(parent), m_first_child(first_child), m_second_child(second_child)
+{
+}
+bool cAnalyzeLineageFurcation::operator==(const cAnalyzeLineageFurcation &in) const {
+ if (m_parent != in.m_parent) { return false; }
+ if (m_first_child != in.m_first_child) { return false; }
+ if (m_second_child != in.m_second_child) { return false; }
+ return true;
+}
+
+int FurcationTimePolicy_ParentBirth(cAnalyzeLineageFurcation &furcation){
+ if(furcation.m_parent){ return furcation.m_parent->GetUpdateBorn();
+ } else { return -1; }
+}
+int FurcationTimePolicy_FirstChildBirth(cAnalyzeLineageFurcation &furcation){
+ if(furcation.m_first_child){ return furcation.m_first_child->GetUpdateBorn();
+ } else { return -1; }
+}
+int FurcationTimePolicy_SecondChildBirth(cAnalyzeLineageFurcation &furcation){
+ if(furcation.m_second_child){ return furcation.m_second_child->GetUpdateBorn();
+ } else { return -1; }
+}
+int CompareInt(const void * _a, const void * _b){
+ int a(*((int*)_a));
+ int b(*((int*)_b));
+ if(a > b){ return 1; }
+ if(a < b){ return -1; }
+ return 0;
+}
+
+
+cAnalyzeTreeStats_Gamma::cAnalyzeTreeStats_Gamma(cWorld* world){
+}
+
+void cAnalyzeTreeStats_Gamma::LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list){
+ cAnalyzeGenotype * genotype = NULL;
+ tListIterator<cAnalyzeGenotype> batch_it(genotype_list);
+
+ m_gen_array.Resize(genotype_list.GetSize());
+ int array_pos = 0;
+ batch_it.Reset();
+ while ((genotype = batch_it.Next()) != NULL) {
+ m_gen_array[array_pos] = genotype;
+ array_pos++;
+ }
+}
+
+void cAnalyzeTreeStats_Gamma::MapIDToGenotypePos(
+ tArray<cAnalyzeGenotype *> &lineage,
+ tHashTable<int, int> &out_mapping
+){
+ out_mapping.ClearAll();
+ for(int i = 0; i < lineage.GetSize(); i++){
+ out_mapping.SetValue(lineage[i]->GetID(), i);
+ }
+}
+
+void cAnalyzeTreeStats_Gamma::Unlink(tArray<cAnalyzeGenotype *> &lineage){
+ for(int i = 0; i < lineage.GetSize(); i++){
+ lineage[i]->Unlink();
+ }
+}
+
+void cAnalyzeTreeStats_Gamma::EstablishLinks(
+ tArray<cAnalyzeGenotype *> &lineage,
+ tHashTable<int, int> &out_mapping
+){
+ this->Unlink(lineage);
+ this->MapIDToGenotypePos(lineage, out_mapping);
+
+ int parent_id(-1);
+ int parent_index(-1);
+
+ for(int i = 0; i < lineage.GetSize(); i++){
+ parent_id = lineage[i]->GetParentID();
+ if(parent_id >= 0){
+ out_mapping.Find(parent_id, parent_index);
+ lineage[parent_index]->LinkChild(*lineage[i]);
+ }
+ }
+}
+
+void cAnalyzeTreeStats_Gamma::FindFurcations(
+ tArray<cAnalyzeGenotype *> &lineage,
+ tArray<cAnalyzeLineageFurcation> &out_furcations
+){
+ cAnalyzeGenotype *parent(0);
+ cAnalyzeLineageFurcation furcation(0,0,0);
+ int child_list_size(0);
+
+ out_furcations.Resize(0);
+ for(int i = 0; i < lineage.GetSize(); i++){
+ parent = lineage[i];
+
+ child_list_size = parent->GetChildList().GetSize();
+ if(child_list_size > 1){
+ for(int j = 1; j < child_list_size; j++){
+ furcation = cAnalyzeLineageFurcation(
+ parent,
+ parent->GetChildList().GetPos(j-1),
+ parent->GetChildList().GetPos(j)
+ );
+ out_furcations.Push(furcation);
+ }
+ }
+ }
+}
+
+void cAnalyzeTreeStats_Gamma::FindFurcationTimes(
+ tArray<cAnalyzeGenotype *> &lineage,
+ int (*furcation_time_policy)(cAnalyzeLineageFurcation &furcation),
+ tArray<int> &out_furcation_times
+){
+ tArray<cAnalyzeLineageFurcation> furcations;
+ FindFurcations(lineage, furcations);
+
+ int size = furcations.GetSize();
+ out_furcation_times.Resize(size, 0);
+ for(int i = 0; i < size; i++){
+ out_furcation_times[i] = furcation_time_policy(furcations[i]);
+ }
+ out_furcation_times.QSort(CompareInt);
+}
+
+void cAnalyzeTreeStats_Gamma::FindInternodeDistances(
+ tArray<int> &furcation_times,
+ int end_time,
+ tArray<int> &out_internode_distances
+){
+ int size = furcation_times.GetSize();
+ out_internode_distances.Resize(size, 0);
+
+ for(int i = 0; i < size-1; i++){
+ out_internode_distances[i] = furcation_times[i+1] - furcation_times[i];
+ }
+ out_internode_distances[size-1] = end_time - furcation_times[size-1];
+}
+
+double cAnalyzeTreeStats_Gamma::CalculateGamma(tArray<int> &inode_dists){
+ // n: number of leaves, constant for a given tree.
+ int n = inode_dists.GetSize() + 1;
+
+ if(n <= 2){
+ if(m_world->GetVerbosity() >= VERBOSE_ON) {
+ cerr << "Error: not enough genotypes in batch to calculate gamma - " << endl;
+ }
+ return nan("0");
+ }
+
+ /*
+ For convenience, make a copy of m_g with the indices offset.
+ This permits use of the same indices as in the formula for gamma.
+ */
+ tArray<int> g = tArray<int>(2, 0) + inode_dists;
+
+ int T = 0;
+ for(int j = 2; j <= n; j++) { T += j*g[j]; }
+
+ // si: interior summation values, cached
+ tArray<int> si(n, 0);
+ for(int k = 2; k <= n-1; k++) { si[k] = k*g[k] + si[k-1]; }
+
+ // so: exterior summation
+ int so = 0;
+ for(int i = 2; i <= n-1; i++) { so += si[i]; }
+
+ m_gamma = ( ( (1./(n-2.)) * so ) - (T/2.) ) / ( T*sqrt( 1. / (12.*(n-2.)) ) );
+
+ return m_gamma;
+}
+
+double cAnalyzeTreeStats_Gamma::Gamma(void){
+ return m_gamma;
+}
+
+void cAnalyzeTreeStats_Gamma::AnalyzeBatch(
+ tList<cAnalyzeGenotype> &genotype_list,
+ int end_time,
+ int furcation_time_convention
+){
+ tHashTable<int, int> mapping;
+ tArray<int> furcation_times;
+ tArray<int> internode_distances;
+
+ int (*furcation_time_policy)(cAnalyzeLineageFurcation &furcation);
+ furcation_time_policy = 0;
+ if (furcation_time_convention == 1){
+ furcation_time_policy = FurcationTimePolicy_ParentBirth;
+ } else if (furcation_time_convention == 2){
+ furcation_time_policy = FurcationTimePolicy_FirstChildBirth;
+ } else if (furcation_time_convention == 3){
+ furcation_time_policy = FurcationTimePolicy_SecondChildBirth;
+ } else {
+ /* Bad furcation time convention specified. */
+ if(m_world->GetVerbosity() >= VERBOSE_ON) {
+ cerr << "Error: Bad furcation time convention specified." << endl;
+ cerr << " - choices are" << endl;
+ cerr << " - 1: Use parent's birth time" << endl;
+ cerr << " - 2: Use elder child's birth time" << endl;
+ cerr << " - 3: Use younger child's birth time" << endl;
+ }
+ }
+
+ LoadGenotypes(genotype_list);
+ EstablishLinks(m_gen_array, mapping);
+ FindFurcationTimes(m_gen_array, furcation_time_policy, furcation_times);
+
+ if (end_time < furcation_times[furcation_times.GetSize() - 1]){
+ /* Bad furcation time convention specified. */
+ if(m_world->GetVerbosity() >= VERBOSE_ON) {
+ cerr << "Error: Lineage end time comes before last furcation." << endl;
+ }
+ }
+
+ FindInternodeDistances(furcation_times, end_time, internode_distances);
+ CalculateGamma(internode_distances);
+}
+
Modified: development/source/analyze/cAnalyzeTreeStats_Gamma.h
===================================================================
--- development/source/analyze/cAnalyzeTreeStats_Gamma.h 2008-04-21 20:43:50 UTC (rev 2544)
+++ development/source/analyze/cAnalyzeTreeStats_Gamma.h 2008-04-22 02:36:03 UTC (rev 2545)
@@ -21,12 +21,15 @@
*
*/
-#ifndef cAnalyzeTreeStats_Gamma_h
-#define cAnalyzeTreeStats_Gamma_h
+#ifndef cAnalyzeTreeStats_Orig_Gamma_h
+#define cAnalyzeTreeStats_Orig_Gamma_h
#ifndef tArray_h
#include "tArray.h"
#endif
+#ifndef tHashTable
+#include "tHashTable.h"
+#endif
#ifndef tList_h
#include "tList.h"
#endif
@@ -34,14 +37,14 @@
class cAnalyzeGenotype;
class cWorld;
-class cAnalyzeTreeStats_Gamma {
+class cAnalyzeTreeStats_Orig_Gamma {
public:
cWorld* m_world;
tArray<cAnalyzeGenotype *> m_gen_array;
tArray<int> m_g;
double m_gamma;
public:
- cAnalyzeTreeStats_Gamma(cWorld* world);
+ cAnalyzeTreeStats_Orig_Gamma(cWorld* world);
void LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list);
void QSortGenotypes(void);
@@ -62,4 +65,70 @@
void QSortAGPhyloDepth(tArray<cAnalyzeGenotype *> &gen_array);
void QSortAGUpdateBorn(tArray<cAnalyzeGenotype *> &gen_array);
+
+
+/* Rewrite */
+class cAnalyzeLineageFurcation {
+public:
+ cAnalyzeGenotype *m_parent;
+ cAnalyzeGenotype *m_first_child;
+ cAnalyzeGenotype *m_second_child;
+public:
+ cAnalyzeLineageFurcation(
+ cAnalyzeGenotype *parent = 0,
+ cAnalyzeGenotype *first_child = 0,
+ cAnalyzeGenotype *second_child = 0
+ );
+ /* This equality operator compares pointers -- this is bad form, but propagated from a similar equality operator in cAnalyzeGenotype. */
+ bool operator==(const cAnalyzeLineageFurcation &in) const ;
+};
+
+class cAnalyzeTreeStats_Gamma {
+public:
+ cWorld* m_world;
+ tArray<cAnalyzeGenotype *> m_gen_array;
+ tArray<int> m_g;
+ double m_gamma;
+public:
+ cAnalyzeTreeStats_Gamma(cWorld* world);
+
+ void LoadGenotypes(tList<cAnalyzeGenotype> &genotype_list);
+ void MapIDToGenotypePos(
+ tArray<cAnalyzeGenotype *> &lineage,
+ tHashTable<int, int> &out_mapping
+ );
+ void Unlink(tArray<cAnalyzeGenotype *> &lineage);
+ void EstablishLinks(
+ tArray<cAnalyzeGenotype *> &lineage,
+ tHashTable<int, int> &out_mapping
+ );
+ void FindFurcations(
+ tArray<cAnalyzeGenotype *> &lineage,
+ tArray<cAnalyzeLineageFurcation> &out_furcations
+ );
+ void FindFurcationTimes(
+ tArray<cAnalyzeGenotype *> &lineage,
+ int (*furcation_time_policy)(cAnalyzeLineageFurcation &furcation),
+ tArray<int> &out_furcation_times
+ );
+ void FindInternodeDistances(
+ tArray<int> &furcation_times,
+ int end_time,
+ tArray<int> &out_internode_distances
+ );
+ double CalculateGamma(tArray<int> &inode_dists);
+ double Gamma(void);
+
+ // Commands.
+ void AnalyzeBatch(
+ tList<cAnalyzeGenotype> &genotype_list,
+ int end_time,
+ int furcation_time_convention
+ );
+};
+
+int FurcationTimePolicy_ParentBirth(cAnalyzeLineageFurcation &furcation);
+int FurcationTimePolicy_FirstChildBirth(cAnalyzeLineageFurcation &furcation);
+int FurcationTimePolicy_SecondChildBirth(cAnalyzeLineageFurcation &furcation);
+
#endif
More information about the Avida-cvs
mailing list