[Avida-SVN] r3379 - branches/goings/source/analyze

goingssh at myxo.css.msu.edu goingssh at myxo.css.msu.edu
Mon Aug 24 08:18:55 PDT 2009


Author: goingssh
Date: 2009-08-24 11:18:55 -0400 (Mon, 24 Aug 2009)
New Revision: 3379

Modified:
   branches/goings/source/analyze/cAnalyze.cc
   branches/goings/source/analyze/cAnalyze.h
Log:
Added Charles' command to calculate all pair-wise sets of edit distances to my branch

Modified: branches/goings/source/analyze/cAnalyze.cc
===================================================================
--- branches/goings/source/analyze/cAnalyze.cc	2009-08-24 14:06:23 UTC (rev 3378)
+++ branches/goings/source/analyze/cAnalyze.cc	2009-08-24 15:18:55 UTC (rev 3379)
@@ -3285,7 +3285,75 @@
   return;
 }
 
+// Calculate Edit Distance stats for all pairs of organisms across the population.
+void cAnalyze::CommandPrintDistances(cString cur_string)
+{
+  cout << "Calculating Edit Distance between all pairs of genotypes." << endl;
+  
+  // Get the maximum distance we care about
+  int dist_threshold = cur_string.PopWord().AsInt();
+  
+  // Get the file name that saves the result 
+  cString filename = cur_string.PopWord();
+  if (filename.IsEmpty()) {
+    filename = "edit_distance.dat";
+  }
+  
+  ofstream & fout = m_world->GetDataFileOFStream(filename);
+  
+  fout << "# All pairs edit distance" << endl;
+  fout << "# 1: Num organism pairs" << endl;
+  fout << "# 2: Mean distance" << endl;
+  fout << "# 3: Max distance" << endl;
+  fout << "# 4: Frac distances above threshold (" << dist_threshold << ")" << endl;
+  fout << endl;
+  
+  // Loop through all pairs of organisms.
+  int dist_total = 0;
+  int dist_max = 0;
+  int pair_count = 0;
+  int threshold_pair_count = 0;
 
+  cAnalyzeGenotype * genotype1 = NULL;
+  cAnalyzeGenotype * genotype2 = NULL;
+  tListIterator<cAnalyzeGenotype> batch_it1(batch[cur_batch].List());
+
+  int watermark = 0;
+  
+  while ((genotype1 = batch_it1.Next()) != NULL) {
+    const int gen1_count = genotype1->GetNumCPUs();
+
+    // Pair this genotype with itself for a distance of 0.
+    pair_count += gen1_count * (gen1_count - 1) / 2;
+
+    // Loop through the other genotypes this one can be paired with.
+    tListIterator<cAnalyzeGenotype> batch_it2(batch_it1);
+    while ((genotype2 = batch_it2.Next()) != NULL) {
+      const int gen2_count = genotype2->GetNumCPUs();
+      const int cur_pairs = gen1_count * gen2_count;
+      const int cur_dist = cGenomeUtil::FindEditDistance(genotype1->GetGenome(), genotype2->GetGenome());      
+      dist_total += cur_pairs * cur_dist;
+      if (cur_dist > dist_max) dist_max = cur_dist;
+      pair_count += cur_pairs;
+      if (cur_dist >= dist_threshold) threshold_pair_count += cur_pairs;
+
+      if (pair_count > watermark) {
+	cout << watermark << endl;
+	watermark += 100000;
+      }
+    }
+  }
+  
+  fout << pair_count << " "
+       << ((double) dist_total) / (double) pair_count << " "
+       << dist_max << " "
+       << ((double) threshold_pair_count) / (double) pair_count << " "
+       << endl;
+
+  return;
+}
+
+
 // Calculate various stats for trees in population.
 void cAnalyze::CommandPrintTreeStats(cString cur_string)
 {
@@ -9237,6 +9305,7 @@
   // Population analysis commands...
   AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
   AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
+  AddLibraryDef("PRINT_DISTANCES", &cAnalyze::CommandPrintDistances);
   AddLibraryDef("PRINT_TREE_STATS", &cAnalyze::CommandPrintTreeStats);
   AddLibraryDef("PRINT_CUMULATIVE_STEMMINESS", &cAnalyze::CommandPrintCumulativeStemminess);
   AddLibraryDef("PRINT_GAMMA", &cAnalyze::CommandPrintGamma);

Modified: branches/goings/source/analyze/cAnalyze.h
===================================================================
--- branches/goings/source/analyze/cAnalyze.h	2009-08-24 14:06:23 UTC (rev 3378)
+++ branches/goings/source/analyze/cAnalyze.h	2009-08-24 15:18:55 UTC (rev 3379)
@@ -250,6 +250,7 @@
   // Population Analysis Commands...
   void CommandPrintPhenotypes(cString cur_string);
   void CommandPrintDiversity(cString cur_string);
+  void CommandPrintDistances(cString cur_string);
   void CommandPrintTreeStats(cString cur_string);
   void CommandPrintCumulativeStemminess(cString cur_string);
   void CommandPrintGamma(cString cur_string);




More information about the Avida-cvs mailing list