[Avida-SVN] r3378 - in development/source: analyze main

charles at myxo.css.msu.edu charles at myxo.css.msu.edu
Mon Aug 24 07:06:24 PDT 2009


Author: charles
Date: 2009-08-24 10:06:23 -0400 (Mon, 24 Aug 2009)
New Revision: 3378

Modified:
   development/source/analyze/cAnalyze.cc
   development/source/analyze/cAnalyze.h
   development/source/main/cGenomeUtil.cc
   development/source/main/cPopulation.cc
   development/source/main/cTaskLib.h
Log:
Created an analyze command PRINT_DISTANCES that will compare all pairs of organisms in the loaded
population and report details about them into the file data/edit_distance.dat.  The first argument
is a threshold distance, and the output will report the fraction of comparisons that resulted in an
edit distance greater than or equal to this threshold.  The second argument is a filename.


Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc	2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/analyze/cAnalyze.cc	2009-08-24 14:06:23 UTC (rev 3378)
@@ -3289,6 +3289,75 @@
 }
 
 
+// Calculate Edit Distance stats for all pairs of organisms across the population.
+void cAnalyze::CommandPrintDistances(cString cur_string)
+{
+  cout << "Calculating Edit Distance between all pairs of genotypes." << endl;
+  
+  // Get the maximum distance we care about
+  int dist_threshold = cur_string.PopWord().AsInt();
+  
+  // Get the file name that saves the result 
+  cString filename = cur_string.PopWord();
+  if (filename.IsEmpty()) {
+    filename = "edit_distance.dat";
+  }
+  
+  ofstream & fout = m_world->GetDataFileOFStream(filename);
+  
+  fout << "# All pairs edit distance" << endl;
+  fout << "# 1: Num organism pairs" << endl;
+  fout << "# 2: Mean distance" << endl;
+  fout << "# 3: Max distance" << endl;
+  fout << "# 4: Frac distances above threshold (" << dist_threshold << ")" << endl;
+  fout << endl;
+  
+  // Loop through all pairs of organisms.
+  int dist_total = 0;
+  int dist_max = 0;
+  int pair_count = 0;
+  int threshold_pair_count = 0;
+
+  cAnalyzeGenotype * genotype1 = NULL;
+  cAnalyzeGenotype * genotype2 = NULL;
+  tListIterator<cAnalyzeGenotype> batch_it1(batch[cur_batch].List());
+
+  int watermark = 0;
+  
+  while ((genotype1 = batch_it1.Next()) != NULL) {
+    const int gen1_count = genotype1->GetNumCPUs();
+
+    // Pair this genotype with itself for a distance of 0.
+    pair_count += gen1_count * (gen1_count - 1) / 2;
+
+    // Loop through the other genotypes this one can be paired with.
+    tListIterator<cAnalyzeGenotype> batch_it2(batch_it1);
+    while ((genotype2 = batch_it2.Next()) != NULL) {
+      const int gen2_count = genotype2->GetNumCPUs();
+      const int cur_pairs = gen1_count * gen2_count;
+      const int cur_dist = cGenomeUtil::FindEditDistance(genotype1->GetGenome(), genotype2->GetGenome());      
+      dist_total += cur_pairs * cur_dist;
+      if (cur_dist > dist_max) dist_max = cur_dist;
+      pair_count += cur_pairs;
+      if (cur_dist >= dist_threshold) threshold_pair_count += cur_pairs;
+
+      if (pair_count > watermark) {
+	cout << watermark << endl;
+	watermark += 100000;
+      }
+    }
+  }
+  
+  fout << pair_count << " "
+       << ((double) dist_total) / (double) pair_count << " "
+       << dist_max << " "
+       << ((double) threshold_pair_count) / (double) pair_count << " "
+       << endl;
+
+  return;
+}
+
+
 // Calculate various stats for trees in population.
 void cAnalyze::CommandPrintTreeStats(cString cur_string)
 {
@@ -9275,6 +9344,7 @@
   // Population analysis commands...
   AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
   AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
+  AddLibraryDef("PRINT_DISTANCES", &cAnalyze::CommandPrintDistances);
   AddLibraryDef("PRINT_TREE_STATS", &cAnalyze::CommandPrintTreeStats);
   AddLibraryDef("PRINT_CUMULATIVE_STEMMINESS", &cAnalyze::CommandPrintCumulativeStemminess);
   AddLibraryDef("PRINT_GAMMA", &cAnalyze::CommandPrintGamma);

Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h	2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/analyze/cAnalyze.h	2009-08-24 14:06:23 UTC (rev 3378)
@@ -253,6 +253,7 @@
   // Population Analysis Commands...
   void CommandPrintPhenotypes(cString cur_string);
   void CommandPrintDiversity(cString cur_string);
+  void CommandPrintDistances(cString cur_String);
   void CommandPrintTreeStats(cString cur_string);
   void CommandPrintCumulativeStemminess(cString cur_string);
   void CommandPrintGamma(cString cur_string);

Modified: development/source/main/cGenomeUtil.cc
===================================================================
--- development/source/main/cGenomeUtil.cc	2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/main/cGenomeUtil.cc	2009-08-24 14:06:23 UTC (rev 3378)
@@ -179,8 +179,12 @@
   if (!size1) return size2;
   if (!size2) return size1;
 
+  // Count how many direct matches we have at the front and rear.
+  int match_front = 0;
+  int match_rear = 0;
+
   int * cur_row  = new int[size1];  // The row we are calculating
-  int * prev_row = new int[size1];  // The last row we calculater
+  int * prev_row = new int[size1];  // The last row we calculated
 
   // Initialize the previous row to record the differece from nothing.
   for (int i = 0; i < size1; i++)  prev_row[i] = i + 1;

Modified: development/source/main/cPopulation.cc
===================================================================
--- development/source/main/cPopulation.cc	2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/main/cPopulation.cc	2009-08-24 14:06:23 UTC (rev 3378)
@@ -641,7 +641,7 @@
   m_world->GetStats().RecordBirth(target_cell.GetID(), in_genotype->GetID(),
                                   in_organism->GetPhenotype().ParentTrue());
   
-  // @MRR Do coalescence clade set up for new organisms.
+  // @MRR Do coalescence clade setup for new organisms.
   CCladeSetupOrganism(in_organism ); 
   
   //count how many times MERIT_BONUS_INST (rewarded instruction) is in the genome

Modified: development/source/main/cTaskLib.h
===================================================================
--- development/source/main/cTaskLib.h	2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/main/cTaskLib.h	2009-08-24 14:06:23 UTC (rev 3378)
@@ -104,7 +104,11 @@
 
   inline double FractionalReward(unsigned int supplied, unsigned int correct);  
 
-  
+  // All tasks must be declared here, taking a cTaskContext reference as the sole input and
+  // returning a double between 0.0 and 1.0 indicating the quality of how well the task was
+  // performed.
+
+  // Basic Tasks
   double Task_Echo(cTaskContext& ctx) const;
   double Task_Add(cTaskContext& ctx) const;
   double Task_Add3(cTaskContext& ctx) const;




More information about the Avida-cvs mailing list