[Avida-SVN] r3378 - in development/source: analyze main
charles at myxo.css.msu.edu
charles at myxo.css.msu.edu
Mon Aug 24 07:06:24 PDT 2009
Author: charles
Date: 2009-08-24 10:06:23 -0400 (Mon, 24 Aug 2009)
New Revision: 3378
Modified:
development/source/analyze/cAnalyze.cc
development/source/analyze/cAnalyze.h
development/source/main/cGenomeUtil.cc
development/source/main/cPopulation.cc
development/source/main/cTaskLib.h
Log:
Created an analyze command PRINT_DISTANCES that will compare all pairs of organisms in the loaded
population and report details about them into the file data/edit_distance.dat. The first argument
is a threshold distance, and the output will report the fraction of comparisons that resulted in an
edit distance greater than or equal to this threshold. The second argument is a filename.
Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc 2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/analyze/cAnalyze.cc 2009-08-24 14:06:23 UTC (rev 3378)
@@ -3289,6 +3289,75 @@
}
+// Calculate Edit Distance stats for all pairs of organisms across the population.
+void cAnalyze::CommandPrintDistances(cString cur_string)
+{
+ cout << "Calculating Edit Distance between all pairs of genotypes." << endl;
+
+ // Get the maximum distance we care about
+ int dist_threshold = cur_string.PopWord().AsInt();
+
+ // Get the file name that saves the result
+ cString filename = cur_string.PopWord();
+ if (filename.IsEmpty()) {
+ filename = "edit_distance.dat";
+ }
+
+ ofstream & fout = m_world->GetDataFileOFStream(filename);
+
+ fout << "# All pairs edit distance" << endl;
+ fout << "# 1: Num organism pairs" << endl;
+ fout << "# 2: Mean distance" << endl;
+ fout << "# 3: Max distance" << endl;
+ fout << "# 4: Frac distances above threshold (" << dist_threshold << ")" << endl;
+ fout << endl;
+
+ // Loop through all pairs of organisms.
+ int dist_total = 0;
+ int dist_max = 0;
+ int pair_count = 0;
+ int threshold_pair_count = 0;
+
+ cAnalyzeGenotype * genotype1 = NULL;
+ cAnalyzeGenotype * genotype2 = NULL;
+ tListIterator<cAnalyzeGenotype> batch_it1(batch[cur_batch].List());
+
+ int watermark = 0;
+
+ while ((genotype1 = batch_it1.Next()) != NULL) {
+ const int gen1_count = genotype1->GetNumCPUs();
+
+ // Pair this genotype with itself for a distance of 0.
+ pair_count += gen1_count * (gen1_count - 1) / 2;
+
+ // Loop through the other genotypes this one can be paired with.
+ tListIterator<cAnalyzeGenotype> batch_it2(batch_it1);
+ while ((genotype2 = batch_it2.Next()) != NULL) {
+ const int gen2_count = genotype2->GetNumCPUs();
+ const int cur_pairs = gen1_count * gen2_count;
+ const int cur_dist = cGenomeUtil::FindEditDistance(genotype1->GetGenome(), genotype2->GetGenome());
+ dist_total += cur_pairs * cur_dist;
+ if (cur_dist > dist_max) dist_max = cur_dist;
+ pair_count += cur_pairs;
+ if (cur_dist >= dist_threshold) threshold_pair_count += cur_pairs;
+
+ if (pair_count > watermark) {
+ cout << watermark << endl;
+ watermark += 100000;
+ }
+ }
+ }
+
+ fout << pair_count << " "
+ << ((double) dist_total) / (double) pair_count << " "
+ << dist_max << " "
+ << ((double) threshold_pair_count) / (double) pair_count << " "
+ << endl;
+
+ return;
+}
+
+
// Calculate various stats for trees in population.
void cAnalyze::CommandPrintTreeStats(cString cur_string)
{
@@ -9275,6 +9344,7 @@
// Population analysis commands...
AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
+ AddLibraryDef("PRINT_DISTANCES", &cAnalyze::CommandPrintDistances);
AddLibraryDef("PRINT_TREE_STATS", &cAnalyze::CommandPrintTreeStats);
AddLibraryDef("PRINT_CUMULATIVE_STEMMINESS", &cAnalyze::CommandPrintCumulativeStemminess);
AddLibraryDef("PRINT_GAMMA", &cAnalyze::CommandPrintGamma);
Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h 2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/analyze/cAnalyze.h 2009-08-24 14:06:23 UTC (rev 3378)
@@ -253,6 +253,7 @@
// Population Analysis Commands...
void CommandPrintPhenotypes(cString cur_string);
void CommandPrintDiversity(cString cur_string);
+ void CommandPrintDistances(cString cur_String);
void CommandPrintTreeStats(cString cur_string);
void CommandPrintCumulativeStemminess(cString cur_string);
void CommandPrintGamma(cString cur_string);
Modified: development/source/main/cGenomeUtil.cc
===================================================================
--- development/source/main/cGenomeUtil.cc 2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/main/cGenomeUtil.cc 2009-08-24 14:06:23 UTC (rev 3378)
@@ -179,8 +179,12 @@
if (!size1) return size2;
if (!size2) return size1;
+ // Count how many direct matches we have at the front and rear.
+ int match_front = 0;
+ int match_rear = 0;
+
int * cur_row = new int[size1]; // The row we are calculating
- int * prev_row = new int[size1]; // The last row we calculater
+ int * prev_row = new int[size1]; // The last row we calculated
// Initialize the previous row to record the differece from nothing.
for (int i = 0; i < size1; i++) prev_row[i] = i + 1;
Modified: development/source/main/cPopulation.cc
===================================================================
--- development/source/main/cPopulation.cc 2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/main/cPopulation.cc 2009-08-24 14:06:23 UTC (rev 3378)
@@ -641,7 +641,7 @@
m_world->GetStats().RecordBirth(target_cell.GetID(), in_genotype->GetID(),
in_organism->GetPhenotype().ParentTrue());
- // @MRR Do coalescence clade set up for new organisms.
+ // @MRR Do coalescence clade setup for new organisms.
CCladeSetupOrganism(in_organism );
//count how many times MERIT_BONUS_INST (rewarded instruction) is in the genome
Modified: development/source/main/cTaskLib.h
===================================================================
--- development/source/main/cTaskLib.h 2009-08-24 03:27:01 UTC (rev 3377)
+++ development/source/main/cTaskLib.h 2009-08-24 14:06:23 UTC (rev 3378)
@@ -104,7 +104,11 @@
inline double FractionalReward(unsigned int supplied, unsigned int correct);
-
+ // All tasks must be declared here, taking a cTaskContext reference as the sole input and
+ // returning a double between 0.0 and 1.0 indicating the quality of how well the task was
+ // performed.
+
+ // Basic Tasks
double Task_Echo(cTaskContext& ctx) const;
double Task_Add(cTaskContext& ctx) const;
double Task_Add3(cTaskContext& ctx) const;
More information about the Avida-cvs
mailing list