[Avida-SVN] r3499 - in development: source/analyze source/tools tests/analyze_printphenotypes
blwalker at myxo.css.msu.edu
blwalker at myxo.css.msu.edu
Fri Oct 16 11:43:39 PDT 2009
Author: blwalker
Date: 2009-10-16 14:43:39 -0400 (Fri, 16 Oct 2009)
New Revision: 3499
Removed:
development/tests/analyze_printphenotypes/expected/
Modified:
development/source/analyze/cAnalyze.cc
development/source/analyze/cAnalyze.h
development/source/tools/tHashTable.h
Log:
New version of CommandPrintPhenotypes:
- handles any number of tasks without crashing, by using a cBitArray rather than an int to encode phenotype (where "phenotype" means "set of tasks an organism can do")
- outputs phenotypes sorted by number of organisms belonging (greatest to least) and within that by number of genotypes belonging
Because this version of CommandPrintPhenotypes uses a different sort than the previous, consistency of output is broken (although the output is really all the same information, just sorted within number of organisms a little more reasonably). Thus, expected results for the analyze_printphenotypes test are also being replaced.
Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc 2009-10-16 03:20:07 UTC (rev 3498)
+++ development/source/analyze/cAnalyze.cc 2009-10-16 18:43:39 UTC (rev 3499)
@@ -2591,6 +2591,24 @@
///// Population Analysis Commands ////
+// Comparator for p_stat struct: compared by cpu_count
+// Higher cpu_count is considered "less" in order to sort greatest-to-least
+// Furthermore, within the same cpu_count we sort greatest-to-least
+// based on genotype_count
+int cAnalyze::PStatsComparator(const void * elem1, const void * elem2)
+{
+ if (((p_stats*)elem2)->cpu_count > ((p_stats*)elem1)->cpu_count) return 1;
+ if (((p_stats*)elem2)->cpu_count < ((p_stats*)elem1)->cpu_count) return -1;
+
+ // if the cpu_counts are the same, we'd like to sort greatest-to-least
+ // on genotype_count
+ if (((p_stats*)elem2)->genotype_count > ((p_stats*)elem1)->genotype_count) return 1;
+ if (((p_stats*)elem2)->genotype_count < ((p_stats*)elem1)->genotype_count) return -1;
+
+ // if they have the same cpu_count and genotype_count, we call them the same
+ return 0;
+}
+
void cAnalyze::CommandPrintPhenotypes(cString cur_string)
{
if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing phenotypes in batch "
@@ -2616,41 +2634,59 @@
// Setup the phenotype categories...
const int num_tasks = batch[cur_batch].List().GetFirst()->GetNumTasks();
const int num_phenotypes = 1 << (num_tasks + 1);
- tArray<int> phenotype_counts(num_phenotypes);
- tArray<int> genotype_counts(num_phenotypes);
- tArray<double> total_length(num_phenotypes);
- tArray<double> total_gest(num_phenotypes);
- tArray<int> total_task_count(num_phenotypes);
- tArray<int> total_task_performance_count(num_phenotypes);
- phenotype_counts.SetAll(0);
- genotype_counts.SetAll(0);
- total_length.SetAll(0.0);
- total_gest.SetAll(0.0);
- total_task_count.SetAll(0);
- total_task_performance_count.SetAll(0);
+ tHashTable<cBitArray, p_stats> phenotype_table(HASH_TABLE_SIZE_MEDIUM);
// Loop through all of the genotypes in this batch...
tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
cAnalyzeGenotype * genotype = NULL;
while ((genotype = batch_it.Next()) != NULL) {
- int phen_id = 0;
+ cBitArray phen_id(num_tasks + 1); // + 1 because phenotype also depends on viability
+ phen_id.Clear();
if (genotype->GetViable() == true) phen_id++;
for (int i = 0; i < num_tasks; i++) {
- if (genotype->GetTaskCount(i) > 0) phen_id += 1 << (i+1);
+ if (genotype->GetTaskCount(i) > 0) phen_id.Set(i + 1, true); // again, +1 because we used 0th bit for viability
}
- phenotype_counts[phen_id] += genotype->GetNumCPUs();
- genotype_counts[phen_id]++;
- total_length[phen_id] += genotype->GetNumCPUs() * genotype->GetLength();
- total_gest[phen_id] += genotype->GetNumCPUs() * genotype->GetGestTime();
- for (int i = 0; i < num_tasks; i++) {
- total_task_count[phen_id] += ((genotype->GetTaskCount(i) > 0) ? 1 : 0);
- total_task_performance_count[phen_id] += genotype->GetTaskCount(i);
+
+ p_stats phenotype_stats;
+
+ if (phenotype_table.Find(phen_id, phenotype_stats)) {
+ phenotype_stats.cpu_count += genotype->GetNumCPUs();
+ phenotype_stats.genotype_count += 1;
+ phenotype_stats.total_length += genotype->GetNumCPUs() * genotype->GetLength();
+ phenotype_stats.total_gest += genotype->GetNumCPUs() * genotype->GetGestTime();
+
+ // don't bother tracking these unless asked for
+ if (print_ttc || print_ttpc) {
+ for (int i = 0; i < num_tasks; i++) {
+ phenotype_stats.total_task_count += ((genotype->GetTaskCount(i) > 0) ? 1 : 0);
+ phenotype_stats.total_task_performance_count += genotype->GetTaskCount(i);
+ }
+ }
}
+ else {
+ phenotype_stats.phen_id = phen_id; // this is for ease of printing and sorting
+ phenotype_stats.cpu_count = genotype->GetNumCPUs();
+ phenotype_stats.genotype_count = 1;
+ phenotype_stats.total_length = genotype->GetNumCPUs() * genotype->GetLength();
+ phenotype_stats.total_gest = genotype->GetNumCPUs() * genotype->GetGestTime();
+
+ phenotype_stats.total_task_count = 0;
+ phenotype_stats.total_task_performance_count = 0;
+
+ // don't bother actually tracking these unless asked for
+ if (print_ttc || print_ttpc) {
+ for (int i = 0; i < num_tasks; i++) {
+ phenotype_stats.total_task_count += ((genotype->GetTaskCount(i) > 0) ? 1 : 0);
+ phenotype_stats.total_task_performance_count += genotype->GetTaskCount(i);
+ }
+ }
+ }
+
+ // add to / update table
+ phenotype_table.SetValue(phen_id, phenotype_stats);
}
-
- // Print out the results...
-
+
ofstream& fp = m_world->GetDataFileOFStream(filename);
fp << "# 1: Number of organisms of this phenotype" << endl
@@ -2674,40 +2710,34 @@
else { fp << "# 6+: Tasks performed in this phenotype" << endl; }
fp << endl;
- // @CAO Lets do this inefficiently for the moment, but print out the
- // phenotypes in order.
+ // Print the phenotypes in order from greatest cpu count to least
+ // Within cpu_count, print in order from greatest genotype count to least
+ tArray<p_stats> phenotype_array;
+ phenotype_table.GetValues(phenotype_array);
+ phenotype_array.MergeSort(&cAnalyze::PStatsComparator); // sort by cpu_count, greatest to least
- while (true) {
- // Find the next phenotype to print...
- int max_count = phenotype_counts[0];
- int max_position = 0;
- for (int i = 0; i < num_phenotypes; i++) {
- if (phenotype_counts[i] > max_count) {
- max_count = phenotype_counts[i];
- max_position = i;
- }
+ for (int i = 0; i < phenotype_array.GetSize(); i++) {
+ fp << phenotype_array[i].cpu_count << " "
+ << phenotype_array[i].genotype_count << " "
+ << phenotype_array[i].total_length / phenotype_array[i].cpu_count << " "
+ << phenotype_array[i].total_gest / phenotype_array[i].cpu_count << " "
+ << phenotype_array[i].phen_id.Get(0) << " "; // viability
+
+ if (print_ttc) {
+ fp << phenotype_array[i].total_task_count / phenotype_array[i].genotype_count << " ";
}
+ if (print_ttpc) {
+ fp << phenotype_array[i].total_task_performance_count / phenotype_array[i].genotype_count << " ";
+ }
- if (max_count == 0) break; // we're done!
+ // not using cBitArray::Print because it would print viability bit too
+ for (int j = 1; j <= num_tasks; j++) { fp << phenotype_array[i].phen_id.Get(j) << " "; }
- fp << phenotype_counts[max_position] << " "
- << genotype_counts[max_position] << " "
- << total_length[max_position] / phenotype_counts[max_position] << " "
- << total_gest[max_position] / phenotype_counts[max_position] << " "
- << (max_position & 1) << " ";
- if (print_ttc) { fp << total_task_count[max_position] / genotype_counts[max_position] << " "; }
- if (print_ttpc) {
- fp << total_task_performance_count[max_position] / genotype_counts[max_position] << " ";
- }
- for (int i = 1; i <= num_tasks; i++) {
- if ((max_position >> i) & 1 > 0) fp << "1 ";
- else fp << "0 ";
- }
fp << endl;
- phenotype_counts[max_position] = 0;
}
m_world->GetDataFileManager().Remove(filename);
+
}
Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h 2009-10-16 03:20:07 UTC (rev 3498)
+++ development/source/analyze/cAnalyze.h 2009-10-16 18:43:39 UTC (rev 3499)
@@ -55,6 +55,9 @@
#ifndef tMatrix_h
#include "tMatrix.h"
#endif
+#ifndef tHashTable_h
+#include "tHashTable.h"
+#endif
#if USE_tMemTrack
# ifndef tMemTrack_h
@@ -170,6 +173,17 @@
static void PopCommonCPUTestParameters(cWorld* in_world, cString& cur_string, cCPUTestInfo& test_info,
cResourceHistory* in_resource_history = NULL, int in_resource_time_spent_offset = 0);
+
+ // structure for phenotype statistics, used in CommandPrintPhenotypes
+ struct p_stats {
+ cBitArray phen_id;
+ int cpu_count;
+ int genotype_count;
+ double total_length;
+ double total_gest;
+ int total_task_count;
+ int total_task_performance_count;
+ };
private:
// Pop specific types of arguments from an arg list.
@@ -199,7 +213,7 @@
// Batch management...
int BatchUtil_GetMaxLength(int batch_id = -1);
- // Comamnd helpers...
+ // Command helpers...
void CommandDetail_Header(std::ostream& fp, int format_type,
tListIterator< tDataEntryCommand<cAnalyzeGenotype> >& output_it, int time_step = -1);
void CommandDetail_Body(std::ostream& fp, int format_type,
@@ -211,6 +225,7 @@
tListIterator< tDataEntryCommand<cAnalyzeGenotype> >& output_it);
void CommandHistogram_Body(std::ostream& fp, int format_type,
tListIterator< tDataEntryCommand<cAnalyzeGenotype> >& output_it);
+ static int PStatsComparator(const void * elem1, const void * elem2); // must be static for qsort to accept it
// Loading methods...
void LoadOrganism(cString cur_string);
Modified: development/source/tools/tHashTable.h
===================================================================
--- development/source/tools/tHashTable.h 2009-10-16 03:20:07 UTC (rev 3498)
+++ development/source/tools/tHashTable.h 2009-10-16 18:43:39 UTC (rev 3499)
@@ -73,6 +73,9 @@
#ifndef tList_h
#include "tList.h"
#endif
+#ifndef BIT_ARRAY_H
+#include "cBitArray.h"
+#endif
#include <cstdlib>
@@ -124,6 +127,20 @@
out_hash += (unsigned int) key[i];
return out_hash % table_size;
}
+
+ // HASH_TYPE = cBitArray
+ // We hash a bit array by calculating the sum of the squared values of the
+ // positions where bits are on, then modding this number by the size of
+ // the hash table
+ template<> inline int HashKey<cBitArray>(const cBitArray& key, int table_size)
+ {
+ unsigned int out_hash = 0;
+ for (int i = 0; i < key.GetSize(); i++) {
+ if (key.Get(i)) { out_hash += i*i; }
+ }
+ return out_hash % table_size;
+ }
+
}
More information about the Avida-cvs
mailing list