[Avida-cvs] [Avida2-svn] r368 - branches/brysonda/source/main
brysonda@myxo.css.msu.edu
brysonda at myxo.css.msu.edu
Thu Nov 3 16:39:31 PST 2005
Author: brysonda
Date: 2005-11-03 19:39:30 -0500 (Thu, 03 Nov 2005)
New Revision: 368
Modified:
branches/brysonda/source/main/cAnalyze.cc
branches/brysonda/source/main/cAnalyze.h
branches/brysonda/source/main/cAnalyzeGenotype.cc
branches/brysonda/source/main/cAnalyzeGenotype.h
branches/brysonda/source/main/cLandscape.cc
branches/brysonda/source/main/cLandscape.h
Log:
Merge in changes to analyze from trunk revision 364.
Modified: branches/brysonda/source/main/cAnalyze.cc
===================================================================
--- branches/brysonda/source/main/cAnalyze.cc 2005-11-01 05:59:21 UTC (rev 367)
+++ branches/brysonda/source/main/cAnalyze.cc 2005-11-04 00:39:30 UTC (rev 368)
@@ -1300,43 +1300,71 @@
}
else cout << "Sampling Organisms..." << endl;
- int old_total = 0;
- int new_total = 0;
-
cAnalyzeGenotype * genotype = NULL;
tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
- // of only interested in viables, remove them first.
- if (test_viable==1) {
- while ((genotype = batch_it.Next()) != NULL) {
- if ((genotype->GetViable())==0) {
- batch_it.Remove();
- delete genotype;
- }
- }
+ // Loop through all genotypes to perform a census
+ int org_count = 0;
+ while ((genotype = batch_it.Next()) != NULL) {
+ // If we require viables, reduce all non-viables to zero organisms.
+ if (test_viable == 1 && genotype->GetViable() == 0) {
+ genotype->SetNumCPUs(0);
+ }
+
+ // Count the number of organisms in this genotype.
+ org_count += genotype->GetNumCPUs();
}
+ // Create an array to store pointers to the genotypes and fill it in
+ // while temporarily resetting all of the organism counts to zero.
+ tArray<cAnalyzeGenotype *> org_array(org_count);
+ int cur_org = 0;
batch_it.Reset();
+ while ((genotype = batch_it.Next()) != NULL) {
+ for (int i = 0; i < genotype->GetNumCPUs(); i++) {
+ org_array[cur_org] = genotype;
+ cur_org++;
+ }
+ genotype->SetNumCPUs(0);
+ }
+ assert(cur_org == org_count);
+
+ // Determine how many organisms we want to keep.
+ int new_org_count = (int) fraction;
+ if (fraction < 1.0) new_org_count = (int) (fraction * (double) org_count);
+ if (new_org_count > org_count) {
+ cerr << "Warning: Trying to sample " << new_org_count
+ << "organisms from a population of " << org_count
+ << endl;
+ new_org_count = org_count;
+ }
+
+ // Now pick those that we are keeping.
+ tArray<int> keep_ids(new_org_count);
+ random.Choose(org_count, keep_ids);
+
+ // And increment the org counts for the associated genotypes.
+ for (int i = 0; i < new_org_count; i++) {
+ genotype = org_array[ keep_ids[i] ];
+ genotype->SetNumCPUs(genotype->GetNumCPUs() + 1);
+ }
+
+
+ // Delete all genotypes with no remaining organisms...
+ batch_it.Reset();
while ((genotype = batch_it.Next()) != NULL) {
- const int old_count = genotype->GetNumCPUs();
- const int new_count = random.GetRandBinomial(old_count, fraction);
- old_total += old_count;
- new_total += new_count;
-
- if (new_count == 0) {
+ if (genotype->GetNumCPUs() == 0) {
batch_it.Remove();
delete genotype;
- } else {
- genotype->SetNumCPUs(new_count);
}
}
int num_genotypes = batch[cur_batch].List().GetSize();
if (verbose) {
- cout << " Removed " << old_total - new_total
+ cout << " Removed " << org_count - new_org_count
<< " organisms (" << init_genotypes - num_genotypes
- << " genotypes); " << new_total
+ << " genotypes); " << new_org_count
<< " orgs (" << num_genotypes << " gens) remaining."
<< endl;
}
@@ -2904,7 +2932,7 @@
}
-void cAnalyze::CommunityComplexity(cString cur_string)
+void cAnalyze::AnalyzeCommunityComplexity(cString cur_string)
{
/////////////////////////////////////////////////////////////////////
// Calculate the mutual information between community and environment
@@ -3480,6 +3508,226 @@
}
+void cAnalyze::AnalyzeComplexityDelta(cString cur_string)
+{
+ // This command will examine the current population, and sample mutations
+ // to see what the distribution of complexity changes is. Only genotypes
+ // with a certain abundance (default=3) will be tested to make sure that
+ // the organism didn't already have hidden complexity due to a downward
+ // step.
+ cout << "Testing complexity delta." << endl;
+
+ cString filename = "complexity_delta.dat";
+ int num_tests = 10;
+ double copy_mut_prob = m_world->GetConfig().COPY_MUT_PROB.Get();
+ double ins_mut_prob = m_world->GetConfig().DIVIDE_INS_PROB.Get();
+ double del_mut_prob = m_world->GetConfig().DIVIDE_DEL_PROB.Get();
+ int count_threshold = 3;
+
+ if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
+ if (cur_string.GetSize() > 0) num_tests = cur_string.PopWord().AsInt();
+ if (cur_string.GetSize() > 0) copy_mut_prob = cur_string.PopWord().AsDouble();
+ if (cur_string.GetSize() > 0) ins_mut_prob = cur_string.PopWord().AsDouble();
+ if (cur_string.GetSize() > 0) del_mut_prob = cur_string.PopWord().AsDouble();
+ if (cur_string.GetSize() > 0) count_threshold = cur_string.PopWord().AsInt();
+
+ if (verbose == true) {
+ cout << "...using:"
+ << " filename='" << filename << "'"
+ << " num_tests=" << num_tests
+ << " copy_mut_prob=" << copy_mut_prob
+ << " ins_mut_prob=" << ins_mut_prob
+ << " del_mut_prob=" << del_mut_prob
+ << " count_threshold=" << count_threshold
+ << endl;
+ }
+
+ // Create an array of all of the genotypes above threshold.
+ cAnalyzeGenotype * genotype = NULL;
+ tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+
+ // Loop through all genotypes to perform a census
+ int org_count = 0;
+ while ((genotype = batch_it.Next()) != NULL) {
+ // Only count genotypes above threshold
+ if (genotype->GetNumCPUs() >= count_threshold) {
+ org_count += genotype->GetNumCPUs();
+ }
+ }
+
+ // Create an array to store pointers to the genotypes and fill it in.
+ tArray<cAnalyzeGenotype *> org_array(org_count);
+ int cur_org = 0;
+ batch_it.Reset();
+ while ((genotype = batch_it.Next()) != NULL) {
+ // Ignore genotypes below threshold.
+ if (genotype->GetNumCPUs() < count_threshold) continue;
+
+ // Insert the remaining genotypes into the array.
+ for (int i = 0; i < genotype->GetNumCPUs(); i++) {
+ org_array[cur_org] = genotype;
+ cur_org++;
+ }
+ }
+
+ // Open up the file and prepare it for output.
+ cDataFile & df = data_file_manager.Get(filename);
+ df.WriteComment( "An analyze of expected complexity changes between parent and offspring" );
+ df.WriteTimeStamp();
+
+ // Next check the appropriate number of organisms, perform mutations, and
+ // store the results.
+ for (int cur_test = 0; cur_test < num_tests; cur_test++) {
+ // Pick the genotype to test.
+ int test_org_id = m_world->GetRandom().GetInt(org_count);
+ genotype = org_array[test_org_id];
+
+ // Create a copy of the genome.
+ cCPUMemory mod_genome = genotype->GetGenome();
+
+ if (copy_mut_prob == 0.0 &&
+ ins_mut_prob == 0.0 &&
+ del_mut_prob == 0.0) {
+ cerr << "ERROR: All mutation rates are zero! No complexity delta analysis possible." << endl;
+ return;
+ }
+
+ // Perform the per-site mutations -- we are going to keep looping until
+ // we trigger at least one mutation.
+ int num_mutations = 0;
+ while (num_mutations == 0) {
+ if (copy_mut_prob > 0.0) {
+ for (int i = 0; i < mod_genome.GetSize(); i++) {
+ if (m_world->GetRandom().P(copy_mut_prob)) {
+ mod_genome[i] = inst_set.GetRandomInst();
+ num_mutations++;
+ }
+ }
+ }
+
+ // Perform an Insertion if it has one.
+ if (m_world->GetRandom().P(ins_mut_prob)) {
+ int ins_line = m_world->GetRandom().GetInt(mod_genome.GetSize() + 1);
+ mod_genome.Insert(ins_line, inst_set.GetRandomInst());
+ num_mutations++;
+ }
+
+ // Perform a Deletion if it has one.
+ if (m_world->GetRandom().P(del_mut_prob)) {
+ int del_line = m_world->GetRandom().GetInt(mod_genome.GetSize());
+ mod_genome.Remove(del_line);
+ num_mutations++;
+ }
+ }
+
+ // Calculate the complexities....
+ genotype->Recalculate();
+ double start_complexity = genotype->GetComplexity();
+ double start_fitness = genotype->GetFitness();
+ int start_length = genotype->GetLength();
+ int start_gest = genotype->GetGestTime();
+
+ cAnalyzeGenotype new_genotype(m_world, mod_genome, inst_set);
+ new_genotype.Recalculate();
+ double end_complexity = new_genotype.GetComplexity();
+ double complexity_change = end_complexity - start_complexity;
+ double end_fitness = new_genotype.GetFitness();
+ int end_length = new_genotype.GetLength();
+ int end_gest = new_genotype.GetGestTime();
+
+ df.Write(num_mutations, "Number of mutational differences between original organism and mutant.");
+ df.Write(complexity_change, "Complexity difference between original organism and mutant.");
+ df.Write(start_complexity, "Complexity of initial organism.");
+ df.Write(end_complexity, "Complexity of mutant.");
+ df.Write(start_fitness, "Fitness of initial organism.");
+ df.Write(end_fitness, "Fitness of mutant.");
+ df.Write(start_length, "Length of initial organism.");
+ df.Write(end_length, "Length of mutant.");
+ df.Write(start_gest, "Gestation Time of initial organism.");
+ df.Write(end_gest, "Gestation Time of mutant.");
+ df.Write(genotype->GetID(), "ID of initial genotype.");
+ df.Endl();
+ }
+}
+
+void cAnalyze::AnalyzeKnockouts(cString cur_string)
+{
+ cout << "Analyzing the effects of knockouts..." << endl;
+
+ cString filename = "knockouts.dat";
+ if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
+
+ int max_knockouts = 1;
+ if (cur_string.GetSize() > 0) max_knockouts = cur_string.PopWord().AsInt();
+
+ // Open up the data file...
+ cDataFile & df = data_file_manager.Get(filename);
+ df.WriteComment( "Analysis of knockouts in genomes" );
+ df.WriteTimeStamp();
+
+
+ // Setup a NULL instruction in a special inst set.
+ cInstSet ko_inst_set(inst_set);
+ // Locate the instruction corresponding to "NULL" in the instruction library.
+ {
+ cInstruction lib_null_inst = ko_inst_set.GetInstLib()->GetInst("NULL");
+ if (lib_null_inst == ko_inst_set.GetInstLib()->GetInstError()) {
+ cout << "<cAnalyze::AnalyzeKnockouts> got error:" << endl
+ << " instruction 'NULL' not in current hardware type" << endl;
+ exit(1);
+ }
+ // Add mapping to located instruction.
+ ko_inst_set.Add2(lib_null_inst.GetOp());
+ }
+ const cInstruction null_inst = ko_inst_set.GetInst("NULL");
+
+ // Loop through all of the genotypes in this batch...
+ tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+ cAnalyzeGenotype * genotype = NULL;
+ while ((genotype = batch_it.Next()) != NULL) {
+ if (verbose == true) cout << " Knockout: " << genotype->GetName() << endl;
+
+ // Calculate the stats for the genotype we're working with...
+ genotype->Recalculate();
+ const double base_fitness = genotype->GetFitness();
+
+ const int max_line = genotype->GetLength();
+ const cGenome & base_genome = genotype->GetGenome();
+ cGenome mod_genome(base_genome);
+
+ // Loop through all the lines of code, testing the removal of each.
+ int dead_count = 0;
+ int neg_count = 0;
+ int neut_count = 0;
+ int pos_count = 0;
+ for (int line_num = 0; line_num < max_line; line_num++) {
+ // Save a copy of the current instruction and replace it with "NULL"
+ int cur_inst = base_genome[line_num].GetOp();
+ mod_genome[line_num] = null_inst;
+ cAnalyzeGenotype ko_genotype(m_world, mod_genome, ko_inst_set);
+ ko_genotype.Recalculate();
+
+ double ko_fitness = ko_genotype.GetFitness();
+ if (ko_fitness == 0.0) dead_count++;
+ else if (ko_fitness < base_fitness) neg_count++;
+ else if (ko_fitness == base_fitness) neut_count++;
+ else if (ko_fitness > base_fitness) pos_count++;
+ else cerr << "ERROR: illegal state in AnalyzeKnockouts()" << endl;
+
+ // Reset the mod_genome back to the original sequence.
+ mod_genome[line_num].SetOp(cur_inst);
+ }
+
+ df.Write(genotype->GetID(), "Genotype ID");
+ df.Write(dead_count, "Count of lethal knockouts");
+ df.Write(neg_count, "Count of detrimental knockouts");
+ df.Write(neut_count, "Count of neutral knockouts");
+ df.Write(pos_count, "Count of beneficial knockouts");
+ df.Endl();
+ }
+}
+
+
void cAnalyze::CommandFitnessMatrix(cString cur_string)
{
if (verbose == true) cout << "Calculating fitness matrix for batch " << cur_batch << endl;
@@ -3728,11 +3976,8 @@
const cInstruction inst_lib_null_inst = map_inst_set.GetInstLib()->GetInst("NULL");
if(inst_lib_null_inst == map_inst_set.GetInstLib()->GetInstError()){
cout << "<cAnalyze::CommandMapTasks> got error:" << endl;
- cout << " --- instruction \"NULL\" isn't in the instruction library;" << endl;
- cout << " --- get somebody to map a function to \"NULL\" in the library." << endl;
- cout << " --- (probably to class method \"cHardware-of-some-type::initInstLib\"" << endl;
- cout << " --- in file named \"cpu/hardware-of-some-type.cc\".)" << endl;
- cout << " --- bailing-out." << endl;
+ cout << " --- instruction \"NULL\" isn't in the instruction library for the" << endl;
+ cout << " --- current hardware type." << endl;
exit(1);
}
// Add mapping to located instruction.
@@ -3811,8 +4056,7 @@
delete [] col_pass_count;
delete [] col_fail_count;
-
- }
+ }
}
void cAnalyze::CommandAverageModularity(cString cur_string)
@@ -5705,7 +5949,7 @@
// This command requires at least on arguement
int words = cur_string.CountNumWords();
if(words < 1) {
- cout << "AnalyzeComplexity has no parameters, skipping." << endl;
+ cout << "Error: AnalyzeComplexity has no parameters, skipping." << endl;
return;
}
@@ -6718,7 +6962,7 @@
("viable", "Is Viable (0/1)", &cAnalyzeGenotype::GetViable,
&cAnalyzeGenotype::SetViable));
genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, int>
- ("id", "Genome ID", &cAnalyzeGenotype::GetID,
+ ("id", "Genotype ID", &cAnalyzeGenotype::GetID,
&cAnalyzeGenotype::SetID));
genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, const cString &>
("tag", "Genotype Tag", &cAnalyzeGenotype::GetTag,
@@ -6816,6 +7060,10 @@
("frac_pos", "Fraction Mutations Beneficial",
&cAnalyzeGenotype::GetFracPos,
(void (cAnalyzeGenotype::*)(double)) NULL));
+ genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, double>
+ ("complexity", "Basic Complexity (all neutral/beneficial muts are equal)",
+ &cAnalyzeGenotype::GetComplexity,
+ (void (cAnalyzeGenotype::*)(double)) NULL));
genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, const cString &>
("parent_muts", "Mutations from Parent",
&cAnalyzeGenotype::GetParentMuts, &cAnalyzeGenotype::SetParentMuts,
@@ -6998,7 +7246,7 @@
// Population analysis commands...
AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
- AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::CommunityComplexity);
+ AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::AnalyzeCommunityComplexity);
// Individual organism analysis...
AddLibraryDef("LANDSCAPE", &cAnalyze::CommandLandscape);
@@ -7008,6 +7256,7 @@
AddLibraryDef("AVERAGE_MODULARITY", &cAnalyze::CommandAverageModularity);
AddLibraryDef("MAP_MUTATIONS", &cAnalyze::CommandMapMutations);
AddLibraryDef("ANALYZE_COMPLEXITY", &cAnalyze::AnalyzeComplexity);
+ AddLibraryDef("ANALYZE_KNOCKOUTS", &cAnalyze::AnalyzeKnockouts);
AddLibraryDef("ANALYZE_POP_COMPLEXITY", &cAnalyze::AnalyzePopComplexity);
AddLibraryDef("MAP_DEPTH", &cAnalyze::CommandMapDepth);
@@ -7035,6 +7284,7 @@
&cAnalyze::AnalyzeMutationTraceback);
AddLibraryDef("ANALYZE_EPISTASIS", &cAnalyze::AnalyzeEpistasis);
AddLibraryDef("ANALYZE_MATE_SELECTION", &cAnalyze::AnalyzeMateSelection);
+ AddLibraryDef("ANALYZE_COMPLEXITY_DELTA", &cAnalyze::AnalyzeComplexityDelta);
// Environment manipulation
AddLibraryDef("ENVIRONMENT", &cAnalyze::EnvironmentSetup);
Modified: branches/brysonda/source/main/cAnalyze.h
===================================================================
--- branches/brysonda/source/main/cAnalyze.h 2005-11-01 05:59:21 UTC (rev 367)
+++ branches/brysonda/source/main/cAnalyze.h 2005-11-04 00:39:30 UTC (rev 368)
@@ -158,7 +158,7 @@
void CommandPrintPhenotypes(cString cur_string);
void CommandPrintDiversity(cString cur_string);
void PhyloCommunityComplexity(cString cur_string);
- void CommunityComplexity(cString cur_string);
+ void AnalyzeCommunityComplexity(cString cur_string);
// Individual Organism Analysis...
void CommandLandscape(cString cur_string);
@@ -190,9 +190,11 @@
void AnalyzeBranching(cString cur_string);
void AnalyzeMutationTraceback(cString cur_string);
void AnalyzeComplexity(cString cur_string);
+ void AnalyzeKnockouts(cString cur_string);
void AnalyzePopComplexity(cString cur_string);
void AnalyzeEpistasis(cString cur_string);
void AnalyzeMateSelection(cString cur_string);
+ void AnalyzeComplexityDelta(cString cur_string);
// Environment Manipulation
void EnvironmentSetup(cString cur_string);
Modified: branches/brysonda/source/main/cAnalyzeGenotype.cc
===================================================================
--- branches/brysonda/source/main/cAnalyzeGenotype.cc 2005-11-01 05:59:21 UTC (rev 367)
+++ branches/brysonda/source/main/cAnalyzeGenotype.cc 2005-11-04 00:39:30 UTC (rev 368)
@@ -159,6 +159,7 @@
landscape_stats->frac_neg = landscape.GetProbNeg();
landscape_stats->frac_neut = landscape.GetProbNeut();
landscape_stats->frac_pos = landscape.GetProbPos();
+ landscape_stats->complexity = landscape.GetComplexity();
}
void cAnalyzeGenotype::Recalculate(cAnalyzeGenotype * parent_genotype)
@@ -249,6 +250,11 @@
return landscape_stats->frac_pos;
}
+double cAnalyzeGenotype::GetComplexity() const
+{
+ CalcLandscape(); // Make sure the landscape is calculated...
+ return landscape_stats->complexity;
+}
cString cAnalyzeGenotype::GetTaskList() const
{
Modified: branches/brysonda/source/main/cAnalyzeGenotype.h
===================================================================
--- branches/brysonda/source/main/cAnalyzeGenotype.h 2005-11-01 05:59:21 UTC (rev 367)
+++ branches/brysonda/source/main/cAnalyzeGenotype.h 2005-11-04 00:39:30 UTC (rev 368)
@@ -89,8 +89,9 @@
double frac_neg;
double frac_neut;
double frac_pos;
+ double complexity;
cAnalyzeLandscape() : frac_dead(0.0), frac_neg(0.0),
- frac_neut(0.0), frac_pos(0.0) { ; }
+ frac_neut(0.0), frac_pos(0.0), complexity(0.0) { ; }
};
mutable cAnalyzeLandscape * landscape_stats;
@@ -194,10 +195,12 @@
const cString & GetParentMuts() const { return parent_muts; }
+ // Landscape accessors
double GetFracDead() const;
double GetFracNeg() const;
double GetFracNeut() const;
double GetFracPos() const;
+ double GetComplexity() const;
double GetFitnessRatio() const { return fitness_ratio; }
double GetEfficiencyRatio() const { return efficiency_ratio; }
Modified: branches/brysonda/source/main/cLandscape.cc
===================================================================
--- branches/brysonda/source/main/cLandscape.cc 2005-11-01 05:59:21 UTC (rev 367)
+++ branches/brysonda/source/main/cLandscape.cc 2005-11-04 00:39:30 UTC (rev 368)
@@ -139,7 +139,10 @@
// Calculate the complexity...
double max_ent = log((double) inst_set.GetSize());
+ total_entropy = 0;
for (int i = 0; i < base_genome.GetSize(); i++) {
+ // Per-site entropy is the log of the number of legal states for that
+ // site. Add one to account for the unmutated state.
total_entropy += (log((double) site_count[i] + 1) / max_ent);
}
complexity = base_genome.GetSize() - total_entropy;
Modified: branches/brysonda/source/main/cLandscape.h
===================================================================
--- branches/brysonda/source/main/cLandscape.h 2005-11-01 05:59:21 UTC (rev 367)
+++ branches/brysonda/source/main/cLandscape.h 2005-11-04 00:39:30 UTC (rev 368)
@@ -165,6 +165,9 @@
else return no_epi_size/no_epi_count;}
inline int GetNumTrials() const { return trials; }
+
+ inline double GetTotalEntropy() const { return total_entropy; }
+ inline double GetComplexity() const { return complexity; }
};
#endif
More information about the Avida-cvs
mailing list