[Avida-cvs] [Avida2-svn] r305 - trunk/source/main
huangw10@myxo.css.msu.edu
huangw10 at myxo.css.msu.edu
Wed Aug 31 07:14:22 PDT 2005
Author: huangw10
Date: 2005-08-31 10:14:22 -0400 (Wed, 31 Aug 2005)
New Revision: 305
Modified:
trunk/source/main/analyze.cc
trunk/source/main/analyze.hh
Log:
Added new community complexity measure
Modified: trunk/source/main/analyze.cc
===================================================================
--- trunk/source/main/analyze.cc 2005-08-30 19:06:34 UTC (rev 304)
+++ trunk/source/main/analyze.cc 2005-08-31 14:14:22 UTC (rev 305)
@@ -39,6 +39,7 @@
#include "task_entry.hh"
#include "tDataEntry.hh"
#include "tDataEntryCommand.hh"
+#include "tMatrix.hh"
#include "cTestCPU.h"
#include "cCPUTestInfo.h"
#include "cTestUtil.h"
@@ -437,6 +438,7 @@
return;
}
+
// Looks up the resource concentrations that are the closest to the
// given update and then fill in those concentrations into the environment.
void cAnalyze::FillResources(int update)
@@ -2733,86 +2735,85 @@
cpx_fp << genotype->GetID() << " ";
}
+ ///////////////////////////////////////////////////////////////////
+ // Point mutation at all lines of code to look for neutral mutation
+ // and the mutations that can make organism alive
+ cout << "Test point mutation." << endl;
+ vector<bool> one_line_neutral(num_insts, false);
+ vector< vector<bool> > neutral_mut(length_genome, one_line_neutral);
+ vector< vector<bool> > alive_mut(length_genome, one_line_neutral);
+ if (verbose == true) {
+ PrintTestCPUResources("");
+ }
+
+ genotype->Recalculate();
+ double base_fitness = genotype->GetFitness();
+ cout << base_fitness << endl;
+ const cGenome & base_genome = genotype->GetGenome();
+ cGenome mod_genome(base_genome);
+
+ for (int line = 0; line < length_genome; ++ line) {
+ int cur_inst = base_genome[line].GetOp();
+
+ for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
+ mod_genome[line].SetOp(mod_inst);
+ cAnalyzeGenotype test_genotype(mod_genome, inst_set);
+ test_genotype.Recalculate();
+ if (test_genotype.GetFitness() >= base_fitness) {
+ neutral_mut[line][mod_inst] = true;
+ }
+ if (test_genotype.GetFitness() > 0) {
+ alive_mut[line][mod_inst] = true;
+ }
+ }
+
+ mod_genome[line].SetOp(cur_inst);
+ }
+
/////////////////////////////////////////
// Normalize the probability at each line
+ vector< vector<double> > prob_before_env(length_genome, one_line_prob);
for (int line = 0; line < length_genome; ++ line) {
double cur_total_prob = 0.0;
+ int num_alive = 0;
for (int inst = 0; inst < num_insts; ++ inst) {
- cur_total_prob += prob[line][inst];
+ if (alive_mut[line][inst] == true) {
+ cur_total_prob += prob[line][inst];
+ num_alive ++;
+ }
}
if (cur_total_prob > 1) {
- for (int inst = 0; inst < num_insts; ++ inst) {
- prob[line][inst] = prob[line][inst] / cur_total_prob;
- }
- cur_total_prob = 1.0;
+ cout << "Total probability at " << line << " is greater than 0." << endl;
+ exit(1);
}
double left_prob = 1 - cur_total_prob;
- // Check the number of insts that could make genotype alive ...
- int num_alive = 0;
- const cGenome & base_genome = genotype->GetGenome();
- cGenome mod_genome(base_genome);
for (int inst = 0; inst < num_insts; ++ inst) {
- mod_genome[line].SetOp(inst);
- cAnalyzeGenotype test_genotype(mod_genome, inst_set);
- test_genotype.Recalculate();
-
- // Only when given inst make the genotype alive
- if (test_genotype.GetFitness() > 0) {
- num_alive ++;
+ if (alive_mut[line][inst] == true) {
+ prob_before_env[line][inst] = prob[line][inst] + left_prob / num_alive;
} else {
- prob[line][inst] = -1;
- }
- }
-
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (prob[line][inst] == -1) {
- prob[line][inst] = 0;
- } else {
- prob[line][inst] += left_prob / num_alive;
+ prob_before_env[line][inst] = 0;
}
}
-
+
}
/////////////////////////////////
// Calculate entropy of each line
vector<double> entropy(length_genome, 0.0);
for (int line = 0; line < length_genome; ++ line) {
+ double sum = 0;
for (int inst = 0; inst < num_insts; ++ inst) {
- if (prob[line][inst] > 0) {
- entropy[line] -= prob[line][inst] * log(prob[line][inst]) / log(num_insts*1.0);
+ sum += prob_before_env[line][inst];
+ if (prob_before_env[line][inst] > 0) {
+ entropy[line] -= prob_before_env[line][inst] * log(prob_before_env[line][inst]) / log(num_insts*1.0);
}
}
- }
-
-
- ///////////////////////////////////////////////////////////////////
- // Point mutation at all lines of code to look for neutral mutation
- cout << "Test point mutation." << endl;
- vector<bool> one_line_neutral(num_insts, false);
- vector< vector<bool> > neutral_mut(length_genome, one_line_neutral);
-
- genotype->Recalculate();
- double base_fitness = genotype->GetFitness();
- cout << base_fitness << endl;
- const cGenome & base_genome = genotype->GetGenome();
- cGenome mod_genome(base_genome);
-
- for (int line = 0; line < length_genome; ++ line) {
- int cur_inst = base_genome[line].GetOp();
-
- for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
- mod_genome[line].SetOp(mod_inst);
- cAnalyzeGenotype test_genotype(mod_genome, inst_set);
- test_genotype.Recalculate();
- if (test_genotype.GetFitness() >= base_fitness) {
- neutral_mut[line][mod_inst] = true;
- }
+ if (sum > 1.001 || sum < 0.999) {
+ cout << "Sum of probability is not 1 at line " << line << endl;
+ exit(1);
}
-
- mod_genome[line].SetOp(cur_inst);
}
@@ -2822,20 +2823,19 @@
for (int line = 0; line < length_genome; ++ line) {
double total_prob = 0.0;
+ int num_neutral = 0;
for (int inst = 0; inst < num_insts; ++ inst) {
if (neutral_mut[line][inst] == true) {
+ num_neutral ++;
total_prob += prob[line][inst];
}
}
- if (total_prob == 0) {
- cout << "total prob is zero." ;
- exit(1);
- }
+ double left = 1 - total_prob;
for (int inst = 0; inst < num_insts; ++ inst) {
if (neutral_mut[line][inst] == true) {
- prob_given_env[line][inst] = prob[line][inst] / total_prob;
+ prob_given_env[line][inst] = prob[line][inst] + left / num_neutral;
} else {
prob_given_env[line][inst] = 0.0;
}
@@ -2848,12 +2848,18 @@
vector<double> entropy_given_env(length_genome, 0.0);
for (int line = 0; line < length_genome; ++ line) {
+ double sum = 0;
for (int inst = 0; inst < num_insts; ++ inst) {
+ sum += prob_given_env[line][inst];
if (prob_given_env[line][inst] > 0) {
entropy_given_env[line] -= prob_given_env[line][inst] * log(prob_given_env[line][inst]) /
log(num_insts*1.0);
}
}
+ if (sum > 1.001 || sum < 0.999) {
+ cout << "Sum of probability is not 1 at line " << line << " " << sum << endl;
+ exit(1);
+ }
}
@@ -2868,12 +2874,12 @@
if (entropy[line] >= entropy_given_env[line]) {
information += entropy[line] - entropy_given_env[line];
- } else { // Negative information is deemed as new information ...
+ } else { // Negative information is because given condition is not related with this genotype ...
// Count the number of insts that can make genotype alive
int num_inst_alive = 0;
for (int inst = 0; inst < num_insts; ++ inst) {
- if (prob[line][inst] > 0) {
+ if (alive_mut[line][inst] == true) {
num_inst_alive ++;
}
}
@@ -2885,7 +2891,7 @@
exit(1);
}
}
-
+
}
complexity += information;
@@ -2899,7 +2905,7 @@
// This genotype becomes the given condition of next genotype
given_genotypes.push_back(genotype);
-
+
}
// Set the test CPU back to the state it was
@@ -2911,6 +2917,332 @@
}
+void cAnalyze::CharlesCommunityComplexity(cString cur_string)
+{
+ /////////////////////////////////////////////////////////////////////
+ // Calculate the mutual information between community and environment
+ /////////////////////////////////////////////////////////////////////
+
+ cout << "Analyze community complexity of current population about environment with Charles method ...\n";
+
+ // Get the number of genotypes that are gonna be analyzed.
+ int max_genotypes = cur_string.PopWord().AsInt();
+
+ // Get update
+ int update = cur_string.PopWord().AsInt();
+
+ // Get the directory
+ cString dir = cur_string.PopWord();
+ cString defaultDir = "community_cpx/";
+ cString directory = PopDirectory(dir, defaultDir);
+
+ // Get the file name that saves the result
+ cString filename = cur_string.PopWord();
+ if (filename.IsEmpty()) {
+ filename = "community.complexity.dat";
+ }
+
+ filename.Set("%s%s", directory(), filename.GetData());
+ ofstream cpx_fp(filename());
+
+ cpx_fp << "# Legend:" << endl;
+ cpx_fp << "# 1: Genotype ID" << endl;
+ cpx_fp << "# 2: Entropy given Known Genotypes" << endl;
+ cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
+ cpx_fp << "# 4: New Information about Environment" << endl;
+ cpx_fp << "# 5: Total Complexity" << endl;
+ cpx_fp << endl;
+
+ ///////////////////////
+ // Backup test CPU data
+ bool backupUsage = cTestCPU::UseResources();
+ tArray<double> backupResources(cTestCPU::GetResources());
+ cTestCPU::UseResources() = true;
+ FillResources(update);
+
+ ///////////////////////////////////////////////////////////////////////
+ // Choose the first n most abundant genotypes and put them in community
+
+ vector<cAnalyzeGenotype *> community;
+ cAnalyzeGenotype * genotype = NULL;
+ tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+
+ while (((genotype = batch_it.Next()) != NULL) && (community.size() < max_genotypes)) {
+ community.push_back(genotype);
+ }
+
+ //////////////////////////////////////////////////////
+ // Test point mutation of each genotype in environment
+
+ int num_insts = inst_set.GetSize();
+ map<int, tMatrix<double> > point_mut;
+ int size_community = community.size();
+ int length_genome;
+ if (size_community > 1) {
+ length_genome = community[0]->GetLength();
+ }
+
+ for (int i = 0; i < size_community; ++ i) {
+ genotype = community[i];
+
+ ///////////////////////////////////////////////////////////////////
+ // Point mutation at all lines of code to look for neutral mutation
+ cout << "Test point mutation for genotype " << genotype->GetID() << endl;
+ tMatrix<double> prob(length_genome, num_insts);
+ if (verbose == true) {
+ PrintTestCPUResources("");
+ }
+
+ genotype->Recalculate();
+ double base_fitness = genotype->GetFitness();
+ const cGenome & base_genome = genotype->GetGenome();
+ cGenome mod_genome(base_genome);
+
+ for (int line = 0; line < length_genome; ++ line) {
+ int cur_inst = base_genome[line].GetOp();
+ int num_neutral = 0;
+
+ for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
+ mod_genome[line].SetOp(mod_inst);
+ cAnalyzeGenotype test_genotype(mod_genome, inst_set);
+ test_genotype.Recalculate();
+ if (test_genotype.GetFitness() >= base_fitness) {
+ prob[line][mod_inst] = 1.0;
+ num_neutral ++;
+ } else {
+ prob[line][mod_inst] = 0.0;
+ }
+ }
+
+ for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
+ prob[line][mod_inst] /= num_neutral;
+ }
+
+
+ mod_genome[line].SetOp(cur_inst);
+ }
+
+ point_mut.insert(make_pair(genotype->GetID(), prob));
+ }
+
+ //////////////////////////////////////
+ // Loop through genotypes in community
+
+ double complexity = 0.0;
+ double complexity2 = 0.0;
+ vector<cAnalyzeGenotype *> given_genotypes;
+
+ // Deal with first gentoype
+ genotype = community[0];
+ double oo_first_entropy = length_genome;
+ double oo_conditional_entropy = 0.0;
+ tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
+
+ for (int line = 0; line < length_genome; ++ line) {
+ double oneline_entropy = 0.0;
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ if (this_prob[line][inst] > 0) {
+ oneline_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
+ log(1.0*num_insts));
+ }
+ }
+ oo_conditional_entropy += oneline_entropy;
+ }
+ double new_info = oo_first_entropy - oo_conditional_entropy;
+ complexity += new_info;
+ complexity2 += new_info;
+ given_genotypes.push_back(genotype);
+ cpx_fp << genotype->GetID() << " " << oo_first_entropy << " " << oo_conditional_entropy << " "
+ << new_info << " " << complexity << " ";
+ genotype->Recalculate();
+ genotype->PrintTasks(cpx_fp, 0, -1);
+ cpx_fp << endl;
+
+ // Other genotypes in community ...
+ for (int i = 1; i < size_community; ++ i) {
+ genotype = community[i];
+ if (genotype->GetLength() != length_genome) {
+ cerr << "Genotypes in the community do not same genome length.\n";
+ cpx_fp.close();
+ exit(1);
+ }
+
+ // Skip the dead organisms
+ genotype->Recalculate();
+ cout << genotype->GetID() << " " << genotype->GetFitness() << endl;
+ if (genotype->GetFitness() == 0) {
+ continue;
+ }
+
+ double min_new_info = length_genome;
+ double oo_first_entropy, oo_conditional_entropy;
+ tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
+
+ // For any given genotype, calculate the new information in genotype
+ for (int j = 0; j < given_genotypes.size(); ++ j) {
+
+ tMatrix<double> given_prob = point_mut.find(given_genotypes[j]->GetID())->second;
+ double new_info = 0.0;
+ double total_first_entropy = 0.0;
+ double total_conditional_entropy = 0.0;
+
+ for (int line = 0; line < length_genome; ++ line) {
+
+ // H(genotype|known_genotype)
+ double prob_overlap = 0;
+ tArray<double> normalized_overlap(num_insts);
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ if (this_prob[line][inst] < given_prob[line][inst]) {
+ prob_overlap += this_prob[line][inst];
+ normalized_overlap[inst] = this_prob[line][inst];
+ } else {
+ prob_overlap += given_prob[line][inst];
+ normalized_overlap[inst] = given_prob[line][inst];
+ }
+ }
+
+ double overlap_entropy = 0.0;
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ normalized_overlap[inst] /= prob_overlap;
+ if (normalized_overlap[inst] > 0) {
+ overlap_entropy -= normalized_overlap[inst] * (log(normalized_overlap[inst]) /
+ log(1.0*num_insts));
+ }
+ }
+
+ double entropy_overlap = 0.0;
+ if (prob_overlap > 0 && (1 - prob_overlap > 0)) {
+ entropy_overlap = (- prob_overlap * log(prob_overlap) - (1-prob_overlap) * log(1 - prob_overlap)) / log(1.0*num_insts);
+ } else {
+ entropy_overlap = 0;
+ }
+
+ double first_entropy = prob_overlap * overlap_entropy + (1 - prob_overlap) * 1 + entropy_overlap;
+ total_first_entropy += first_entropy;
+
+ // H(genotype|E, known_genotype) = H(genotype|Env)
+ double conditional_entropy = 0.0;
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ if (this_prob[line][inst] > 0) {
+ conditional_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
+ log(1.0*num_insts));
+ }
+ }
+ total_conditional_entropy += conditional_entropy;
+
+ if (conditional_entropy > first_entropy + 0.001) {
+ cerr << "Negative Information.\n";
+ cout << line << endl;
+ exit(1);
+ }
+
+ new_info += first_entropy - conditional_entropy;
+ }
+
+ if (new_info < min_new_info) {
+ min_new_info = new_info;
+ oo_first_entropy = total_first_entropy;
+ oo_conditional_entropy = total_conditional_entropy;
+ }
+
+ }
+ complexity += min_new_info;
+ cpx_fp << genotype->GetID() << " " << oo_first_entropy << " " << oo_conditional_entropy << " "
+ << min_new_info << " " << complexity << " ";
+
+ // Second method of Charles
+ /*min_new_info = length_genome;
+
+ for (int j = 0; j < given_genotypes.size(); ++ j) {
+
+ tMatrix<double> given_prob = point_mut.find(given_genotypes[j]->GetID())->second;
+ double new_info = 0.0;
+ double total_first_entropy = 0.0;
+ double total_conditional_entropy = 0.0;
+
+ for (int line = 0; line < length_genome; ++ line) {
+
+ // H(genotype|known_genotype)
+ double prob_overlap = 0;
+ tArray<double> normalized_overlap(num_insts);
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ if (this_prob[line][inst] < given_prob[line][inst]) {
+ prob_overlap += this_prob[line][inst];
+ normalized_overlap[inst] = this_prob[line][inst];
+ } else {
+ prob_overlap += given_prob[line][inst];
+ normalized_overlap[inst] = given_prob[line][inst];
+ }
+ }
+
+ double first_entropy = 0.0;
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ normalized_overlap[inst] += (1-prob_overlap) / num_insts;
+ if (normalized_overlap[inst] > 0) {
+ first_entropy -= normalized_overlap[inst] * (log(normalized_overlap[inst]) /
+ log(1.0*num_insts));
+ }
+ }
+ total_first_entropy += first_entropy;
+
+ // H(genotype|E, known_genotype) = H(genotype|Env)
+ double conditional_entropy = 0.0;
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ if (this_prob[line][inst] > 0) {
+ conditional_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
+ log(1.0*num_insts));
+ }
+ }
+ total_conditional_entropy += conditional_entropy;
+
+ if (conditional_entropy > first_entropy + 0.001) {
+ cout << "This probability is:\n";
+ for (int inst = 0; inst < num_insts; inst ++) {
+ cout << this_prob[line][inst] << " ";
+ }
+ cout << endl;
+ cout << "Given probability is:\n";
+ for (int inst = 0; inst < num_insts; inst ++) {
+ cout << given_prob[line][inst] << " ";
+ }
+ cout << endl;
+ cerr << "Negative Information of second method at line " << line << endl;;
+ cerr << "Given genotype is " << given_genotypes[j]->GetID() << endl;
+ exit(1);
+ }
+
+ new_info += first_entropy - conditional_entropy;
+ }
+
+ if (new_info < min_new_info) {
+ min_new_info = new_info;
+ oo_first_entropy = total_first_entropy;
+ oo_conditional_entropy = total_conditional_entropy;
+ }
+
+ }
+ complexity2 += min_new_info;
+ cpx_fp << oo_first_entropy << " " << oo_conditional_entropy << " "
+ << min_new_info << " " << complexity2 << " ";*/
+
+
+ genotype->PrintTasks(cpx_fp, 0, -1);
+ cpx_fp << endl;
+ given_genotypes.push_back(genotype);
+ }
+
+
+ // Set the test CPU back to the state it was
+ cTestCPU::UseResources() = backupUsage;
+ cTestCPU::SetupResourceArray(backupResources);
+
+ cpx_fp.close();
+ return;
+
+
+
+}
+
void cAnalyze::CommandLandscape(cString cur_string)
{
if (verbose == true) cout << "Landscaping batch " << cur_batch << endl;
@@ -5809,6 +6141,7 @@
{
int words = cur_string.CountNumWords();
int useResources = 0;
+ int update = -1;
if(words >= 1) {
useResources = cur_string.PopWord().AsInt();
// All non-zero values are considered false
@@ -5816,6 +6149,9 @@
useResources = 0;
}
}
+ if (words >= 2) {
+ update = cur_string.PopWord().AsInt();
+ }
bool backupUsage;
tArray<double> backupResources;
@@ -5837,14 +6173,18 @@
<< "parent and ancestor distances may not be correct" << endl;
}
+ if (useResources && update > -1) {
+ FillResources(update);
+ }
+
tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
cAnalyzeGenotype * genotype = NULL;
cAnalyzeGenotype * last_genotype = NULL;
while ((genotype = batch_it.Next()) != NULL) {
// If use resources, load proper resource according to update_born
- int updateBorn = -1;
- if(useResources) {
+ if(useResources && update == -1) {
+ int updateBorn = -1;
updateBorn = genotype->GetUpdateBorn();
FillResources(updateBorn);
}
@@ -6639,6 +6979,7 @@
AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::CommunityComplexity);
+ AddLibraryDef("CHARLES_COMMUNITY_COMPLEXITY", &cAnalyze::CharlesCommunityComplexity);
// Individual organism analysis...
AddLibraryDef("LANDSCAPE", &cAnalyze::CommandLandscape);
Modified: trunk/source/main/analyze.hh
===================================================================
--- trunk/source/main/analyze.hh 2005-08-30 19:06:34 UTC (rev 304)
+++ trunk/source/main/analyze.hh 2005-08-31 14:14:22 UTC (rev 305)
@@ -156,6 +156,7 @@
void CommandPrintPhenotypes(cString cur_string);
void CommandPrintDiversity(cString cur_string);
void CommunityComplexity(cString cur_string);
+ void CharlesCommunityComplexity(cString cur_string);
// Individual Organism Analysis...
void CommandLandscape(cString cur_string);
More information about the Avida-cvs
mailing list