[Avida-cvs] [Avida2-svn] r310 - trunk/source/main
huangw10@myxo.css.msu.edu
huangw10 at myxo.css.msu.edu
Wed Sep 14 07:08:20 PDT 2005
Author: huangw10
Date: 2005-09-14 10:08:20 -0400 (Wed, 14 Sep 2005)
New Revision: 310
Modified:
trunk/source/main/analyze.cc
Log:
Calculate community complexity.
Modified: trunk/source/main/analyze.cc
===================================================================
--- trunk/source/main/analyze.cc 2005-09-08 20:53:21 UTC (rev 309)
+++ trunk/source/main/analyze.cc 2005-09-14 14:08:20 UTC (rev 310)
@@ -2423,7 +2423,7 @@
}
-void cAnalyze::CommunityComplexity(cString cur_string)
+void cAnalyze::PhyloCommunityComplexity(cString cur_string)
{
/////////////////////////////////////////////////////////////////////////
@@ -2917,7 +2917,7 @@
}
-void cAnalyze::CharlesCommunityComplexity(cString cur_string)
+void cAnalyze::CommunityComplexity(cString cur_string)
{
/////////////////////////////////////////////////////////////////////
// Calculate the mutual information between community and environment
@@ -2951,6 +2951,8 @@
cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
cpx_fp << "# 4: New Information about Environment" << endl;
cpx_fp << "# 5: Total Complexity" << endl;
+ cpx_fp << "# 6: Hamming Distance to Closest Given Genotype" << endl;
+ cpx_fp << "# 7 - : Tasks Implemented" << endl;
cpx_fp << endl;
///////////////////////
@@ -3032,9 +3034,9 @@
double complexity2 = 0.0;
vector<cAnalyzeGenotype *> given_genotypes;
- // Deal with first gentoype
+ // Calculate the complexity of the first gentoype
genotype = community[0];
- double oo_first_entropy = length_genome;
+ double oo_initial_entropy = length_genome;
double oo_conditional_entropy = 0.0;
tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
@@ -3048,17 +3050,21 @@
}
oo_conditional_entropy += oneline_entropy;
}
- double new_info = oo_first_entropy - oo_conditional_entropy;
+ double new_info = oo_initial_entropy - oo_conditional_entropy;
complexity += new_info;
complexity2 += new_info;
given_genotypes.push_back(genotype);
- cpx_fp << genotype->GetID() << " " << oo_first_entropy << " " << oo_conditional_entropy << " "
- << new_info << " " << complexity << " ";
+ cpx_fp << genotype->GetID() << " "
+ << oo_initial_entropy << " "
+ << oo_conditional_entropy << " "
+ << new_info << " "
+ << complexity << " "
+ << 0 << " ";
genotype->Recalculate();
genotype->PrintTasks(cpx_fp, 0, -1);
cpx_fp << endl;
- // Other genotypes in community ...
+ // New information in other genotypes in community ...
for (int i = 1; i < size_community; ++ i) {
genotype = community[i];
if (genotype->GetLength() != length_genome) {
@@ -3075,7 +3081,8 @@
}
double min_new_info = length_genome;
- double oo_first_entropy, oo_conditional_entropy;
+ double oo_initial_entropy, oo_conditional_entropy;
+ cAnalyzeGenotype * used_genotype;
tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
// For any given genotype, calculate the new information in genotype
@@ -3083,42 +3090,41 @@
tMatrix<double> given_prob = point_mut.find(given_genotypes[j]->GetID())->second;
double new_info = 0.0;
- double total_first_entropy = 0.0;
+ double total_initial_entropy = 0.0;
double total_conditional_entropy = 0.0;
for (int line = 0; line < length_genome; ++ line) {
// H(genotype|known_genotype)
double prob_overlap = 0;
- tArray<double> normalized_overlap(num_insts);
for (int inst = 0; inst < num_insts; ++ inst) {
if (this_prob[line][inst] < given_prob[line][inst]) {
prob_overlap += this_prob[line][inst];
- normalized_overlap[inst] = this_prob[line][inst];
} else {
prob_overlap += given_prob[line][inst];
- normalized_overlap[inst] = given_prob[line][inst];
}
}
- double overlap_entropy = 0.0;
+ double given_site_entropy = 0.0;
for (int inst = 0; inst < num_insts; ++ inst) {
- normalized_overlap[inst] /= prob_overlap;
- if (normalized_overlap[inst] > 0) {
- overlap_entropy -= normalized_overlap[inst] * (log(normalized_overlap[inst]) /
- log(1.0*num_insts));
+ if (given_prob[line][inst] > 0) {
+ given_site_entropy -= given_prob[line][inst] * (log(given_prob[line][inst]) /
+ log(1.0*num_insts));
}
}
+
double entropy_overlap = 0.0;
if (prob_overlap > 0 && (1 - prob_overlap > 0)) {
- entropy_overlap = (- prob_overlap * log(prob_overlap) - (1-prob_overlap) * log(1 - prob_overlap)) / log(1.0*num_insts);
+ entropy_overlap = (- prob_overlap * log(prob_overlap)
+ - (1-prob_overlap) * log(1 - prob_overlap)) / log(1.0*num_insts);
} else {
entropy_overlap = 0;
}
- double first_entropy = prob_overlap * overlap_entropy + (1 - prob_overlap) * 1 + entropy_overlap;
- total_first_entropy += first_entropy;
+ double initial_entropy = prob_overlap * given_site_entropy
+ + (1 - prob_overlap) * 1 + entropy_overlap;
+ total_initial_entropy += initial_entropy;
// H(genotype|E, known_genotype) = H(genotype|Env)
double conditional_entropy = 0.0;
@@ -3130,108 +3136,48 @@
}
total_conditional_entropy += conditional_entropy;
- if (conditional_entropy > first_entropy + 0.001) {
+ if (conditional_entropy > initial_entropy + 0.00001) {
cerr << "Negative Information.\n";
cout << line << endl;
- exit(1);
- }
-
- new_info += first_entropy - conditional_entropy;
- }
-
- if (new_info < min_new_info) {
- min_new_info = new_info;
- oo_first_entropy = total_first_entropy;
- oo_conditional_entropy = total_conditional_entropy;
- }
-
- }
- complexity += min_new_info;
- cpx_fp << genotype->GetID() << " " << oo_first_entropy << " " << oo_conditional_entropy << " "
- << min_new_info << " " << complexity << " ";
-
- // Second method of Charles
- /*min_new_info = length_genome;
-
- for (int j = 0; j < given_genotypes.size(); ++ j) {
-
- tMatrix<double> given_prob = point_mut.find(given_genotypes[j]->GetID())->second;
- double new_info = 0.0;
- double total_first_entropy = 0.0;
- double total_conditional_entropy = 0.0;
-
- for (int line = 0; line < length_genome; ++ line) {
-
- // H(genotype|known_genotype)
- double prob_overlap = 0;
- tArray<double> normalized_overlap(num_insts);
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (this_prob[line][inst] < given_prob[line][inst]) {
- prob_overlap += this_prob[line][inst];
- normalized_overlap[inst] = this_prob[line][inst];
- } else {
- prob_overlap += given_prob[line][inst];
- normalized_overlap[inst] = given_prob[line][inst];
- }
- }
-
- double first_entropy = 0.0;
- for (int inst = 0; inst < num_insts; ++ inst) {
- normalized_overlap[inst] += (1-prob_overlap) / num_insts;
- if (normalized_overlap[inst] > 0) {
- first_entropy -= normalized_overlap[inst] * (log(normalized_overlap[inst]) /
- log(1.0*num_insts));
- }
- }
- total_first_entropy += first_entropy;
-
- // H(genotype|E, known_genotype) = H(genotype|Env)
- double conditional_entropy = 0.0;
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (this_prob[line][inst] > 0) {
- conditional_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
- log(1.0*num_insts));
- }
- }
- total_conditional_entropy += conditional_entropy;
-
- if (conditional_entropy > first_entropy + 0.001) {
- cout << "This probability is:\n";
- for (int inst = 0; inst < num_insts; inst ++) {
+ for (int inst = 0; inst < num_insts; ++ inst) {
cout << this_prob[line][inst] << " ";
}
cout << endl;
- cout << "Given probability is:\n";
- for (int inst = 0; inst < num_insts; inst ++) {
- cout << given_prob[line][inst] << " ";
+ for (int inst = 0; inst < num_insts; ++ inst) {
+ cout << given_prob[line][inst] << " ";
}
cout << endl;
- cerr << "Negative Information of second method at line " << line << endl;;
- cerr << "Given genotype is " << given_genotypes[j]->GetID() << endl;
+
exit(1);
}
- new_info += first_entropy - conditional_entropy;
+ new_info += initial_entropy - conditional_entropy;
}
if (new_info < min_new_info) {
min_new_info = new_info;
- oo_first_entropy = total_first_entropy;
+ oo_initial_entropy = total_initial_entropy;
oo_conditional_entropy = total_conditional_entropy;
+ used_genotype = given_genotypes[j];
}
-
+
}
- complexity2 += min_new_info;
- cpx_fp << oo_first_entropy << " " << oo_conditional_entropy << " "
- << min_new_info << " " << complexity2 << " ";*/
+ complexity += min_new_info;
+ cpx_fp << genotype->GetID() << " "
+ << oo_initial_entropy << " "
+ << oo_conditional_entropy << " "
+ << min_new_info << " " << complexity << " ";
+ cpx_fp << cGenomeUtil::FindHammingDistance(genotype->GetGenome(),
+ used_genotype->GetGenome()) << " ";
+
genotype->PrintTasks(cpx_fp, 0, -1);
cpx_fp << endl;
given_genotypes.push_back(genotype);
}
-
-
+
+
// Set the test CPU back to the state it was
cTestCPU::UseResources() = backupUsage;
cTestCPU::SetupResourceArray(backupResources);
@@ -6995,7 +6941,6 @@
AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::CommunityComplexity);
- AddLibraryDef("CHARLES_COMMUNITY_COMPLEXITY", &cAnalyze::CharlesCommunityComplexity);
// Individual organism analysis...
AddLibraryDef("LANDSCAPE", &cAnalyze::CommandLandscape);
More information about the Avida-cvs
mailing list