[Avida-cvs] [avida-svn] r412 - in trunk/source: main utils/hist_map
huangw10@myxo.css.msu.edu
huangw10 at myxo.css.msu.edu
Wed Dec 7 11:54:53 PST 2005
Author: huangw10
Date: 2005-12-07 14:44:08 -0500 (Wed, 07 Dec 2005)
New Revision: 412
Modified:
trunk/source/main/cAnalyze.cc
trunk/source/main/cAnalyze.h
trunk/source/main/cEnvironment.cc
trunk/source/main/cEnvironment.h
trunk/source/utils/hist_map/hist_map2.cc
Log:
community complexity
Modified: trunk/source/main/cAnalyze.cc
===================================================================
--- trunk/source/main/cAnalyze.cc 2005-12-05 17:49:49 UTC (rev 411)
+++ trunk/source/main/cAnalyze.cc 2005-12-07 19:44:08 UTC (rev 412)
@@ -63,7 +63,7 @@
cAnalyze::cAnalyze(cString filename, cEnvironment *environment)
: cur_batch(0)
, d_environment(environment)
-, verbose(nAnalyze::VERBOSE_QUIET)
+, verbose(false)
, interactive_depth(0)
, inst_set(cHardwareUtil::DefaultInstSet(cConfig::GetInstFilename()))
{
@@ -269,7 +269,7 @@
<< endl;
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Loading in " << num_steps
<< " detail files from update " << start_UD
<< " to update " << stop_UD
@@ -489,332 +489,9 @@
return;
}
-double cAnalyze::AnalyzeEntropy(cAnalyzeGenotype * genotype, double mu)
-{
- double entropy = 0.0;
- // If the fitness is 0, the entropy is the length of genotype ...
- genotype->Recalculate();
- if (genotype->GetFitness() == 0) {
- return genotype->GetLength();
- }
- // Calculate the stats for the genotype we're working with ...
- const int num_insts = inst_set.GetSize();
- const int num_lines = genotype->GetLength();
- const cGenome & base_genome = genotype->GetGenome();
- cGenome mod_genome(base_genome);
- double base_fitness = genotype->GetFitness();
-
- // Loop through all the lines of code, testing all mutations...
- tArray<double> test_fitness(num_insts);
- tArray<double> prob(num_insts);
- for (int line_no = 0; line_no < num_lines; line_no ++) {
- int cur_inst = base_genome[line_no].GetOp();
-
- // Test fitness of each mutant.
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
- mod_genome[line_no].SetOp(mod_inst);
- cAnalyzeGenotype test_genotype(mod_genome, inst_set);
- test_genotype.Recalculate();
- // Ajust fitness ...
- if (test_genotype.GetFitness() <= base_fitness) {
- test_fitness[mod_inst] = test_genotype.GetFitness();
- } else {
- test_fitness[mod_inst] = base_fitness;
- }
- }
-
- // Calculate probabilities at mut-sel balance
- double w_bar = 1;
-
- // Normalize fitness values
- double maxFitness = 0.0;
- for(int i=0; i<num_insts; i++) {
- if(test_fitness[i] > maxFitness) {
- maxFitness = test_fitness[i];
- }
- }
-
-
- for(int i=0; i<num_insts; i++) {
- test_fitness[i] /= maxFitness;
- }
-
- while(1) {
- double sum = 0.0;
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
- prob[mod_inst] = (mu * w_bar) /
- ((double)num_insts *
- (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
- sum = sum + prob[mod_inst];
- }
- if ((sum-1.0)*(sum-1.0) <= 0.0001)
- break;
- else
- w_bar = w_bar - 0.000001;
- }
-
- // Calculate entropy ...
- double this_entropy = 0.0;
- for (int i = 0; i < num_insts; i ++) {
- this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
- }
- entropy += this_entropy;
-
- // Reset the mod_genome back to the original sequence.
- mod_genome[line_no].SetOp(cur_inst);
- }
- return entropy;
-}
-double cAnalyze::AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,
- cAnalyzeGenotype * parent, double mut_rate)
-{
- double entropy = 0.0;
-
- // Calculate the stats for the genotype we're working with ...
- genotype->Recalculate();
- const int num_insts = inst_set.GetSize();
- const int num_lines = genotype->GetLength();
- const cGenome & base_genome = genotype->GetGenome();
- const cGenome & parent_genome = parent->GetGenome();
- cGenome mod_genome(base_genome);
-
- // Loop through all the lines of code, testing all mutations ...
- tArray<double> test_fitness(num_insts);
- tArray<double> prob(num_insts);
- for (int line_no = 0; line_no < num_lines; line_no ++) {
- int cur_inst = base_genome[line_no].GetOp();
- int parent_inst = parent_genome[line_no].GetOp();
-
- // Test fitness of each mutant.
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
- mod_genome[line_no].SetOp(mod_inst);
- cAnalyzeGenotype test_genotype(mod_genome, inst_set);
- test_genotype.Recalculate();
- test_fitness[mod_inst] = test_genotype.GetFitness();
- }
-
-
- // Calculate probabilities at mut-sel balance
- double w_bar = 1;
-
- // Normalize fitness values, assert if they are all zero
- double maxFitness = 0.0;
- for(int i=0; i<num_insts; i++) {
- if ( i == parent_inst) { continue; }
- if (test_fitness[i] > maxFitness) {
- maxFitness = test_fitness[i];
- }
- }
-
- if(maxFitness > 0) {
- for(int i = 0; i < num_insts; i ++) {
- if (i == parent_inst) { continue; }
- test_fitness[i] /= maxFitness;
- }
- } else {
- // every other inst is equally likely to be mutated to
- for (int i = 0; i < num_insts; i ++) {
- if (i == parent_inst) { continue; }
- test_fitness[i] = 1;
- }
- }
-
- double double_num_insts = num_insts * 1.0;
- while(1) {
- double sum = 0.0;
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
- if (mod_inst == parent_inst) { continue; }
- prob[mod_inst] = (mut_rate * w_bar) /
- (double_num_insts-2) /
- (w_bar + test_fitness[mod_inst] * mut_rate * (double_num_insts-1) / (double_num_insts - 2)
- - test_fitness[mod_inst]);
- sum = sum + prob[mod_inst];
- }
- if ((sum-1.0)*(sum-1.0) <= 0.0001)
- break;
- else
- w_bar = w_bar - 0.000001;
- }
-
- // Calculate entropy ...
- double this_entropy = 0.0;
- this_entropy -= (1.0 - mut_rate) * log(1.0 - mut_rate) / log(static_cast<double>(num_insts));
- for (int i = 0; i < num_insts; i ++) {
- if (i == parent_inst) { continue; }
- prob[i] = prob[i] * mut_rate;
- this_entropy += prob[i] * log(static_cast<double>(1.0/prob[i])) / log (static_cast<double>(num_insts));
- }
- entropy += this_entropy;
-
- // Reset the mod_genome back to the base_genome.
- mod_genome[line_no].SetOp(cur_inst);
- }
- return entropy;
-}
-
-double cAnalyze::IncreasedInfo(cAnalyzeGenotype * genotype1,
- cAnalyzeGenotype * genotype2,
- double mu)
-{
- double increased_info = 0.0;
-
- // Calculate the stats for the genotypes we're working with ...
- if ( genotype1->GetLength() != genotype2->GetLength() ) {
- cerr << "Error: Two genotypes don't have same length.(cAnalyze::IncreasedInfo)" << endl;
- exit(1);
- }
-
- genotype1->Recalculate();
- if (genotype1->GetFitness() == 0) {
- return 0.0;
- }
-
- const int num_insts = inst_set.GetSize();
- const int num_lines = genotype1->GetLength();
- const cGenome & genotype1_base_genome = genotype1->GetGenome();
- cGenome genotype1_mod_genome(genotype1_base_genome);
- double genotype1_base_fitness = genotype1->GetFitness();
- vector<double> genotype1_info(num_lines, 0.0);
-
- // Loop through all the lines of code, calculate genotype1 information
- tArray<double> test_fitness(num_insts);
- tArray<double> prob(num_insts);
- for (int line_no = 0; line_no < num_lines; line_no ++) {
- int cur_inst = genotype1_base_genome[line_no].GetOp();
-
- // Test fitness of each mutant.
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
- genotype1_mod_genome[line_no].SetOp(mod_inst);
- cAnalyzeGenotype test_genotype(genotype1_mod_genome, inst_set);
- test_genotype.Recalculate();
- // Ajust fitness ...
- if (test_genotype.GetFitness() <= genotype1_base_fitness) {
- test_fitness[mod_inst] = test_genotype.GetFitness();
- } else {
- test_fitness[mod_inst] = genotype1_base_fitness;
- }
- }
-
- // Calculate probabilities at mut-sel balance
- double w_bar = 1;
-
- // Normalize fitness values
- double maxFitness = 0.0;
- for(int i=0; i<num_insts; i++) {
- if(test_fitness[i] > maxFitness) {
- maxFitness = test_fitness[i];
- }
- }
-
- for(int i=0; i<num_insts; i++) {
- test_fitness[i] /= maxFitness;
- }
-
- while(1) {
- double sum = 0.0;
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
- prob[mod_inst] = (mu * w_bar) /
- ((double)num_insts *
- (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
- sum = sum + prob[mod_inst];
- }
- if ((sum-1.0)*(sum-1.0) <= 0.0001)
- break;
- else
- w_bar = w_bar - 0.000001;
- }
-
- // Calculate entropy ...
- double this_entropy = 0.0;
- for (int i = 0; i < num_insts; i ++) {
- this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
- }
- genotype1_info[line_no] = 1 - this_entropy;
-
- // Reset the mod_genome back to the original sequence.
- genotype1_mod_genome[line_no].SetOp(cur_inst);
- }
-
- genotype2->Recalculate();
- if (genotype2->GetFitness() == 0) {
- for (int line_no = 0; line_no < num_lines; ++ line_no) {
- increased_info += genotype1_info[line_no];
- }
- return increased_info;
- }
-
- const cGenome & genotype2_base_genome = genotype2->GetGenome();
- cGenome genotype2_mod_genome(genotype2_base_genome);
- double genotype2_base_fitness = genotype2->GetFitness();
-
- // Loop through all the lines of code, calculate increased information
- for (int line_no = 0; line_no < num_lines; line_no ++) {
- int cur_inst = genotype2_base_genome[line_no].GetOp();
-
- // Test fitness of each mutant.
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
- genotype2_mod_genome[line_no].SetOp(mod_inst);
- cAnalyzeGenotype test_genotype(genotype2_mod_genome, inst_set);
- test_genotype.Recalculate();
- // Ajust fitness ...
- if (test_genotype.GetFitness() <= genotype2_base_fitness) {
- test_fitness[mod_inst] = test_genotype.GetFitness();
- } else {
- test_fitness[mod_inst] = genotype2_base_fitness;
- }
- }
-
- // Calculate probabilities at mut-sel balance
- double w_bar = 1;
-
- // Normalize fitness values, assert if they are all zero
- double maxFitness = 0.0;
- for(int i=0; i<num_insts; i++) {
- if(test_fitness[i] > maxFitness) {
- maxFitness = test_fitness[i];
- }
- }
-
- for(int i=0; i<num_insts; i++) {
- test_fitness[i] /= maxFitness;
- }
-
- while(1) {
- double sum = 0.0;
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
- prob[mod_inst] = (mu * w_bar) /
- ((double)num_insts *
- (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
- sum = sum + prob[mod_inst];
- }
- if ((sum-1.0)*(sum-1.0) <= 0.0001)
- break;
- else
- w_bar = w_bar - 0.000001;
- }
-
- // Calculate entropy ...
- double this_entropy = 0.0;
- for (int i = 0; i < num_insts; i ++) {
- this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
- }
-
- // Compare information
- if (genotype1_info[line_no] > 1 - this_entropy) {
- increased_info += genotype1_info[line_no] - (1 - this_entropy);
- } // else increasing is 0, do nothing
-
- // Reset the mod_genome back to the original sequence.
- genotype2_mod_genome[line_no].SetOp(cur_inst);
- }
-
-
- return increased_info;
-}
-
void cAnalyze::LoadFile(cString cur_string)
{
// LOAD
@@ -840,7 +517,7 @@
exit(1);
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Loading file of type: " << filetype << endl;
}
@@ -896,7 +573,7 @@
// If no arguments are passed in, just find max num_cpus.
if (cur_string.GetSize() == 0) cur_string = "num_cpus";
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Reducing batch " << cur_batch << " to genotypes: ";
}
@@ -904,7 +581,7 @@
tListPlus<cAnalyzeGenotype> found_list;
while (cur_string.CountNumWords() > 0) {
cString gen_desc(cur_string.PopWord());
- if (verbose >= nAnalyze::VERBOSE_ON) cout << gen_desc << " ";
+ if (verbose) cout << gen_desc << " ";
// Determine by lin_type which genotype were are tracking...
cAnalyzeGenotype * found_gen = PopGenotype(gen_desc, cur_batch);
@@ -938,7 +615,7 @@
return;
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Reducing batch " << cur_batch << " to organisms: " << endl;
}
@@ -950,7 +627,7 @@
while (cur_string.CountNumWords() > 0) {
cString org_desc(cur_string.PopWord());
- if (verbose >= nAnalyze::VERBOSE_ON) cout << org_desc << " ";
+ if (verbose) cout << org_desc << " ";
// Determine by org_desc which genotype were are tracking...
if (org_desc == "random") {
@@ -1007,7 +684,7 @@
cString lin_type = "num_cpus";
if (cur_string.CountNumWords() > 0) lin_type = cur_string.PopWord();
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Reducing batch " << cur_batch
<< " to " << lin_type << " lineage " << endl;
} else cout << "Performing lineage scan..." << endl;
@@ -1060,7 +737,7 @@
batch[cur_batch].List().PushRear(found_list.Pop());
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " Lineage has " << total_kept << " genotypes; "
<< total_removed << " were removed." << endl;
}
@@ -1087,7 +764,7 @@
cString parent_method = "rec_region_size";
if (cur_string.CountNumWords() > 0) parent_method = cur_string.PopWord();
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Reducing batch " << cur_batch
<< " to " << lin_type << " sexual lineage "
<< " using " << parent_method << " criteria." << endl;
@@ -1215,7 +892,7 @@
batch[cur_batch].List().PushRear(found_list.Pop());
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " Sexual lineage has " << total_kept << " genotypes; "
<< total_removed << " were removed." << endl;
}
@@ -1234,7 +911,7 @@
cString clade_type( cur_string.PopWord() );
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Reducing batch " << cur_batch
<< " to clade " << clade_type << "." << endl;
} else cout << "Performing clade scan..." << endl;
@@ -1288,7 +965,7 @@
batch[cur_batch].List().PushRear(found_list.Pop());
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " Clade has " << total_kept << " genotypes; "
<< total_removed << " were removed." << endl;
}
@@ -1308,77 +985,49 @@
test_viable = cur_string.PopWord().AsDouble();
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Sampling " << fraction << " organisms from batch "
<< cur_batch << "." << endl;
}
else cout << "Sampling Organisms..." << endl;
+ int old_total = 0;
+ int new_total = 0;
+
cAnalyzeGenotype * genotype = NULL;
tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
-
- // Loop through all genotypes to perform a census
- int org_count = 0;
- while ((genotype = batch_it.Next()) != NULL) {
- // If we require viables, reduce all non-viables to zero organisms.
- if (test_viable == 1 && genotype->GetViable() == 0) {
- genotype->SetNumCPUs(0);
- }
-
- // Count the number of organisms in this genotype.
- org_count += genotype->GetNumCPUs();
+
+ // of only interested in viables, remove them first.
+ if (test_viable==1) {
+ while ((genotype = batch_it.Next()) != NULL) {
+ if ((genotype->GetViable())==0) {
+ batch_it.Remove();
+ delete genotype;
+ }
+ }
}
-
- // Create an array to store pointers to the genotypes and fill it in
- // while temporarily resetting all of the organism counts to zero.
- tArray<cAnalyzeGenotype *> org_array(org_count);
- int cur_org = 0;
+
batch_it.Reset();
+
while ((genotype = batch_it.Next()) != NULL) {
- for (int i = 0; i < genotype->GetNumCPUs(); i++) {
- org_array[cur_org] = genotype;
- cur_org++;
- }
- genotype->SetNumCPUs(0);
- }
-
- assert(cur_org == org_count);
-
- // Determine how many organisms we want to keep.
- int new_org_count = (int) fraction;
- if (fraction < 1.0) new_org_count = (int) (fraction * (double) org_count);
- if (new_org_count > org_count) {
- cerr << "Warning: Trying to sample " << new_org_count
- << "organisms from a population of " << org_count
- << endl;
- new_org_count = org_count;
- }
-
- // Now pick those that we are keeping.
- tArray<int> keep_ids(new_org_count);
- random.Choose(org_count, keep_ids);
-
- // And increment the org counts for the associated genotypes.
- for (int i = 0; i < new_org_count; i++) {
- genotype = org_array[ keep_ids[i] ];
- genotype->SetNumCPUs(genotype->GetNumCPUs() + 1);
- }
-
-
- // Delete all genotypes with no remaining organisms...
- batch_it.Reset();
- while ((genotype = batch_it.Next()) != NULL) {
- if (genotype->GetNumCPUs() == 0) {
+ const int old_count = genotype->GetNumCPUs();
+ const int new_count = random.GetRandBinomial(old_count, fraction);
+ old_total += old_count;
+ new_total += new_count;
+
+ if (new_count == 0) {
batch_it.Remove();
delete genotype;
+ } else {
+ genotype->SetNumCPUs(new_count);
}
}
int num_genotypes = batch[cur_batch].List().GetSize();
- if (verbose >= nAnalyze::VERBOSE_ON) {
- cout << " Removed " << org_count - new_org_count
+ if (verbose) {
+ cout << " Removed " << old_total - new_total
<< " organisms (" << init_genotypes - num_genotypes
- << " genotypes); " << new_org_count
+ << " genotypes); " << new_total
<< " orgs (" << num_genotypes << " gens) remaining."
<< endl;
}
@@ -1399,7 +1048,7 @@
test_viable = cur_string.PopWord().AsDouble();
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Sampling " << fraction << " genotypes from batch "
<< cur_batch << "." << endl;
}
@@ -1418,7 +1067,7 @@
}
int num_genotypes = batch[cur_batch].List().GetSize();
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " Removed " << init_genotypes - num_genotypes
<< " genotypes; " << num_genotypes << " remaining."
<< endl;
@@ -1450,7 +1099,7 @@
void cAnalyze::CommandPrint(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Printing batch " << cur_batch << endl;
+ if (verbose == true) cout << "Printing batch " << cur_batch << endl;
else cout << "Printing organisms..." << endl;
cString directory = PopDirectory(cur_string, "genebank/");
@@ -1469,15 +1118,13 @@
}
cTestUtil::PrintGenome(genotype->GetGenome(), filename);
- if (verbose >= nAnalyze::VERBOSE_ON) {
- cout << "Printing: " << filename << endl;
- }
+ if (verbose) cout << "Printing: " << filename << endl;
}
}
void cAnalyze::CommandTrace(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Tracing batch " << cur_batch << endl;
+ if (verbose == true) cout << "Tracing batch " << cur_batch << endl;
else cout << "Tracing organisms..." << endl;
int words = cur_string.CountNumWords();
@@ -1530,7 +1177,7 @@
cTestCPU::TestGenome(test_info, genotype->GetGenome());
- if (verbose >= nAnalyze::VERBOSE_ON) cout << " Tracing: " << filename << endl;
+ if (verbose) cout << " Tracing: " << filename << endl;
trace_fp.close();
}
@@ -1545,7 +1192,7 @@
void cAnalyze::CommandPrintTasks(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Printing tasks in batch " << cur_batch << endl;
+ if (verbose == true) cout << "Printing tasks in batch " << cur_batch << endl;
else cout << "Printing tasks..." << endl;
// Load in the variables...
@@ -1566,7 +1213,7 @@
void cAnalyze::CommandDetail(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Detailing batch " << cur_batch << endl;
+ if (verbose == true) cout << "Detailing batch " << cur_batch << endl;
else cout << "Detailing..." << endl;
// Load in the variables...
@@ -1601,7 +1248,7 @@
void cAnalyze::CommandDetailTimeline(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Detailing batch "
+ if (verbose == true) cout << "Detailing batch "
<< cur_batch << " based on time" << endl;
else cout << "Detailing..." << endl;
@@ -1613,7 +1260,7 @@
if (cur_string.GetSize() != 0) time_step = cur_string.PopWord().AsInt();
if (cur_string.GetSize() != 0) max_time = cur_string.PopWord().AsInt();
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " Time step = " << time_step << endl
<< " Max time = " << max_time << endl;
}
@@ -1705,12 +1352,6 @@
int cur_time = 0;
while (cur_genotype != NULL && cur_time <= max_time) {
- if (verbose >= nAnalyze::VERBOSE_DETAILS) {
- cout << "Detailing genotype " << cur_genotype->GetID()
- << " at depth " << cur_genotype->GetDepth()
- << endl;
- }
-
output_it.Reset();
tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
if (format_type == FILE_TYPE_HTML) {
@@ -1802,7 +1443,7 @@
void cAnalyze::CommandDetailAverage(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Average detailing batch " << cur_batch << endl;
+ if (verbose == true) cout << "Average detailing batch " << cur_batch << endl;
else cout << "Detailing..." << endl;
// Load in the variables...
@@ -1837,7 +1478,7 @@
if (cur_string.GetSize() != 0) keyword = cur_string.PopWord();
if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Detailing batches for " << keyword << endl;
+ if (verbose == true) cout << "Detailing batches for " << keyword << endl;
else cout << "Detailing Batches..." << endl;
// Scan the functions list for the keyword we need...
@@ -2108,7 +1749,7 @@
void cAnalyze::CommandHistogram(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Histogram batch " << cur_batch << endl;
+ if (verbose == true) cout << "Histogram batch " << cur_batch << endl;
else cout << "Histograming..." << endl;
// Load in the variables...
@@ -2282,7 +1923,7 @@
void cAnalyze::CommandPrintPhenotypes(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Printing phenotypes in batch "
+ if (verbose == true) cout << "Printing phenotypes in batch "
<< cur_batch << endl;
else cout << "Printing phenotypes..." << endl;
@@ -2369,7 +2010,7 @@
// Print various diversity metrics from the current batch of genotypes...
void cAnalyze::CommandPrintDiversity(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Printing diversity data for batch "
+ if (verbose == true) cout << "Printing diversity data for batch "
<< cur_batch << endl;
else cout << "Printing diversity data..." << endl;
@@ -2459,24 +2100,25 @@
}
}
-
-void cAnalyze::PhyloCommunityComplexity(cString cur_string)
+void cAnalyze::CommunityComplexity(cString cur_string)
{
+ /////////////////////////////////////////////////////////////////////
+ // Calculate the mutual information between community and environment
+ /////////////////////////////////////////////////////////////////////
- /////////////////////////////////////////////////////////////////////////
- // Calculate the mutual information between all genotypes and environment
- /////////////////////////////////////////////////////////////////////////
+ cout << "Analyze community complexity of current population ...\n";
- cout << "Analyze biocomplexity of current population about environment ...\n";
-
// Get the number of genotypes that are gonna be analyzed.
- int max_genotypes = cur_string.PopWord().AsInt();
+ int max_genotypes = cur_string.PopWord().AsInt(); // If it is 0, we sample
+ //two genotypes for each task.
// Get update
int update = cur_string.PopWord().AsInt();
// Get the directory
- cString directory = PopDirectory(cur_string, "community_cpx/");
+ cString dir = cur_string.PopWord();
+ cString defaultDir = "community_cpx/";
+ cString directory = PopDirectory(dir, defaultDir);
// Get the file name that saves the result
cString filename = cur_string.PopWord();
@@ -2493,35 +2135,17 @@
cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
cpx_fp << "# 4: New Information about Environment" << endl;
cpx_fp << "# 5: Total Complexity" << endl;
+
+ cpx_fp << "# 6: Information about Environment" << endl;
+ cpx_fp << "# 7: Sum of Individual Information" << endl;
+
+ cpx_fp << "# 8: Hamming Distance to Closest Given Genotype" << endl;
+ cpx_fp << "# 9: Total Hamming Distance to Closest Neighbor" << endl;
+ cpx_fp << "# 10: Number of Organisms" << endl;
+ cpx_fp << "# 11: Total Number of Organisms" << endl;
+ cpx_fp << "# 12 - : Tasks Implemented" << endl;
cpx_fp << endl;
- ////////////////////////////////////////////////////////////////////////////
- // Loop through all genotypes in all batches and build id vs. genotype map
-
- map<int, cAnalyzeGenotype *> genotype_database;
- for (int i = 0; i < MAX_BATCHES; ++ i) {
- tListIterator<cAnalyzeGenotype> batch_it(batch[i].List());
- cAnalyzeGenotype * genotype = NULL;
- while ((genotype = batch_it.Next()) != NULL) {
- genotype_database.insert(make_pair(genotype->GetID(), genotype));
- }
- }
-
- ////////////////////////////////////////////////
- // Check if all the genotypes having same length
-
- int length_genome;
- if (genotype_database.size() > 0) {
- length_genome = genotype_database.begin()->second->GetLength();
- }
- map<int, cAnalyzeGenotype *>::iterator gen_iterator = genotype_database.begin();
- for (; gen_iterator != genotype_database.end(); ++ gen_iterator) {
- if (gen_iterator->second->GetLength() != length_genome) {
- cerr << "Genotype " << gen_iterator->first << " has different genome length." << endl;
- exit(1);
- }
- }
-
///////////////////////
// Backup test CPU data
bool backupUsage = cTestCPU::UseResources();
@@ -2529,560 +2153,132 @@
cTestCPU::UseResources() = true;
FillResources(update);
- ///////////////////////////////////////////////////////////////////////
- // Choose the first n most abundant genotypes and put them in community
-
+
vector<cAnalyzeGenotype *> community;
cAnalyzeGenotype * genotype = NULL;
tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
- while (((genotype = batch_it.Next()) != NULL) && (community.size() < max_genotypes)) {
- community.push_back(genotype);
- }
-
- ///////////////////////////
- // Measure hamming distance
- int size_community = community.size();
- if (size_community == 0) {
- cerr << "There is no genotype in this community." << endl;
- cpx_fp.close();
- exit(1);
- }
- typedef pair<int,int> gen_pair;
- map<gen_pair, int> hamming_dist;
-
- for (int i = 0; i< size_community; ++ i) {
- for (int j = i+1; j < size_community; ++ j) {
- int dist = cGenomeUtil::FindHammingDistance(community[i]->GetGenome(),
- community[j]->GetGenome());
- int id1 = community[i]->GetID();
- int id2 = community[j]->GetID();
-
- hamming_dist.insert(make_pair(gen_pair(id1, id2), dist));
- hamming_dist.insert(make_pair(gen_pair(id2, id1), dist));
+ if (max_genotypes > 0) {
+
+ /////////////////////////////////////////////////////////////////
+ // Random pick up some genotypes from population with replacement
+
+ while (((genotype = batch_it.Next()) != NULL) && (community.size() < max_genotypes)) {
+ community.push_back(genotype);
}
- }
- //////////////////////////////////
- // Get Most Recent Common Ancestor
+ // Put all alive genotypes in a vector
+ vector<cAnalyzeGenotype *> all_live_genotypes;
- map<gen_pair, cAnalyzeGenotype *> mrca;
- map<gen_pair, int> raw_dist;
- for (int i = 0; i< size_community; ++ i) {
- for (int j = i+1; j < size_community; ++ j) {
-
- cAnalyzeGenotype * lineage1_genotype = community[i];
- cAnalyzeGenotype * lineage2_genotype = community[j];
- int total_dist = 0;
+ while ((genotype = batch_it.Next()) != NULL) {
+ genotype->Recalculate();
- while (lineage1_genotype->GetID() != lineage2_genotype->GetID()) {
- if (lineage1_genotype->GetID() > lineage2_genotype->GetID()) {
- int parent_id = lineage1_genotype->GetParentID();
- cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
-
- total_dist += cGenomeUtil::FindHammingDistance(lineage1_genotype->GetGenome(),
- parent->GetGenome());
- lineage1_genotype = parent;
- } else {
- int parent_id = lineage2_genotype->GetParentID();
- cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
- total_dist += cGenomeUtil::FindHammingDistance(lineage2_genotype->GetGenome(),
- parent->GetGenome());
-
- lineage2_genotype = parent;
- }
- }
-
- int id1 = community[i]->GetID();
- int id2 = community[j]->GetID();
- mrca.insert(make_pair(gen_pair(id1, id2), lineage1_genotype));
- mrca.insert(make_pair(gen_pair(id2, id1), lineage1_genotype));
- raw_dist.insert(make_pair(gen_pair(id1, id2), total_dist));
- raw_dist.insert(make_pair(gen_pair(id2, id1), total_dist));
+ double fitness = genotype->GetFitness();
+ if (genotype->GetFitness() == 0) continue; // Dead Genotype
+ all_live_genotypes.push_back(genotype);
}
- }
- /*
- ////////////////////////////////////////////////////////////////////////////////////////////
- // Sort the genotype that is next genotype is the most closest one to all previous genotypes
-
- vector<cAnalyzeGenotype *> sorted_community;
- vector<cAnalyzeGenotype *> left_genotypes = community;
-
- // Put the first genotype in left to sorted.
- sorted_community.push_back(*left_genotypes.begin());
- left_genotypes.erase(left_genotypes.begin());
-
- while (left_genotypes.size() > 0) {
- int min_total_hamming = size_community * length_genome;
- int index_next;
-
- for (int next = 0; next < left_genotypes.size(); ++ next) {
- int total_hamming = 0;
- int id1 = left_genotypes[next]->GetID();
-
- for (int given = 0; given < sorted_community.size(); ++ given) {
- int id2 = sorted_community[given]->GetID();
- total_hamming += hamming_dist.find(gen_pair(id1, id2))->second;
- }
-
- if (total_hamming < min_total_hamming) {
- min_total_hamming = total_hamming;
- index_next = next;
- }
+ // Random sample genotypes from alive population
+ while (community.size() < max_genotypes) {
+ int index = random.GetUInt(all_live_genotypes.size());
+ community.push_back(all_live_genotypes[index]);
}
- sorted_community.push_back(left_genotypes[index_next]);
- left_genotypes.erase(left_genotypes.begin() + index_next);
- }
- */
+ } else if (max_genotypes == -1) {
- vector<cAnalyzeGenotype *> sorted_community = community;
- /////////////////////////////////////////////
- // Loop through genotypes in sorted community
+ // Classify the genotype by task implemented (phenotype) ...
+ map<string, vector<cAnalyzeGenotype *> > genotype_classBytask;
+ int num_tasks = d_environment->GetTaskLib().GetSize();
+ vector<cAnalyzeGenotype *> all_live_genotypes;
- double complexity = 0.0;
- vector<cAnalyzeGenotype *> given_genotypes;
- int num_insts = inst_set.GetSize();
+ while ((genotype = batch_it.Next()) != NULL) {
+ genotype->Recalculate();
- for (int i = 0; i < size_community; ++ i) {
- genotype = sorted_community[i];
+ double fitness = genotype->GetFitness();
+ if (genotype->GetFitness() == 0) continue; // Dead Genotype
+ all_live_genotypes.push_back(genotype);
- // Skip the dead organisms
- genotype->Recalculate();
- if (genotype->GetFitness() == 0) {
- continue;
- }
-
- vector<double> one_line_prob(num_insts, 0.0);
- vector< vector<double> > prob(length_genome, one_line_prob);
-
- cout << endl << genotype->GetID() << endl;
-
- /*if (given_genotypes.size() >= 2) {
-
- ///////////////////////////////////////////////////////////////////
- // Look for two given genotypes that has minimun depth dist with it
-
- cAnalyzeGenotype * min_depth_gen = given_genotypes[0];
- cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(),
- given_genotypes[0]->GetID()))->second;
- int min_depth_dist = genotype->GetDepth() + given_genotypes[0]->GetDepth() - 2 * tmrca->GetDepth();
-
- cAnalyzeGenotype * second_min_gen = given_genotypes[1];
- tmrca = mrca.find(gen_pair(genotype->GetID(), given_genotypes[1]->GetID()))->second;
- int second_min_depth = genotype->GetDepth() + given_genotypes[1]->GetDepth() - 2 * tmrca->GetDepth();
-
- for (int i = 2; i < given_genotypes.size(); ++ i) {
- cAnalyzeGenotype * given_genotype = given_genotypes[i];
- cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(),
- given_genotype->GetID()))->second;
- int dist = genotype->GetDepth() + given_genotype->GetDepth() - 2 * tmrca->GetDepth();
-
- if (dist < min_depth_dist) {
- second_min_depth = min_depth_dist;
- second_min_gen = min_depth_gen;
- min_depth_dist = dist;
- min_depth_gen = given_genotype;
- } else if (dist >= min_depth_dist && dist < second_min_depth) {
- second_min_depth = dist;
- second_min_gen = given_genotype;
+ string tasks;
+ for (int task_id = 0; task_id < num_tasks; ++ task_id) {
+ int count = genotype->GetTaskCount(task_id);
+ if (count > 0) {
+ tasks += '1';
+ } else {
+ tasks += '0';
}
}
- const cGenome & given_genome1 = min_depth_gen->GetGenome();
- const cGenome & given_genome2 = second_min_gen->GetGenome();
- for (int line = 0; line < length_genome; ++ line) {
- int given_inst = given_genome1[line].GetOp();
- prob[line][given_inst] += pow(1 - 1.0/length_genome, min_depth_dist);
- given_inst = given_genome2[line].GetOp();
- prob[line][given_inst] += pow(1 - 1.0/length_genome, min_depth_dist);
+ map<string, vector<cAnalyzeGenotype *> >::iterator classBytask_it;
+ classBytask_it = genotype_classBytask.find(tasks);
+ if (classBytask_it == genotype_classBytask.end()) {
+ vector<cAnalyzeGenotype *> genotypes;
+ genotypes.push_back(genotype);
+ genotype_classBytask.insert(make_pair(tasks, genotypes));
+ } else {
+ classBytask_it->second.push_back(genotype);
}
-
- cpx_fp << genotype->GetID() << " " << min_depth_dist << " " << second_min_depth
- << " " << raw_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second
- << " " << raw_dist.find(gen_pair(genotype->GetID(), second_min_gen->GetID()))->second
- << " ";
-
-
- } else if (given_genotypes.size() == 1) {
- //////////////////////////////////////////////////////
- // Calculate the probability of each inst at each line
- cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(),
- given_genotypes[0]->GetID()))->second;
- int dist = genotype->GetDepth() + given_genotypes[0]->GetDepth() - 2 * tmrca->GetDepth();
- const cGenome & given_genome = given_genotypes[0]->GetGenome();
-
- for (int line = 0; line < length_genome; ++ line) {
- int given_inst = given_genome[line].GetOp();
- prob[line][given_inst] += pow(1 - 1.0/length_genome, dist);
- }
-
- cpx_fp << genotype->GetID() << " " << dist << " "
- << raw_dist.find(gen_pair(genotype->GetID(), given_genotypes[0]->GetID()))->second << " ";
- } else {
- cpx_fp << genotype->GetID() << " ";
- }*/
-
- if (given_genotypes.size() >= 1) {
- //////////////////////////////////////////////////
- // Look for a genotype that is closest to this one
-
- cAnalyzeGenotype * min_depth_gen = given_genotypes[0];
- cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(),
- given_genotypes[0]->GetID()))->second;
- int min_depth_dist = genotype->GetDepth() + given_genotypes[0]->GetDepth() - 2 * tmrca->GetDepth();
-
- for (int i = 1; i < given_genotypes.size() ; ++ i) {
- cAnalyzeGenotype * given_genotype = given_genotypes[i];
- cAnalyzeGenotype * tmrca = mrca.find(gen_pair(genotype->GetID(),
- given_genotype->GetID()))->second;
- int dist = genotype->GetDepth() + given_genotype->GetDepth() - 2 * tmrca->GetDepth();
-
- if (dist < min_depth_dist) {
- min_depth_dist = dist;
- min_depth_gen = given_genotype;
- }
- }
-
- const cGenome & given_genome = min_depth_gen->GetGenome();
- const cGenome & base_genome = genotype->GetGenome();
- cGenome mod_genome(base_genome);
- for (int line = 0; line < length_genome; ++ line) {
- int given_inst = given_genome[line].GetOp();
- mod_genome = base_genome;
- mod_genome[line].SetOp(given_inst);
- cAnalyzeGenotype test_genotype(mod_genome, inst_set);
- test_genotype.Recalculate();
-
- // Only when given inst make the genotype alive
- if (test_genotype.GetFitness() > 0) {
- prob[line][given_inst] += pow(1 - 1.0/length_genome, min_depth_dist);
- }
- }
-
- cpx_fp << genotype->GetID() << " " << min_depth_dist << " "
- << raw_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << " "
- << hamming_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << " ";
- } else {
- cpx_fp << genotype->GetID() << " ";
}
- ///////////////////////////////////////////////////////////////////
- // Point mutation at all lines of code to look for neutral mutation
- // and the mutations that can make organism alive
- cout << "Test point mutation." << endl;
- vector<bool> one_line_neutral(num_insts, false);
- vector< vector<bool> > neutral_mut(length_genome, one_line_neutral);
- vector< vector<bool> > alive_mut(length_genome, one_line_neutral);
-// if (verbose >= nAnalyze::VERBOSE_ON) {
-// PrintTestCPUResources("");
-// }
- genotype->Recalculate();
- double base_fitness = genotype->GetFitness();
- cout << base_fitness << endl;
- const cGenome & base_genome = genotype->GetGenome();
- cGenome mod_genome(base_genome);
+ // Choose one organism from each major group (num of organisms >= 3)
+
+ map<string, vector<cAnalyzeGenotype*> >::iterator classBytask_it;
+ for (classBytask_it = genotype_classBytask.begin();
+ classBytask_it != genotype_classBytask.end(); ++ classBytask_it) {
- for (int line = 0; line < length_genome; ++ line) {
- int cur_inst = base_genome[line].GetOp();
-
- for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
- mod_genome[line].SetOp(mod_inst);
- cAnalyzeGenotype test_genotype(mod_genome, inst_set);
- test_genotype.Recalculate();
- if (test_genotype.GetFitness() >= base_fitness) {
- neutral_mut[line][mod_inst] = true;
- }
- if (test_genotype.GetFitness() > 0) {
- alive_mut[line][mod_inst] = true;
- }
- }
-
- mod_genome[line].SetOp(cur_inst);
- }
+ vector<cAnalyzeGenotype *> & genotype_vector = classBytask_it->second;
+ int num_genotypes = genotype_vector.size();
+ int num_cpus = 0;
- /////////////////////////////////////////
- // Normalize the probability at each line
- vector< vector<double> > prob_before_env(length_genome, one_line_prob);
-
- for (int line = 0; line < length_genome; ++ line) {
- double cur_total_prob = 0.0;
- int num_alive = 0;
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (alive_mut[line][inst] == true) {
- cur_total_prob += prob[line][inst];
- num_alive ++;
- }
+ for (int gen = 0; gen < num_genotypes; ++ gen) {
+ num_cpus += genotype_vector[gen]->GetNumCPUs();
}
- if (cur_total_prob > 1) {
- cout << "Total probability at " << line << " is greater than 0." << endl;
- exit(1);
- }
- double left_prob = 1 - cur_total_prob;
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (alive_mut[line][inst] == true) {
- prob_before_env[line][inst] = prob[line][inst] + left_prob / num_alive;
- } else {
- prob_before_env[line][inst] = 0;
- }
- }
-
+ // Don't count the class which has too few organisms.
+ if (num_cpus < 3) continue;
+
+ community.push_back(genotype_vector[0]);
+ cout << "Task " << classBytask_it->first << " " << num_cpus << endl;
}
- /////////////////////////////////
- // Calculate entropy of each line
- vector<double> entropy(length_genome, 0.0);
- for (int line = 0; line < length_genome; ++ line) {
- double sum = 0;
- for (int inst = 0; inst < num_insts; ++ inst) {
- sum += prob_before_env[line][inst];
- if (prob_before_env[line][inst] > 0) {
- entropy[line] -= prob_before_env[line][inst] * log(prob_before_env[line][inst]) / log(num_insts*1.0);
- }
- }
- if (sum > 1.001 || sum < 0.999) {
- cout << "Sum of probability is not 1 at line " << line << endl;
- exit(1);
- }
- }
-
- /////////////////////////////////////////////////////
- // Redistribute the probability of insts at each line
- vector< vector<double> > prob_given_env(length_genome, one_line_prob);
-
- for (int line = 0; line < length_genome; ++ line) {
- double total_prob = 0.0;
- int num_neutral = 0;
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (neutral_mut[line][inst] == true) {
- num_neutral ++;
- total_prob += prob[line][inst];
- }
- }
-
- double left = 1 - total_prob;
-
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (neutral_mut[line][inst] == true) {
- prob_given_env[line][inst] = prob[line][inst] + left / num_neutral;
- } else {
- prob_given_env[line][inst] = 0.0;
- }
- }
+ while (community.size() < 50) {
+ // padding the community up to 50 genotypes ...
+ int index = random.GetUInt(all_live_genotypes.size());
+ community.push_back(all_live_genotypes[index]);
}
- ////////////////////////////////////////////////
- // Calculate the entropy given environment
-
- vector<double> entropy_given_env(length_genome, 0.0);
- for (int line = 0; line < length_genome; ++ line) {
- double sum = 0;
- for (int inst = 0; inst < num_insts; ++ inst) {
- sum += prob_given_env[line][inst];
- if (prob_given_env[line][inst] > 0) {
- entropy_given_env[line] -= prob_given_env[line][inst] * log(prob_given_env[line][inst]) /
- log(num_insts*1.0);
- }
- }
- if (sum > 1.001 || sum < 0.999) {
- cout << "Sum of probability is not 1 at line " << line << " " << sum << endl;
- exit(1);
- }
- }
-
-
- ///////////////////////////////////////////////////////////////////////////
- // Calculate the information between genotype and env given other genotypes
- double information = 0.0;
- double entropy_before = 0.0;
- double entropy_after = 0.0;
- for (int line = 0; line < length_genome; ++ line) {
- entropy_before += entropy[line];
- entropy_after += entropy_given_env[line];
-
- if (entropy[line] >= entropy_given_env[line]) {
- information += entropy[line] - entropy_given_env[line];
- } else { // Negative information is because given condition is not related with this genotype ...
-
- // Count the number of insts that can make genotype alive
- int num_inst_alive = 0;
- for (int inst = 0; inst < num_insts; ++ inst) {
- if (alive_mut[line][inst] == true) {
- num_inst_alive ++;
- }
- }
-
- double entropy_before = - log(1.0/num_inst_alive) / log(num_insts*1.0);
- information += entropy_before - entropy_given_env[line];
- if (information < 0) {
- cout << "Negative information at site " << line << endl;
- exit(1);
- }
- }
-
- }
- complexity += information;
-
- cpx_fp << entropy_before << " " << entropy_after << " "
- << information << " " << complexity << " ";
-
- genotype->PrintTasks(cpx_fp, 0, -1);
- cpx_fp << endl;
-
- /////////////////////////////////////////////////////////////
- // This genotype becomes the given condition of next genotype
-
- given_genotypes.push_back(genotype);
-
}
-
- // Set the test CPU back to the state it was
- cTestCPU::UseResources() = backupUsage;
- cTestCPU::SetupResourceArray(backupResources);
-
- cpx_fp.close();
- return;
-
-}
-
-void cAnalyze::AnalyzeCommunityComplexity(cString cur_string)
-{
- /////////////////////////////////////////////////////////////////////
- // Calculate the mutual information between community and environment
- /////////////////////////////////////////////////////////////////////
-
- cout << "Analyze community complexity of current population about environment with Charles method ...\n";
-
- // Get the number of genotypes that are gonna be analyzed.
- int max_genotypes = cur_string.PopWord().AsInt(); // If it is 0, we sample
- //two genotypes for each task.
-
- // Get update
- int update = cur_string.PopWord().AsInt();
-
- // Get the directory
- cString dir = cur_string.PopWord();
- cString defaultDir = "community_cpx/";
- cString directory = PopDirectory(dir, defaultDir);
-
- // Get the file name that saves the result
- cString filename = cur_string.PopWord();
- if (filename.IsEmpty()) {
- filename = "community.complexity.dat";
- }
-
- filename.Set("%s%s", directory(), filename.GetData());
- ofstream cpx_fp(filename());
-
- cpx_fp << "# Legend:" << endl;
- cpx_fp << "# 1: Genotype ID" << endl;
- cpx_fp << "# 2: Entropy given Known Genotypes" << endl;
- cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
- cpx_fp << "# 4: New Information about Environment" << endl;
- cpx_fp << "# 5: Total Complexity" << endl;
- cpx_fp << "# 6: Hamming Distance to Closest Given Genotype" << endl;
- cpx_fp << "# 7: Total Hamming Distance to Closest Neighbor" << endl;
- cpx_fp << "# 8: Number of Organisms" << endl;
- cpx_fp << "# 9: Total Number of Organisms" << endl;
- cpx_fp << "# 10 - : Tasks Implemented" << endl;
- cpx_fp << endl;
-
- ///////////////////////
- // Backup test CPU data
- bool backupUsage = cTestCPU::UseResources();
- tArray<double> backupResources(cTestCPU::GetResources());
- cTestCPU::UseResources() = true;
- FillResources(update);
-
- vector<cAnalyzeGenotype *> community;
- cAnalyzeGenotype * genotype = NULL;
- tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
-
- if (max_genotypes > 0) {
-
- ///////////////////////////////////////////////////////////////////////
- // Choose the first n most abundant genotypes and put them in community
-
- while (((genotype = batch_it.Next()) != NULL) && (community.size() < max_genotypes)) {
- community.push_back(genotype);
- }
- } else if (max_genotypes == 0) {
-
- /////////////////////////////////////
- // Choose two genotypes for each task
-
- genotype = batch_it.Next();
- if (genotype == NULL) {
- cpx_fp.close();
- return;
- }
- genotype->Recalculate();
- int num_tasks = genotype->GetNumTasks();
- vector< vector<cAnalyzeGenotype *> > genotype_class(num_tasks);
- do {
- for (int task_id = 0; task_id < num_tasks; ++ task_id) {
- int count = genotype->GetTaskCount(task_id);
- if (count > 0) {
- genotype_class[task_id].push_back(genotype);
- }
- }
- } while ((genotype = batch_it.Next()) != NULL);
-
- cRandom random;
- for (int task_id = 0; task_id < num_tasks; ++ task_id) {
- int num_genotype = genotype_class[task_id].size();
- if (num_genotype > 0) {
- int index = random.GetUInt(num_genotype);
- community.push_back(genotype_class[task_id][index]);
- index = random.GetUInt(num_genotype);
- community.push_back(genotype_class[task_id][index]);
- } else {
- // Pick up a class that is not empty
- int class_id = random.GetUInt(num_tasks);
- while (genotype_class[class_id].size() == 0) {
- class_id ++;
- if (class_id >= num_tasks) {
- class_id = 0;
- }
- }
- int num_genotype = genotype_class[class_id].size();
- int index = random.GetUInt(num_genotype);
- community.push_back(genotype_class[task_id][index]);
- index = random.GetUInt(num_genotype);
- community.push_back(genotype_class[task_id][index]);
- }
- }
-
- }
-
+
////////////////////////////////////////////////////
// Test point mutation of each genotype in community
int num_insts = inst_set.GetSize();
map<int, tMatrix<double> > point_mut;
- int size_community = community.size();
+ int com_size = community.size();
int length_genome;
- if (size_community > 1) {
+ if (com_size > 0) {
length_genome = community[0]->GetLength();
+ } else {
+ cpx_fp.close();
+ return;
}
- for (int i = 0; i < size_community; ++ i) {
+ for (int i = 0; i < com_size; ++ i) {
+
genotype = community[i];
///////////////////////////////////////////////////////////////////
// Point mutation at all lines of code to look for neutral mutation
+
cout << "Test point mutation for genotype " << genotype->GetID() << endl;
tMatrix<double> prob(length_genome, num_insts);
-// if (verbose >= nAnalyze::VERBOSE_ON) {
+// if (verbose == true) {
// PrintTestCPUResources("");
// }
@@ -3124,6 +2320,7 @@
double complexity = 0.0;
int total_dist = 0;
int total_cpus = 0;
+ double total_ind_info = 0.0;
vector<cAnalyzeGenotype *> given_genotypes;
////////////////////////////////////////
@@ -3147,16 +2344,19 @@
double new_info = oo_initial_entropy - oo_conditional_entropy;
complexity += new_info;
given_genotypes.push_back(genotype);
+ total_ind_info = length_genome - oo_conditional_entropy;
cpx_fp << genotype->GetID() << " "
<< oo_initial_entropy << " "
<< oo_conditional_entropy << " "
<< new_info << " "
- << complexity << " "
- << "0 0" << " ";
+ << complexity << " "
+ << total_ind_info << " "
+ << total_ind_info << " "
+ << "0 0" << " ";
int num_cpus = genotype->GetNumCPUs();
total_cpus += num_cpus;
- cpx_fp << num_cpus << " " << total_cpus << " ";
+ cpx_fp << num_cpus << " " << total_cpus << " ";
genotype->Recalculate();
genotype->PrintTasks(cpx_fp, 0, -1);
cpx_fp << endl;
@@ -3164,7 +2364,7 @@
//////////////////////////////////////////////////////
// New information in other genotypes in community ...
- for (int i = 1; i < size_community; ++ i) {
+ for (int i = 1; i < com_size; ++ i) {
genotype = community[i];
if (genotype->GetLength() != length_genome) {
cerr << "Genotypes in the community do not same genome length.\n";
@@ -3207,8 +2407,8 @@
double given_site_entropy = 0.0;
for (int inst = 0; inst < num_insts; ++ inst) {
if (given_prob[line][inst] > 0) {
- given_site_entropy -= given_prob[line][inst] * (log(given_prob[line][inst]) /
- log(1.0*num_insts));
+ given_site_entropy -= given_prob[line][inst] *
+ (log(given_prob[line][inst]) / log(1.0*num_insts));
}
}
@@ -3216,7 +2416,8 @@
double entropy_overlap = 0.0;
if (prob_overlap > 0 && (1 - prob_overlap > 0)) {
entropy_overlap = (- prob_overlap * log(prob_overlap)
- - (1-prob_overlap) * log(1 - prob_overlap)) / log(1.0*num_insts);
+ - (1-prob_overlap) * log(1 - prob_overlap)) /
+ log(1.0*num_insts);
} else {
entropy_overlap = 0;
}
@@ -3264,19 +2465,22 @@
}
complexity += min_new_info;
+ total_ind_info += length_genome - oo_conditional_entropy;
cpx_fp << genotype->GetID() << " "
<< oo_initial_entropy << " "
<< oo_conditional_entropy << " "
- << min_new_info << " " << complexity << " ";
+ << min_new_info << " " << complexity << " "
+ << length_genome - oo_conditional_entropy << " "
+ << total_ind_info << " ";
int hamm_dist = cGenomeUtil::FindHammingDistance(genotype->GetGenome(),
used_genotype->GetGenome());
total_dist += hamm_dist;
- cpx_fp << hamm_dist << " " << total_dist << " ";
+ cpx_fp << hamm_dist << " " << total_dist << " ";
- int num_cpus = genotype->GetNumCPUs();
- total_cpus += num_cpus;
- cpx_fp << num_cpus << " " << total_cpus << " ";
+ int num_cpus = genotype->GetNumCPUs();
+ total_cpus += num_cpus;
+ cpx_fp << num_cpus << " " << total_cpus << " ";
genotype->PrintTasks(cpx_fp, 0, -1);
@@ -3296,9 +2500,120 @@
}
+void cAnalyze::PopDiversity(cString cur_string)
+{
+ ////////////////////////////////////////////////
+ // Calculate Shannon Index of the Population ...
+ ////////////////////////////////////////////////
+
+ cout << "Analyze biological diversity of current population ...\n";
+
+ // Get the directory
+ cString dir = cur_string.PopWord();
+ cString defaultDir = "pop_diversity/";
+ cString directory = PopDirectory(dir, defaultDir);
+
+ // Get the file name that saves the result
+ cString filename = cur_string.PopWord();
+ if (filename.IsEmpty()) {
+ filename = "diversity.dat";
+ }
+
+ filename.Set("%s%s", directory(), filename.GetData());
+ ofstream diversity_fp(filename());
+
+
+ tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+ cAnalyzeGenotype * genotype;
+
+ // Classify the genotype by task implemented ...
+ map<string, vector<cAnalyzeGenotype *> > genotype_classBytask;
+ int num_tasks = d_environment->GetTaskLib().GetSize();
+
+ while ((genotype = batch_it.Next()) != NULL) {
+ genotype->Recalculate();
+
+ double fitness = genotype->GetFitness();
+ if (genotype->GetFitness() == 0) continue; // Dead Genotype
+
+ string tasks;
+ for (int task_id = 0; task_id < num_tasks; ++ task_id) {
+ int count = genotype->GetTaskCount(task_id);
+ if (count > 0) {
+ tasks += '1';
+ } else {
+ tasks += '0';
+ }
+ }
+
+ map<string, vector<cAnalyzeGenotype *> >::iterator classBytask_it;
+ classBytask_it = genotype_classBytask.find(tasks);
+ if (classBytask_it == genotype_classBytask.end()) {
+ vector<cAnalyzeGenotype *> genotypes;
+ genotypes.push_back(genotype);
+ genotype_classBytask.insert(make_pair(tasks, genotypes));
+ } else {
+ classBytask_it->second.push_back(genotype);
+ }
+ }
+
+ diversity_fp << "# Biological diversity of the population" << endl;
+ diversity_fp << "# Column 1: number of species" << endl;
+ diversity_fp << "# Column 2: Shannon index" << endl;
+ diversity_fp << "# Column 3: Simpson index" << endl << endl;
+
+ // Calculate Shannon index for classes by tasks ...
+ double entropy = 0.0;
+ int num_class = 0;
+
+ // count total organisms belonging to major species ...
+ int total_organisms = 0;
+ vector<int> speciesbytasks;
+ map<string, vector<cAnalyzeGenotype*> >::iterator classBytask_it;
+ for (classBytask_it = genotype_classBytask.begin();
+ classBytask_it != genotype_classBytask.end(); ++ classBytask_it) {
+
+ vector<cAnalyzeGenotype *> & genotype_vector = classBytask_it->second;
+ int num_genotypes = genotype_vector.size();
+ int num_cpus = 0;
+
+ for (int gen = 0; gen < num_genotypes; ++ gen) {
+ num_cpus += genotype_vector[gen]->GetNumCPUs();
+ }
+
+ cout << classBytask_it->first << " " << num_cpus << endl;
+ // Don't count the class which has too few organisms.
+ if (num_cpus < 3) continue;
+ num_class ++;
+ total_organisms += num_cpus;
+ speciesbytasks.push_back(num_cpus);
+
+ }
+
+ for (int pos = 0; pos < speciesbytasks.size(); ++ pos) {
+ double prob = speciesbytasks[pos] * 1.0 / total_organisms;
+ entropy -= prob * log(prob);
+ }
+
+ // Calculate Simpson's index ...
+ double D = 0;
+ for (int pos = 0; pos < speciesbytasks.size(); ++ pos) {
+ int num_orgs = speciesbytasks[pos];
+ D += num_orgs * (num_orgs -1) * 1.0 / (total_organisms*(total_organisms-1));
+ }
+
+ diversity_fp << num_class << " "
+ << entropy << " "
+ << 1-D << endl;
+ diversity_fp.close();
+
+}
+
+
+
void cAnalyze::CommandLandscape(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Landscaping batch " << cur_batch << endl;
+ if (verbose == true) cout << "Landscaping batch " << cur_batch << endl;
else cout << "Landscapping..." << endl;
// Load in the variables...
@@ -3325,7 +2640,7 @@
void cAnalyze::AnalyzeEpistasis(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Epistasis on " << cur_batch << endl;
+ if (verbose == true) cout << "Epistasis on " << cur_batch << endl;
else cout << "Calculating epistasis values..." << endl;
// Load in the variables...
@@ -3528,295 +2843,10 @@
df.Endl();
}
-void cAnalyze::AnalyzeComplexityDelta(cString cur_string)
-{
- // This command will examine the current population, and sample mutations
- // to see what the distribution of complexity changes is. Only genotypes
- // with a certain abundance (default=3) will be tested to make sure that
- // the organism didn't already have hidden complexity due to a downward
- // step.
- cout << "Testing complexity delta." << endl;
-
- cString filename = "complexity_delta.dat";
- int num_tests = 10;
- double copy_mut_prob = cConfig::GetCopyMutProb();
- double ins_mut_prob = cConfig::GetDivideInsProb();
- double del_mut_prob = cConfig::GetDivideDelProb();
- int count_threshold = 3;
-
- if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
- if (cur_string.GetSize() > 0) num_tests = cur_string.PopWord().AsInt();
- if (cur_string.GetSize() > 0) copy_mut_prob = cur_string.PopWord().AsDouble();
- if (cur_string.GetSize() > 0) ins_mut_prob = cur_string.PopWord().AsDouble();
- if (cur_string.GetSize() > 0) del_mut_prob = cur_string.PopWord().AsDouble();
- if (cur_string.GetSize() > 0) count_threshold = cur_string.PopWord().AsInt();
-
- if (verbose >= nAnalyze::VERBOSE_ON) {
- cout << "...using:"
- << " filename='" << filename << "'"
- << " num_tests=" << num_tests
- << " copy_mut_prob=" << copy_mut_prob
- << " ins_mut_prob=" << ins_mut_prob
- << " del_mut_prob=" << del_mut_prob
- << " count_threshold=" << count_threshold
- << endl;
- }
-
- // Create an array of all of the genotypes above threshold.
- cAnalyzeGenotype * genotype = NULL;
- tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
-
- // Loop through all genotypes to perform a census
- int org_count = 0;
- while ((genotype = batch_it.Next()) != NULL) {
- // Only count genotypes above threshold
- if (genotype->GetNumCPUs() >= count_threshold) {
- org_count += genotype->GetNumCPUs();
- }
- }
-
- // Create an array to store pointers to the genotypes and fill it in.
- tArray<cAnalyzeGenotype *> org_array(org_count);
- int cur_org = 0;
- batch_it.Reset();
- while ((genotype = batch_it.Next()) != NULL) {
- // Ignore genotypes below threshold.
- if (genotype->GetNumCPUs() < count_threshold) continue;
-
- // Insert the remaining genotypes into the array.
- for (int i = 0; i < genotype->GetNumCPUs(); i++) {
- org_array[cur_org] = genotype;
- cur_org++;
- }
- }
-
- // Open up the file and prepare it for output.
- cDataFile & df = data_file_manager.Get(filename);
- df.WriteComment( "An analyze of expected complexity changes between parent and offspring" );
- df.WriteTimeStamp();
- // Next check the appropriate number of organisms, perform mutations, and
- // store the results.
- for (int cur_test = 0; cur_test < num_tests; cur_test++) {
- // Pick the genotype to test.
- int test_org_id = g_random.GetInt(org_count);
- genotype = org_array[test_org_id];
-
- // Create a copy of the genome.
- cCPUMemory mod_genome = genotype->GetGenome();
-
- if (copy_mut_prob == 0.0 &&
- ins_mut_prob == 0.0 &&
- del_mut_prob == 0.0) {
- cerr << "ERROR: All mutation rates are zero! No complexity delta analysis possible." << endl;
- return;
- }
-
- // Perform the per-site mutations -- we are going to keep looping until
- // we trigger at least one mutation.
- int num_mutations = 0;
- while (num_mutations == 0) {
- if (copy_mut_prob > 0.0) {
- for (int i = 0; i < mod_genome.GetSize(); i++) {
- if (g_random.P(copy_mut_prob)) {
- mod_genome[i] = inst_set.GetRandomInst();
- num_mutations++;
- }
- }
- }
-
- // Perform an Insertion if it has one.
- if (g_random.P(ins_mut_prob)) {
- int ins_line = g_random.GetInt(mod_genome.GetSize() + 1);
- mod_genome.Insert(ins_line, inst_set.GetRandomInst());
- num_mutations++;
- }
-
- // Perform a Deletion if it has one.
- if (g_random.P(del_mut_prob)) {
- int del_line = g_random.GetInt(mod_genome.GetSize());
- mod_genome.Remove(del_line);
- num_mutations++;
- }
- }
-
- // Calculate the complexities....
- genotype->Recalculate();
- double start_complexity = genotype->GetKO_Complexity(); // genotype->GetComplexity();
- double start_fitness = genotype->GetFitness();
- int start_length = genotype->GetLength();
- int start_gest = genotype->GetGestTime();
-
- cAnalyzeGenotype new_genotype(mod_genome, inst_set);
- new_genotype.Recalculate();
- double end_complexity = new_genotype.GetKO_Complexity(); // GetComplexity();
- double complexity_change = end_complexity - start_complexity;
- double end_fitness = new_genotype.GetFitness();
- int end_length = new_genotype.GetLength();
- int end_gest = new_genotype.GetGestTime();
-
- df.Write(num_mutations, "Number of mutational differences between original organism and mutant.");
- df.Write(complexity_change, "Complexity difference between original organism and mutant.");
- df.Write(start_complexity, "Complexity of initial organism.");
- df.Write(end_complexity, "Complexity of mutant.");
- df.Write(start_fitness, "Fitness of initial organism.");
- df.Write(end_fitness, "Fitness of mutant.");
- df.Write(start_length, "Length of initial organism.");
- df.Write(end_length, "Length of mutant.");
- df.Write(start_gest, "Gestation Time of initial organism.");
- df.Write(end_gest, "Gestation Time of mutant.");
- df.Write(genotype->GetID(), "ID of initial genotype.");
- df.Endl();
- }
-}
-
-void cAnalyze::AnalyzeKnockouts(cString cur_string)
-{
- cout << "Analyzing the effects of knockouts..." << endl;
-
- cString filename = "knockouts.dat";
- if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
-
- int max_knockouts = 1;
- if (cur_string.GetSize() > 0) max_knockouts = cur_string.PopWord().AsInt();
-
- // Open up the data file...
- cDataFile & df = data_file_manager.Get(filename);
- df.WriteComment( "Analysis of knockouts in genomes" );
- df.WriteTimeStamp();
-
-
- // Setup a NULL instruction in a special inst set.
- cInstSet ko_inst_set(inst_set);
- // Locate the instruction corresponding to "NULL" in the instruction library.
- {
- cInstruction lib_null_inst = ko_inst_set.GetInstLib()->GetInst("NULL");
- if (lib_null_inst == ko_inst_set.GetInstLib()->GetInstError()) {
- cout << "<cAnalyze::AnalyzeKnockouts> got error:" << endl
- << " instruction 'NULL' not in current hardware type" << endl;
- exit(1);
- }
- // Add mapping to located instruction.
- ko_inst_set.Add2(lib_null_inst.GetOp());
- }
- const cInstruction null_inst = ko_inst_set.GetInst("NULL");
-
- // Loop through all of the genotypes in this batch...
- tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
- cAnalyzeGenotype * genotype = NULL;
- while ((genotype = batch_it.Next()) != NULL) {
- if (verbose >= nAnalyze::VERBOSE_ON) cout << " Knockout: " << genotype->GetName() << endl;
-
- // Calculate the stats for the genotype we're working with...
- genotype->Recalculate();
- const double base_fitness = genotype->GetFitness();
-
- const int max_line = genotype->GetLength();
- const cGenome & base_genome = genotype->GetGenome();
- cGenome mod_genome(base_genome);
-
- // Loop through all the lines of code, testing the removal of each.
- // -2=lethal, -1=detrimental, 0=neutral, 1=beneficial
- int dead_count = 0;
- int neg_count = 0;
- int neut_count = 0;
- int pos_count = 0;
- tArray<int> ko_effect(max_line);
- for (int line_num = 0; line_num < max_line; line_num++) {
- // Save a copy of the current instruction and replace it with "NULL"
- int cur_inst = base_genome[line_num].GetOp();
- mod_genome[line_num] = null_inst;
- cAnalyzeGenotype ko_genotype(mod_genome, ko_inst_set);
- ko_genotype.Recalculate();
-
- double ko_fitness = ko_genotype.GetFitness();
- if (ko_fitness == 0.0) {
- dead_count++;
- ko_effect[line_num] = -2;
- } else if (ko_fitness < base_fitness) {
- neg_count++;
- ko_effect[line_num] = -1;
- } else if (ko_fitness == base_fitness) {
- neut_count++;
- ko_effect[line_num] = 0;
- } else if (ko_fitness > base_fitness) {
- pos_count++;
- ko_effect[line_num] = 1;
- } else {
- cerr << "ERROR: illegal state in AnalyzeKnockouts()" << endl;
- }
-
- // Reset the mod_genome back to the original sequence.
- mod_genome[line_num].SetOp(cur_inst);
- }
-
- tArray<int> ko_pair_effect(ko_effect);
- if (max_knockouts > 1) {
- for (int line1 = 0; line1 < max_line; line1++) {
- for (int line2 = line1+1; line2 < max_line; line2++) {
- int cur_inst1 = base_genome[line1].GetOp();
- int cur_inst2 = base_genome[line2].GetOp();
- mod_genome[line1] = null_inst;
- mod_genome[line2] = null_inst;
- cAnalyzeGenotype ko_genotype(mod_genome, ko_inst_set);
- ko_genotype.Recalculate();
-
- double ko_fitness = ko_genotype.GetFitness();
-
- // If both individual knockouts are both harmful, but in combination
- // they are neutral or even beneficial, they should not count as
- // information.
- if (ko_fitness >= base_fitness &&
- ko_effect[line1] < 0 && ko_effect[line2] < 0) {
- ko_pair_effect[line1] = 0;
- ko_pair_effect[line2] = 0;
- }
-
- // If the individual knockouts are both neutral (or beneficial?),
- // but in combination they are harmful, they are likely redundant
- // to each other. For now, count them both as information.
- if (ko_fitness < base_fitness &&
- ko_effect[line1] >= 0 && ko_effect[line2] >= 0) {
- ko_pair_effect[line1] = -1;
- ko_pair_effect[line2] = -1;
- }
-
- // Reset the mod_genome back to the original sequence.
- mod_genome[line1].SetOp(cur_inst1);
- mod_genome[line2].SetOp(cur_inst2);
- }
- }
- }
-
- int pair_dead_count = 0;
- int pair_neg_count = 0;
- int pair_neut_count = 0;
- int pair_pos_count = 0;
- for (int i = 0; i < max_line; i++) {
- if (ko_pair_effect[i] == -2) pair_dead_count++;
- else if (ko_pair_effect[i] == -1) pair_neg_count++;
- else if (ko_pair_effect[i] == 0) pair_neut_count++;
- else if (ko_pair_effect[i] == 1) pair_pos_count++;
- }
-
- // Output data...
- df.Write(genotype->GetID(), "Genotype ID");
- df.Write(dead_count, "Count of lethal knockouts");
- df.Write(neg_count, "Count of detrimental knockouts");
- df.Write(neut_count, "Count of neutral knockouts");
- df.Write(pos_count, "Count of beneficial knockouts");
- df.Write(pair_dead_count, "Count of lethal knockouts after paired knockout tests.");
- df.Write(pair_neg_count, "Count of detrimental knockouts after paired knockout tests.");
- df.Write(pair_neut_count, "Count of neutral knockouts after paired knockout tests.");
- df.Write(pair_pos_count, "Count of beneficial knockouts after paired knockout tests.");
- df.Endl();
- }
-}
-
-
void cAnalyze::CommandFitnessMatrix(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Calculating fitness matrix for batch " << cur_batch << endl;
+ if (verbose == true) cout << "Calculating fitness matrix for batch " << cur_batch << endl;
else cout << "Calculating fitness matrix..." << endl;
cout << "Warning: considering only first genotype of the batch!" << endl;
@@ -3903,7 +2933,7 @@
const int num_cols = output_list.GetSize();
// Give some information in verbose mode.
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " outputing as ";
if (print_mode == 1) cout << "boolean ";
if (file_type == FILE_TYPE_TEXT) {
@@ -3930,7 +2960,7 @@
tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
cAnalyzeGenotype * genotype = NULL;
while ((genotype = batch_it.Next()) != NULL) {
- if (verbose >= nAnalyze::VERBOSE_ON) cout << " Mapping " << genotype->GetName() << endl;
+ if (verbose == true) cout << " Mapping " << genotype->GetName() << endl;
// Construct this filename...
cString filename;
@@ -4062,8 +3092,11 @@
const cInstruction inst_lib_null_inst = map_inst_set.GetInstLib()->GetInst("NULL");
if(inst_lib_null_inst == map_inst_set.GetInstLib()->GetInstError()){
cout << "<cAnalyze::CommandMapTasks> got error:" << endl;
- cout << " --- instruction \"NULL\" isn't in the instruction library for the" << endl;
- cout << " --- current hardware type." << endl;
+ cout << " --- instruction \"NULL\" isn't in the instruction library;" << endl;
+ cout << " --- get somebody to map a function to \"NULL\" in the library." << endl;
+ cout << " --- (probably to class method \"cHardware-of-some-type::initInstLib\"" << endl;
+ cout << " --- in file named \"cpu/hardware-of-some-type.cc\".)" << endl;
+ cout << " --- bailing-out." << endl;
exit(1);
}
// Add mapping to located instruction.
@@ -4143,7 +3176,7 @@
delete [] col_pass_count;
delete [] col_fail_count;
- }
+ }
}
void cAnalyze::CommandAverageModularity(cString cur_string)
@@ -4177,7 +3210,7 @@
const int num_cols = output_list.GetSize();
// Give some information in verbose mode.
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " outputing as ";
if (print_mode == 1) cout << "boolean ";
cout << "text files." << endl;
@@ -4262,7 +3295,7 @@
int num_cpus = genotype->GetNumCPUs();
- if (verbose >= nAnalyze::VERBOSE_ON) cout << " Mapping " << genotype->GetName() << endl;
+ if (verbose == true) cout << " Mapping " << genotype->GetName() << endl;
// Calculate the stats for the genotype we're working with...
genotype->Recalculate();
@@ -4529,7 +3562,7 @@
if (arg_list.PopString("html") != "") file_type = FILE_TYPE_HTML;
// Give some information in verbose mode.
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " outputing as ";
if (file_type == FILE_TYPE_TEXT) cout << "text files." << endl;
else cout << "HTML files." << endl;
@@ -4542,7 +3575,7 @@
tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
cAnalyzeGenotype * genotype = NULL;
while ((genotype = batch_it.Next()) != NULL) {
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << " Creating mutation map for " << genotype->GetName() << endl;
}
@@ -4837,7 +3870,7 @@
int tmp = batch1; batch1 = batch2; batch2 = tmp;
}
- if (verbose <= nAnalyze::VERBOSE_QUIET) {
+ if (verbose == false) {
cout << "Calculating Hamming Distance... ";
cout.flush();
} else {
@@ -4904,7 +3937,7 @@
int tmp = batch1; batch1 = batch2; batch2 = tmp;
}
- if (verbose <= nAnalyze::VERBOSE_QUIET) {
+ if (verbose == false) {
cout << "Calculating Levenstein Distance... ";
cout.flush();
} else {
@@ -4971,7 +4004,7 @@
int tmp = batch1; batch1 = batch2; batch2 = tmp;
}
- if (verbose <= nAnalyze::VERBOSE_QUIET) cout << "Calculating Species Distance... " << endl;
+ if (verbose == false) cout << "Calculating Species Distance... " << endl;
else cout << "Calculating Species Distance between batch "
<< batch1 << " and " << batch2 << endl;
@@ -5087,7 +4120,7 @@
int tmp = batch1; batch1 = batch2; batch2 = tmp;
}
- if (verbose <= nAnalyze::VERBOSE_QUIET) cout << "Creating recombinants... " << endl;
+ if (verbose == false) cout << "Creating recombinants... " << endl;
else cout << "Creating recombinants between batch "
<< batch1 << " and " << batch2 << endl;
@@ -5181,7 +4214,7 @@
cout << "Aligning sequences..." << endl;
- if (batch[cur_batch].IsLineage() == false && verbose >= nAnalyze::VERBOSE_ON) {
+ if (batch[cur_batch].IsLineage() == false && verbose == true) {
cerr << " Warning: sequences may not be a consecutive lineage."
<< endl;
}
@@ -5245,104 +4278,9 @@
batch[cur_batch].SetAligned(true);
}
-// Now this command do not consider changing environment
-// and only work for lineage and fixed-length runs.
-void cAnalyze::AnalyzeNewInfo(cString cur_string)
-{
- cout << "Analyze new information in child about environment ..." << endl;
-
- // Load in the variables
- int words = cur_string.CountNumWords();
- if (words < 1) {
- cout << "This command requires mutation rate, skipping." << endl;
- return;
- }
-
- // Get the mutation rate ...
- double mu = cur_string.PopWord().AsDouble();
-
- // Create the directory using the string given as the second argument
- cString dir = cur_string.PopWord();
- cString defaultDir = "newinfo/";
- cString directory = PopDirectory(dir, defaultDir);
-
- ///////////////////////////////////////////////////////
- // Loop through all of the genotypes in this batch...
-
- cString newinfo_fn;
- ofstream newinfo_fp;
- if (batch[cur_batch].IsLineage() == true) {
- newinfo_fn.Set("%s%s.newinfo.dat", directory(), "lineage");
- newinfo_fp.open(newinfo_fn);
- } else {
- cout << "This command requires the lineage in the batch, skipping.\n";
- return;
- }
- newinfo_fp << "# Legend:" << endl;
- newinfo_fp << "# 1:Child Genotype ID" << endl;
- newinfo_fp << "# 2:Parent Genotype ID" << endl;
- newinfo_fp << "# 3:Information of Child about Environment I(C:E)" << endl;
- newinfo_fp << "# 4:Information of Parent about Environment I(P:E)" << endl;
- newinfo_fp << "# 5:I(C:E)-I(P:E)" << endl;
- newinfo_fp << "# 6:Information Gained in Child" << endl;
- newinfo_fp << "# 7:Information Decreased in Child" << endl;
- newinfo_fp << "# 8:Net Increasing of Information in Child" << endl;
- newinfo_fp << endl;
-
- tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
- cAnalyzeGenotype * parent_genotype = batch_it.Next();
- if (parent_genotype == NULL) {
- newinfo_fp.close();
- return;
- }
- cAnalyzeGenotype * child_genotype = NULL;
- double I_P_E; // Information of parent about environment
- double H_P_E = AnalyzeEntropy(parent_genotype, mu);
- I_P_E = parent_genotype->GetLength() - H_P_E;
- while ((child_genotype = batch_it.Next()) != NULL) {
-
- if (verbose >= nAnalyze::VERBOSE_ON) {
- cout << "Analyze new information for " << child_genotype->GetName() << endl;
- }
-
- // Information of parent about its environment should not be zero.
- if (I_P_E == 0) {
- cerr << "Error: Information between parent and its enviroment is zero."
- << "(cAnalyze::AnalyzeNewInfo)" << endl;
- exit(1);
- }
- double H_C_E = AnalyzeEntropy(child_genotype, mu);
- double I_C_E = child_genotype->GetLength() - H_C_E;
- double net_gain = I_C_E - I_P_E;
-
- // Increased information in child compared to parent
- double child_increased_info = IncreasedInfo(child_genotype, parent_genotype, mu);
-
- // Lost information in child compared to parent
- double child_lost_info = IncreasedInfo(parent_genotype, child_genotype, mu);
- // Write information to file ...
- newinfo_fp << child_genotype->GetID() << " ";
- newinfo_fp << parent_genotype->GetID() << " ";
- newinfo_fp << I_C_E << " ";
- newinfo_fp << I_P_E << " ";
- newinfo_fp << net_gain << " ";
- newinfo_fp << child_increased_info << " ";
- newinfo_fp << child_lost_info << " ";
- newinfo_fp << child_increased_info - child_lost_info << endl;
-
- parent_genotype = child_genotype;
- I_P_E = I_C_E;
- }
-
- newinfo_fp.close();
- return;
-}
-
-
-
void cAnalyze::WriteClone(cString cur_string)
{
// Load in the variables...
@@ -5490,7 +4428,7 @@
for (grid_side = 5; grid_side < 100; grid_side += 5) {
if (grid_side * grid_side >= max_count) break;
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "...assuming population size "
<< grid_side << "x" << grid_side << "." << endl;
}
@@ -5603,7 +4541,7 @@
// Count the number of diffs between the two strings we're interested in.
const int total_diffs = cStringUtil::Distance(first_seq, last_seq);
- if (verbose >= nAnalyze::VERBOSE_ON) cout << " " << total_diffs << " mutations being tested."
+ if (verbose) cout << " " << total_diffs << " mutations being tested."
<< endl;
// Locate each difference.
@@ -5725,7 +4663,7 @@
void cAnalyze::AnalyzeInstructions(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Analyzing Instructions in batch " << cur_batch << endl;
}
else cout << "Analyzeing Instructions..." << endl;
@@ -5865,7 +4803,7 @@
void cAnalyze::AnalyzeInstPop(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Analyzing Instructions in batch " << cur_batch << endl;
}
else cout << "Analyzeing Instructions..." << endl;
@@ -5923,7 +4861,7 @@
void cAnalyze::AnalyzeBranching(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Analyzing branching patterns in batch " << cur_batch << endl;
}
else cout << "Analyzeing Branches..." << endl;
@@ -5941,13 +4879,13 @@
void cAnalyze::AnalyzeMutationTraceback(cString cur_string)
{
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Analyzing mutation traceback in batch " << cur_batch << endl;
}
else cout << "Analyzing mutation traceback..." << endl;
// This works best on lineages, so warn if we don't have one.
- if (batch[cur_batch].IsLineage() == false && verbose >= nAnalyze::VERBOSE_ON) {
+ if (batch[cur_batch].IsLineage() == false && verbose == true) {
cerr << " Warning: trying to traceback mutations outside of lineage."
<< endl;
}
@@ -6030,13 +4968,13 @@
void cAnalyze::AnalyzeComplexity(cString cur_string)
{
- cout << "Analyzing genome complexity..." << endl;
+ cout << "Analyzing genome complexity..." << endl;
// Load in the variables...
// This command requires at least on arguement
int words = cur_string.CountNumWords();
if(words < 1) {
- cout << "ERROR: AnalyzeComplexity has no parameters, skipping." << endl;
+ cout << "AnalyzeComplexity has no parameters, skipping." << endl;
return;
}
@@ -6095,25 +5033,25 @@
ofstream lineage_fp;
ofstream non_lineage_fp;
if (batch[cur_batch].IsLineage() == true) {
- lineage_filename.Set("%s%s.complexity.dat", directory(), "lineage");
+ lineage_filename.Set("%s%s_cpx.dat", directory(), "lineage");
lineage_fp.open(lineage_filename);
islineage = true;
} else {
cString non_lineage_file;
- non_lineage_file.Set("%s%s.complexity.dat", directory(), "nonlineage");
+ non_lineage_file.Set("%s%s_cpx.dat", directory(), "nonlineage");
non_lineage_fp.open(non_lineage_file);
islineage = false;
}
while ((genotype = batch_it.Next()) != NULL) {
- if (verbose >= nAnalyze::VERBOSE_ON) {
- cout << " Analyzing complexity for " << genotype->GetName() << endl;
+ if (verbose == true) {
+ cout << "Analyzing complexity for " << genotype->GetName() << endl;
}
// Construct this filename...
- cString filename;
- filename.Set("%s%s.complexity.dat", directory(), genotype->GetName()());
- ofstream fp(filename());
+ //cString filename;
+ //filename.Set("%s%s.complexity.dat", directory(), genotype->GetName()());
+ //ofstream fp(filename());
if (islineage) {
lineage_fp << genotype->GetID() << " ";
} else {
@@ -6128,12 +5066,21 @@
// Calculate the stats for the genotype we're working with ...
genotype->Recalculate();
- cout << genotype->GetFitness() << endl;
+ cout << genotype->GetID() << " " << genotype->GetFitness() << endl;
const int num_insts = inst_set.GetSize();
const int max_line = genotype->GetLength();
const cGenome & base_genome = genotype->GetGenome();
cGenome mod_genome(base_genome);
+ if (genotype->GetFitness() == 0) {
+ if (islineage) {
+ lineage_fp << 0 << endl;
+ } else {
+ non_lineage_fp << 0 << endl;
+ }
+ continue;
+ }
+
// Loop through all the lines of code, testing all mutations...
tArray<double> test_fitness(num_insts);
tArray<double> prob(num_insts);
@@ -6142,7 +5089,7 @@
//char cur_symbol = base_genome[line_num].GetSymbol();
// Column 1 ... the original instruction in the genome.
- fp << cur_inst << " ";
+ //fp << cur_inst << " ";
// Test fitness of each mutant.
for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
@@ -6171,13 +5118,13 @@
}
}
- if(maxFitness > 0) {
+ if (maxFitness > 0) {
for(int i=0; i<num_insts; i++) {
test_fitness[i] /= maxFitness;
}
} else {
- fp << "All zero fitness, ERROR." << endl;
- continue;
+ cerr << "All zero fitness, ERROR." << endl;
+ exit(1);
}
while(1) {
@@ -6193,9 +5140,9 @@
w_bar = w_bar - 0.000001;
}
// Write probability
- for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
- fp << prob[mod_inst] << " ";
- }
+ //for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
+ // fp << prob[mod_inst] << " ";
+ //}
// Calculate complexity
double entropy = 0;
@@ -6203,7 +5150,7 @@
entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
}
double complexity = 1 - entropy;
- fp << complexity << endl;
+ //fp << complexity << endl;
if (islineage) {
lineage_fp << complexity << " ";
@@ -6214,7 +5161,8 @@
// Reset the mod_genome back to the original sequence.
mod_genome[line_num].SetOp(cur_inst);
}
- fp.close();
+
+ //fp.close();
if (islineage) {
lineage_fp << endl;
} else {
@@ -6226,7 +5174,7 @@
// where i is the batchFrequency
for(int count=0; genotype != NULL && count < batchFrequency - 1; count++) {
genotype = batch_it.Next();
- if(genotype != NULL && verbose >= nAnalyze::VERBOSE_ON) {
+ if(genotype != NULL && verbose == true) {
cout << "Skipping: " << genotype->GetName() << endl;
}
}
@@ -6257,28 +5205,6 @@
cString directory = PopDirectory(cur_string, "pop_complexity/");
cString file = cur_string;
- // Get sample index...
- /*if (sample_size > batch_size) {
- cout << " Sample size is greater than batch size." << endl;
- return;
- }
-int *index_array;
-int *flag_array;
-index_array = (int*) malloc (batch_size*sizeof(int));
-flag_array = (int*) malloc (batch_size*sizeof(int));
-for (int i=0; i<batch_size; i++) {
- index_array[i] = i;
- flag_array[i] = 0;
-}
-int max_num = batch_size;
-cRandom random;
-for (int i=0; i<sample_size; i++) {
- int index = random.GetUInt(max_num);
- flag_array[index_array[index]] = 1;
- index_array[index] = index_array[max_num-1];
- max_num--;
-}*/
-
// Construct filename...
cString filename;
filename.Set("%spop%s.complexity.dat", directory(), file());
@@ -6326,22 +5252,21 @@
-// Calculate complexity
-for (int line_num = 0; line_num < seq_length; line_num ++) {
- double entropy = 0.0;
- for (int inst_num = 0; inst_num < num_insts; inst_num ++) {
- if (inst_stat(line_num, inst_num) == 0) continue;
- float prob = (float) (inst_stat(line_num, inst_num)) / (float) (actural_samples);
- entropy += prob * log((double) 1.0/prob) / log((double) num_insts);
- }
- double complexity = 1 - entropy;
- fp << complexity << " ";
-};
-fp << endl;
-fp.close();
-//free(index_array);
-//free(flag_array);
-return;
+ // Calculate complexity
+ for (int line_num = 0; line_num < seq_length; line_num ++) {
+ double entropy = 0.0;
+ for (int inst_num = 0; inst_num < num_insts; inst_num ++) {
+ if (inst_stat(line_num, inst_num) == 0) continue;
+ float prob = (float) (inst_stat(line_num, inst_num)) / (float) (actural_samples);
+ entropy += prob * log((double) 1.0/prob) / log((double) num_insts);
+ }
+ double complexity = 1 - entropy;
+ fp << complexity << " ";
+ };
+ fp << endl;
+ fp.close();
+
+ return;
}
@@ -6359,7 +5284,7 @@
cout << "Printing helpfiles in: " << cur_string << endl;
cHelpManager help_control;
- if (verbose >= nAnalyze::VERBOSE_ON) help_control.SetVerbose();
+ if (verbose == true) help_control.SetVerbose();
while (cur_string.GetSize() > 0) {
help_control.LoadFile(cur_string.PopWord());
}
@@ -6372,7 +5297,7 @@
cout << "Printing documentation files in: " << cur_string << endl;
cHelpManager help_control;
- if (verbose >= nAnalyze::VERBOSE_ON) help_control.SetVerbose();
+ if (verbose == true) help_control.SetVerbose();
while (cur_string.GetSize() > 0) {
help_control.LoadFile(cur_string.PopWord());
}
@@ -6396,7 +5321,7 @@
cString & cur_variable = GetVariable(var);
cur_variable = cur_string.PopWord();
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Setting " << var << " to " << cur_variable << endl;
}
}
@@ -6407,7 +5332,7 @@
if (cur_string.CountNumWords() > 0) {
next_batch = cur_string.PopWord().AsInt();
}
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Setting current batch to " << next_batch << endl;
+ if (verbose) cout << "Setting current batch to " << next_batch << endl;
if (next_batch >= MAX_BATCHES) {
cerr << " Error: max batches is " << MAX_BATCHES << endl;
exit(1);
@@ -6419,7 +5344,7 @@
void cAnalyze::BatchName(cString cur_string)
{
if (cur_string.CountNumWords() == 0) {
- if (verbose >= nAnalyze::VERBOSE_ON) cout << " Warning: No name given in NAME_BATCH!" << endl;
+ if (verbose) cout << " Warning: No name given in NAME_BATCH!" << endl;
return;
}
@@ -6429,11 +5354,11 @@
void cAnalyze::BatchTag(cString cur_string)
{
if (cur_string.CountNumWords() == 0) {
- if (verbose >= nAnalyze::VERBOSE_ON) cout << " Warning: No tag given in TAG_BATCH!" << endl;
+ if (verbose) cout << " Warning: No tag given in TAG_BATCH!" << endl;
return;
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Tagging batch " << cur_batch
<< " with tag '" << cur_string << "'" << endl;
}
@@ -6451,7 +5376,7 @@
int batch_id = cur_batch;
if (cur_string.CountNumWords() > 0) batch_id = cur_string.PopWord().AsInt();
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Purging batch " << batch_id << endl;
+ if (verbose) cout << "Purging batch " << batch_id << endl;
while (batch[batch_id].List().GetSize() > 0) {
delete batch[batch_id].List().Pop();
@@ -6472,7 +5397,7 @@
int batch_to = cur_batch;
if (cur_string.GetSize() > 0) batch_to = cur_string.PopWord().AsInt();
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Duplicating from batch " << batch_from
<< " to batch " << batch_to << "." << endl;
}
@@ -6515,11 +5440,11 @@
cTestCPU::UseResources() = true;
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Running batch " << cur_batch << " through test CPUs..." << endl;
} else cout << "Running through test CPUs..." << endl;
- if (verbose >= nAnalyze::VERBOSE_ON && batch[cur_batch].IsLineage() == false) {
+ if (verbose == true && batch[cur_batch].IsLineage() == false) {
cerr << " Warning: batch may not be a linege; "
<< "parent and ancestor distances may not be correct" << endl;
}
@@ -6542,7 +5467,7 @@
// If the previous genotype was the parent of this one, pass in a pointer
// to it for improved recalculate (such as distance to parent, etc.)
-// if (verbose >= nAnalyze::VERBOSE_ON) {
+// if (verbose == true) {
// PrintTestCPUResources("");
// }
if (last_genotype != NULL &&
@@ -6564,7 +5489,7 @@
void cAnalyze::BatchRename(cString cur_string)
{
- if (verbose <= nAnalyze::VERBOSE_QUIET) cout << "Renaming organisms..." << endl;
+ if (verbose == false) cout << "Renaming organisms..." << endl;
else cout << "Renaming organisms in batch " << cur_batch << endl;
// If a number is given with rename, start at that number...
@@ -6604,33 +5529,17 @@
cerr << "Debug Args: " << cur_string << endl;
}
-void cAnalyze::CommandVerbose(cString cur_string)
+void cAnalyze::ToggleVerbose(cString cur_string)
{
- cur_string.ToUpper();
-
- // If no arguments are given, assume a basic toggle.
- if (cur_string.GetSize() == 0 && verbose <= nAnalyze::VERBOSE_QUIET) {
- verbose = nAnalyze::VERBOSE_ON;
- }
- else if (cur_string.GetSize() == 0 && verbose >= nAnalyze::VERBOSE_ON) {
- verbose = nAnalyze::VERBOSE_QUIET;
- }
-
- // Otherwise, read in the argument to decide the new mode.
- else if (cur_string == "SILENT") verbose = nAnalyze::VERBOSE_SILENT;
- else if (cur_string == "QUIET") verbose = nAnalyze::VERBOSE_QUIET;
- else if (cur_string == "OFF") verbose = nAnalyze::VERBOSE_QUIET;
- else if (cur_string == "ON") verbose = nAnalyze::VERBOSE_ON;
- else if (cur_string == "DETAILS") verbose = nAnalyze::VERBOSE_DETAILS;
- else if (cur_string == "HIGH") verbose = nAnalyze::VERBOSE_DETAILS;
+ // No Args needed...
+ (void) cur_string;
- // Print out new verbose level (nothing for silent!)
- if (verbose == nAnalyze::VERBOSE_QUIET) {
- cout << "Verbose QUIET: Using minimal log messages..." << endl;
- } else if (verbose == nAnalyze::VERBOSE_ON) {
- cout << "Verbose ON: Using verbose log messages..." << endl;
- } else if (verbose == nAnalyze::VERBOSE_DETAILS) {
- cout << "Verbose DETAILS: Using detailed log messages..." << endl;
+ if (verbose == false) {
+ cout << "Using verbose log messages..." << endl;
+ verbose = true;
+ } else {
+ cout << "Using non-verbose log messages..." << endl;
+ verbose = false;
}
}
@@ -6697,7 +5606,7 @@
exit(1);
}
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Creating function: " << fun_name << endl;
+ if (verbose) cout << "Creating function: " << fun_name << endl;
// Create the new function...
cAnalyzeFunction * new_function = new cAnalyzeFunction(fun_name);
@@ -6711,7 +5620,7 @@
bool cAnalyze::FunctionRun(const cString & fun_name, cString args)
{
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose) {
cout << "Running function: " << fun_name << endl;
// << " with args: " << args << endl;
}
@@ -6769,7 +5678,7 @@
void cAnalyze::CommandForeach(cString cur_string,
tList<cAnalyzeCommand> & clist)
{
- if (verbose >= nAnalyze::VERBOSE_ON) cout << "Initiating Foreach loop..." << endl;
+ if (verbose) cout << "Initiating Foreach loop..." << endl;
cString var = cur_string.PopWord();
int num_args = cur_string.CountNumWords();
@@ -6779,13 +5688,13 @@
for (int i = 0; i < num_args; i++) {
cur_variable = cur_string.PopWord();
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Foreach: setting " << var << " to " << cur_variable << endl;
}
ProcessCommands(clist);
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Ending Foreach on " << var << endl;
}
}
@@ -6794,9 +5703,7 @@
void cAnalyze::CommandForRange(cString cur_string,
tList<cAnalyzeCommand> & clist)
{
- if (verbose >= nAnalyze::VERBOSE_ON) {
- cout << "Initiating FORRANGE loop..." << endl;
- }
+ if (verbose) cout << "Initiating FORRANGE loop..." << endl;
int num_args = cur_string.CountNumWords();
if (num_args < 3) {
@@ -6820,7 +5727,7 @@
for (int i = (int) min_val; i <= (int) max_val; i += (int) step_val) {
cur_variable.Set("%d", i);
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "FORRANGE: setting " << var << " to " << cur_variable << endl;
}
ProcessCommands(clist);
@@ -6829,14 +5736,14 @@
for (double i = min_val; i <= max_val; i += step_val) {
cur_variable.Set("%f", i);
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "FORRANGE: setting " << var << " to " << cur_variable << endl;
}
ProcessCommands(clist);
}
}
- if (verbose >= nAnalyze::VERBOSE_ON) {
+ if (verbose == true) {
cout << "Ending FORRANGE on " << var << endl;
}
}
@@ -7067,7 +5974,7 @@
("viable", "Is Viable (0/1)", &cAnalyzeGenotype::GetViable,
&cAnalyzeGenotype::SetViable));
genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, int>
- ("id", "Genotype ID", &cAnalyzeGenotype::GetID,
+ ("id", "Genome ID", &cAnalyzeGenotype::GetID,
&cAnalyzeGenotype::SetID));
genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, const cString &>
("tag", "Genotype Tag", &cAnalyzeGenotype::GetTag,
@@ -7165,18 +6072,6 @@
("frac_pos", "Fraction Mutations Beneficial",
&cAnalyzeGenotype::GetFracPos,
(void (cAnalyzeGenotype::*)(double)) NULL));
- genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, double>
- ("complexity", "Basic Complexity (all neutral/beneficial muts are equal)",
- &cAnalyzeGenotype::GetComplexity,
- (void (cAnalyzeGenotype::*)(double)) NULL));
- genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, int>
- ("ko_complexity", "Complexity calculated by counting sites you can't knockout",
- &cAnalyzeGenotype::GetKO_Complexity,
- (void (cAnalyzeGenotype::*)(int)) NULL));
- genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, int>
- ("ko_pair_complexity", "Count of sites you can't knockout (including epistatic effects)",
- &cAnalyzeGenotype::GetKOPair_Complexity,
- (void (cAnalyzeGenotype::*)(int)) NULL));
genotype_data_list.PushRear(new tDataEntry<cAnalyzeGenotype, const cString &>
("parent_muts", "Mutations from Parent",
&cAnalyzeGenotype::GetParentMuts, &cAnalyzeGenotype::SetParentMuts,
@@ -7359,7 +6254,8 @@
// Population analysis commands...
AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
- AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::AnalyzeCommunityComplexity);
+ AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::CommunityComplexity);
+ AddLibraryDef("POP_DIVERSITY", &cAnalyze::PopDiversity);
// Individual organism analysis...
AddLibraryDef("LANDSCAPE", &cAnalyze::CommandLandscape);
@@ -7369,7 +6265,6 @@
AddLibraryDef("AVERAGE_MODULARITY", &cAnalyze::CommandAverageModularity);
AddLibraryDef("MAP_MUTATIONS", &cAnalyze::CommandMapMutations);
AddLibraryDef("ANALYZE_COMPLEXITY", &cAnalyze::AnalyzeComplexity);
- AddLibraryDef("ANALYZE_KNOCKOUTS", &cAnalyze::AnalyzeKnockouts);
AddLibraryDef("ANALYZE_POP_COMPLEXITY", &cAnalyze::AnalyzePopComplexity);
AddLibraryDef("MAP_DEPTH", &cAnalyze::CommandMapDepth);
@@ -7381,7 +6276,6 @@
// Lineage analysis commands...
AddLibraryDef("ALIGN", &cAnalyze::CommandAlign);
- AddLibraryDef("ANALYZE_NEWINFO", &cAnalyze::AnalyzeNewInfo);
// Build input files for avida...
AddLibraryDef("WRITE_CLONE", &cAnalyze::WriteClone);
@@ -7397,7 +6291,6 @@
&cAnalyze::AnalyzeMutationTraceback);
AddLibraryDef("ANALYZE_EPISTASIS", &cAnalyze::AnalyzeEpistasis);
AddLibraryDef("ANALYZE_MATE_SELECTION", &cAnalyze::AnalyzeMateSelection);
- AddLibraryDef("ANALYZE_COMPLEXITY_DELTA", &cAnalyze::AnalyzeComplexityDelta);
// Environment manipulation
AddLibraryDef("ENVIRONMENT", &cAnalyze::EnvironmentSetup);
@@ -7417,7 +6310,7 @@
AddLibraryDef("STATUS", &cAnalyze::PrintStatus);
AddLibraryDef("DEBUG", &cAnalyze::PrintDebug);
AddLibraryDef("ECHO", &cAnalyze::PrintDebug);
- AddLibraryDef("VERBOSE", &cAnalyze::CommandVerbose);
+ AddLibraryDef("VERBOSE", &cAnalyze::ToggleVerbose);
AddLibraryDef("INCLUDE", &cAnalyze::IncludeFile);
AddLibraryDef("SYSTEM", &cAnalyze::CommandSystem);
AddLibraryDef("INTERACTIVE", &cAnalyze::CommandInteractive);
Modified: trunk/source/main/cAnalyze.h
===================================================================
--- trunk/source/main/cAnalyze.h 2005-12-05 17:49:49 UTC (rev 411)
+++ trunk/source/main/cAnalyze.h 2005-12-07 19:44:08 UTC (rev 412)
@@ -31,13 +31,6 @@
#define MAX_BATCHES 2000
-namespace nAnalyze {
- const int VERBOSE_SILENT = 0; // No output at all
- const int VERBOSE_QUIET = 1; // Notification at start of commands.
- const int VERBOSE_ON = 2; // Verbose output, detailing progress
- const int VERBOSE_DETAILS = 3; // High level of details, as available.
-}
-
// cAnalyze : The master analyze object.
class cGenotypeBatch; // array
@@ -74,7 +67,7 @@
// is a pair of the update and a vector of the resource concentrations
std::vector<std::pair<int, std::vector<double> > > resources;
- int verbose; // How much information to print?
+ bool verbose; // Should details be output to command line?
int interactive_depth; // How nested are we if in interactive mode?
cDataFileManager data_file_manager;
@@ -162,8 +155,8 @@
// Population Analysis Commands...
void CommandPrintPhenotypes(cString cur_string);
void CommandPrintDiversity(cString cur_string);
- void PhyloCommunityComplexity(cString cur_string);
- void AnalyzeCommunityComplexity(cString cur_string);
+ void CommunityComplexity(cString cur_string);
+ void PopDiversity(cString cur_string);
// Individual Organism Analysis...
void CommandLandscape(cString cur_string);
@@ -181,7 +174,6 @@
// Lineage Analysis Commands...
void CommandAlign(cString cur_string);
- void AnalyzeNewInfo(cString cur_string);
// Build Input Files for Avida
void WriteClone(cString cur_string);
@@ -195,11 +187,9 @@
void AnalyzeBranching(cString cur_string);
void AnalyzeMutationTraceback(cString cur_string);
void AnalyzeComplexity(cString cur_string);
- void AnalyzeKnockouts(cString cur_string);
void AnalyzePopComplexity(cString cur_string);
void AnalyzeEpistasis(cString cur_string);
void AnalyzeMateSelection(cString cur_string);
- void AnalyzeComplexityDelta(cString cur_string);
// Environment Manipulation
void EnvironmentSetup(cString cur_string);
@@ -219,7 +209,7 @@
void BatchRename(cString cur_string);
void PrintStatus(cString cur_string);
void PrintDebug(cString cur_string);
- void CommandVerbose(cString cur_string);
+ void ToggleVerbose(cString cur_string);
void IncludeFile(cString cur_string);
void CommandSystem(cString cur_string);
void CommandInteractive(cString cur_string);
@@ -230,19 +220,7 @@
// Looks up the resource concentrations that are the closest to the
// given update and then fill in those concentrations into the environment.
void FillResources(int update);
- // Analyze the entropy of genotype under default environment
- double AnalyzeEntropy(cAnalyzeGenotype * genotype, double mut_rate);
- // Analyze the entropy of child given parent and default environment
- double AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,
- cAnalyzeGenotype * parent,
- double mut_rate);
- // Calculate the increased information in genotype1 comparing to genotype2
- // line by line. If information in genotype1 is less than that in genotype2
- // in a line, increasing is 0. Usually this is used for child-parent comparison.
- double IncreasedInfo(cAnalyzeGenotype * genotype1,
- cAnalyzeGenotype * genotype2,
- double mut_rate);
-
+
// Flow Control...
void CommandForeach(cString cur_string, tList<cAnalyzeCommand> & clist);
void CommandForRange(cString cur_string, tList<cAnalyzeCommand> & clist);
Modified: trunk/source/main/cEnvironment.cc
===================================================================
--- trunk/source/main/cEnvironment.cc 2005-12-05 17:49:49 UTC (rev 411)
+++ trunk/source/main/cEnvironment.cc 2005-12-07 19:44:08 UTC (rev 412)
@@ -607,6 +607,35 @@
return true;
}
+
+bool cEnvironment::LoadSetValue(cString desc)
+{
+ cString item_type = desc.PopWord();
+ item_type.ToUpper();
+
+ cString item_name = desc.PopWord();
+
+ double item_value = desc.PopWord().AsDouble();
+
+
+ if (item_type == "REACTION") {
+ cReaction * cur_reaction = reaction_lib.GetReaction(item_name);
+ if (cur_reaction == NULL) {
+ cerr << "Unknown REACTION: '" << item_name << "'" << endl;
+ return false;
+ }
+ cur_reaction->ModifyValue(item_value);
+ } else if (item_type == "") {
+ cerr << "Format: SET_VALUE <type> <name> <new_value>" << endl;
+ } else {
+ cerr << "Error: Cannot modify value of type "
+ << item_type << endl;
+ return false;
+ }
+
+ return true;
+}
+
bool cEnvironment::LoadLine(cString line)
{
cString type = line.PopWord(); // Determine type of this entry.
@@ -617,6 +646,7 @@
else if (type == "REACTION") load_ok = LoadReaction(line);
else if (type == "MUTATION") load_ok = LoadMutation(line);
else if (type == "SET_ACTIVE") load_ok = LoadSetActive(line);
+ else if (type == "SET_VALUE") load_ok = LoadSetValue(line);
else {
cerr << "Error: Unknown environment keyword '" << type << "." << endl;
return false;
Modified: trunk/source/main/cEnvironment.h
===================================================================
--- trunk/source/main/cEnvironment.h 2005-12-05 17:49:49 UTC (rev 411)
+++ trunk/source/main/cEnvironment.h 2005-12-07 19:44:08 UTC (rev 412)
@@ -76,6 +76,7 @@
bool LoadMutation(cString desc);
bool LoadSetActive(cString desc);
+ bool LoadSetValue(cString desc);
bool TestRequisites(const tList<cReactionRequisite> & req_list,
int task_count,
Modified: trunk/source/utils/hist_map/hist_map2.cc
===================================================================
--- trunk/source/utils/hist_map/hist_map2.cc 2005-12-05 17:49:49 UTC (rev 411)
+++ trunk/source/utils/hist_map/hist_map2.cc 2005-12-07 19:44:08 UTC (rev 412)
@@ -51,7 +51,7 @@
int cur_col = 1;
while (cur_col <= max_id) {
cString cur_entry(cur_line.PopWord());
- if (cur_col == col_id) value = 100 + (int) (3.0 * cur_entry.AsDouble());
+ if (cur_col == col_id) value = 100 + (int) (1.0 * cur_entry.AsDouble());
else if (cur_col == count_id) count = cur_entry.AsInt();
cur_col++;
}
More information about the Avida-cvs
mailing list