[Avida-cvs] [avida-svn] r476 - development/source/analyze
ofria@myxo.css.msu.edu
ofria at myxo.css.msu.edu
Sat Feb 18 20:34:42 PST 2006
Author: ofria
Date: 2006-02-18 23:34:42 -0500 (Sat, 18 Feb 2006)
New Revision: 476
Modified:
development/source/analyze/cAnalyze.cc
development/source/analyze/cAnalyze.h
development/source/analyze/cAnalyzeGenotype.h
Log:
Added in an analyze command: ANALYZE_MODULARITY
Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc 2006-02-19 01:54:46 UTC (rev 475)
+++ development/source/analyze/cAnalyze.cc 2006-02-19 04:34:42 UTC (rev 476)
@@ -4615,6 +4615,162 @@
}
+void cAnalyze::CommandAnalyzeModularity(cString cur_string)
+{
+ cString filename("analyze_modularity.dat");
+ if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
+
+ cDataFile & df = m_world->GetDataFile(filename);
+ df.WriteComment( "Modularity Analysis" );
+ df.WriteTimeStamp();
+
+ // Determine which phenotypic traits we're working with
+ tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
+ tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
+ cStringList arg_list(cur_string);
+ LoadGenotypeDataList(arg_list, output_list);
+ const int num_traits = output_list.GetSize();
+
+ // Setup the map_inst_set with the NULL instruction
+ cInstSet map_inst_set(inst_set);
+
+ // Locate the NULL instruction in the library
+ const cInstruction lib_null = map_inst_set.GetInstLib()->GetInst("NULL");
+ if (lib_null == map_inst_set.GetInstLib()->GetInstError()) {
+ cerr << "Internal ERROR: Instruction 'NULL' not found." << endl;
+ exit(1);
+ }
+
+ // Add mapping to located instruction.
+ map_inst_set.AddInst(lib_null.GetOp());
+ const cInstruction null_inst = map_inst_set.GetInst("NULL");
+
+
+ // Loop through all genotypes in this batch.
+ tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+ cAnalyzeGenotype * genotype = NULL;
+ while ((genotype = batch_it.Next()) != NULL) {
+ const int base_length = genotype->GetLength();
+ const cGenome & base_genome = genotype->GetGenome();
+ cGenome mod_genome(base_genome);
+ genotype->Recalculate();
+
+ tMatrix<bool> task_matrix(num_traits, base_length);
+ tArray<int> num_inst(num_traits); // Number of instructions for each task
+ tArray<int> num_task(base_length); // Number of traits at each locus
+ task_matrix.SetAll(false);
+ num_inst.SetAll(0);
+ num_task.SetAll(0);
+
+ // Loop through all lines in this genome
+ for (int line_num = 0; line_num < base_length; line_num++) {
+ int cur_inst = base_genome[line_num].GetOp();
+
+ // Determine what happens to this genotype when this line is knocked out
+ mod_genome[line_num] = null_inst;
+ cAnalyzeGenotype test_genotype(m_world, mod_genome, map_inst_set);
+ test_genotype.Recalculate();
+
+ // Loop through the individual traits
+ output_it.Reset();
+ tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
+ int cur_trait = 0;
+ while ((data_command = output_it.Next()) != NULL) {
+ data_command->SetTarget(&test_genotype);
+ test_genotype.SetSpecialArgs(data_command->GetArgs());
+ int compare = data_command->Compare(genotype);
+
+ // If knocking out the instruction turns off this trait, mark it in
+ // the modularity matrix. Only check if the test_genotype replicates,
+ // i.e. if its fitness is not zeros
+ if (compare < 0 && test_genotype.GetFitness() != 0) {
+ task_matrix(cur_trait, line_num) = true;
+ num_inst[cur_trait]++;
+ num_task[line_num]++;
+ // cout << line_num << " : true" << endl;
+ } else {
+ // cout << line_num << " : false" << endl;
+ }
+ cur_trait++;
+ }
+
+ // Reset the mod_genome back to the original sequence.
+ mod_genome[line_num].SetOp(cur_inst);
+ } // end of genotype-phenotype mapping for a single organism
+
+
+ // --== PHYSICAL MODULARITY ==--
+
+ double ave_dist = 0.0; // Average distance between sites in traits.
+
+ // Loop through each task to calculate its physical modularity
+ int trait_count = 0; // Count active traits...
+ int site_count = 0; // Count total sites for all traits...
+ for (int cur_trait = 0; cur_trait < num_traits; cur_trait++) {
+ // cout << "Trait " << cur_trait << ", coded for by "
+ // << num_inst[cur_trait] << " instructions." << endl;
+
+ // Ignore traits not coded for in this genome...
+ if (num_inst[cur_trait] == 0) continue;
+
+ // Keep track of how many traits we're examining...
+ trait_count++;
+
+ double trait_dist = 0.0; // Total distance between sites in this trait.
+ int num_samples = 0; // Count samples we take for this trait.
+
+ // Compare all pairs of positions.
+ for (int pos1 = 0; pos1 < base_length; pos1++) {
+ if (task_matrix(cur_trait, pos1) == false) continue;
+ site_count++;
+ for (int pos2 = pos1+1; pos2 < base_length; pos2++) {
+ if (task_matrix(cur_trait, pos2) == false) continue;
+
+ // We'll only make it this far if both positions code for the trait.
+ num_samples++;
+
+ // Calculate the distance...
+ int cur_dist = pos2 - pos1;
+
+ // Remember to consider that the genome is circular.
+ if (2*cur_dist > base_length) cur_dist = base_length - cur_dist;
+
+ // cout << "Pos " << pos1 << " and " << pos2 << "; distance="
+ // << cur_dist << endl;
+
+ // And add it into the total for this trait.
+ trait_dist += cur_dist;
+ }
+ }
+
+ // Assert that we found the correct number of samples.
+ assert(num_samples = num_inst(cur_trait) * (num_inst(cur_trait)-1) / 2);
+
+ // Now that we have all of the distances for this trait, divide by the
+ // number of samples and add it to the average.
+ ave_dist += trait_dist / num_samples;
+ }
+
+
+ // Now that we've summed up all of the average distances for this
+ // genotype, calculate the physical modularity.
+
+ double PM = 1.0 - (ave_dist / (double) (base_length * trait_count));
+ double ave_sites = ((double) site_count) / (double) trait_count;
+
+ // Write the restults to file...
+ df.Write(PM, "Physical Modularity");
+ df.Write(trait_count, "Number of traits used in calculation");
+ df.Write(ave_sites, "Average num sites associated with traits");
+ df.Write(base_length, "Genome length");
+ df.Write(ave_dist, "Average Distance between trait sites");
+ df.Endl();
+ }
+
+ // @CAO CONTINUE HERE
+}
+
+
void cAnalyze::CommandMapMutations(cString cur_string)
{
cout << "Constructing genome mutations maps..." << endl;
Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h 2006-02-19 01:54:46 UTC (rev 475)
+++ development/source/analyze/cAnalyze.h 2006-02-19 04:34:42 UTC (rev 476)
@@ -170,6 +170,7 @@
void CommandFitnessMatrix(cString cur_string);
void CommandMapTasks(cString cur_string);
void CommandAverageModularity(cString cur_string);
+ void CommandAnalyzeModularity(cString cur_string);
void CommandMapMutations(cString cur_string);
void CommandMapDepth(cString cur_string);
Modified: development/source/analyze/cAnalyzeGenotype.h
===================================================================
--- development/source/analyze/cAnalyzeGenotype.h 2006-02-19 01:54:46 UTC (rev 475)
+++ development/source/analyze/cAnalyzeGenotype.h 2006-02-19 04:34:42 UTC (rev 476)
@@ -83,12 +83,14 @@
// Group 4 : Landscape stats (obtained from testing all possible mutations)
class cAnalyzeKnockouts {
public:
+ // Calculations based off of all single knockouts
int dead_count;
int neg_count;
int neut_count;
int pos_count;
- bool has_pair_info; // Try all pairs of knocks to get epistasis effects?
+ // Extra calculations based off of double knockouts...
+ bool has_pair_info; // Have these calculations been made?
int pair_dead_count;
int pair_neg_count;
int pair_neut_count;
More information about the Avida-cvs
mailing list