[Avida-cvs] [avida-svn] r498 - development/source/analyze

matt@myxo.css.msu.edu matt at myxo.css.msu.edu
Thu Mar 9 12:53:48 PST 2006


Author: matt
Date: 2006-03-09 15:53:48 -0500 (Thu, 09 Mar 2006)
New Revision: 498

Modified:
   development/source/analyze/cAnalyze.cc
   development/source/analyze/cAnalyze.h
Log:
Quick implementation of pairwise mutation testing.


Modified: development/source/analyze/cAnalyze.cc
===================================================================
--- development/source/analyze/cAnalyze.cc	2006-03-08 12:04:32 UTC (rev 497)
+++ development/source/analyze/cAnalyze.cc	2006-03-09 20:53:48 UTC (rev 498)
@@ -555,6 +555,128 @@
   return entropy;
 }
 
+//@ MRR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+tMatrix< double > cAnalyze::AnalyzeEntropyPairs(cAnalyzeGenotype * genotype, double mu) 
+{
+  
+  double entropy = 0.0;
+
+  genotype->Recalculate();
+
+   // Calculate the stats for the genotype we're working with ...
+  const int num_insts = inst_set.GetSize();
+  const int num_lines = genotype->GetLength();
+  const cGenome & base_genome = genotype->GetGenome();
+  cGenome mod_genome(base_genome);
+  double base_fitness = genotype->GetFitness();
+
+  cout << num_lines << endl;
+  tMatrix< double > pairwiseEntropy(num_lines, num_lines);
+  for (int i=0; i<num_lines; i++)
+    for (int j=-0; j<num_lines; j++)
+      pairwiseEntropy[i][j] = 0.0;
+  
+  cout << pairwiseEntropy.GetNumRows() << endl;
+
+  // If the fitness is 0, return empty matrix
+  
+  if (genotype->GetFitness() == 0) {
+    return pairwiseEntropy;
+  }
+
+
+  tMatrix< double >  test_fitness(num_insts,num_insts);
+  tMatrix< double >  prob(num_insts,num_insts);
+  
+  //Pairwise mutations; the diagonal of the matrix will be the information
+  //stored by that site alone
+  for (int line_1 = 0; line_1 < num_lines; line_1++){ 
+    for (int line_2 = line_1; line_2 < num_lines; line_2++) { 
+
+      cerr << "[ " << line_1 << ", " << line_2 << " ]" << endl;
+
+      int cur_inst_1 = base_genome[line_1].GetOp(); 
+      int cur_inst_2 = base_genome[line_2].GetOp();
+      
+      // Test fitness of each mutant.
+      for (int mod_inst_1 = 0; mod_inst_1 < num_insts; mod_inst_1++){
+	for (int mod_inst_2 = 0; mod_inst_2 < num_insts; mod_inst_2++) {
+	  mod_genome[line_1].SetOp(mod_inst_1);
+	  mod_genome[line_2].SetOp(mod_inst_2);
+	  cAnalyzeGenotype test_genotype(m_world, mod_genome, inst_set);
+	  test_genotype.Recalculate();
+	  // Adjust fitness ...
+	  if (test_genotype.GetFitness() <= base_fitness) {
+	    test_fitness[mod_inst_1][mod_inst_2] = test_genotype.GetFitness();
+	  } else {
+	    test_fitness[mod_inst_1][mod_inst_2] = base_fitness;
+	  }
+	}
+      }
+	
+	// Calculate probabilities at mut-sel balance
+	double w_bar = 1;
+	
+	// Normalize fitness values
+	double maxFitness = 0.0;
+	for(int i=0; i<num_insts; i++) {
+	  for (int j = 0; j < num_insts; j++){
+	    if(test_fitness[i][j] > maxFitness) {
+	      maxFitness = test_fitness[i][j];
+	    }
+	  }
+	}
+	
+	
+	for(int i=0; i<num_insts; i++) {
+	  for (int j=0; j<num_insts; j++){
+	    test_fitness[i][j] /= maxFitness;
+	  }
+	}
+	
+
+	while(1) {
+	  double sum = 0.0;
+	  for (int mod_inst_1 = 0; mod_inst_1 < num_insts; mod_inst_1++) {
+	    for (int mod_inst_2 = 0; mod_inst_2 < num_insts; mod_inst_2++){
+	      
+	      prob[mod_inst_1][mod_inst_2] = 
+		(mu * w_bar) / 
+		               ((double)num_insts * 
+		               (w_bar + test_fitness[mod_inst_1][mod_inst_2] 
+				* mu  - test_fitness[mod_inst_1][mod_inst_2]));
+	      sum = sum + prob[mod_inst_1][mod_inst_2];
+	    }
+	  }
+	  if ((sum-1.0)*(sum-1.0) <= 0.0001) 
+	    break;
+	  else
+	    w_bar = w_bar - 0.000001;
+	}
+    
+	// Calculate entropy ...
+	double this_entropy = 0.0;
+	for (int i = 0; i < num_insts; i++){
+	  for (int j = 0; j < num_insts; j++) {
+	    this_entropy += prob[i][j] * 
+	                    log((double) 1.0/prob[i][j]) / log ((double) num_insts);
+	  }
+	}
+	entropy += this_entropy;
+	pairwiseEntropy[line_1][line_2] = this_entropy;
+
+	// Reset the mod_genome back to the original sequence.
+	mod_genome[line_1].SetOp(cur_inst_1);
+	mod_genome[line_2].SetOp(cur_inst_2);
+	
+      }  
+    }  //End Loops
+  return pairwiseEntropy;
+}
+
+
+
+
 double cAnalyze::AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,
                                            cAnalyzeGenotype * parent, double mut_rate)
 {
@@ -3302,6 +3424,94 @@
   }
 }
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//@ MRR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+void cAnalyze::CommandPairwiseEntropy(cString cur_string)
+{
+  if (verbose >= nAnalyze::VERBOSE_ON)
+    cout << "Finding pairwise entropy on batch " << cur_batch << endl;
+  else
+    cout << "Finding pairwise entropy..." << endl;
+  
+  cout << "@MRR-> This command is being tested!" << endl;
+
+  cString directory = PopDirectory(cur_string, "pairwise_data/");
+  if (verbose >= nAnalyze::VERBOSE_ON)
+    cout << "\tUsing directory: " << directory << endl;
+  double mu = cur_string.PopWord().AsDouble();
+  if (verbose >= nAnalyze::VERBOSE_ON)
+    cout << "\tUsing mu=" << mu << endl;
+
+  tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
+  cAnalyzeGenotype * genotype = batch_it.Next();
+
+  cout << genotype->GetName() << endl;
+
+  while(genotype != NULL)
+  { 
+    cString genName = genotype->GetName();
+
+    if (verbose >= nAnalyze::VERBOSE_ON)
+      cout << "\t...on genotype " << genName << endl;
+
+    cString filename;
+    filename.Set("%spairdata.%s.dat", static_cast<const char*>(directory),
+		 static_cast<const char*>(genName));
+
+    ofstream& fp = m_world->GetDataFileOFStream(filename);
+
+    if (verbose >= nAnalyze::VERBOSE_ON)
+	cout << "\t\t...with filename:  " << filename << endl;
+
+    cout << "# Pairwise Entropy Information" << endl;
+
+    tMatrix<double> pairdata = AnalyzeEntropyPairs(genotype, mu);
+
+    cout << pairdata.GetNumRows() << endl;
+
+    for (int i=0;  i < pairdata.GetNumRows(); i++){
+      for (int j=0; j < pairdata.GetNumCols(); j++)
+	cout << pairdata[i][j] << " ";
+      cout << endl;
+    }
+     m_world->GetDataFileManager().Remove(filename);
+    genotype = batch_it.Next();
+  }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 void cAnalyze::AnalyzeEpistasis(cString cur_string)
 {
   if (verbose >= nAnalyze::VERBOSE_ON) cout << "Epistasis on " << cur_batch << endl;
@@ -3328,6 +3538,7 @@
 }
 
 
+
 // This command will take the current batch and analyze how well organisms
 // cross-over with each other, both across the population and between mates.
 
@@ -7568,6 +7779,7 @@
   AddLibraryDef("ANALYZE_KNOCKOUTS", &cAnalyze::AnalyzeKnockouts);
   AddLibraryDef("ANALYZE_POP_COMPLEXITY", &cAnalyze::AnalyzePopComplexity);
   AddLibraryDef("MAP_DEPTH", &cAnalyze::CommandMapDepth);
+  AddLibraryDef("PAIRWISE_ENTROPY", &cAnalyze::CommandPairwiseEntropy); //@MRR
   
   // Population comparison commands...
   AddLibraryDef("HAMMING", &cAnalyze::CommandHamming);

Modified: development/source/analyze/cAnalyze.h
===================================================================
--- development/source/analyze/cAnalyze.h	2006-03-08 12:04:32 UTC (rev 497)
+++ development/source/analyze/cAnalyze.h	2006-03-09 20:53:48 UTC (rev 498)
@@ -28,6 +28,9 @@
 #ifndef tList_h
 #include "tList.h"
 #endif
+#ifndef tMatrix_h
+#include "tMatrix.h"
+#endif
 
 const int MAX_BATCHES = 2000;
 
@@ -173,6 +176,7 @@
   void CommandAnalyzeModularity(cString cur_string);
   void CommandMapMutations(cString cur_string);
   void CommandMapDepth(cString cur_string);
+  void CommandPairwiseEntropy(cString cur_string);
 
   // Population Comparison Commands...
   void CommandHamming(cString cur_string);
@@ -243,6 +247,11 @@
   double IncreasedInfo(cAnalyzeGenotype * genotype1, 
 		       cAnalyzeGenotype * genotype2, 
 		       double mut_rate);
+
+  //Calculate covarying information between pairs of sites
+  tMatrix<double> AnalyzeEntropyPairs(cAnalyzeGenotype * genotype,
+					       double mut_rate);
+
   
   // Flow Control...
   void CommandForeach(cString cur_string, tList<cAnalyzeCommand> & clist);




More information about the Avida-cvs mailing list