[Avida-SVN] r2257 - in branches/matt/PairwiseEpistasis: Avida.xcodeproj source/analyze
matt at myxo.css.msu.edu
matt at myxo.css.msu.edu
Tue Jan 8 13:20:12 PST 2008
Author: matt
Date: 2008-01-08 16:20:12 -0500 (Tue, 08 Jan 2008)
New Revision: 2257
Modified:
branches/matt/PairwiseEpistasis/Avida.xcodeproj/project.pbxproj
branches/matt/PairwiseEpistasis/source/analyze/cAnalyze.cc
branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.cc
branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.h
Log:
Updated MutationReversion to do all pairwise comparisons on LOD.
Modified: branches/matt/PairwiseEpistasis/Avida.xcodeproj/project.pbxproj
===================================================================
--- branches/matt/PairwiseEpistasis/Avida.xcodeproj/project.pbxproj 2008-01-08 19:20:13 UTC (rev 2256)
+++ branches/matt/PairwiseEpistasis/Avida.xcodeproj/project.pbxproj 2008-01-08 21:20:12 UTC (rev 2257)
@@ -1556,7 +1556,6 @@
DCC30C4D0762532C008F7A48 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 702442D70859E0B00059BD9B /* Build configuration list for PBXProject "Avida" */;
- compatibilityVersion = "Xcode 2.4";
hasScannedForEncodings = 0;
mainGroup = DCC30C490762532C008F7A48;
productRefGroup = DCC3164E07626CF3008F7A48 /* Products */;
Modified: branches/matt/PairwiseEpistasis/source/analyze/cAnalyze.cc
===================================================================
--- branches/matt/PairwiseEpistasis/source/analyze/cAnalyze.cc 2008-01-08 19:20:13 UTC (rev 2256)
+++ branches/matt/PairwiseEpistasis/source/analyze/cAnalyze.cc 2008-01-08 21:20:12 UTC (rev 2257)
@@ -6998,155 +6998,224 @@
/* MRR
- * August 2007
+ * January 2008
* This function will go through the lineage, align the genotypes, and
- * preform mutation reversion a specified number of descendents ahead
- * assuming they keep within a certain alignment distance (specified as well).
- * The output will give fitness information for the mutation-reverted genotypes
- * as described below.
+ * perform pairwise reversions for all pairs of genotypes in the current
+ * batch.
+ * Arguments:
+ * filename [= "pairwise_reversion.dat" ]
+ * num_trails [= DEFAULT_NUM_PP_TRIALS] default if plasticity has not been set
+ * (value defined in cAnalyzeGenotype.h)
*/
void cAnalyze::MutationRevert(cString cur_string)
{
//This function takes in three parameters, all defaulted:
- cString filename("XXX.dat"); //The name of the output file
- int max_dist = -1; //The maximum edit distance allowed in the search
- int max_depth = 5; //The maximum depth forward one wishes to search
+ cString filename; //The name of the output file
+ int num_trials;
- if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
- if (cur_string.GetSize() != 0) max_dist = cur_string.PopWord().AsInt();
- if (cur_string.GetSize() != 0) max_depth = cur_string.PopWord().AsInt();
+ filename = (cur_string.GetSize() > 0) ? "pairwise_reversion.dat" : cur_string.PopWord();
+ num_trials = (cur_string.GetSize() > 0) ? DEFAULT_NUM_PP_TRIALS : cur_string.PopWord().AsInt();
- //Warning notifications
+ //Right now, only perform this on actual lineages
if (!batch[cur_batch].IsLineage())
- {
- cout << "Error: This command requires a lineage. Skipping." << endl;
- return;
- }
-
+ m_world->GetDriver().RaiseFatalException(2, "MutationRevert: Current batch must be a lineage.");
//Request a file
- ofstream& FOT = m_world->GetDataFileOFStream(filename);
- /*
- FOT output per line
- ID
- FITNESS
- BIRTH
- DISTANCE
- PID
- P_FITNESS
- P_BIRTH
- @ea depth past
- CHILDX_ID
- CHILDX_BIRTH
- CHILDX_FITNESS
- CHILDX_DISTANCE
- CHILDX_FITNESS_SANS_MUT
- */
+ cDataFile& df = m_world->GetDataFile(filename);
+ if (!df.Good())
+ m_world->GetDriver().RaiseFatalException(2, "MutationRevert: Unable to open requested file for output.");
+
+
//Align the batch... we're going to keep the fitnesses intact from the runs
CommandAlign("");
//Our edit distance is already stored in the historical dump.
-
- //Test hardware
- cTestCPU* test_cpu = m_world->GetHardwareManager().CreateTestCPU();
- cCPUTestInfo* test_info = new cCPUTestInfo();
- test_info->UseRandomInputs(true);
- tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
- cAnalyzeGenotype* parent_genotype = batch_it.Next();
- cAnalyzeGenotype* other_genotype = NULL;
- cAnalyzeGenotype* genotype = NULL;
-
- while( (genotype = batch_it.Next()) != NULL && parent_genotype != NULL)
- {
- if (true)
- {
- FOT << genotype->GetID() << " "
- << genotype->GetFitness() << " "
- << genotype->GetUpdateBorn() << " "
- << genotype->GetParentDist() << " "
- << parent_genotype->GetID() << " "
- << parent_genotype->GetFitness() << " "
- << parent_genotype->GetUpdateBorn() << " ";
+ const tListPlus<cAnalyzeGenotype>& lineage = batch[cur_batch].List();
+ int batch_size = lineage.GetSize();
+
+ // This isn't the most efficient way to do this...
+ int dist_from_root = 0; // Number of mutations from the start ancestor
+
+ const cAnalyzeGenotype* genotype_0 = NULL; //Initial Genotype
+ const cAnalyzeGenotype* genotype_A = NULL; //Mutant A
+ const cAnalyzeGenotype* genotype_AB = NULL; //Mutant AB
+
+ for (int A = 1; A < batch_size; A++){
+ genotype_0 = lineage.GetPos(A-1);
+ genotype_A = lineage.GetPos(A);
+ dist_from_root += genotype_A->GetParentDist();
+ int dist_from_A = 0; // Number of mutations from genotype_A
+ tArray<bool> mutated_from_A(genotype_0->GetAlignedSequence().GetSize());
+ mutated_from_A.SetAll(false);
+
+ for (int AB = A+1; AB < batch_size; AB++){
+ genotype_AB = lineage.GetPos(AB);
+ dist_from_A += genotype_AB->GetParentDist();
- int cum_dist = 0;
- cString str_parent = parent_genotype->GetSequence();
- cString str_other = "";
- cString str_align_parent = parent_genotype->GetAlignedSequence();
- cString str_align_other = genotype->GetAlignedSequence();
+ // Gather our genotype strings
+ cString str_0 = genotype_0->GetAlignedSequence();
+ cString str_A = genotype_A->GetAlignedSequence();
+ cString str_AB = genotype_AB->GetAlignedSequence();
+ cString str_B = "";
cString reversion = ""; //Reversion mask
-
- //Find what changes to revert
- for (int k = 0; k < str_align_parent.GetSize(); k++)
- {
- char p = str_align_parent[k];
- char c = str_align_other[k];
- if (p == c)
- reversion += " "; //Nothing
- else if (p == '_' && c != '_')
- reversion += "+"; //Insertion
- else if (p != '_' && c == '_')
- reversion += "-"; //Deletion
- else
- reversion += p; //Point Mutation
- }
-
- tListIterator<cAnalyzeGenotype> next_it(batch_it);
- for (int i = 0; i < max_depth; i++)
- {
- if ( (other_genotype = next_it.Next()) != NULL &&
- (cum_dist <= max_dist || max_dist == -1) )
- {
- cum_dist += other_genotype->GetParentDist();
- if (cum_dist > max_dist && max_dist != -1)
- break;
- str_other = other_genotype->GetSequence();
- str_align_other = other_genotype->GetAlignedSequence();
-
- //Revert "background" to parental form
- cString reverted = "";
- for (int k = 0; k < reversion.GetSize(); k++)
- {
- if (reversion[k] == '+') continue; //Insertion, so skip
- else if (reversion[k] == '-') reverted += str_align_parent[k]; //Add del
- else if (reversion[k] != ' ') reverted += reversion[k]; //Revert mut
- else if (str_align_other[k] != '_') reverted += str_align_other[k]; //Keep current
- }
-
- cAnalyzeGenotype new_genotype(m_world, reverted, inst_set); //Get likely fitness
- new_genotype.Recalculate(m_ctx, test_cpu, NULL, test_info, 50);
-
-
- FOT << other_genotype->GetID() << " "
- << other_genotype->GetFitness() << " "
- << other_genotype->GetUpdateBorn() << " "
- << cum_dist << " "
- << new_genotype.GetFitness() << " ";
- }
- else
- {
- FOT << -1 << " "
- << -1 << " "
- << -1 << " "
- << -1 << " "
- << -1 << " ";
- }
- }
- FOT << endl;
- }
- parent_genotype = genotype;
+
+ // For point mutations, has this site been mutated from A before AB?
+ for (int k = 0; k < str_A.GetSize(); k++)
+ if (str_A[k] != str_AB[k])
+ mutated_from_A[k] = true;
+
+ int dist_0_A = 0;
+ // Find Reversion Mask for Mutant A
+ for (int k = 0; k < str_A.GetSize(); k++){
+ char c0 = str_0[k];
+ char cA = str_A[k];
+ if (c0 == cA) // No change
+ reversion += " ";
+ else if (c0 == '_' && cA != '_'){ // Insertion
+ reversion += "+";
+ dist_0_A++;
+ }
+ else if (c0 != '_' && cA == '_'){ // Deletion
+ reversion += "-";
+ dist_0_A++;
+ }
+ else{
+ reversion += "m"; //Point Mutation
+ dist_0_A++;
+ }
+ }
+
+ //Revert "background" to remove mutation from genotype0 to genotypeA
+ cString tmp_B; // Debugging string
+ for (int k = 0; k < reversion.GetSize(); k++){
+ switch(reversion[k]){
+ case '+': // Insertion from 0 to A, so remove site all together
+ tmp_B += str_AB[k];
+ continue;
+ break;
+
+ case '-': // Deletion from 0 to A, add the site back in
+ str_B += str_0[k];
+ tmp_B += str_AB[k];
+ break;
+
+ case ' ': // No change from 0 to A
+ if (str_AB[k] != '_'){ // If the site still exists
+ str_B += str_AB[k]; // Keep current state
+ }
+ tmp_B += str_AB[k];
+ break;
+
+ case 'm': // Point mutation from 0 to A
+ if (str_AB[k] != '_' && !mutated_from_A[k]){ // If the site still exists and is the
+ str_B += str_0[k]; // same as mutant A, revert to mutant 0
+ tmp_B += str_0[k];
+ }
+ else{
+ str_B += str_AB[k]; // Otherwise keep the AB form.
+ tmp_B += str_AB[k];
+ }
+ break;
+ }
+ }
+
+ // Get our fitness values
+ double fitness_A = genotype_A->GetFitness();
+ double fitness_AB = genotype_AB->GetFitness();
+ double fitness_B = -1.0;
+ double P_H_A = -1.0; //Phenotypic Entropies
+ double P_H_AB = -1.0;
+ double P_H_B = -1.0;
+
+ //Calculate (or reclaculate) fitnesses to account for plasticity
+ if (!genotype_A->PhenPlastCalculated()){
+ cPhenPlastGenotype pp(genotype_A->GetGenome(), num_trials, m_world, m_ctx);
+ fitness_A = pp.GetLikelyFitness();
+ P_H_A = pp.GetPhenotypicEntropy();
+ }
+ else
+ P_H_A = genotype_A->GetPhenotypicEntropy();
+
+ if (genotype_AB->PhenPlastCalculated()){
+ cPhenPlastGenotype pp(genotype_AB->GetGenome(), num_trials, m_world, m_ctx);
+ fitness_AB = pp.GetLikelyFitness();
+ P_H_AB = pp.GetPhenotypicEntropy();
+ }
+ else
+ P_H_AB = genotype_AB->GetPhenotypicEntropy();
+
+ cPhenPlastGenotype pp(str_B, num_trials, m_world, m_ctx);
+ fitness_B = pp.GetLikelyFitness();
+ P_H_B = pp.GetPhenotypicEntropy();
+
+
+ /*
+ FOT output per line
+ ID_A
+ DEPTH_A
+ BIRTH_A
+ FITNESS_A
+ PHEN_PLAST_ENTROPY_A
+ ID_AB
+ DEPTH_AB
+ BIRTH_AB
+ FITNESS_AB
+ PHEN_PLAST_ENTROPY_AB
+ DISTANCE_A_AB
+ FITNESS_B
+ PHEN_PLAST_ENTROPY_B
+ */
+
+ df.Write(genotype_A->GetID(), "ID_A");
+ df.Write(genotype_A->GetDepth(), "Depth_A");
+ df.Write(genotype_A->GetUpdateBorn(), "Birth_A");
+ df.Write(fitness_A, "Fitness_A");
+ df.Write(P_H_A, "Phenotypic_Entropy_A");
+ df.Write(dist_0_A, "Mutation_Distance_0_A");
+
+ df.Write(genotype_AB->GetID(), "ID_AB");
+ df.Write(genotype_AB->GetDepth(), "Depth_AB");
+ df.Write(genotype_AB->GetUpdateBorn(), "Birth_AB");
+ df.Write(fitness_AB, "Fitness_AB");
+ df.Write(P_H_AB, "Phenotypic_Entropy_AB");
+ df.Write(dist_from_A, "Mutation_Distance_A_AB");
+
+ df.Write(fitness_AB, "Fitness_B");
+ df.Write(P_H_AB, "Phenotypic_Entropy_B");
+ df.Endl();
+
+ /* Debug
+ cout << "0: " << str_0 << endl;
+ cout << " " << reversion << endl;
+ cout << "A: " << str_A << endl;
+ cout << "AB: " << str_AB << endl;
+ cout << "BX: " << tmp_B << endl;
+ cout << " ";
+ for (int k = 0; k < mutated_from_A.GetSize(); k++){
+ if (!mutated_from_A[k])
+ cout << " ";
+ else
+ cout << "*";
+ }
+ cout << endl;
+ cout << "B: " << str_B << endl;
+ cout << endl;
+ */
+
+
+ }
+
+
}
-
//Clean up
- delete test_cpu;
- delete test_info;
-
+ m_world->GetDataFileManager().Remove(filename);
return;
}
-
+
void cAnalyze::EnvironmentSetup(cString cur_string)
{
cout << "Running environment command: " << endl << " " << cur_string << endl;
Modified: branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.cc
===================================================================
--- branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.cc 2008-01-08 19:20:13 UTC (rev 2256)
+++ branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.cc 2008-01-08 21:20:12 UTC (rev 2257)
@@ -357,7 +357,7 @@
{
// Implicit genotype recalculation if required
if (m_phenplast_stats == NULL){
- cPhenPlastGenotype pp(genome, 1000, m_world, m_world->GetDefaultContext());
+ cPhenPlastGenotype pp(genome, DEFAULT_NUM_PP_TRIALS, m_world, m_world->GetDefaultContext());
SummarizePhenotypicPlasticity(pp);
}
}
Modified: branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.h
===================================================================
--- branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.h 2008-01-08 19:20:13 UTC (rev 2256)
+++ branches/matt/PairwiseEpistasis/source/analyze/cAnalyzeGenotype.h 2008-01-08 21:20:12 UTC (rev 2257)
@@ -63,7 +63,11 @@
class cTestCPU;
class cWorld;
+// @ MRR Default number of trails for phenotypic
+// plasticity
+# define DEFAULT_NUM_PP_TRIALS 100
+
class cAnalyzeGenotype;
class cAnalyzeGenotypeLink {
private:
@@ -351,8 +355,8 @@
double GetLikelyFrequency() const { CheckPhenPlast(); return m_phenplast_stats->m_likely_frequency; }
double GetLikelyFitness() const { CheckPhenPlast(); return m_phenplast_stats->m_likely_fitness; }
int GetNumTrials() const { CheckPhenPlast(); return m_phenplast_stats->m_recalculate_trials; }
+ bool PhenPlastCalculated() const { return m_phenplast_stats != NULL; }
-
double GetFitnessRatio() const { return fitness_ratio; }
double GetEfficiencyRatio() const { return efficiency_ratio; }
double GetCompMeritRatio() const { return comp_merit_ratio; }
More information about the Avida-cvs
mailing list