[Avida-cvs] [avida-svn] r765 - in trunk: documentation/content/using source/main

dule@myxo.css.msu.edu dule at myxo.css.msu.edu
Thu Jun 22 07:08:14 PDT 2006


Author: dule
Date: 2006-06-22 10:08:13 -0400 (Thu, 22 Jun 2006)
New Revision: 765

Modified:
   trunk/documentation/content/using/analyze_mode.html
   trunk/source/main/cAnalyze.cc
   trunk/source/main/cAnalyze.h
Log:
Added an analyze routine LEVENSTEIN_SINGLE. While regular LEVENSTEIN calculates
the average Levenstein (edit) distance between sequences from two batches, the
new routine reports Levenstein distance between the first (most abundant) sequence
in the batch and all the other ones. This is useful for looking at epistasis.
 


Modified: trunk/documentation/content/using/analyze_mode.html
===================================================================
--- trunk/documentation/content/using/analyze_mode.html	2006-06-21 21:24:27 UTC (rev 764)
+++ trunk/documentation/content/using/analyze_mode.html	2006-06-22 14:08:13 UTC (rev 765)
@@ -336,6 +336,10 @@
 	and b2.  This metric is similar to hamming distance, but calculates
 	the minimum number of single insertions, deletions, and mutations to
 	move from one sequence to the other.
+<tr><td><b>LEVENSTEIN_SINGLE [<font color="#008800">file="lev.dat"</font>]
+	Variation on above. Calculate Levenstein distance between the first 
+	(usually most abundant sequence) in the batch and all other sequences
+	in the batch. Useful for calculating "population epistasis".     	
 <tr><td><b>SPECIES [<font color="#008800">file="species.dat"</font>]
 	[<font color="#0000AA">bach1</font>]
 	[<font color="#0000AA">bach2</font>]

Modified: trunk/source/main/cAnalyze.cc
===================================================================
--- trunk/source/main/cAnalyze.cc	2006-06-21 21:24:27 UTC (rev 764)
+++ trunk/source/main/cAnalyze.cc	2006-06-22 14:08:13 UTC (rev 765)
@@ -5172,6 +5172,39 @@
   df.Endl();
 }
 
+void cAnalyze::CommandLevensteinSingle(cString cur_string)
+{
+  cString filename("lev.dat");
+  if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
+  ofstream & fp = data_file_manager.GetOFStream(filename);
+
+  int batch1 = PopBatch(cur_string.PopWord());
+  
+  cout << "Calculating Levenstein Distance between most abundant genotype and other genotypes... ";
+  cout << endl; 
+  cout.flush();
+  
+  // Setup some variables;
+  tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
+  cAnalyzeGenotype * reference_genotype = list1_it.Next(); 
+  cAnalyzeGenotype * genotype1; 
+  list1_it.Reset();
+
+  fp << "# Levenstein distance information"; 
+  fp << endl; 
+  // Loop through all of the genotypes in the batch batch...
+  while ((genotype1 = list1_it.Next()) != NULL) {
+      const int dist = cGenomeUtil::FindEditDistance(reference_genotype->GetGenome(),
+                                                     genotype1->GetGenome());
+      fp << dist; 
+      fp << " "; 
+  }
+  fp << endl; 
+  fp.close(); 
+}
+
+
+
 void cAnalyze::CommandSpecies(cString cur_string)
 {
   cString filename("species.dat");
@@ -7578,6 +7611,7 @@
   // Population comparison commands...
   AddLibraryDef("HAMMING", &cAnalyze::CommandHamming);
   AddLibraryDef("LEVENSTEIN", &cAnalyze::CommandLevenstein);
+  AddLibraryDef("LEVENSTEIN_SINGLE", &cAnalyze::CommandLevensteinSingle);
   AddLibraryDef("SPECIES", &cAnalyze::CommandSpecies);
   AddLibraryDef("RECOMBINE", &cAnalyze::CommandRecombine);
   

Modified: trunk/source/main/cAnalyze.h
===================================================================
--- trunk/source/main/cAnalyze.h	2006-06-21 21:24:27 UTC (rev 764)
+++ trunk/source/main/cAnalyze.h	2006-06-22 14:08:13 UTC (rev 765)
@@ -176,6 +176,7 @@
   // Population Comparison Commands...
   void CommandHamming(cString cur_string);
   void CommandLevenstein(cString cur_string);
+  void CommandLevensteinSingle(cString cur_string);
   void CommandSpecies(cString cur_string);
   void CommandRecombine(cString cur_string);
 




More information about the Avida-cvs mailing list