[Avida-SVN] r3506 - development/source/main

Mon Oct 26 07:08:14 PDT 2009

Author: dk
Date: 2009-10-26 10:08:14 -0400 (Mon, 26 Oct 2009)
New Revision: 3506

Modified:
   development/source/main/cGenomeUtil.cc
   development/source/main/cGenomeUtil.h
   development/source/main/cPopulationInterface.cc
Log:
bugfix in hgt code.

Modified: development/source/main/cGenomeUtil.cc
===================================================================

--- development/source/main/cGenomeUtil.cc	2009-10-21 20:47:25 UTC (rev 3505)
+++ development/source/main/cGenomeUtil.cc	2009-10-26 14:08:14 UTC (rev 3506)
@@ -248,66 +248,29 @@
  
  The return value here is somewhat incomplete.  Eventually, the idea is that the
  list of matches include the starting position of the match, the overall length (extent)
- of the match, and the cost of the match.  Right now, however, it only includes the cost.
- 
- \todo Convert this over to using two rows instead of len(substring)+1 rows (easy, but I'm being lazy).
+ of the match, and the cost of the match.  Right now, however, it only includes the cost
+ and ending position (inclusive). 
  */
 cGenomeUtil::substring_match_list_type cGenomeUtil::FindSubstringMatches(const cGenome& base, const cGenome& substring) {
 	substring_match_list_type ssml(base.GetSize());
 	const int rows=substring.GetSize()+1;
 	const int cols=base.GetSize()+1;
-	int costmat[rows][cols];	
-	bzero(costmat, sizeof(int)*rows*cols);
+	int costmat[2][cols];   
+	int* c=costmat[0]; // current row
+	int* p=costmat[1]; // previous row
+	bzero(costmat, sizeof(int)*2*cols);
 	
-	// initialize the first row, first column, and match list starts.
-	for(int i=0; i<rows; ++i) { costmat[i][0] = i; }
-	for(int j=0; j<cols; ++j) { costmat[0][j] = 0; }
-	//	for(int j=0; j<base.GetSize(); ++j) { ssml[j].start = j; ssml[j].extent = 0; }
-		
-	// now do all the rest...
 	for(int i=1; i<rows; ++i) {
+		c[0] = p[0]+1;
 		for(int j=1; j<cols; ++j) {
-			// we're only doing cost at the moment, so we can get away with something simple (i know, i know; space & time, etc., etc.):
-			int l[3] = { costmat[i-1][j-1], costmat[i-1][j], costmat[i][j-1] };
-			costmat[i][j] = *std::min_element(l,l+3) + (substring[i-1] == base[j-1]);
-// if we need to know the relationship among all three elements for tracking extents,
-// then this may be a good starting point:
-// A=above left, B=above, C=left
-//			if(costmat[i-1][j-1] < costmat[i-1][j]) {
-//				// A < B
-//				if(costmat[i-1][j-1] < costmat[i][j-1]) {
-//					// A < C --> A < (B,C)
-//					costmat[i][j] = costmat[i-1][j-1];
-//					++ssml[j-1].extent;
-//				} else {
-//					// A >= C --> C <= (A,B)
-//					costmat[i][j] = costmat[i][j-1];
-//					++ssml[j-1].extent;
-//				}
-//			} else {
-//				// A >= B
-//				if(costmat[i-1][j] < costmat[i][j-1]) {
-//					// B < C --> B <= (A,C)
-//					costmat[i][j] = costmat[i-1][j];
-//					// extent unchanged
-//				} else {
-//					// B >= C --> C <= (A,B)
-//					costmat[i][j] = costmat[i][j-1];
-//					++ssml[j-1].extent;
-//				}
-//			}
-//			
-//			// add the cost of a mismatch:
-//			costmat[i][j] += (substring[j] == base[i]);
+			int l[3] = {p[j-1], p[j], c[j-1]};
+			c[j] = *std::min_element(l,l+3) + (substring[i-1] != base[j-1]);
+			ssml[j-1].cost = c[j];
+			ssml[j-1].position = j-1;
 		}
+		std::swap(c,p);
 	}
 	
-	// copy match costs from the last row, skipping the null column (it can't ever be least):
-	for(int j=0; j<base.GetSize(); ++j) {
-		ssml[j].cost = costmat[rows-1][j+1];
-		ssml[j].position = j;
-	}
-	
 	return ssml;
 }
 

Modified: development/source/main/cGenomeUtil.h
===================================================================
--- development/source/main/cGenomeUtil.h	2009-10-21 20:47:25 UTC (rev 3505)
+++ development/source/main/cGenomeUtil.h	2009-10-26 14:08:14 UTC (rev 3506)
@@ -58,14 +58,23 @@
   static int FindSlidingDistance(const cGenome& gen1, const cGenome& gen2);
   static int FindEditDistance(const cGenome& gen1, const cGenome& gen2);
 
-	//! Substring match record.
+	/*! Substring match record.
+	 
+	 The intent behind a substring match record is that it specifies where (position) a match
+	 between two genomes was found, the length (extent) of that match in the base string,
+	 and the edit distance (cost) of that match.
+	 
+	 The position and extent are inclusive, and describe the matching region in the
+	 base string like so: [position-extent, position]
+	 */
 	struct substring_match {
 		//! Default constructor.
-		substring_match() : position(0), extent(0), cost(0) { }
+		substring_match() : position(0), //extent(0),
+		cost(0) { }
 		//! Operator< overload.
 		bool operator<(const substring_match& sm) { return cost < sm.cost; }
 		int position; //!< Final position in the base string of this match.
-		int extent; //!< Length of the match in the base string.
+		//int extent; //!< Length of the match in the base string.
 		int cost; //!< Cost (edit distance) of this match.
 	};
 	

Modified: development/source/main/cPopulationInterface.cc
===================================================================
--- development/source/main/cPopulationInterface.cc	2009-10-21 20:47:25 UTC (rev 3505)
+++ development/source/main/cPopulationInterface.cc	2009-10-26 14:08:14 UTC (rev 3506)
@@ -635,7 +635,6 @@
  is called.
  
  \todo HGT should prefer more similar and older fragments.
- \todo Enable replacement mutations.
  */
 void cPopulationInterface::DoHGTMutation(cAvidaContext& ctx, cGenome& offspring) {
 	// get this organism's cell:
@@ -670,13 +669,14 @@
 	// there are (currently) two supported types of HGT mutations: insertions & replacements.
 	// which one are we doing?
 	if(ctx.GetRandom().P(m_world->GetConfig().HGT_INSERTION_MUT_P.Get())) {
-		// insertion: insert the fragment at the final location of the match:
-		offspring.Insert(ssm.position, *f);
+		// insertion: insert the fragment just after the final location of the match:
+		offspring.Insert(ssm.position+1, *f);
 	} else {
 		// replacement: replace up to fragment size instructions in the genome.
 		// note that replacement can wrap around from front->back.  replacement counts
 		// forward, so we have to find the start position first.
 		int start = ssm.position - f->GetSize() + 1;
+		//int start = ssm.position - ssm.extent + 1; // this isn't turned on yet.
 		if(start < 0) { start += offspring.GetSize(); }
 		for(int i=0; i<f->GetSize(); ++i) {
 			offspring[start] = (*f)[i];
@@ -688,22 +688,4 @@
 	m_world->GetPopulation().AdjustHGTResource(-f->GetSize());
 	m_world->GetStats().GenomeFragmentInserted(cell.GetOrganism(), *f);
 	fragments.erase(f);
-	
-	// old code from a previous version of hgt, kept around for reference:
-	//	// calculate the best match for each fragment:
-	//	cGenomeUtil::substring_match_list_type match_list;
-	//	for(fragment_list_type::iterator i=fragments.begin(); i!=fragments.end(); ++i) {
-	//		match_list.push_back(cGenomeUtil::FindBestSubstringMatch(cell.GetOrganism()->GetGenome(), *i));
-	//	}	
-	
-	//	for(fragment_list_type::iterator i=fragments.begin(); i!=fragments.end(); ++i) {
-	//		int d = std::max(cGenomeUtil::FindHammingDistance(nearby, *i), 1);
-	//		if(m_world->GetRandom().P(m_world->GetConfig().HGT_INSERTION_PROB.Get() / d)) {
-	//			m_hardware->InsertGenomeFragment(*i);
-	//			m_world->GetPopulation().AdjustHGTResource(-i->GetSize());
-	//			m_world->GetStats().GenomeFragmentInserted(this, *i);
-	//			fragments.erase(i);
-	//			return true;
-	//		}
-	//	}
 }