[Avida-SVN] r3506 - development/source/main
dk at myxo.css.msu.edu
dk at myxo.css.msu.edu
Mon Oct 26 07:08:14 PDT 2009
Author: dk
Date: 2009-10-26 10:08:14 -0400 (Mon, 26 Oct 2009)
New Revision: 3506
Modified:
development/source/main/cGenomeUtil.cc
development/source/main/cGenomeUtil.h
development/source/main/cPopulationInterface.cc
Log:
bugfix in hgt code.
Modified: development/source/main/cGenomeUtil.cc
===================================================================
--- development/source/main/cGenomeUtil.cc 2009-10-21 20:47:25 UTC (rev 3505)
+++ development/source/main/cGenomeUtil.cc 2009-10-26 14:08:14 UTC (rev 3506)
@@ -248,66 +248,29 @@
The return value here is somewhat incomplete. Eventually, the idea is that the
list of matches include the starting position of the match, the overall length (extent)
- of the match, and the cost of the match. Right now, however, it only includes the cost.
-
- \todo Convert this over to using two rows instead of len(substring)+1 rows (easy, but I'm being lazy).
+ of the match, and the cost of the match. Right now, however, it only includes the cost
+ and ending position (inclusive).
*/
cGenomeUtil::substring_match_list_type cGenomeUtil::FindSubstringMatches(const cGenome& base, const cGenome& substring) {
substring_match_list_type ssml(base.GetSize());
const int rows=substring.GetSize()+1;
const int cols=base.GetSize()+1;
- int costmat[rows][cols];
- bzero(costmat, sizeof(int)*rows*cols);
+ int costmat[2][cols];
+ int* c=costmat[0]; // current row
+ int* p=costmat[1]; // previous row
+ bzero(costmat, sizeof(int)*2*cols);
- // initialize the first row, first column, and match list starts.
- for(int i=0; i<rows; ++i) { costmat[i][0] = i; }
- for(int j=0; j<cols; ++j) { costmat[0][j] = 0; }
- // for(int j=0; j<base.GetSize(); ++j) { ssml[j].start = j; ssml[j].extent = 0; }
-
- // now do all the rest...
for(int i=1; i<rows; ++i) {
+ c[0] = p[0]+1;
for(int j=1; j<cols; ++j) {
- // we're only doing cost at the moment, so we can get away with something simple (i know, i know; space & time, etc., etc.):
- int l[3] = { costmat[i-1][j-1], costmat[i-1][j], costmat[i][j-1] };
- costmat[i][j] = *std::min_element(l,l+3) + (substring[i-1] == base[j-1]);
-// if we need to know the relationship among all three elements for tracking extents,
-// then this may be a good starting point:
-// A=above left, B=above, C=left
-// if(costmat[i-1][j-1] < costmat[i-1][j]) {
-// // A < B
-// if(costmat[i-1][j-1] < costmat[i][j-1]) {
-// // A < C --> A < (B,C)
-// costmat[i][j] = costmat[i-1][j-1];
-// ++ssml[j-1].extent;
-// } else {
-// // A >= C --> C <= (A,B)
-// costmat[i][j] = costmat[i][j-1];
-// ++ssml[j-1].extent;
-// }
-// } else {
-// // A >= B
-// if(costmat[i-1][j] < costmat[i][j-1]) {
-// // B < C --> B <= (A,C)
-// costmat[i][j] = costmat[i-1][j];
-// // extent unchanged
-// } else {
-// // B >= C --> C <= (A,B)
-// costmat[i][j] = costmat[i][j-1];
-// ++ssml[j-1].extent;
-// }
-// }
-//
-// // add the cost of a mismatch:
-// costmat[i][j] += (substring[j] == base[i]);
+ int l[3] = {p[j-1], p[j], c[j-1]};
+ c[j] = *std::min_element(l,l+3) + (substring[i-1] != base[j-1]);
+ ssml[j-1].cost = c[j];
+ ssml[j-1].position = j-1;
}
+ std::swap(c,p);
}
- // copy match costs from the last row, skipping the null column (it can't ever be least):
- for(int j=0; j<base.GetSize(); ++j) {
- ssml[j].cost = costmat[rows-1][j+1];
- ssml[j].position = j;
- }
-
return ssml;
}
Modified: development/source/main/cGenomeUtil.h
===================================================================
--- development/source/main/cGenomeUtil.h 2009-10-21 20:47:25 UTC (rev 3505)
+++ development/source/main/cGenomeUtil.h 2009-10-26 14:08:14 UTC (rev 3506)
@@ -58,14 +58,23 @@
static int FindSlidingDistance(const cGenome& gen1, const cGenome& gen2);
static int FindEditDistance(const cGenome& gen1, const cGenome& gen2);
- //! Substring match record.
+ /*! Substring match record.
+
+ The intent behind a substring match record is that it specifies where (position) a match
+ between two genomes was found, the length (extent) of that match in the base string,
+ and the edit distance (cost) of that match.
+
+ The position and extent are inclusive, and describe the matching region in the
+ base string like so: [position-extent, position]
+ */
struct substring_match {
//! Default constructor.
- substring_match() : position(0), extent(0), cost(0) { }
+ substring_match() : position(0), //extent(0),
+ cost(0) { }
//! Operator< overload.
bool operator<(const substring_match& sm) { return cost < sm.cost; }
int position; //!< Final position in the base string of this match.
- int extent; //!< Length of the match in the base string.
+ //int extent; //!< Length of the match in the base string.
int cost; //!< Cost (edit distance) of this match.
};
Modified: development/source/main/cPopulationInterface.cc
===================================================================
--- development/source/main/cPopulationInterface.cc 2009-10-21 20:47:25 UTC (rev 3505)
+++ development/source/main/cPopulationInterface.cc 2009-10-26 14:08:14 UTC (rev 3506)
@@ -635,7 +635,6 @@
is called.
\todo HGT should prefer more similar and older fragments.
- \todo Enable replacement mutations.
*/
void cPopulationInterface::DoHGTMutation(cAvidaContext& ctx, cGenome& offspring) {
// get this organism's cell:
@@ -670,13 +669,14 @@
// there are (currently) two supported types of HGT mutations: insertions & replacements.
// which one are we doing?
if(ctx.GetRandom().P(m_world->GetConfig().HGT_INSERTION_MUT_P.Get())) {
- // insertion: insert the fragment at the final location of the match:
- offspring.Insert(ssm.position, *f);
+ // insertion: insert the fragment just after the final location of the match:
+ offspring.Insert(ssm.position+1, *f);
} else {
// replacement: replace up to fragment size instructions in the genome.
// note that replacement can wrap around from front->back. replacement counts
// forward, so we have to find the start position first.
int start = ssm.position - f->GetSize() + 1;
+ //int start = ssm.position - ssm.extent + 1; // this isn't turned on yet.
if(start < 0) { start += offspring.GetSize(); }
for(int i=0; i<f->GetSize(); ++i) {
offspring[start] = (*f)[i];
@@ -688,22 +688,4 @@
m_world->GetPopulation().AdjustHGTResource(-f->GetSize());
m_world->GetStats().GenomeFragmentInserted(cell.GetOrganism(), *f);
fragments.erase(f);
-
- // old code from a previous version of hgt, kept around for reference:
- // // calculate the best match for each fragment:
- // cGenomeUtil::substring_match_list_type match_list;
- // for(fragment_list_type::iterator i=fragments.begin(); i!=fragments.end(); ++i) {
- // match_list.push_back(cGenomeUtil::FindBestSubstringMatch(cell.GetOrganism()->GetGenome(), *i));
- // }
-
- // for(fragment_list_type::iterator i=fragments.begin(); i!=fragments.end(); ++i) {
- // int d = std::max(cGenomeUtil::FindHammingDistance(nearby, *i), 1);
- // if(m_world->GetRandom().P(m_world->GetConfig().HGT_INSERTION_PROB.Get() / d)) {
- // m_hardware->InsertGenomeFragment(*i);
- // m_world->GetPopulation().AdjustHGTResource(-i->GetSize());
- // m_world->GetStats().GenomeFragmentInserted(this, *i);
- // fragments.erase(i);
- // return true;
- // }
- // }
}
More information about the Avida-cvs
mailing list