[Avida-cvs] [Avida2-svn] r257 - in trunk/source: main tools
ofria@myxo.css.msu.edu
ofria at myxo.css.msu.edu
Sun Jul 24 09:36:45 PDT 2005
Author: ofria
Date: 2005-07-24 12:36:45 -0400 (Sun, 24 Jul 2005)
New Revision: 257
Modified:
trunk/source/main/environment.cc
trunk/source/tools/tDictionary.hh
trunk/source/tools/tList.hh
Log:
Rebuilt the inside of tDictionary to story all of its data in a single linked
list, with a hash table pointing into that list for rapid access. This will
also allow us to have an iterator that can traverse the entries with relative
ease.
In the process, I also improved the functionality of iterators for tList by
giving them GetPos (which returns a pointer), Set and Reset methods. Finally,
I added a Transfer method onto tList which will move all of the entries from
one list onto the end of another quite rapidly.
Modified: trunk/source/main/environment.cc
===================================================================
--- trunk/source/main/environment.cc 2005-07-23 10:15:26 UTC (rev 256)
+++ trunk/source/main/environment.cc 2005-07-24 16:36:45 UTC (rev 257)
@@ -643,6 +643,7 @@
bool cEnvironment::Load(const cString & filename)
{
+ cerr << "--- ENVIRONMENT SETUP ---" << endl;
cerr << "ENV: Loading file '" << filename << "'." << endl;
cInitFile infile(filename);
@@ -673,6 +674,8 @@
}
}
+ cerr << "--- FINISHED ENVIRONMENT SETUP---" << endl;
+
return true;
}
Modified: trunk/source/tools/tDictionary.hh
===================================================================
--- trunk/source/tools/tDictionary.hh 2005-07-23 10:15:26 UTC (rev 256)
+++ trunk/source/tools/tDictionary.hh 2005-07-24 16:36:45 UTC (rev 257)
@@ -5,6 +5,29 @@
// before continuing. SOME RESTRICTIONS MAY APPLY TO USE OF THIS FILE. //
//////////////////////////////////////////////////////////////////////////////
+/*
+ * This template is used to look up objects of the desired type by name.
+ * I is implemented through use of a linked list and a hash table. The linked
+ * list contains all of the individual entries stored in the dictionary (in an
+ * arbitrary order). The hash table points to the first entry in the list
+ * that fits in its cell. If there are no entries that fit in the cell, the
+ * has table contains a NULL pointer at that location.
+ *
+ * On INSERT: If a cell already has at least one entry in it, the new entry
+ * gets inserted into the linked list before the existing entry. If the cell
+ * is currently empty, the new entry gets placed at the end of the linked
+ * list. In either case, the cell is updated to point at the new entry.
+ *
+ * On DELETE: Start looking at the position in the list where the cell is
+ * pointing and continue until the entry-to-be-deleted is found. If the
+ * entry to be deleted is the one being pointed at, be sure to update the
+ * cell.
+ *
+ * On LOOKUP: If the cell has a NULL pointer, lookup fails. Otherwise search
+ * through list until either correct entry is found (lookup succeeds) or else
+ * the lookup finds an entry not in the current cell (lookup fails).
+ */
+
#ifndef TDICTIONARY_HH
#define TDICTIONARY_HH
@@ -14,6 +37,9 @@
#ifndef STRING_UTIL_HH
#include "string_util.hh"
#endif
+#ifndef TARRAY_HH
+#include "tArray.hh"
+#endif
#ifndef TLIST_HH
#include "tList.hh"
#endif
@@ -29,49 +55,125 @@
template <class T> class tDictionary {
+ // We create a structure with full information about each entry stored in
+ // this dictionary.
template <class U> struct tDictEntry {
cString name;
+ int id;
U data;
};
private:
- int size;
- int hash_size;
- tList< tDictEntry<T> > * hash_table;
+ int dict_size; // How many entries are we storing?
+ int hash_size; // What size hash table are we using?
+
+ tList< tDictEntry<T> > entry_list; // A linked list of ALL entries
+ tArray< tListNode< tDictEntry<T> > * > cell_array; // Pointers to the entry list.
+ tListIterator< tDictEntry<T> > list_it; // Iterator for entry_list
+ // Currently, we hash a string simply by adding up the individual character
+ // values in that string and modding by the hash size. For most applications
+ // this will work fine (and it is fast!) but some patters will cause all
+ // strings to go into the same cell. For example, "ABC"=="CBA"=="BBB".
int HashString(const cString & key) const {
unsigned int out_hash = 0;
for (int i = 0; i < key.GetSize(); i++)
out_hash += (unsigned int) key[i];
return out_hash % hash_size;
}
+
+ // Function to find the appropriate tDictEntry for a string that is passed
+ // in and return it.
tDictEntry<T> * FindEntry(const cString & name) {
const int bin = HashString(name);
- tListIterator< tDictEntry<T> > list_it(hash_table[bin]);
- while (list_it.Next() != NULL) {
+ if (cell_array[bin] == NULL) return NULL;
+
+ // Set the list iterator to the first entry of this bin.
+ list_it.Set(cell_array[bin]);
+
+ // Loop through all entries in this bin to see if any are a perfect match.
+ while (list_it.Get() != NULL && list_it.Get()->id == bin) {
if (list_it.Get()->name == name) return list_it.Get();
+ list_it.Next();
}
+
+ // No matches found.
return NULL;
}
public:
- tDictionary() : size(0), hash_size(DICTIONARY_HASH_DEFAULT) {
- hash_table = new tList< tDictEntry<T> >[hash_size];
+ tDictionary(int in_hash_size=DICTIONARY_HASH_DEFAULT)
+ : dict_size(0)
+ , hash_size(in_hash_size)
+ , cell_array(in_hash_size)
+ , list_it(entry_list)
+ {
+ cell_array.SetAll(NULL);
}
+
~tDictionary() {
- for (int i = 0; i < hash_size; i++)
- while (hash_table[i].GetSize()) delete hash_table[i].Pop();
- delete [] hash_table;
+ while (entry_list.GetSize()) delete entry_list.Pop();
}
- int GetSize() { return size; }
+
+ bool OK() {
+ cout << "DICT_SIZE = " << dict_size << endl;
+ cout << "HASH_SIZE = " << hash_size << endl;
+ int count = 0;
+ cout << "LIST ELEMENTS:" << endl;
+ list_it.Reset();
+ while (list_it.Next() != NULL) {
+ tDictEntry<T> * cur_entry = list_it.Get();
+ cout << " " << count << " : "
+ << cur_entry->id << " "
+ << cur_entry->name << " "
+ << cur_entry->data << " "
+ << endl;
+ }
+ cout << endl;
+ cout << "ARRAY CELLS: "
+ << cell_array.GetSize()
+ << endl;
+ for (int i = 0; i < hash_size; i++) {
+ tListNode< tDictEntry<T> > * cur_list_node = cell_array[i];
+ if (cur_list_node == NULL) {
+ cout << " NULL" << endl;
+ } else {
+ cout << " " << cur_list_node->data->id
+ << " " << cur_list_node->data->name
+ << endl;
+ }
+ }
+
+ return true;
+ }
+
+ int GetSize() { return dict_size; }
void Add(const cString & name, T data) {
+ // Build the new entry...
tDictEntry<T> * new_entry = new tDictEntry<T>;
new_entry->name = name;
new_entry->data = data;
const int bin = HashString(name);
- hash_table[bin].Push(new_entry);
- size++;
+ new_entry->id = bin;
+
+
+ // Determine where this new entry should go; either at the beginning of
+ // the list (if there are no others in the bin) or following another
+ // entry in the bin.
+ if (cell_array[bin] == NULL) {
+ list_it.Reset(); // Reset to list start
+ entry_list.Insert(list_it, new_entry); // Place new entry in the list
+ list_it.Next(); // Advance to new entry
+ cell_array[bin] = list_it.GetPos(); // Record position
+ } else {
+ list_it.Set(cell_array[bin]); // Setup insert point
+ entry_list.Insert(list_it, new_entry); // Place new entry in the list
+ list_it.Next(); // Advance to new entry
+ }
+
+ // Update our entry count...
+ dict_size++;
}
bool HasEntry(const cString & name) {
@@ -88,56 +190,86 @@
}
T Remove(const cString & name) {
+ // Determine the bin that we are going to be using.
const int bin = HashString(name);
+
T out_data = NULL;
- tListIterator< tDictEntry<T> > list_it(hash_table[bin]);
- while (list_it.Next() != NULL) {
- if (list_it.Get()->name == name) {
- out_data = list_it.Get()->data;
- list_it.Remove();
- size--;
- break;
+ assert(cell_array[bin] != NULL);
+ list_it.Set(cell_array[bin]);
+
+ // If we are deleting the first entry in this bin we must clean up...
+ if (list_it.Get()->name == name) {
+ out_data = list_it.Get()->data;
+ list_it.Remove();
+ list_it.Next();
+ dict_size--;
+ // See if the next entry is still part of this cell.
+ if (list_it.AtRoot() == false && list_it.Get()->id == bin) {
+ cell_array[bin] = list_it.GetPos();
+ } else {
+ cell_array[bin] = NULL;
}
}
+
+ // If it was not the first entry in this cell, keep looking!
+ else {
+ while (list_it.Next() != NULL && list_it.Get()->id == bin) {
+ if (list_it.Get()->name == name) {
+ out_data = list_it.Get()->data;
+ list_it.Remove();
+ dict_size--;
+ break;
+ }
+ }
+ }
+
return out_data;
}
cString NearMatch(const cString name) {
cString best_match("");
int best_dist = name.GetSize();
- for (int i = 0; i < hash_size; i++) {
- tListIterator< tDictEntry<T> > list_it(hash_table[i]);
- while (list_it.Next() != NULL) {
- int dist = cStringUtil::EditDistance(name, list_it.Get()->name);
- if (dist < best_dist) {
- best_dist = dist;
- best_match = list_it.Get()->name;
- }
+ list_it.Reset();
+ while (list_it.Next() != NULL) {
+ int dist = cStringUtil::EditDistance(name, list_it.Get()->name);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_match = list_it.Get()->name;
}
}
return best_match;
}
void SetHash(int _hash) {
- const int old_hash_size = hash_size;
-
// Create the new table...
hash_size = _hash;
- tList< tDictEntry<T> > * new_hash_table =
- new tList< tDictEntry<T> >[hash_size];
+ cell_array.ResizeClear(hash_size);
+ cell_array.SetAll(NULL);
- // Move everything over...
- for (int i = 0; i < old_hash_size; i++) {
- while (hash_table[i].GetSize() > 0) {
- tDictEntry<T> * cur_entry = hash_table[i].Pop();
- const int bin = HashString(cur_entry->name);
- new_hash_table[bin].Push(cur_entry);
+ // Backup all of the entries in the list and re-insert them one-by-one.
+ tList< tDictEntry<T> > backup_list;
+ backup_list.Transfer(entry_list);
+
+ while (backup_list.GetSize() > 0) {
+ tDictEntry<T> * cur_entry = backup_list.Pop();
+
+ // determine the new bin for this entry.
+ int bin = HashString(cur_entry->name);
+ cur_entry->id = bin;
+
+ if (cell_array[bin] == NULL) {
+ list_it.Reset(); // Reset to list start
+ entry_list.Insert(list_it, cur_entry); // Place new entry in the list
+ list_it.Next(); // Advance to new entry
+ cell_array[bin] = list_it.GetPos(); // Record position
+ } else {
+ list_it.Set(cell_array[bin]); // Setup insert point
+ entry_list.Insert(list_it, cur_entry); // Place new entry in the list
+ list_it.Next(); // Advance to new entry
}
+
}
- // Cleanup...
- delete [] hash_table;
- hash_table = new_hash_table;
}
};
Modified: trunk/source/tools/tList.hh
===================================================================
--- trunk/source/tools/tList.hh 2005-07-23 10:15:26 UTC (rev 256)
+++ trunk/source/tools/tList.hh 2005-07-24 16:36:45 UTC (rev 257)
@@ -32,6 +32,9 @@
tBaseIterator() { ; }
virtual ~tBaseIterator() { ; }
+ virtual void Set(tListNode<T> * in_node) = 0;
+ virtual void Reset() = 0;
+
virtual const T * GetConst() = 0;
virtual const T * NextConst() = 0;
virtual const T * PrevConst() = 0;
@@ -50,9 +53,12 @@
const tListNode<T> * GetConstNode() { return node; }
public:
explicit tListIterator(tList<T> & _list);
+ explicit tListIterator(tList<T> & _list, tListNode<T> * start_node);
~tListIterator();
+ void Set(tListNode<T> * in_node) { node = in_node; }
void Reset();
+ tListNode<T> * GetPos() { return node; }
T * Get();
T * Next();
@@ -80,8 +86,11 @@
const tListNode<T> * GetConstNode() { return node; }
public:
explicit tConstListIterator(const tList<T> & _list);
+ explicit tConstListIterator(const tList<T> & _list,
+ const tListNode<T> * start_node);
~tConstListIterator();
+ void Set(tListNode<T> * in_node) { node = in_node; }
void Reset();
const T * Get();
@@ -215,19 +224,24 @@
void CircPrev() { if (size > 0) Push(PopRear()); }
T * Remove(tListIterator<T> & other) {
- if (&(other.list) != this) return NULL;
+ if (&(other.list) != this) return NULL; // @CAO make this an assert?
return RemoveNode(other.node);
}
T * Insert(tListIterator<T> & list_it, T * in_data) {
+ tListNode<T> * cur_node = list_it.node;
+
+ // Build the new node for the list...
tListNode<T> * new_node = new tListNode<T>;
new_node->data = in_data;
- new_node->next = list_it.node->next;
- new_node->prev = list_it.node;
- list_it.node->next->prev = new_node;
- list_it.node->next = new_node;
+ // Insert the new node after the iterator...
+ new_node->next = cur_node->next;
+ new_node->prev = cur_node;
+ cur_node->next->prev = new_node;
+ cur_node->next = new_node;
size++;
+
return in_data;
}
@@ -249,11 +263,39 @@
int GetSize() const { return size; }
+ // Copy another list onto the end of this one.
void Append(tList<T> & other_list) {
tListIterator<T> other_it(other_list);
while (other_it.Next() != NULL) PushRear(other_it.Get());
}
+ // Empty out another list, transferring its contents to the end of this one.
+ void Transfer(tList<T> & other_list) {
+ // If the other list is empty, stop here.
+ if (other_list.GetSize() == 0) return;
+
+ // Hook this list into the other one.
+ other_list.root.next->prev = root.prev;
+ other_list.root.prev->next = &root;
+ root.prev->next = other_list.root.next;
+ root.prev = other_list.root.prev;
+
+ // Clean up the other list so it has no entries.
+ other_list.root.next = &(other_list.root);
+ other_list.root.prev = &(other_list.root);
+
+ // Update the size
+ size += other_list.size;
+ other_list.size = 0;
+
+ // Update all iterators in the other list to point at the root.
+ tListNode< tBaseIterator<T> > * test_it = other_list.it_root.next;
+ while (test_it != &other_list.it_root) {
+ test_it->data->Reset();
+ test_it = test_it->next;
+ }
+ }
+
// Find by value
T * Find(T * _in) const {
tListNode<T> * test = root.next;
@@ -384,6 +426,13 @@
list.AddIterator(this);
}
+template <class T> tListIterator<T>::tListIterator(tList<T> & _list,
+ tListNode<T> * start_node)
+ : list(_list), node(start_node)
+{
+ list.AddIterator(this);
+}
+
template <class T> tListIterator<T>::~tListIterator()
{
list.RemoveIterator(this);
@@ -445,6 +494,12 @@
list.AddIterator(this);
}
+template <class T> tConstListIterator<T>::tConstListIterator(const tList<T> & _list, const tListNode<T> * start_node)
+ : list(_list), node(start_node)
+{
+ list.AddIterator(this);
+}
+
template <class T> tConstListIterator<T>::~tConstListIterator()
{
list.RemoveIterator(this);
More information about the Avida-cvs
mailing list