[Avida-cvs] [Avida2-svn] r294 - trunk/source/tools
ofria@myxo.css.msu.edu
ofria at myxo.css.msu.edu
Thu Aug 25 14:51:51 PDT 2005
Author: ofria
Date: 2005-08-25 17:51:51 -0400 (Thu, 25 Aug 2005)
New Revision: 294
Added:
trunk/source/tools/tHashTable.hh
Log:
Added a tHashTable template to the tool section. This is needed for my
previous commit...
Added: trunk/source/tools/tHashTable.hh
===================================================================
--- trunk/source/tools/tHashTable.hh 2005-08-25 21:51:05 UTC (rev 293)
+++ trunk/source/tools/tHashTable.hh 2005-08-25 21:51:51 UTC (rev 294)
@@ -0,0 +1,330 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 1993 - 2005 California Institute of Technology //
+// //
+// Read the COPYING and README files, or contact 'avida at alife.org', //
+// before continuing. SOME RESTRICTIONS MAY APPLY TO USE OF THIS FILE. //
+//////////////////////////////////////////////////////////////////////////////
+
+/*
+ * This template is used to look up objects of the desired type by an integer
+ * hash ID. It is implemented through use of a linked list that contains all
+ * of the individual entries stored in the table (in an arbitrary order).
+ * The hash table points to the first entry in the list that fits in its
+ * cell. If there are no entries that fit in the cell, the table contains a
+ * NULL pointer at that location.
+ *
+ * The inputs are HASH_TYPE and DATA_TYPE
+ *
+ * INTERFACE:
+ * tHashTable(int in_hash_size=DICTIONARY_HASH_DEFAULT) // Constructor
+ * ~tHashTable() // Destructor
+ * int GetSize() // Get num entries
+ * void Add(const HASH_TYPE & key, DATA_TYPE data) // Add new entry
+ * void SetValue(const HASH_TYPE & key, DATA_TYPE data) // Add/modify entry
+ * bool HasEntry(const HASH_TYPE & key) // Test if key exists
+ * bool Find(const HASH_TYPE & key, T & out_data) // Find entry for key
+ * T Remove(const HASH_TYPE & key) // Remove entry
+ * void SetTableSize(int _hash) // Change hash table size
+ *
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * On INSERT: If a cell already has at least one entry in it, the new entry
+ * gets inserted into the linked list before the existing entry. If the cell
+ * is currently empty, the new entry gets placed at the end of the linked
+ * list. In either case, the cell is updated to point at the new entry.
+ *
+ * On DELETE: Start looking at the position in the list where the cell is
+ * pointing and continue until the entry-to-be-deleted is found. If the
+ * entry to be deleted is the one being pointed at, be sure to update the
+ * cell.
+ *
+ * On LOOKUP: If the cell has a NULL pointer, lookup fails. Otherwise search
+ * through list until either correct entry is found (lookup succeeds) or else
+ * the lookup finds an entry not in the current cell (lookup fails).
+ */
+
+#ifndef THASH_TABLE_HH
+#define THASH_TABLE_HH
+
+#ifndef STRING_HH
+#include "string.hh"
+#endif
+#ifndef STRING_UTIL_HH
+#include "string_util.hh"
+#endif
+#ifndef TARRAY_HH
+#include "tArray.hh"
+#endif
+#ifndef TLIST_HH
+#include "tList.hh"
+#endif
+
+#define HASH_TABLE_SIZE_DEFAULT 23
+#define HASH_TABLE_SIZE_MEDIUM 331
+#define HASH_TABLE_SIZE_LARGE 2311
+
+template <class DATA_TYPE> class tList; // access
+template <class DATA_TYPE> class tListIterator; // aggregate
+
+template <class HASH_TYPE, class DATA_TYPE> class tHashTable {
+
+ // We create a structure with full information about each entry stored in
+ // this dictionary.
+ template <class E_HASH_TYPE, class E_DATA_TYPE> struct tHashEntry {
+ E_HASH_TYPE key;
+ int id;
+ E_DATA_TYPE data;
+ };
+
+private:
+ int entry_count; // How many entries are we storing?
+ int table_size; // What size hash table are we using?
+
+ // Create a linked list of all hash entries in the table, as well as a
+ // companion array with pointers into the list that will give the start of
+ // each hash entry.
+ tList< tHashEntry<HASH_TYPE, DATA_TYPE> > entry_list;
+ tArray< tListNode< tHashEntry<HASH_TYPE, DATA_TYPE> > * > cell_array;
+
+ // Create an iterator for entry_list
+ tListIterator< tHashEntry<HASH_TYPE, DATA_TYPE> > list_it;
+
+ // Create a set of HashKey methods for each of the basic data types that
+ // we allow:
+
+ // HASH_TYPE = int
+ // Simply mod the into by the size of the hash table and hope for the best
+ int HashKey(const int & key) const {
+ return key % table_size;
+ }
+
+ // HASH_TYPE = cString
+ // We hash a string simply by adding up the individual character values in
+ // that string and modding by the hash size. For most applications this
+ // will work fine (and reasonably fast!) but some patterns will cause all
+ // strings to go into the same cell. For example, "ABC"=="CBA"=="BBB".
+ int HashKey(const cString & key) const {
+ unsigned int out_hash = 0;
+ for (int i = 0; i < key.GetSize(); i++)
+ out_hash += (unsigned int) key[i];
+ return out_hash % table_size;
+ }
+
+ // Function to find the appropriate tHashEntry for a key that is passed
+ // in and return it.
+ tHashEntry<HASH_TYPE, DATA_TYPE> * FindEntry(const HASH_TYPE & key) {
+ const int bin = HashKey(key);
+ if (cell_array[bin] == NULL) return NULL;
+
+ // Set the list iterator to the first entry of this bin.
+ list_it.Set(cell_array[bin]);
+
+ // Loop through all entries in this bin to see if any are a perfect match.
+ while (list_it.Get() != NULL && list_it.Get()->id == bin) {
+ if (list_it.Get()->key == key) return list_it.Get();
+ list_it.Next();
+ }
+
+ // No matches found.
+ return NULL;
+ }
+private:
+ // disabled copy constructor.
+ tHashTable(const tHashTable &);
+public:
+ tHashTable(int in_table_size=DICTIONARY_HASH_DEFAULT)
+ : entry_count(0)
+ , table_size(in_table_size)
+ , cell_array(in_table_size)
+ , list_it(entry_list)
+ {
+ cell_array.SetAll(NULL);
+ }
+
+ ~tHashTable() {
+ while (entry_list.GetSize()) delete entry_list.Pop();
+ }
+
+
+ bool OK() {
+ std::cout << "ENTRY_COUNT = " << entry_count << std::endl;
+ std::cout << "TABLE_SIZE = " << table_size << std::endl;
+ int count = 0;
+ std::cout << "LIST ELEMENTS:" << std::endl;
+ list_it.Reset();
+ while (list_it.Next() != NULL) {
+ tHashEntry<HASH_TYPE, DATA_TYPE> * cur_entry = list_it.Get();
+ std::cout << " " << count << " : "
+ << cur_entry->id << " "
+ << cur_entry->key << " "
+ << cur_entry->data << " "
+ << std::endl;
+ }
+ std::cout << std::endl;
+ std::cout << "ARRAY CELLS: "
+ << cell_array.GetSize()
+ << std::endl;
+ for (int i = 0; i < table_size; i++) {
+ tListNode< tHashEntry<HASH_TYPE, DATA_TYPE> > * cur_list_node =
+ cell_array[i];
+ if (cur_list_node == NULL) {
+ std::cout << " NULL" << std::endl;
+ } else {
+ std::cout << " " << cur_list_node->data->id
+ << " " << cur_list_node->data->key
+ << std::endl;
+ }
+ }
+
+ return true;
+ }
+
+ int GetSize() { return entry_count; }
+
+ // This function is used to add a new entry...
+ void Add(const HASH_TYPE & key, DATA_TYPE data) {
+ // Build the new entry...
+ tHashEntry<HASH_TYPE, DATA_TYPE> * new_entry =
+ new tHashEntry<HASH_TYPE, DATA_TYPE>;
+ new_entry->key = key;
+ new_entry->data = data;
+ const int bin = HashKey(key);
+ new_entry->id = bin;
+
+
+ // Determine where this new entry should go; either at the end of
+ // the list (if there are no others in the bin) or following another
+ // entry in the bin.
+ if (cell_array[bin] == NULL) { list_it.Reset(); } // Reset to list start
+ else { list_it.Set(cell_array[bin]); } // Else find insert point
+
+ entry_list.Insert(list_it, new_entry); // Place new entry in the list
+ list_it.Prev(); // Back up to new entry
+ cell_array[bin] = list_it.GetPos(); // Record position
+
+ // Update our entry count...
+ entry_count++;
+ }
+
+
+ // This function will change the value of an entry that exists, or add it
+ // if it doesn't exist.
+ void SetValue(const HASH_TYPE & key, DATA_TYPE data) {
+ tHashEntry<HASH_TYPE, DATA_TYPE> * cur_entry = FindEntry(key);
+ if (cur_entry == NULL) {
+ Add(key, data);
+ return;
+ }
+ cur_entry->data = data;
+ }
+
+
+ bool HasEntry(const HASH_TYPE & key) {
+ return FindEntry(key) != NULL;
+ }
+
+ bool Find(const HASH_TYPE & key, DATA_TYPE & out_data) {
+ tHashEntry<HASH_TYPE, DATA_TYPE> * found_entry = FindEntry(key);
+ if (found_entry != NULL) {
+ out_data = found_entry->data;
+ return true;
+ }
+ return false;
+ }
+
+ DATA_TYPE Remove(const HASH_TYPE & key) {
+ // Determine the bin that we are going to be using.
+ const int bin = HashKey(key);
+
+ DATA_TYPE out_data;
+ assert(cell_array[bin] != NULL);
+ list_it.Set(cell_array[bin]);
+
+ // If we are deleting the first entry in this bin we must clean up...
+ if (list_it.Get()->key == key) {
+ out_data = list_it.Get()->data;
+ list_it.Remove();
+ list_it.Next();
+ entry_count--;
+ // See if the next entry is still part of this cell.
+ if (list_it.AtRoot() == false && list_it.Get()->id == bin) {
+ cell_array[bin] = list_it.GetPos();
+ } else {
+ cell_array[bin] = NULL;
+ }
+ }
+
+ // If it was not the first entry in this cell, keep looking!
+ else {
+ while (list_it.Next() != NULL && list_it.Get()->id == bin) {
+ if (list_it.Get()->key == key) {
+ out_data = list_it.Get()->data;
+ list_it.Remove();
+ entry_count--;
+ break;
+ }
+ }
+ }
+
+ return out_data;
+ }
+
+ void SetTableSize(int _hash) {
+ // Create the new table...
+ table_size = _hash;
+ cell_array.ResizeClear(table_size);
+ cell_array.SetAll(NULL);
+
+ // Backup all of the entries in the list and re-insert them one-by-one.
+ tList< tHashEntry<HASH_TYPE, DATA_TYPE> > backup_list;
+ backup_list.Transfer(entry_list);
+
+ while (backup_list.GetSize() > 0) {
+ tHashEntry<HASH_TYPE, DATA_TYPE> * cur_entry = backup_list.Pop();
+
+ // determine the new bin for this entry.
+ int bin = HashKey(cur_entry->key);
+ cur_entry->id = bin;
+
+ if (cell_array[bin] == NULL) { list_it.Reset(); } // Reset to list start
+ else { list_it.Set(cell_array[bin]); } // Else find insert point
+
+ entry_list.Insert(list_it, cur_entry); // Place new entry in the list
+ list_it.Prev(); // Back up to new entry
+ cell_array[bin] = list_it.GetPos(); // Record position
+ }
+ }
+
+ // The following method allows the user to convert the dictionary contents
+ // into lists. Empty lists show be passed in as arguments and the method
+ // will fill in their contents.
+ void AsLists(tList<HASH_TYPE> & key_list, tList<DATA_TYPE> & value_list) {
+ // Setup the lists to fill in.
+ assert(key_list.GetSize() == 0);
+ assert(value_list.GetSize() == 0);
+ tListIterator<HASH_TYPE> key_it(key_list);
+ tListIterator<DATA_TYPE> value_it(value_list);
+
+ // Loop through the current entries and included them into the output
+ // list one at a time.
+ list_it.Reset();
+ while (list_it.Next() != NULL) {
+ // Grab the info about the current entry.
+ HASH_TYPE & cur_key = list_it.Get()->key;
+ DATA_TYPE & cur_value = list_it.Get()->data;
+
+ // Find the position to place this in the lists.
+ key_it.Reset();
+ value_it.Reset();
+ value_it.Next();
+ while (key_it.Next() != NULL && cur_key > *(key_it.Get())) {
+ value_it.Next();
+ }
+ key_list.Insert(key_it, &cur_key);
+ value_list.Insert(value_it, &cur_value);
+ }
+ }
+};
+
+#endif
More information about the Avida-cvs
mailing list