[pygr-notify] [pygr commit] r117 - contrib/benchmark

codesite-noreply at google.com codesite-noreply at google.com
Tue Dec 23 14:07:28 PST 2008


Author: istvan.albert
Date: Tue Dec 23 12:34:34 2008
New Revision: 117

Added:
    contrib/benchmark/dbm_dict.py
Modified:
    contrib/benchmark/bench.py
    contrib/benchmark/results.txt

Log:
added dbm-dict, updated statistics for 10 million items

Modified: contrib/benchmark/bench.py
==============================================================================
--- contrib/benchmark/bench.py	(original)
+++ contrib/benchmark/bench.py	Tue Dec 23 12:34:34 2008
@@ -2,9 +2,10 @@
  from itertools import *
  import sq_dict
  import sq_dict2
+import dbm_dict

  # number of elements
-ELEM_NUM = 10**5
+ELEM_NUM = 10**7

  # data size
  DATA_SIZE = 100
@@ -46,7 +47,7 @@
  @Timer
  def indexing( func, fname ):
      "Loads rows into the database"
-    db = func( fname, 'w')
+    db = func( fname, 'c')
      if hasattr(db, 'create_index'):
          db.create_index()
          db.sync()
@@ -91,20 +92,23 @@
      #
      # enable the cdb benchmarks below
      #
-    #import cdb_dict
+    import cdb_dict
      #
-    #func0 = cdb_dict.cdb_open
+    func0 = cdb_dict.cdb_open

      func1 = bsddb.btopen
      func2 = bsddb.hashopen
      func3 = sq_dict.sq_dict_open
      func4 = sq_dict2.sq_dict2_open
-
-    funcs = [ func1, func2, func3, func4 ]
+    func5 = dbm_dict.dbm_open
+
+    funcs = [ func0, func1, func2, func3, func4, func5 ]
      tests = [ loading, indexing, forward_iter, reverse_iter, update]

      print
-
+    print 'Data: %s elements of %s size' % (ELEM_NUM, DATA_SIZE)
+    print
+
      # delete existing databases
      for func in funcs:
          fname = get_name( func )

Added: contrib/benchmark/dbm_dict.py
==============================================================================
--- (empty file)
+++ contrib/benchmark/dbm_dict.py	Tue Dec 23 12:34:34 2008
@@ -0,0 +1,43 @@
+# dbm based dictionary
+
+# use this for windows
+#import dumbdbm as dbm_lib
+
+# uncomment this for unix
+import gdbm as dbm_lib
+
+def dbm_open( filename, mode='c'):
+    db = DbmShelve(filename, mode=mode)
+    return db
+
+class DbmShelve( object ):
+
+    def __init__ (self, filename, mode):
+        # will switch modes of operation depending on the type of access
+        self.db = dbm_lib.open( filename, mode) # ahem
+
+    def create_index(self):
+        pass
+
+    def sync(self):
+        if hasattr(self.db, 'sync'):
+            self.db.sync()
+
+    def close(self):
+        if hasattr(self.db, 'close'):
+            self.db.close()
+
+    def __setitem__(self, key, value):
+        self.db[key] = value
+
+    def __getitem__(self, key):
+        return self.db[key]
+
+    def __iter__(self):
+        return iter( self.db.keys() )
+
+    def keys(self):
+        return self.db.keys()
+
+if __name__ == '__main__':
+    db = DbmShelve('test.db', mode='c')

Modified: contrib/benchmark/results.txt
==============================================================================
--- contrib/benchmark/results.txt	(original)
+++ contrib/benchmark/results.txt	Tue Dec 23 12:34:34 2008
@@ -1,32 +1,38 @@
-Data: 1 million rows each 100 character long
-
-elapsed= 16.5s, test=loading, func=cdb_open
-elapsed= 35.9s, test=loading, func=btopen
-elapsed= 63.6s, test=loading, func=hashopen
-elapsed= 46.3s, test=loading, func=sq_dict_open
-elapsed= 24.4s, test=loading, func=sq_dict2_open
-----------
-elapsed=  0.0s, test=indexing, func=cdb_open
-elapsed=  0.0s, test=indexing, func=btopen
-elapsed=  0.0s, test=indexing, func=hashopen
-elapsed=  0.0s, test=indexing, func=sq_dict_open
-elapsed= 10.6s, test=indexing, func=sq_dict2_open
-----------
-elapsed= 14.1s, test=forward_iter, func=cdb_open
-elapsed= 34.7s, test=forward_iter, func=btopen
-elapsed= 17.8s, test=forward_iter, func=hashopen
-elapsed= 36.4s, test=forward_iter, func=sq_dict_open
-elapsed= 24.0s, test=forward_iter, func=sq_dict2_open
-----------
-elapsed=  2.7s, test=reverse_iter, func=cdb_open
-elapsed= 13.5s, test=reverse_iter, func=btopen
-elapsed= 27.7s, test=reverse_iter, func=hashopen
-elapsed= 38.3s, test=reverse_iter, func=sq_dict_open
-elapsed= 37.8s, test=reverse_iter, func=sq_dict2_open
-----------
-elapsed= 17.1s, test=update, func=cdb_open
-elapsed= 84.9s, test=update, func=btopen
-elapsed= 86.2s, test=update, func=hashopen
-elapsed= 89.5s, test=update, func=sq_dict_open
-elapsed= 91.5s, test=update, func=sq_dict2_open
-----------
+
+Data: 10000000 elements of 100 size
+
+elapsed= 29.1s, test=loading, func=cdb_open
+elapsed=145.2s, test=loading, func=btopen
+elapsed=9161.6s, test=loading, func=hashopen
+elapsed=320.2s, test=loading, func=sq_dict_open
+elapsed=125.5s, test=loading, func=sq_dict2_open
+elapsed=15438.2s, test=loading, func=dbm_open
+----------
+elapsed=  0.0s, test=indexing, func=cdb_open
+elapsed=  0.0s, test=indexing, func=btopen
+elapsed=  0.0s, test=indexing, func=hashopen
+elapsed=  0.0s, test=indexing, func=sq_dict_open
+elapsed= 93.2s, test=indexing, func=sq_dict2_open
+elapsed=  0.0s, test=indexing, func=dbm_open
+----------
+elapsed= 17.7s, test=forward_iter, func=cdb_open
+elapsed=145.1s, test=forward_iter, func=btopen
+elapsed=146.3s, test=forward_iter, func=hashopen
+elapsed=237.6s, test=forward_iter, func=sq_dict_open
+elapsed=202.0s, test=forward_iter, func=sq_dict2_open
+elapsed= 36.4s, test=forward_iter, func=dbm_open
+----------
+elapsed= 21.7s, test=reverse_iter, func=cdb_open
+elapsed=119.7s, test=reverse_iter, func=btopen
+elapsed=233.4s, test=reverse_iter, func=hashopen
+elapsed=335.7s, test=reverse_iter, func=sq_dict_open
+elapsed=293.5s, test=reverse_iter, func=sq_dict2_open
+elapsed= 63.7s, test=reverse_iter, func=dbm_open
+----------
+elapsed= 49.1s, test=update, func=cdb_open
+elapsed=264.2s, test=update, func=btopen
+elapsed=7894.6s, test=update, func=hashopen
+elapsed=479.7s, test=update, func=sq_dict_open
+elapsed=593.6s, test=update, func=sq_dict2_open
+elapsed=16387.3s, test=update, func=dbm_open
+----------



More information about the pygr-notify mailing list