[bip] pygr: reload AnnotationDB

Brent Pedersen bpederse at gmail.com
Wed Mar 26 09:25:02 PDT 2008


hi, i'm assuming others are using pygr here. i want to create an
AnnotationDB, save it on disk, then access it later, without having to
re-build.
presumably, i'm doing something silly, but i dont know what it is. for
the simplified case below, i expect load() and read() below to print
the same result. what step am i missing to make that the case? thanks
for any pointers.
-brent

========

from pygr import cnestedlist, seqdb

def load():
    seq = "a" * 80 + "c" * 80  + "t" * 80 + "g" * 80
    fh = open('/tmp/seq.fa', 'w')
    print >>fh, ">chr1"
    print >>fh, seq
    fh.close()

    seq_db = seqdb.BlastDB('/tmp/seq.fa')
    # some fake features.
    anno_dict = { 'gene40to500' : ('chr1', 'gene50to400', 50, 400)
                , 'gene90to100' : ('chr1', 'gene90to100', 90, 100)
                , 'gene10to50'  : ('chr1', 'gene10to50',  10, 50)}

    slice_dict = dict(id=0, name=1, start=2, stop=3)
    anno_db = seqdb.AnnotationDB(anno_dict, seq_db, sliceAttrDict=slice_dict)

    #alignment = cnestedlist.NLMSA('/tmp/test', mode='w',
seqDict=seqdb, pairwiseMode=True)
    alignment = cnestedlist.NLMSA('/tmp/test', mode='w', pairwiseMode=True)

    for v in anno_db.values():
        alignment.addAnnotation(v)


    alignment.build(saveSeqDict=True)

    query = seq_db['chr1'][60:70]
    a = alignment[query]

    print a.keys()[0].name

def read():

    alignment = cnestedlist.NLMSA('/tmp/test')
    seq_db = seqdb.BlastDB('seq.fa')

    query = seq_db['chr1'][60:70]
    a = alignment[query]
    print a.keys()[0].name


load()  # works as expected !

read() # 'error: seq not in PrefixUnionDict'



More information about the biology-in-python mailing list