[pygr-notify] Issue 56 in pygr: unable to access the sequence object via a saved bound schema attribute :(

codesite-noreply at google.com codesite-noreply at google.com
Sun Dec 21 22:10:18 PST 2008


Status: New
Owner: jqian.ubc
CC: cjlee112
Labels: Type-Defect Priority-High

New issue 56 by jqian.ubc: unable to access the sequence object via a saved  
bound schema attribute :(
http://code.google.com/p/pygr/issues/detail?id=56

What steps will reproduce the problem?

qing at 1[ensembl]$ python -i
Python 2.5.2 (r252:60911, Aug  8 2008, 09:22:44)
[GCC 4.3.1] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pygr.Data
>>> pygr.Data.dir('Bio.Annotation.Ensembl')
['Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon.sqltable',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exonTranscript',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript.sqltable']
>>> from pygr import sqlgraph
>>> from pygr import seqdb
>>> conn = pygr.Data.Bio.Server.Ensembl.Ensembldb()
>>> exonTB =
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon.sqltable()
>>> transcriptTB =
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript.sqltable()
>>> genomeName = 'HUMAN.hg18'
>>> genomeResourceID = 'Bio.Seq.Genome.' + genomeName
>>> genomeResourceID
'Bio.Seq.Genome.HUMAN.hg18'
>>> genome = pygr.Data.getResource(genomeResourceID)
>>> genome['chr1']
chr1[0:247249719]
>>> from ensembl import seqregion
>>> dnaTB = sqlgraph.SQLTable('homo_sapiens_core_47_36i.dna',
itemSliceClass=seqdb.SeqDBSlice, attrAlias=dict(seq='sequence'),
itemClass=seqregion.EnsemblDNA, serverInfo=conn)
>>> dnaTB.__doc__ = 'ensembl dna sql table (homo_sapiens_core_47_36i)'
>>>
pygr.Data.addResource('Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.dna.sqltable',
dnaTB)
>>> seqregionTB = sqlgraph.SQLTable('homo_sapiens_core_47_36i.seq_region',
itemClass=sqlgraph.TupleO, serverInfo=conn)
>>> sr = seqregion.SeqRegion(seqregionTB, {17:genome, 4:dnaTB}, {17:'chr',
4:None})
>>> srID = 'Bio.Seq.Ensembl.homo_sapiens_core_47_36i.seq'
>>> sr.__doc__ = 'ensembl seqregion (homo_sapiens_core_47_36i)'
>>> pygr.Data.addResource(srID, sr)
>>> pygr.Data.save()
>>>

qing at 1[ensembl]$ python -i
Python 2.5.2 (r252:60911, Aug  8 2008, 09:22:44)
[GCC 4.3.1] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pygr.Data
>>> pygr.Data.dir('Bio.Annotation.Ensembl.homo_sapiens_core_47_36i')
['Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.dna.sqltable',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon.sqltable',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exonTranscript',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript.sqltable']
>>> conn = pygr.Data.Bio.Server.Ensembl.Ensembldb()
>>> exonTB =
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon.sqltable()
>>> transcriptTB =
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript.sqltable()
>>> sr = pygr.Data.Bio.Seq.Ensembl.homo_sapiens_core_47_36i.seq()
>>> from pygr import seqdb

>>> exonAnnoDB = seqdb.AnnotationDB(exonTB, sr,
sliceAttrDict=dict(id='seq_region_id', stop='seq_region_end',
orientation='seq_region_strand'))
>>> transcriptAnnoDB =seqdb.AnnotationDB(transcriptTB, sr,
sliceAttrDict=dict(id='seq_region_id', stop='seq_region_end',
orientation='seq_region_strand'))

>>> exonAnnoDB.__doc__ = 'ensembl annotationDB for exon
(homo_sapiens_core_47_36i)'
>>> transcriptAnnoDB.__doc__ = 'ensembl annotationDB for transcript
(homo_sapiens_core_47_36i)'
>>>
pygr.Data.addResource('Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon',
exonAnnoDB)
>>>
pygr.Data.addResource('Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript',
transcriptAnnoDB)
>>> from pygr import sqlgraph
>>> exon_transcript =
sqlgraph.SQLGraph('homo_sapiens_core_47_36i.exon_transcript',
serverInfo=conn, sourceDB = exonAnnoDB, targetDB=transcriptAnnoDB,
attrAlias=dict(source_id='exon_id', target_id='transcript_id'))
>>> exon_transcript.__doc__='ensembl exonAnnoDB -> transcriptAnnoDB
(homo_sapiens_core_47_36i)'

>>>
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon_transcript =
exon_transcript
>>>
pygr.Data.schema.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon_transcript
= pygr.Data.ManyToManyRelation(exonAnnoDB, transcriptAnnoDB,
bindAttrs=('transcripts', 'exons'))
>>> pygr.Data.save()
>>>

qing at 1[ensembl]$ python -i
Python 2.5.2 (r252:60911, Aug  8 2008, 09:22:44)
[GCC 4.3.1] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pygr.Data
>>> pygr.Data.dir('Bio.Annotation.Ensembl')
['Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.dna.sqltable',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon.sqltable',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exonTranscript',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon_transcript',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript',
'Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript.sqltable']
>>> exonDB =  
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon()


>>> transcriptDB =
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.transcript()
>>> transcript = transcriptDB[1]
>>> exons = transcript.exons
>>> for e in exons:
...     print e.id, e.seq_region_start
...
1 19397
2 14600
3 8131
4 7778
5 7465
6 7096
7 6721
8 6611
9 6470
10 5767
11 5659
12 4863
13 4274

# I didn't spell *sequence* wrong this time!
>>> for e in exons:
...     print e.id, len(e.sequence)
...
1
Traceback (most recent call last):
   File "<stdin>", line 2, in <module>
AttributeError: 'EnsemblRow_homo_sapiens_core_47_36i.exon' object has no
attribute 'sequence'

# An exon object does have a *sequence* attribute!
>>> exon = exonDB[1]
>>> len(exon.sequence)
273
>>> repr(exon)
'annot1[0:273]'

# In addition, I am able to access the sequence attribute of an exon via
the saved exon_transcript graph.
>>> exon_transcript =
pygr.Data.Bio.Annotation.Ensembl.homo_sapiens_core_47_36i.exon_transcript()
>>> exons = (~exon_transcript)[transcript]
>>> for e in exons:
...     print e.id, e.seq_region_start, len(e.sequence)
...
1 19397 273
2 14600 155
3 8131 99
4 7778 147
5 7465 141
6 7096 132
7 6721 198
8 6611 18
9 6470 139
10 5767 44
11 5659 106
12 4863 39
13 4274 92
>>>







--
You received this message because you are listed in the owner
or CC fields of this issue, or because you starred this issue.
You may adjust your issue notification preferences at:
http://code.google.com/hosting/settings



More information about the pygr-notify mailing list