[twill] mail monitor memory leak?

John Mudd johnbmudd at gmail.com
Tue Apr 4 05:36:47 PDT 2006


Here's the guts of a script to monitor a SquirrelMail web mail account
and forward new emails to my cell phone.  It's working well but it
consumes about 35 MB per day.  Any suggestions on how to track down
the leak?

John



savedEmailURLs = None

while True:

    # Get HTML from SquirrelMail menu page.
    output = StringIO.StringIO()
    twill.set_output(output)
    go(url + '/src/left_main.php')
    show()
    html = output.getvalue()

    # Verify the INBOX link is present.
    m = re.search('INBOX', html)
    assert m

    # Get twill's list of links from the INBOX folder page.
    output = StringIO.StringIO()
    twill.set_output(output)
    follow('INBOX')
    showlinks()
    links = output.getvalue()

    # Verify this is the INBOX folder page.
    m = re.search('Sign Out', links, re.MULTILINE)
    assert m

    # Extract the list of links for each email on the first page.
    # Example text:
    # 25. test email ==>
read_body.php?mailbox=INBOX&passed_id=314&startMessage=1
    emailUrlList = re.findall('\d+\. .* ==>
(read_body.php\?mailbox=INBOX&passed_id=\d+&startMessage=\d+)', links,
re.M)

    # Is this the first pass?
    if savedEmailURLs == None:
        # Treat all current email as if already processed.
        savedEmailURLs = set(emailUrlList)
        #savedEmailURLs = set(emailUrlList[1:])  # All but first, for
test only.        #savedEmailURLs = set([])  # None, for test only.

    # Repeat for each email that hasn't already been processed.
    for emailUrl in (url
        for url in emailUrlList
            if url not in savedEmailURLs):

        # Get the HTML for this email.
        output = StringIO.StringIO()
        twill.set_output(output)
        go(url + '/src/' + emailUrl)
        show()
        html = output.getvalue()

        # Extract the subject, fromAddr and body.
        m = re.search(fromAddrPattern, html, re.MULTILINE | re.DOTALL)
        (subject, fromAddr, body) = m.groups()

        subject = stripHTML(subject)
        fromAddr = stripHTML(fromAddr)
        body = stripHTML(body)

        body = re.sub('\n{3,}', '\n\n', body)  # Collapse repeated blank lines.
        body = re.sub('^\s*', '', body)  # Strip leading white space.
        body = re.sub('(.)\\1{4,}', '\g<1>'*3, body)  # Shorten long
repetitions.
        body = body[:300]  # Truncate if too long for SMS.

        # Forward email.
        sendMail(subject, fromAddr, body)

        # Remember not to process this email again.
        savedEmailURLs.add(emailUrl)

    time.sleep(60)



More information about the twill mailing list