Blob Blame History Raw
#!/usr/bin/python
#
# Download the latest gpsbabel source tarball by simulating a browser
# visit and a HTTP POST form submission (there is no other way to
# download those files).

import re
import urllib2
import subprocess
from BeautifulSoup import BeautifulSoup

download_url = "http://www.gpsbabel.org/download.html"

print "Loading HTML page", download_url

page = urllib2.urlopen(download_url)
# print "page:", page

soup = BeautifulSoup(page)
# print "soup:", soup

rexp = re.compile('\.tar\.gz$')

def test_tag(tag):
    if tag.name != 'form': return False
    if tag['action'] != 'plan9.php': return False
    if tag['method'] != 'post': return False
    if tag['enctype'] != 'multipart/form-data': return False
    found = tag.find('input', value=rexp)
    return found

tarball_form = soup.find(test_tag)
form_inputs = tarball_form.findAll('input')

d = dict([ (input['name'], input['value']) for input in form_inputs ])
# print d

tarball = d[u'dl']
token = d[u'token']

# print "tarball:", tarball
# print "token:", token

# Python has no library functions which handle 'multipart/form-data'
# encoding, and the pycurl interface is non-trivial, so we just run
# the 'curl' program as a separate process.
cmd = [ 'curl',
        '-F', "=".join(['dl', tarball]),
        '-F', "=".join(['token', token]),
        '-e', download_url,
        'http://www.gpsbabel.org/plan9.php',
        '-o', tarball ]
# print "Command:", cmd

print "Running curl to get tarball via HTTP POST:", tarball
retcode = subprocess.call(cmd, shell=False)

if retcode == 0:
    print "Successfully downloaded tarball:", tarball
else:
    print "Error downloading tarball (%d):" % retcode, tarball