#!/usr/bin/python import re import urllib import time import HTMLParser import os # Change this for your station # See http://reiseauskunft.bahn.de/bin/bhftafel.exe url = "http://reiseauskunft.bahn.de/bin/bhftafel.exe/dn?ld=&country=DEU&rt=1&input=Berlin&boardType=dep&time=actual&productsFilter=11111&REQTrain_name=S1&start=yes" # Note: This script creates a text file, /tmp/bahn # For use with lcd4linux handle = urllib.urlopen(url) source = handle.read() handle.close() next_trains = [] parser = HTMLParser.HTMLParser() def remove_html(str): # remove html tags, unescape entities and trim str = parser.unescape(re.sub('<[^>]*>', ' ', str)) str = str.strip() str = str.replace(u'\xfc', 'ue') str = str.replace(u'\xdc', 'Ue') str = str.replace(u'\xe4', 'ae') str = str.replace(u'\xc4', 'Ae') str = str.replace(u'\xf6', 'oe') str = str.replace(u'\xD6', 'Oe') str = re.sub('[ \t]+', ' ', str) return str def abbreviate_station(str): str = str.replace(' Hbf', '') return str def abbreviate_status(str): str = re.sub('ca\...', '', str) str = str.replace(' , Grund:', ';') return str for single_train in source.split('')[2:-1]: time_str = re.search(r'(\d\d:\d\d)', single_train) dest_html = re.search(r'(.*?)', single_train, re.S) platform_html = re.search(r'(.*?)', single_train, re.S) status_html = re.search(r'(.*?)', single_train, re.S) #print platform_html #print single_train if not all([time_str, dest_html, platform_html]): print '= INVALID ENTRY =' #print single_train print time_str print dest_html print platform_html print status_html print '' continue #raise Exception('Parse error: not all match') if not status_html: status = '(no status)' else: status = remove_html(status_html.group(1)) arrival_time = time.strptime(time_str.group(1), '%H:%M') dest, stations = remove_html(dest_html.group(1)).split('\n', 1) platform = remove_html(platform_html.group(1)) next_trains.append( (arrival_time, dest, stations, platform, status) ) #print next_trains file = open('/tmp/bahn', 'w+') if len(next_trains) > 1: dest_max_width = max(len(abbreviate_station(dest)) for _, dest, _, _, _ in next_trains) for arrival, dest, stations, platform, status in next_trains: line = '' line += time.strftime('%H:%M', arrival) + ' | ' line += (abbreviate_station(dest).rjust(dest_max_width).encode('utf8')) + ' | ' #line += platform.encode('utf8') + ' | ' line += abbreviate_status(status.encode('utf8')) + '\n' file.write(line) print line, else: file.write("No trains found") file.close()