I've spent the whole day yesterday looking into an interesting problem brought to my attention by Wayne Purcell after he read an article by Simon Behar. I took Simon's code, signed up for Google Maps API key, got the relation ids for a few cities from OpenStreetMap.org, and the poly files from Polygons.OpenStreetMap.fr, updated my Visual Studio Code and Python installations and ended up with this code:
Note: if you are reading this on a phone, the horizontal view may be better.
import googlemaps
import random
import csv
import time
from shapely import geometry as geo
from datetime import datetime, timedelta
MODES = ["driving", "transit"]
CITIES = {"Gold Coast": "GoldCoast", "Warsaw": "Warsaw", \
"Brisbane": "Brisbane", "Berlin":"Berlin"}
# utc offset may change on daylight saving time (DST) change
GMT = {"Gold Coast": +10, "Warsaw": +2, \
"Brisbane": +10, "Berlin": +2}
POLYS = {}
gmaps = googlemaps.Client(key = "your api key goes here")
def get_directions(_from, _to, _mode, _time):
return gmaps.directions(_from,_to, mode=_mode, \
departure_time=_time)
def get_travel_time(_dir):
"In minutes"
return round(_dir[0]['legs'][0]['duration']['value'] / 60, 2)
def get_travel_distance(_dir):
"In kilometers"
return _dir[0]['legs'][0]['distance']['value'] / 1000
def get_poly(_city):
poly_name = CITIES[_city]
if poly_name in POLYS:
return POLYS[poly_name]
else:
POLYS[poly_name] = create_polygon_from_file(poly_name)
return POLYS[poly_name]
def create_polygon_from_file(_file_name):
poly_points = []
with open ("polys\\" + _file_name + \
".poly.txt", "r") as raw_poly:
raw_poly.readline()
next_line = raw_poly.readline()
while next_line[0] != "E":
next_line = raw_poly.readline()
while next_line[0] != "E":
pair = next_line.split("\t")[1:]
pair[1] = pair[1][:-1]
poly_points.append([float(p) for p in pair])
next_line = raw_poly.readline()
next_line = raw_poly.readline()
return geo.Polygon(poly_points)
def generate_random_point(polygon):
minx, miny, maxx, maxy = polygon.bounds
pnt = geo.Point(random.uniform(minx, maxx), \
random.uniform(miny, maxy))
while not polygon.contains(pnt):
pnt = geo.Point(random.uniform(minx, maxx), \
random.uniform(miny, maxy))
return pnt
def point_to_dict(point):
return {"lat": point.y, "lng": point.x}
def dict_to_string(_dict):
return str(_dict['lat']) + " " + str(_dict['lng'])
def get_test_point(poly):
return point_to_dict(generate_random_point(poly))
def route(_city, localTime, attempt):
poly = get_poly(_city)
_from = get_test_point(poly)
_to = get_test_point(poly)
out = [_city, dict_to_string(_from), dict_to_string(_to)]
for mode in MODES:
departure_time = localTime - timedelta(hours = GMT[_city])
print(mode + " directions in " + _city + " from " + \
dict_to_string(_from) + " to " + dict_to_string(_to) +\
" at " + str(departure_time))
res = get_directions(_from, _to, mode, departure_time)
if not res:
time.sleep(1)
if mode == 'transit':
print ("Route failed at " + mode + ", retrying...")
return []
return route(_city, localTime, attempt)
out.extend([get_travel_time(res), \
get_travel_distance(res)])
return out
def run(cities, routes, outputFile, localTime):
with open(outputFile, 'a', newline='') as out:
writer = csv.writer(out, dialect='excel')
writer.writerow(['city', 'start', 'end', 'driving_time[m]',\
'driving_dist[km]', 'transit_time[m]', \
'transit_dist[km]', 'attempts', 'ratio'])
for city in cities:
for i in range(routes):
attempt = 1
print ("Progress: " + str(i) + "/" + str(routes))
time.sleep(1)
res = route(city, localTime, attempt)
while not res:
attempt += 1
res = route(city, localTime, attempt)
res.extend([str(attempt), \
round(res[5] / res[3], 2) ])
writer.writerow(res)
run(CITIES.keys(), routes=10, outputFile="4cities.csv", \
localTime= datetime.fromisoformat("2020-10-07T08:00:00"))
Which, I think allows to measure the availability and relative speed of public transport at 8am local time on a particular work day.
What was missing in Simon's article and data was a number of failed attempts at routing by public transport between points A and B. The results for Brisbane and Gold Coast may be skewed by their boundaries including large national parks without any public transport nearby. Warsaw and Berlin also have large forests within city boundaries, but they seem to be more available. Still, in Brisbane and Gold Coast you CAN get there by car - driving is checked first, so the data as the measure of availability of public transport stands.