123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- '''This code is part of the final project.
- After I have audited and cleaned the data and transfered everything into table
- in my database, I can start running queries on it.
- '''
- import sqlite3
- import csv
- from pprint import pprint
- # Put the path to your sqlite database.
- # If no database is available, a new one will be created.
- sqlite_file = 'openstreetmap_sf_db.sqlite'
- # Connecting to the database.
- conn = sqlite3.connect(sqlite_file)
- cur = conn.cursor()
- # Number of Nodes
- def number_of_nodes():
- output = cur.execute('SELECT COUNT(*) FROM nodes')
- return output.fetchone()[0]
- print('Number of nodes: \n' , number_of_nodes())
- # Number of Ways
- def number_of_ways():
- output = cur.execute('SELECT COUNT(*) FROM ways')
- return output.fetchone()[0]
- print('Number of ways: \n', number_of_ways())
- # Number of Unique Users
- def number_of_unique_users():
- output = cur.execute('SELECT COUNT(DISTINCT e.uid) FROM \
- (SELECT uid FROM nodes UNION ALL SELECT uid FROM ways) e')
- return output.fetchone()[0]
- print('Number of unique users: \n' , number_of_unique_users())
- # Most Contributing Users
- def most_contributing_users():
- output = cur.execute('SELECT e.user, COUNT(*) as num FROM \
- (SELECT user FROM nodes UNION ALL SELECT user FROM ways) e \
- GROUP BY e.user \
- ORDER BY num DESC \
- LIMIT 10 ')
- print("Most contributing users: \n")
- pprint(output.fetchall())
- return None
- print('Most contributing users: \n', most_contributing_users())
- # Number of Users Who Contributed Once
- def number_of_users_contributed_once():
- output = cur.execute('SELECT COUNT(*) FROM \
- (SELECT e.user, COUNT(*) as num FROM \
- (SELECT user FROM nodes UNION ALL SELECT user FROM ways) e \
- GROUP BY e.user \
- HAVING num = 1) u')
- return output.fetchone()[0]
- print('Number of users who have contributed once: \n', number_of_users_contributed_once())
- # Top 10 Cuisines in San Francisco
- query = "SELECT value, COUNT(*) as num FROM nodes_tags \
- WHERE key=\"b'amenity'\" \
- GROUP BY value \
- ORDER BY num DESC \
- LIMIT 20"
- # Top 10 Amenities in San Francisco
- def top_ten_amenities_in_sf():
- output = cur.execute(query)
- pprint(output.fetchall())
- return None
- print('Top 10 Amenities:\n')
- top_ten_amenities_in_sf()
- # Top 10 Cuisines in San Francisco
- query = "SELECT value, COUNT(*) as num FROM ways_tags \
- WHERE key=\"b'cuisine'\" \
- GROUP BY value \
- ORDER BY num DESC \
- LIMIT 10"
- def cuisines_in_sf():
- output = cur.execute(query)
- pprint(output.fetchall())
- return None
- print('Top 10 Cuisines in San Francisco:\n')
- cuisines_in_sf()
- # Different Types of Shops
- query = "SELECT value, COUNT(*) as num FROM nodes_tags \
- WHERE key=\"b'shop'\" \
- GROUP BY value \
- ORDER BY num DESC \
- LIMIT 10"
- def shops_in_sf():
- output = cur.execute(query)
- pprint(output.fetchall())
- return None
- print('Different types of shops:\n')
- shops_in_sf()
- # Popular Cafes in San Francisco
- def most_popular_cafes():
- output = cur.execute('SELECT nodes_tags.value, COUNT(*) as num \
- FROM nodes_tags \
- JOIN (SELECT DISTINCT(id) FROM nodes_tags WHERE value="coffee_shop") AS cafes \
- ON nodes_tags.id = cafes.id \
- WHERE nodes_tags.key="name"\
- GROUP BY nodes_tags.value \
- ORDER BY num DESC \
- LIMIT 10' ) # Remove this limit to see the complete list of postcodes
- pprint(output.fetchall())
- return output.fetchall()
- print('Most popular cafes in San Francisco: \n')
- most_popular_cafes()
|