query_on_database.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. '''This code is part of the final project.
  2. After I have audited and cleaned the data and transfered everything into table
  3. in my database, I can start running queries on it.
  4. '''
  5. import sqlite3
  6. import csv
  7. from pprint import pprint
  8. # Put the path to your sqlite database.
  9. # If no database is available, a new one will be created.
  10. sqlite_file = 'openstreetmap_sf_db.sqlite'
  11. # Connecting to the database.
  12. conn = sqlite3.connect(sqlite_file)
  13. cur = conn.cursor()
  14. # Number of Nodes
  15. def number_of_nodes():
  16. output = cur.execute('SELECT COUNT(*) FROM nodes')
  17. return output.fetchone()[0]
  18. print('Number of nodes: \n' , number_of_nodes())
  19. # Number of Ways
  20. def number_of_ways():
  21. output = cur.execute('SELECT COUNT(*) FROM ways')
  22. return output.fetchone()[0]
  23. print('Number of ways: \n', number_of_ways())
  24. # Number of Unique Users
  25. def number_of_unique_users():
  26. output = cur.execute('SELECT COUNT(DISTINCT e.uid) FROM \
  27. (SELECT uid FROM nodes UNION ALL SELECT uid FROM ways) e')
  28. return output.fetchone()[0]
  29. print('Number of unique users: \n' , number_of_unique_users())
  30. # Most Contributing Users
  31. def most_contributing_users():
  32. output = cur.execute('SELECT e.user, COUNT(*) as num FROM \
  33. (SELECT user FROM nodes UNION ALL SELECT user FROM ways) e \
  34. GROUP BY e.user \
  35. ORDER BY num DESC \
  36. LIMIT 10 ')
  37. print("Most contributing users: \n")
  38. pprint(output.fetchall())
  39. return None
  40. print('Most contributing users: \n', most_contributing_users())
  41. # Number of Users Who Contributed Once
  42. def number_of_users_contributed_once():
  43. output = cur.execute('SELECT COUNT(*) FROM \
  44. (SELECT e.user, COUNT(*) as num FROM \
  45. (SELECT user FROM nodes UNION ALL SELECT user FROM ways) e \
  46. GROUP BY e.user \
  47. HAVING num = 1) u')
  48. return output.fetchone()[0]
  49. print('Number of users who have contributed once: \n', number_of_users_contributed_once())
  50. # Top 10 Cuisines in San Francisco
  51. query = "SELECT value, COUNT(*) as num FROM nodes_tags \
  52. WHERE key=\"b'amenity'\" \
  53. GROUP BY value \
  54. ORDER BY num DESC \
  55. LIMIT 20"
  56. # Top 10 Amenities in San Francisco
  57. def top_ten_amenities_in_sf():
  58. output = cur.execute(query)
  59. pprint(output.fetchall())
  60. return None
  61. print('Top 10 Amenities:\n')
  62. top_ten_amenities_in_sf()
  63. # Top 10 Cuisines in San Francisco
  64. query = "SELECT value, COUNT(*) as num FROM ways_tags \
  65. WHERE key=\"b'cuisine'\" \
  66. GROUP BY value \
  67. ORDER BY num DESC \
  68. LIMIT 10"
  69. def cuisines_in_sf():
  70. output = cur.execute(query)
  71. pprint(output.fetchall())
  72. return None
  73. print('Top 10 Cuisines in San Francisco:\n')
  74. cuisines_in_sf()
  75. # Different Types of Shops
  76. query = "SELECT value, COUNT(*) as num FROM nodes_tags \
  77. WHERE key=\"b'shop'\" \
  78. GROUP BY value \
  79. ORDER BY num DESC \
  80. LIMIT 10"
  81. def shops_in_sf():
  82. output = cur.execute(query)
  83. pprint(output.fetchall())
  84. return None
  85. print('Different types of shops:\n')
  86. shops_in_sf()
  87. # Popular Cafes in San Francisco
  88. def most_popular_cafes():
  89. output = cur.execute('SELECT nodes_tags.value, COUNT(*) as num \
  90. FROM nodes_tags \
  91. JOIN (SELECT DISTINCT(id) FROM nodes_tags WHERE value="coffee_shop") AS cafes \
  92. ON nodes_tags.id = cafes.id \
  93. WHERE nodes_tags.key="name"\
  94. GROUP BY nodes_tags.value \
  95. ORDER BY num DESC \
  96. LIMIT 10' ) # Remove this limit to see the complete list of postcodes
  97. pprint(output.fetchall())
  98. return output.fetchall()
  99. print('Most popular cafes in San Francisco: \n')
  100. most_popular_cafes()