112 lines
3.9 KiB
Python
112 lines
3.9 KiB
Python
# nhtsa_japan_domestic.py
|
||
|
||
import os
|
||
import requests
|
||
from pymongo import MongoClient, ASCENDING
|
||
from typing import List, Dict, Set
|
||
|
||
BASE_URL = 'https://vpic.nhtsa.dot.gov/api/vehicles'
|
||
FMT = 'format=json'
|
||
|
||
# — MongoDB setup
|
||
MONGO_URI = os.getenv('MONGO_URI', 'mongodb+srv://goldfishjonny:Tt0BBMbzai477RIM@csc491.g36va.mongodb.net/')
|
||
MONGO_DB = os.getenv('MONGO_DB', 'RevSocial')
|
||
client = MongoClient(MONGO_URI)
|
||
db = client[MONGO_DB]
|
||
|
||
# — Collections & Indexes
|
||
makes_col = db['jdm_makes']
|
||
models_col = db['jdm_models']
|
||
|
||
makes_col .create_index('make_name', unique=True)
|
||
models_col.create_index([('make',ASCENDING),
|
||
('year',ASCENDING),
|
||
('model',ASCENDING)], unique=True)
|
||
|
||
def get_all_manufacturers(page: int=1) -> List[Dict]:
|
||
"""/vehicles/GetAllManufacturers?page={page}&format=json :contentReference[oaicite:0]{index=0}"""
|
||
r = requests.get(f'{BASE_URL}/GetAllManufacturers?{FMT}&page={page}')
|
||
r.raise_for_status()
|
||
return r.json()['Results']
|
||
|
||
def get_makes_for_manufacturer(mfr_id: int) -> List[Dict]:
|
||
"""/vehicles/GetMakeForManufacturer/{mfr_id}?format=json :contentReference[oaicite:1]{index=1}"""
|
||
r = requests.get(f'{BASE_URL}/GetMakeForManufacturer/{mfr_id}?{FMT}')
|
||
r.raise_for_status()
|
||
return r.json()['Results']
|
||
|
||
def produces_strict_passenger_car(make: str) -> bool:
|
||
"""
|
||
Only count if VehicleTypeName == "Passenger Car" exactly.
|
||
/vehicles/GetVehicleTypesForMake/{make}?format=json :contentReference[oaicite:2]{index=2}
|
||
"""
|
||
r = requests.get(
|
||
f'{BASE_URL}/GetVehicleTypesForMake/{requests.utils.quote(make)}?{FMT}'
|
||
)
|
||
r.raise_for_status()
|
||
types = [v['VehicleTypeName'].strip().lower()
|
||
for v in r.json()['Results']]
|
||
return 'passenger car' in types
|
||
|
||
def seed_jdm_makes() -> List[str]:
|
||
"""
|
||
Find all Japanese manufacturers, then their makes that
|
||
strictly produce Passenger Cars in the JDM market.
|
||
"""
|
||
jdm_makes: Set[str] = set()
|
||
page = 1
|
||
while True:
|
||
mfrs = get_all_manufacturers(page)
|
||
if not mfrs:
|
||
break
|
||
|
||
# only Japan‐based manufacturers
|
||
jap = [m for m in mfrs if m.get('Country','').lower() == 'japan']
|
||
if not jap:
|
||
# no more Japan entries further on
|
||
break
|
||
|
||
for m in jap:
|
||
for mk in get_makes_for_manufacturer(m['Mfr_ID']):
|
||
name = mk['Make_Name']
|
||
if produces_strict_passenger_car(name):
|
||
jdm_makes.add(name)
|
||
makes_col.update_one(
|
||
{'make_name': name},
|
||
{'$set': {'make_name': name}},
|
||
upsert=True
|
||
)
|
||
page += 1
|
||
|
||
return sorted(jdm_makes)
|
||
|
||
def seed_models_for_years(makes: List[str], years: List[int]):
|
||
"""
|
||
For each JDM make, seed models_by_year documents.
|
||
/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json :contentReference[oaicite:3]{index=3}
|
||
"""
|
||
for make in makes:
|
||
for yr in years:
|
||
url = (
|
||
f'{BASE_URL}/GetModelsForMakeYear/'
|
||
f'make/{requests.utils.quote(make)}/modelyear/{yr}?{FMT}'
|
||
)
|
||
r = requests.get(url)
|
||
r.raise_for_status()
|
||
models = [m['Model_Name'] for m in r.json()['Results']]
|
||
for model in models:
|
||
models_col.update_one(
|
||
{'make': make, 'year': yr, 'model': model},
|
||
{'$set': {'make': make, 'year': yr, 'model': model}},
|
||
upsert=True
|
||
)
|
||
print(f"{yr} {make}: {len(models)} models")
|
||
|
||
if __name__ == '__main__':
|
||
print("Seeding JDM makes…")
|
||
jdm = seed_jdm_makes()
|
||
print(f"→ {len(jdm)} Japanese domestic‐market makes stored in '{MONGO_DB}.jdm_makes'")
|
||
|
||
YEARS = list(range(2015, 2026))
|
||
seed_models_for_years(jdm, YEARS)
|
||
print("Done seeding Japanese domestic cars.") |