# nhtsa_japan_domestic.py import os import requests from pymongo import MongoClient, ASCENDING from typing import List, Dict, Set BASE_URL = 'https://vpic.nhtsa.dot.gov/api/vehicles' FMT = 'format=json' # — MongoDB setup MONGO_URI = os.getenv('MONGO_URI', 'mongodb+srv://goldfishjonny:Tt0BBMbzai477RIM@csc491.g36va.mongodb.net/') MONGO_DB = os.getenv('MONGO_DB', 'RevSocial') client = MongoClient(MONGO_URI) db = client[MONGO_DB] # — Collections & Indexes makes_col = db['jdm_makes'] models_col = db['jdm_models'] makes_col .create_index('make_name', unique=True) models_col.create_index([('make',ASCENDING), ('year',ASCENDING), ('model',ASCENDING)], unique=True) def get_all_manufacturers(page: int=1) -> List[Dict]: """/vehicles/GetAllManufacturers?page={page}&format=json :contentReference[oaicite:0]{index=0}""" r = requests.get(f'{BASE_URL}/GetAllManufacturers?{FMT}&page={page}') r.raise_for_status() return r.json()['Results'] def get_makes_for_manufacturer(mfr_id: int) -> List[Dict]: """/vehicles/GetMakeForManufacturer/{mfr_id}?format=json :contentReference[oaicite:1]{index=1}""" r = requests.get(f'{BASE_URL}/GetMakeForManufacturer/{mfr_id}?{FMT}') r.raise_for_status() return r.json()['Results'] def produces_strict_passenger_car(make: str) -> bool: """ Only count if VehicleTypeName == "Passenger Car" exactly. /vehicles/GetVehicleTypesForMake/{make}?format=json :contentReference[oaicite:2]{index=2} """ r = requests.get( f'{BASE_URL}/GetVehicleTypesForMake/{requests.utils.quote(make)}?{FMT}' ) r.raise_for_status() types = [v['VehicleTypeName'].strip().lower() for v in r.json()['Results']] return 'passenger car' in types def seed_jdm_makes() -> List[str]: """ Find all Japanese manufacturers, then their makes that strictly produce Passenger Cars in the JDM market. """ jdm_makes: Set[str] = set() page = 1 while True: mfrs = get_all_manufacturers(page) if not mfrs: break # only Japan‐based manufacturers jap = [m for m in mfrs if m.get('Country','').lower() == 'japan'] if not jap: # no more Japan entries further on break for m in jap: for mk in get_makes_for_manufacturer(m['Mfr_ID']): name = mk['Make_Name'] if produces_strict_passenger_car(name): jdm_makes.add(name) makes_col.update_one( {'make_name': name}, {'$set': {'make_name': name}}, upsert=True ) page += 1 return sorted(jdm_makes) def seed_models_for_years(makes: List[str], years: List[int]): """ For each JDM make, seed models_by_year documents. /vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json :contentReference[oaicite:3]{index=3} """ for make in makes: for yr in years: url = ( f'{BASE_URL}/GetModelsForMakeYear/' f'make/{requests.utils.quote(make)}/modelyear/{yr}?{FMT}' ) r = requests.get(url) r.raise_for_status() models = [m['Model_Name'] for m in r.json()['Results']] for model in models: models_col.update_one( {'make': make, 'year': yr, 'model': model}, {'$set': {'make': make, 'year': yr, 'model': model}}, upsert=True ) print(f"{yr} {make}: {len(models)} models") if __name__ == '__main__': print("Seeding JDM makes…") jdm = seed_jdm_makes() print(f"→ {len(jdm)} Japanese domestic‐market makes stored in '{MONGO_DB}.jdm_makes'") YEARS = list(range(2015, 2026)) seed_models_for_years(jdm, YEARS) print("Done seeding Japanese domestic cars.")