RevSocial/nhtsa_jp.py
2025-05-03 16:54:30 -07:00

112 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# nhtsa_japan_domestic.py
import os
import requests
from pymongo import MongoClient, ASCENDING
from typing import List, Dict, Set
BASE_URL = 'https://vpic.nhtsa.dot.gov/api/vehicles'
FMT = 'format=json'
# — MongoDB setup
MONGO_URI = os.getenv('MONGO_URI', 'mongodb+srv://goldfishjonny:Tt0BBMbzai477RIM@csc491.g36va.mongodb.net/')
MONGO_DB = os.getenv('MONGO_DB', 'RevSocial')
client = MongoClient(MONGO_URI)
db = client[MONGO_DB]
# — Collections & Indexes
makes_col = db['jdm_makes']
models_col = db['jdm_models']
makes_col .create_index('make_name', unique=True)
models_col.create_index([('make',ASCENDING),
('year',ASCENDING),
('model',ASCENDING)], unique=True)
def get_all_manufacturers(page: int=1) -> List[Dict]:
"""/vehicles/GetAllManufacturers?page={page}&format=json :contentReference[oaicite:0]{index=0}"""
r = requests.get(f'{BASE_URL}/GetAllManufacturers?{FMT}&page={page}')
r.raise_for_status()
return r.json()['Results']
def get_makes_for_manufacturer(mfr_id: int) -> List[Dict]:
"""/vehicles/GetMakeForManufacturer/{mfr_id}?format=json :contentReference[oaicite:1]{index=1}"""
r = requests.get(f'{BASE_URL}/GetMakeForManufacturer/{mfr_id}?{FMT}')
r.raise_for_status()
return r.json()['Results']
def produces_strict_passenger_car(make: str) -> bool:
"""
Only count if VehicleTypeName == "Passenger Car" exactly.
/vehicles/GetVehicleTypesForMake/{make}?format=json :contentReference[oaicite:2]{index=2}
"""
r = requests.get(
f'{BASE_URL}/GetVehicleTypesForMake/{requests.utils.quote(make)}?{FMT}'
)
r.raise_for_status()
types = [v['VehicleTypeName'].strip().lower()
for v in r.json()['Results']]
return 'passenger car' in types
def seed_jdm_makes() -> List[str]:
"""
Find all Japanese manufacturers, then their makes that
strictly produce Passenger Cars in the JDM market.
"""
jdm_makes: Set[str] = set()
page = 1
while True:
mfrs = get_all_manufacturers(page)
if not mfrs:
break
# only Japanbased manufacturers
jap = [m for m in mfrs if m.get('Country','').lower() == 'japan']
if not jap:
# no more Japan entries further on
break
for m in jap:
for mk in get_makes_for_manufacturer(m['Mfr_ID']):
name = mk['Make_Name']
if produces_strict_passenger_car(name):
jdm_makes.add(name)
makes_col.update_one(
{'make_name': name},
{'$set': {'make_name': name}},
upsert=True
)
page += 1
return sorted(jdm_makes)
def seed_models_for_years(makes: List[str], years: List[int]):
"""
For each JDM make, seed models_by_year documents.
/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json :contentReference[oaicite:3]{index=3}
"""
for make in makes:
for yr in years:
url = (
f'{BASE_URL}/GetModelsForMakeYear/'
f'make/{requests.utils.quote(make)}/modelyear/{yr}?{FMT}'
)
r = requests.get(url)
r.raise_for_status()
models = [m['Model_Name'] for m in r.json()['Results']]
for model in models:
models_col.update_one(
{'make': make, 'year': yr, 'model': model},
{'$set': {'make': make, 'year': yr, 'model': model}},
upsert=True
)
print(f"{yr} {make}: {len(models)} models")
if __name__ == '__main__':
print("Seeding JDM makes…")
jdm = seed_jdm_makes()
print(f"{len(jdm)} Japanese domesticmarket makes stored in '{MONGO_DB}.jdm_makes'")
YEARS = list(range(2015, 2026))
seed_models_for_years(jdm, YEARS)
print("Done seeding Japanese domestic cars.")