Source code for get_weather_data.weather.lookup

"""Weather data lookup by ZIP code."""

import logging
from dataclasses import dataclass, field
from datetime import date, timedelta
from functools import lru_cache

from get_weather_data.core.database import Database
from get_weather_data.core.distance import find_closest
from get_weather_data.weather.ghcn import GHCN_ELEMENTS, get_ghcn_data
from get_weather_data.weather.gsod import get_gsod_data

logger = logging.getLogger("get_weather_data")


[docs] @dataclass class WeatherResult: """Weather data result for a ZIP code and date.""" zipcode: str date: date station_id: str | None = None station_name: str | None = None station_type: str | None = None station_distance_meters: int | None = None tmax: float | None = None tmin: float | None = None tavg: float | None = None prcp: float | None = None snow: float | None = None snwd: float | None = None awnd: float | None = None
@lru_cache(maxsize=1024) def _cached_ghcn_data( station_id: str, year: int, month: int, day: int ) -> dict[str, float | None]: """Cached GHCN data lookup.""" return get_ghcn_data(station_id, date(year, month, day)) @lru_cache(maxsize=1024) def _cached_gsod_data( station_id: str, year: int, month: int, day: int ) -> dict[str, float | None]: """Cached GSOD data lookup.""" return get_gsod_data(station_id, date(year, month, day)) @dataclass class WeatherLookup: """Look up weather data for ZIP codes. Uses caching for improved performance on repeated queries. """ db: Database = field(default_factory=Database) max_stations: int = 10 # Try more stations before giving up (fallback) max_distance_meters: int | None = None use_ghcn: bool = True use_gsod: bool = True use_cache: bool = True def __post_init__(self) -> None: """Preload caches for efficiency.""" if self.db.exists(): self.db.preload_caches() def get_weather( self, zipcode: str, target_date: date, elements: list[str] | None = None, ) -> WeatherResult: """Get weather data for a ZIP code and date. Searches closest stations until data is found. Args: zipcode: 5-digit US ZIP code. target_date: Date to get weather for. elements: List of elements to retrieve. Returns: WeatherResult with available data. """ zipcode = zipcode.zfill(5) if elements is None: elements = GHCN_ELEMENTS result = WeatherResult(zipcode=zipcode, date=target_date) coords = self.db.get_zipcode(zipcode) if coords is None: logger.warning(f"ZIP code {zipcode} not found in database") return result lat, lon = coords closest = self.db.get_closest_stations(zipcode) if not closest: ghcn_stations = self.db.get_stations(station_type="GHCND") usaf_stations = self.db.get_stations(station_type="USAF-WBAN") ghcn_closest = find_closest(lat, lon, ghcn_stations, n=3) usaf_closest = find_closest(lat, lon, usaf_stations, n=2) closest = [(sd.station.id, sd.distance_meters) for sd in ghcn_closest] closest.extend([(sd.station.id, sd.distance_meters) for sd in usaf_closest]) closest.sort(key=lambda x: x[1]) values: dict[str, float | None] = {} found_elements: set[str] = set() for station_id, distance in closest[: self.max_stations]: if self.max_distance_meters and distance > self.max_distance_meters: break if elements and len(found_elements) >= len(elements): break station_info = self.db.get_station_info(station_id) if not station_info: continue station_name, station_type = station_info if station_type == "GHCND" and self.use_ghcn: if self.use_cache: data = _cached_ghcn_data( station_id, target_date.year, target_date.month, target_date.day, ) else: data = get_ghcn_data(station_id, target_date, elements) elif station_type == "USAF-WBAN" and self.use_gsod: if self.use_cache: gsod_data = _cached_gsod_data( station_id, target_date.year, target_date.month, target_date.day, ) else: gsod_data = get_gsod_data(station_id, target_date) data = { "TMAX": gsod_data.get("max_temp"), "TMIN": gsod_data.get("min_temp"), "TAVG": gsod_data.get("temp"), "PRCP": gsod_data.get("precipitation"), "SNWD": gsod_data.get("snow_depth"), "AWND": gsod_data.get("wind_speed"), } data = {k: v * 10 if v else None for k, v in data.items()} else: continue for elem, val in data.items(): if elem not in found_elements and val is not None: values[elem] = val found_elements.add(elem) if result.station_id is None: result.station_id = station_id result.station_name = station_name result.station_type = station_type result.station_distance_meters = distance result.tmax = values.get("TMAX") result.tmin = values.get("TMIN") result.tavg = values.get("TAVG") result.prcp = values.get("PRCP") result.snow = values.get("SNOW") result.snwd = values.get("SNWD") result.awnd = values.get("AWND") return result def get_weather_range( self, zipcode: str, start_date: date, end_date: date, elements: list[str] | None = None, ) -> list[WeatherResult]: """Get weather data for a ZIP code over a date range. Args: zipcode: 5-digit US ZIP code. start_date: Start date. end_date: End date. elements: List of elements to retrieve. Returns: List of WeatherResult objects, one per day. """ results = [] current = start_date while current <= end_date: results.append(self.get_weather(zipcode, current, elements)) current += timedelta(days=1) return results def clear_cache(self) -> None: """Clear the weather data cache.""" _cached_ghcn_data.cache_clear() _cached_gsod_data.cache_clear() def cache_info(self) -> dict: """Get cache statistics.""" return { "ghcn": _cached_ghcn_data.cache_info(), "gsod": _cached_gsod_data.cache_info(), }