Source code for lost_years.utils
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Any
import pandas as pd
import requests
if TYPE_CHECKING:
import numpy as np
import numpy.typing as npt
# Setup logger
logger = logging.getLogger(__name__)
[docs]
def isstring(s: Any) -> bool:
return isinstance(s, str)
[docs]
def column_exists(df: pd.DataFrame, col: str | None) -> bool:
"""Check the column name exists in the DataFrame.
Args:
df: Pandas DataFrame.
col: Column name.
Returns:
bool: True if exists, False if not exists.
"""
if col and (col not in df.columns):
logger.warning(f"The specify column `{col!s}` not found in the input file")
return False
else:
return True
[docs]
def fixup_columns(cols: list[Any]) -> list[str]:
"""Replace index location column to name with `col` prefix
Args:
cols: List of original columns
Returns:
List of column names
"""
out_cols = []
for col in cols:
if isinstance(col, int):
out_cols.append(f"col{col:d}")
else:
out_cols.append(col)
return out_cols
[docs]
def closest(lst: "list[float] | npt.NDArray[np.floating[Any]]", c: float) -> float:
"""Find closest value in list or array.
Args:
lst: List of floats or numpy array
c: Target value to find closest match for
Returns:
Closest value in the list/array
"""
# Convert numpy array to list if needed
working_list: list[float]
if hasattr(lst, "tolist"): # numpy array
working_list = lst.tolist() # type: ignore[attr-defined]
else:
working_list = lst # type: ignore[assignment]
return working_list[min(range(len(working_list)), key=lambda i: abs(working_list[i] - c))]
[docs]
def download_file(url: str, local_path: str | Path | None = None) -> None:
match local_path:
case None:
local_path = Path(url.split("/")[-1])
case str():
local_path = Path(local_path)
case _:
pass # Already a Path object
r = requests.get(url)
with local_path.open("wb") as f:
for chunk in r.iter_content(chunk_size=512 * 1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)