Source code for search_and_replace.cli

"""Command-line interface."""

from __future__ import annotations

import argparse
import logging
import sys
from pathlib import Path

from search_and_replace import __version__
from search_and_replace.batch import load_patterns, load_replacements, process_directory


[docs] def main(argv: list[str] | None = None) -> int: """Main entry point.""" parser = argparse.ArgumentParser( prog="search-and-replace", description="High-performance text correction for OCR output", ) parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") parser.add_argument("source_dir", type=Path, help="Source directory") parser.add_argument( "-o", "--outdir", type=Path, default=Path("postprocessed"), help="Output directory" ) parser.add_argument( "-w", "--patterns", type=Path, default=Path("patterns.csv"), help="Patterns CSV" ) parser.add_argument( "-r", "--replacements", type=Path, default=Path("replacements.csv"), help="Replacements CSV" ) parser.add_argument("--resume", action="store_true", help="Skip existing files") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") parser.add_argument("-p", "--pattern", default="*.txt", help="File glob") parser.add_argument("-j", "--jobs", type=int, help="Worker count") args = parser.parse_args(argv) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, format="%(message)s", ) if not args.source_dir.is_dir(): logging.error("Source directory does not exist: %s", args.source_dir) return 1 patterns = load_patterns(args.patterns) if args.patterns.exists() else None replacements = load_replacements(args.replacements) if args.replacements.exists() else None if patterns: logging.info("Loaded %d patterns from %s", len(patterns), args.patterns) if replacements: logging.info("Loaded %d replacements from %s", len(replacements), args.replacements) processed, skipped = process_directory( args.source_dir, args.outdir, patterns, replacements, pattern=args.pattern, resume=args.resume, jobs=args.jobs, ) logging.info("Processed %d files, skipped %d files", processed, skipped) return 0
if __name__ == "__main__": sys.exit(main())