Workflow Examples

Common analysis workflows and patterns.

Complete Analysis Pipeline

Survey Data Analysis

from statqa.cli.main import main
import sys

# Complete pipeline via CLI
sys.argv = [
    'statqa', 'pipeline',
    'survey_data.csv',
    'survey_codebook.csv',
    '--output-dir', 'results/',
    '--enrich', '--qa'
]
main()

Custom Analysis Workflow

from statqa.metadata.parsers.csv import CSVParser
from statqa.analysis.univariate import UnivariateAnalyzer
from statqa.analysis.bivariate import BivariateAnalyzer
from statqa.qa.generator import QAGenerator
import pandas as pd

# 1. Parse metadata
parser = CSVParser()
codebook = parser.parse("codebook.csv")

# 2. Load data
data = pd.read_csv("data.csv")

# 3. Run analyses
uni_analyzer = UnivariateAnalyzer()
bi_analyzer = BivariateAnalyzer()

results = []
for var_name, variable in codebook.variables.items():
    if var_name in data.columns:
        # Univariate analysis
        result = uni_analyzer.analyze(data[var_name], variable)
        results.append(result)

        # Bivariate with other numeric variables
        for other_name, other_var in codebook.variables.items():
            if other_name != var_name and other_var.is_numeric():
                bi_result = bi_analyzer.analyze(data, variable, other_var)
                results.append(bi_result)

# 4. Generate Q/A pairs
generator = QAGenerator()
all_qa = []
for result in results:
    qa_pairs = generator.generate_qa_pairs(result, "Statistical analysis")
    all_qa.extend(qa_pairs)

print(f"Generated {len(all_qa)} Q/A pairs")

For More Examples

See the Examples section for real-world dataset examples.