#!/usr/bin/env python3 """ Protocol Bicorder Analysis Script Processes a two-column CSV file (protocol descriptor and description) and adds columns for each diagnostic gradient from bicorder.json. Values to be filled by LLM commands. """ import csv import json import sys import argparse from pathlib import Path def load_bicorder_config(bicorder_path): """Load and parse the bicorder.json configuration file.""" with open(bicorder_path, 'r') as f: return json.load(f) def extract_gradients(bicorder_data): """Extract all gradients from the diagnostic sets.""" gradients = [] for diagnostic_set in bicorder_data['diagnostic']: set_name = diagnostic_set['set_name'] for gradient in diagnostic_set['gradients']: # Create a unique column name for this gradient col_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}" gradients.append({ 'column_name': col_name, 'set_name': set_name, 'term_left': gradient['term_left'], 'term_left_description': gradient['term_left_description'], 'term_right': gradient['term_right'], 'term_right_description': gradient['term_right_description'] }) return gradients def process_csv(input_csv, output_csv, bicorder_path, analyst=None, standpoint=None): """ Process the input CSV and add gradient columns. Args: input_csv: Path to input CSV file output_csv: Path to output CSV file bicorder_path: Path to bicorder.json file analyst: Optional analyst name standpoint: Optional standpoint description """ # Load bicorder configuration bicorder_data = load_bicorder_config(bicorder_path) gradients = extract_gradients(bicorder_data) with open(input_csv, 'r', encoding='utf-8') as infile, \ open(output_csv, 'w', newline='', encoding='utf-8') as outfile: reader = csv.DictReader(infile) # Get original fieldnames from input CSV, filter out None/empty original_fields = [f for f in reader.fieldnames if f and f.strip()] # Add gradient columns and metadata columns gradient_columns = [g['column_name'] for g in gradients] output_fields = list(original_fields) + gradient_columns # Add metadata columns if provided if analyst is not None: output_fields.append('analyst') if standpoint is not None: output_fields.append('standpoint') writer = csv.DictWriter(outfile, fieldnames=output_fields) writer.writeheader() # Process each protocol row row_count = 0 for protocol_row in reader: # Start with original row data, filter out None keys output_row = {k: v for k, v in protocol_row.items() if k and k.strip()} # Initialize all gradient columns as empty (to be filled by LLM) for gradient in gradients: output_row[gradient['column_name']] = '' # Add metadata if provided if analyst is not None: output_row['analyst'] = analyst if standpoint is not None: output_row['standpoint'] = standpoint writer.writerow(output_row) row_count += 1 descriptor = protocol_row.get('Descriptor', '').strip() print(f"Processed protocol {row_count}: {descriptor}") print(f"\nOutput written to: {output_csv}") print(f"Total protocols: {row_count}") print(f"Gradient columns added: {len(gradients)}") print(f"\nGradient columns:") for i, gradient in enumerate(gradients, 1): print(f" {i}. {gradient['column_name']}") def main(): parser = argparse.ArgumentParser( description='Process protocol CSV and add bicorder diagnostic columns', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Example usage: python3 bicorder_analyze.py protocols_edited.csv -o output.csv python3 bicorder_analyze.py protocols_raw.csv -o output.csv -a "Jane Doe" -s "Researcher perspective" The script will preserve all original columns and add one column per diagnostic gradient. Each gradient column will be empty, ready to be filled by LLM commands. """ ) parser.add_argument('input_csv', help='Input CSV file with protocol data') parser.add_argument('-o', '--output', required=True, help='Output CSV file') parser.add_argument('-b', '--bicorder', default='../bicorder.json', help='Path to bicorder.json (default: ../bicorder.json)') parser.add_argument('-a', '--analyst', help='Analyst name (adds analyst column)') parser.add_argument('-s', '--standpoint', help='Analyst standpoint (adds standpoint column)') args = parser.parse_args() # Validate input file exists if not Path(args.input_csv).exists(): print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr) sys.exit(1) # Validate bicorder.json exists if not Path(args.bicorder).exists(): print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr) sys.exit(1) # Process the CSV process_csv( args.input_csv, args.output, args.bicorder, args.analyst, args.standpoint ) if __name__ == '__main__': main()