Files
protocol-bicorder/analysis/bicorder_analyze.py
Nathan Schneider fa527bd1f1 Initial analysis
2025-11-21 19:34:33 -07:00

156 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Protocol Bicorder Analysis Script
Processes a two-column CSV file (protocol descriptor and description) and adds
columns for each diagnostic gradient from bicorder.json. Values to be filled
by LLM commands.
"""
import csv
import json
import sys
import argparse
from pathlib import Path
def load_bicorder_config(bicorder_path):
"""Load and parse the bicorder.json configuration file."""
with open(bicorder_path, 'r') as f:
return json.load(f)
def extract_gradients(bicorder_data):
"""Extract all gradients from the diagnostic sets."""
gradients = []
for diagnostic_set in bicorder_data['diagnostic']:
set_name = diagnostic_set['set_name']
for gradient in diagnostic_set['gradients']:
# Create a unique column name for this gradient
col_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}"
gradients.append({
'column_name': col_name,
'set_name': set_name,
'term_left': gradient['term_left'],
'term_left_description': gradient['term_left_description'],
'term_right': gradient['term_right'],
'term_right_description': gradient['term_right_description']
})
return gradients
def process_csv(input_csv, output_csv, bicorder_path, analyst=None, standpoint=None):
"""
Process the input CSV and add gradient columns.
Args:
input_csv: Path to input CSV file
output_csv: Path to output CSV file
bicorder_path: Path to bicorder.json file
analyst: Optional analyst name
standpoint: Optional standpoint description
"""
# Load bicorder configuration
bicorder_data = load_bicorder_config(bicorder_path)
gradients = extract_gradients(bicorder_data)
with open(input_csv, 'r', encoding='utf-8') as infile, \
open(output_csv, 'w', newline='', encoding='utf-8') as outfile:
reader = csv.DictReader(infile)
# Get original fieldnames from input CSV, filter out None/empty
original_fields = [f for f in reader.fieldnames if f and f.strip()]
# Add gradient columns and metadata columns
gradient_columns = [g['column_name'] for g in gradients]
output_fields = list(original_fields) + gradient_columns
# Add metadata columns if provided
if analyst is not None:
output_fields.append('analyst')
if standpoint is not None:
output_fields.append('standpoint')
writer = csv.DictWriter(outfile, fieldnames=output_fields)
writer.writeheader()
# Process each protocol row
row_count = 0
for protocol_row in reader:
# Start with original row data, filter out None keys
output_row = {k: v for k, v in protocol_row.items() if k and k.strip()}
# Initialize all gradient columns as empty (to be filled by LLM)
for gradient in gradients:
output_row[gradient['column_name']] = ''
# Add metadata if provided
if analyst is not None:
output_row['analyst'] = analyst
if standpoint is not None:
output_row['standpoint'] = standpoint
writer.writerow(output_row)
row_count += 1
descriptor = protocol_row.get('Descriptor', '').strip()
print(f"Processed protocol {row_count}: {descriptor}")
print(f"\nOutput written to: {output_csv}")
print(f"Total protocols: {row_count}")
print(f"Gradient columns added: {len(gradients)}")
print(f"\nGradient columns:")
for i, gradient in enumerate(gradients, 1):
print(f" {i}. {gradient['column_name']}")
def main():
parser = argparse.ArgumentParser(
description='Process protocol CSV and add bicorder diagnostic columns',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Example usage:
python3 bicorder_analyze.py protocols_edited.csv -o output.csv
python3 bicorder_analyze.py protocols_raw.csv -o output.csv -a "Jane Doe" -s "Researcher perspective"
The script will preserve all original columns and add one column per diagnostic gradient.
Each gradient column will be empty, ready to be filled by LLM commands.
"""
)
parser.add_argument('input_csv', help='Input CSV file with protocol data')
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
parser.add_argument('-b', '--bicorder',
default='../bicorder.json',
help='Path to bicorder.json (default: ../bicorder.json)')
parser.add_argument('-a', '--analyst', help='Analyst name (adds analyst column)')
parser.add_argument('-s', '--standpoint', help='Analyst standpoint (adds standpoint column)')
args = parser.parse_args()
# Validate input file exists
if not Path(args.input_csv).exists():
print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr)
sys.exit(1)
# Validate bicorder.json exists
if not Path(args.bicorder).exists():
print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr)
sys.exit(1)
# Process the CSV
process_csv(
args.input_csv,
args.output,
args.bicorder,
args.analyst,
args.standpoint
)
if __name__ == '__main__':
main()