156 lines
5.3 KiB
Python
156 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Protocol Bicorder Analysis Script
|
|
|
|
Processes a two-column CSV file (protocol descriptor and description) and adds
|
|
columns for each diagnostic gradient from bicorder.json. Values to be filled
|
|
by LLM commands.
|
|
"""
|
|
|
|
import csv
|
|
import json
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
|
|
def load_bicorder_config(bicorder_path):
|
|
"""Load and parse the bicorder.json configuration file."""
|
|
with open(bicorder_path, 'r') as f:
|
|
return json.load(f)
|
|
|
|
|
|
def extract_gradients(bicorder_data):
|
|
"""Extract all gradients from the diagnostic sets."""
|
|
gradients = []
|
|
for diagnostic_set in bicorder_data['diagnostic']:
|
|
set_name = diagnostic_set['set_name']
|
|
|
|
for gradient in diagnostic_set['gradients']:
|
|
# Create a unique column name for this gradient
|
|
col_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}"
|
|
gradients.append({
|
|
'column_name': col_name,
|
|
'set_name': set_name,
|
|
'term_left': gradient['term_left'],
|
|
'term_left_description': gradient['term_left_description'],
|
|
'term_right': gradient['term_right'],
|
|
'term_right_description': gradient['term_right_description']
|
|
})
|
|
|
|
return gradients
|
|
|
|
|
|
def process_csv(input_csv, output_csv, bicorder_path, analyst=None, standpoint=None):
|
|
"""
|
|
Process the input CSV and add gradient columns.
|
|
|
|
Args:
|
|
input_csv: Path to input CSV file
|
|
output_csv: Path to output CSV file
|
|
bicorder_path: Path to bicorder.json file
|
|
analyst: Optional analyst name
|
|
standpoint: Optional standpoint description
|
|
"""
|
|
# Load bicorder configuration
|
|
bicorder_data = load_bicorder_config(bicorder_path)
|
|
gradients = extract_gradients(bicorder_data)
|
|
|
|
with open(input_csv, 'r', encoding='utf-8') as infile, \
|
|
open(output_csv, 'w', newline='', encoding='utf-8') as outfile:
|
|
|
|
reader = csv.DictReader(infile)
|
|
|
|
# Get original fieldnames from input CSV, filter out None/empty
|
|
original_fields = [f for f in reader.fieldnames if f and f.strip()]
|
|
|
|
# Add gradient columns and metadata columns
|
|
gradient_columns = [g['column_name'] for g in gradients]
|
|
output_fields = list(original_fields) + gradient_columns
|
|
|
|
# Add metadata columns if provided
|
|
if analyst is not None:
|
|
output_fields.append('analyst')
|
|
if standpoint is not None:
|
|
output_fields.append('standpoint')
|
|
|
|
writer = csv.DictWriter(outfile, fieldnames=output_fields)
|
|
writer.writeheader()
|
|
|
|
# Process each protocol row
|
|
row_count = 0
|
|
for protocol_row in reader:
|
|
# Start with original row data, filter out None keys
|
|
output_row = {k: v for k, v in protocol_row.items() if k and k.strip()}
|
|
|
|
# Initialize all gradient columns as empty (to be filled by LLM)
|
|
for gradient in gradients:
|
|
output_row[gradient['column_name']] = ''
|
|
|
|
# Add metadata if provided
|
|
if analyst is not None:
|
|
output_row['analyst'] = analyst
|
|
if standpoint is not None:
|
|
output_row['standpoint'] = standpoint
|
|
|
|
writer.writerow(output_row)
|
|
row_count += 1
|
|
|
|
descriptor = protocol_row.get('Descriptor', '').strip()
|
|
print(f"Processed protocol {row_count}: {descriptor}")
|
|
|
|
print(f"\nOutput written to: {output_csv}")
|
|
print(f"Total protocols: {row_count}")
|
|
print(f"Gradient columns added: {len(gradients)}")
|
|
print(f"\nGradient columns:")
|
|
for i, gradient in enumerate(gradients, 1):
|
|
print(f" {i}. {gradient['column_name']}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Process protocol CSV and add bicorder diagnostic columns',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Example usage:
|
|
python3 bicorder_analyze.py protocols_edited.csv -o output.csv
|
|
python3 bicorder_analyze.py protocols_raw.csv -o output.csv -a "Jane Doe" -s "Researcher perspective"
|
|
|
|
The script will preserve all original columns and add one column per diagnostic gradient.
|
|
Each gradient column will be empty, ready to be filled by LLM commands.
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('input_csv', help='Input CSV file with protocol data')
|
|
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
|
|
parser.add_argument('-b', '--bicorder',
|
|
default='../bicorder.json',
|
|
help='Path to bicorder.json (default: ../bicorder.json)')
|
|
parser.add_argument('-a', '--analyst', help='Analyst name (adds analyst column)')
|
|
parser.add_argument('-s', '--standpoint', help='Analyst standpoint (adds standpoint column)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate input file exists
|
|
if not Path(args.input_csv).exists():
|
|
print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Validate bicorder.json exists
|
|
if not Path(args.bicorder).exists():
|
|
print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Process the CSV
|
|
process_csv(
|
|
args.input_csv,
|
|
args.output,
|
|
args.bicorder,
|
|
args.analyst,
|
|
args.standpoint
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|