Set up analysis scripts

2025-10-30 10:56:21 -06:00
parent d2da0425c6
commit 815ed9d6f4
14 changed files with 1427 additions and 651 deletions
--- a/analysis/bicorder_init.py
+++ b/analysis/bicorder_init.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""
+Initialize LLM conversation with bicorder framework and protocol context.
+
+This script reads a protocol from the CSV and the bicorder.json framework,
+then generates a prompt to initialize the LLM conversation.
+"""
+
+import csv
+import json
+import sys
+import argparse
+from pathlib import Path
+
+
+def load_bicorder_config(bicorder_path):
+    """Load and parse the bicorder.json configuration file."""
+    with open(bicorder_path, 'r') as f:
+        return json.load(f)
+
+
+def get_protocol_by_row(csv_path, row_number):
+    """Get protocol data from CSV by row number (1-indexed)."""
+    with open(csv_path, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        for i, row in enumerate(reader, start=1):
+            if i == row_number:
+                return {
+                    'descriptor': row.get('Descriptor', '').strip(),
+                    'description': row.get('Description', '').strip()
+                }
+    return None
+
+
+def generate_init_prompt(protocol, bicorder_data):
+    """Generate the initialization prompt for the LLM."""
+
+    # Ultra-minimal version for system prompt
+    prompt = f"""Analyze this protocol: "{protocol['descriptor']}"
+
+Description: {protocol['description']}
+
+Task: Rate this protocol on diagnostic gradients using scale 1-9 (1=left term, 5=neutral/balanced, 9=right term). Respond with just the number and brief explanation."""
+
+    return prompt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Initialize LLM conversation with protocol and bicorder framework',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Example usage:
+  # Initialize conversation for protocol in row 1
+  python3 bicorder_init.py protocols_edited.csv 1 | llm -m mistral --save init_1
+
+  # Initialize for row 5
+  python3 bicorder_init.py protocols_edited.csv 5 | llm -m mistral --save init_5
+        """
+    )
+
+    parser.add_argument('input_csv', help='Input CSV file with protocol data')
+    parser.add_argument('row_number', type=int, help='Row number to analyze (1-indexed)')
+    parser.add_argument('-b', '--bicorder',
+                        default='../bicorder.json',
+                        help='Path to bicorder.json (default: ../bicorder.json)')
+
+    args = parser.parse_args()
+
+    # Validate input file exists
+    if not Path(args.input_csv).exists():
+        print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr)
+        sys.exit(1)
+
+    # Validate bicorder.json exists
+    if not Path(args.bicorder).exists():
+        print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr)
+        sys.exit(1)
+
+    # Load protocol
+    protocol = get_protocol_by_row(args.input_csv, args.row_number)
+    if protocol is None:
+        print(f"Error: Row {args.row_number} not found in CSV", file=sys.stderr)
+        sys.exit(1)
+
+    # Load bicorder config
+    bicorder_data = load_bicorder_config(args.bicorder)
+
+    # Generate and output prompt
+    prompt = generate_init_prompt(protocol, bicorder_data)
+    print(prompt)
+
+
+if __name__ == '__main__':
+    main()