protocol-bicorder/analysis/scripts/bicorder_init.py

#!/usr/bin/env python3
"""
Initialize LLM conversation with bicorder framework and protocol context.

This script reads a protocol from the CSV and the bicorder.json framework,
then generates a prompt to initialize the LLM conversation.
"""

import csv
import json
import sys
import argparse
from pathlib import Path


def load_bicorder_config(bicorder_path):
    """Load and parse the bicorder.json configuration file."""
    with open(bicorder_path, 'r') as f:
        return json.load(f)


def get_protocol_by_row(csv_path, row_number):
    """Get protocol data from CSV by row number (1-indexed)."""
    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader, start=1):
            if i == row_number:
                return {
                    'descriptor': row.get('Descriptor', '').strip(),
                    'description': row.get('Description', '').strip()
                }
    return None


def generate_init_prompt(protocol, bicorder_data):
    """Generate the initialization prompt for the LLM."""

    # Ultra-minimal version for system prompt
    prompt = f"""Analyze this protocol: "{protocol['descriptor']}"

Description: {protocol['description']}

Task: Rate this protocol on diagnostic gradients using scale 1-9 (1=left term, 5=neutral/balanced, 9=right term). Respond with just the number and brief explanation."""

    return prompt


def main():
    parser = argparse.ArgumentParser(
        description='Initialize LLM conversation with protocol and bicorder framework',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Example usage:
  # Initialize conversation for protocol in row 1
  python3 bicorder_init.py protocols_edited.csv 1 | llm -m mistral --save init_1

  # Initialize for row 5
  python3 bicorder_init.py protocols_edited.csv 5 | llm -m mistral --save init_5
        """
    )

    parser.add_argument('input_csv', help='Input CSV file with protocol data')
    parser.add_argument('row_number', type=int, help='Row number to analyze (1-indexed)')
    parser.add_argument('-b', '--bicorder',
                        default='../bicorder.json',
                        help='Path to bicorder.json (default: ../bicorder.json)')

    args = parser.parse_args()

    # Validate input file exists
    if not Path(args.input_csv).exists():
        print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr)
        sys.exit(1)

    # Validate bicorder.json exists
    if not Path(args.bicorder).exists():
        print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr)
        sys.exit(1)

    # Load protocol
    protocol = get_protocol_by_row(args.input_csv, args.row_number)
    if protocol is None:
        print(f"Error: Row {args.row_number} not found in CSV", file=sys.stderr)
        sys.exit(1)

    # Load bicorder config
    bicorder_data = load_bicorder_config(args.bicorder)

    # Generate and output prompt
    prompt = generate_init_prompt(protocol, bicorder_data)
    print(prompt)


if __name__ == '__main__':
    main()