Set up analysis scripts

This commit is contained in:
Nathan Schneider
2025-10-30 10:56:21 -06:00
parent d2da0425c6
commit 815ed9d6f4
14 changed files with 1427 additions and 651 deletions

95
analysis/bicorder_init.py Normal file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""
Initialize LLM conversation with bicorder framework and protocol context.
This script reads a protocol from the CSV and the bicorder.json framework,
then generates a prompt to initialize the LLM conversation.
"""
import csv
import json
import sys
import argparse
from pathlib import Path
def load_bicorder_config(bicorder_path):
"""Load and parse the bicorder.json configuration file."""
with open(bicorder_path, 'r') as f:
return json.load(f)
def get_protocol_by_row(csv_path, row_number):
"""Get protocol data from CSV by row number (1-indexed)."""
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for i, row in enumerate(reader, start=1):
if i == row_number:
return {
'descriptor': row.get('Descriptor', '').strip(),
'description': row.get('Description', '').strip()
}
return None
def generate_init_prompt(protocol, bicorder_data):
"""Generate the initialization prompt for the LLM."""
# Ultra-minimal version for system prompt
prompt = f"""Analyze this protocol: "{protocol['descriptor']}"
Description: {protocol['description']}
Task: Rate this protocol on diagnostic gradients using scale 1-9 (1=left term, 5=neutral/balanced, 9=right term). Respond with just the number and brief explanation."""
return prompt
def main():
parser = argparse.ArgumentParser(
description='Initialize LLM conversation with protocol and bicorder framework',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Example usage:
# Initialize conversation for protocol in row 1
python3 bicorder_init.py protocols_edited.csv 1 | llm -m mistral --save init_1
# Initialize for row 5
python3 bicorder_init.py protocols_edited.csv 5 | llm -m mistral --save init_5
"""
)
parser.add_argument('input_csv', help='Input CSV file with protocol data')
parser.add_argument('row_number', type=int, help='Row number to analyze (1-indexed)')
parser.add_argument('-b', '--bicorder',
default='../bicorder.json',
help='Path to bicorder.json (default: ../bicorder.json)')
args = parser.parse_args()
# Validate input file exists
if not Path(args.input_csv).exists():
print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr)
sys.exit(1)
# Validate bicorder.json exists
if not Path(args.bicorder).exists():
print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr)
sys.exit(1)
# Load protocol
protocol = get_protocol_by_row(args.input_csv, args.row_number)
if protocol is None:
print(f"Error: Row {args.row_number} not found in CSV", file=sys.stderr)
sys.exit(1)
# Load bicorder config
bicorder_data = load_bicorder_config(args.bicorder)
# Generate and output prompt
prompt = generate_init_prompt(protocol, bicorder_data)
print(prompt)
if __name__ == '__main__':
main()