- Move all scripts to scripts/, web assets to web/, analysis results into self-contained data/readings/<type>_<YYYYMMDD>/ directories - Add data/readings/manual_20260320/ with 32 JSON readings from git.medlab.host/ntnsndr/protocol-bicorder-data - Add scripts/json_to_csv.py to convert bicorder JSON files to CSV - Add scripts/sync_readings.sh for one-command sync + re-analysis of any dataset backed by a .sync_source config file - Add scripts/classify_readings.py to apply the LDA classifier to all readings and save per-reading cluster assignments - Add --min-coverage flag to multivariate_analysis.py for sparse/shortform datasets; also applies in lda_visualization.py - Fix lda_visualization.py NaN handling and 0-d array annotation bug - Update README.md and WORKFLOW.md to document datasets, sync workflow, shortform handling, and new scripts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
96 lines
3.0 KiB
Python
96 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Initialize LLM conversation with bicorder framework and protocol context.
|
|
|
|
This script reads a protocol from the CSV and the bicorder.json framework,
|
|
then generates a prompt to initialize the LLM conversation.
|
|
"""
|
|
|
|
import csv
|
|
import json
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
|
|
def load_bicorder_config(bicorder_path):
|
|
"""Load and parse the bicorder.json configuration file."""
|
|
with open(bicorder_path, 'r') as f:
|
|
return json.load(f)
|
|
|
|
|
|
def get_protocol_by_row(csv_path, row_number):
|
|
"""Get protocol data from CSV by row number (1-indexed)."""
|
|
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for i, row in enumerate(reader, start=1):
|
|
if i == row_number:
|
|
return {
|
|
'descriptor': row.get('Descriptor', '').strip(),
|
|
'description': row.get('Description', '').strip()
|
|
}
|
|
return None
|
|
|
|
|
|
def generate_init_prompt(protocol, bicorder_data):
|
|
"""Generate the initialization prompt for the LLM."""
|
|
|
|
# Ultra-minimal version for system prompt
|
|
prompt = f"""Analyze this protocol: "{protocol['descriptor']}"
|
|
|
|
Description: {protocol['description']}
|
|
|
|
Task: Rate this protocol on diagnostic gradients using scale 1-9 (1=left term, 5=neutral/balanced, 9=right term). Respond with just the number and brief explanation."""
|
|
|
|
return prompt
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Initialize LLM conversation with protocol and bicorder framework',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Example usage:
|
|
# Initialize conversation for protocol in row 1
|
|
python3 bicorder_init.py protocols_edited.csv 1 | llm -m mistral --save init_1
|
|
|
|
# Initialize for row 5
|
|
python3 bicorder_init.py protocols_edited.csv 5 | llm -m mistral --save init_5
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('input_csv', help='Input CSV file with protocol data')
|
|
parser.add_argument('row_number', type=int, help='Row number to analyze (1-indexed)')
|
|
parser.add_argument('-b', '--bicorder',
|
|
default='../bicorder.json',
|
|
help='Path to bicorder.json (default: ../bicorder.json)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate input file exists
|
|
if not Path(args.input_csv).exists():
|
|
print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Validate bicorder.json exists
|
|
if not Path(args.bicorder).exists():
|
|
print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Load protocol
|
|
protocol = get_protocol_by_row(args.input_csv, args.row_number)
|
|
if protocol is None:
|
|
print(f"Error: Row {args.row_number} not found in CSV", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Load bicorder config
|
|
bicorder_data = load_bicorder_config(args.bicorder)
|
|
|
|
# Generate and output prompt
|
|
prompt = generate_init_prompt(protocol, bicorder_data)
|
|
print(prompt)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|