Reorganize directory, add manual dataset and sync tooling
- Move all scripts to scripts/, web assets to web/, analysis results into self-contained data/readings/<type>_<YYYYMMDD>/ directories - Add data/readings/manual_20260320/ with 32 JSON readings from git.medlab.host/ntnsndr/protocol-bicorder-data - Add scripts/json_to_csv.py to convert bicorder JSON files to CSV - Add scripts/sync_readings.sh for one-command sync + re-analysis of any dataset backed by a .sync_source config file - Add scripts/classify_readings.py to apply the LDA classifier to all readings and save per-reading cluster assignments - Add --min-coverage flag to multivariate_analysis.py for sparse/shortform datasets; also applies in lda_visualization.py - Fix lda_visualization.py NaN handling and 0-d array annotation bug - Update README.md and WORKFLOW.md to document datasets, sync workflow, shortform handling, and new scripts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
95
analysis/scripts/bicorder_init.py
Normal file
95
analysis/scripts/bicorder_init.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Initialize LLM conversation with bicorder framework and protocol context.
|
||||
|
||||
This script reads a protocol from the CSV and the bicorder.json framework,
|
||||
then generates a prompt to initialize the LLM conversation.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_bicorder_config(bicorder_path):
|
||||
"""Load and parse the bicorder.json configuration file."""
|
||||
with open(bicorder_path, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def get_protocol_by_row(csv_path, row_number):
|
||||
"""Get protocol data from CSV by row number (1-indexed)."""
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for i, row in enumerate(reader, start=1):
|
||||
if i == row_number:
|
||||
return {
|
||||
'descriptor': row.get('Descriptor', '').strip(),
|
||||
'description': row.get('Description', '').strip()
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def generate_init_prompt(protocol, bicorder_data):
|
||||
"""Generate the initialization prompt for the LLM."""
|
||||
|
||||
# Ultra-minimal version for system prompt
|
||||
prompt = f"""Analyze this protocol: "{protocol['descriptor']}"
|
||||
|
||||
Description: {protocol['description']}
|
||||
|
||||
Task: Rate this protocol on diagnostic gradients using scale 1-9 (1=left term, 5=neutral/balanced, 9=right term). Respond with just the number and brief explanation."""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Initialize LLM conversation with protocol and bicorder framework',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Example usage:
|
||||
# Initialize conversation for protocol in row 1
|
||||
python3 bicorder_init.py protocols_edited.csv 1 | llm -m mistral --save init_1
|
||||
|
||||
# Initialize for row 5
|
||||
python3 bicorder_init.py protocols_edited.csv 5 | llm -m mistral --save init_5
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('input_csv', help='Input CSV file with protocol data')
|
||||
parser.add_argument('row_number', type=int, help='Row number to analyze (1-indexed)')
|
||||
parser.add_argument('-b', '--bicorder',
|
||||
default='../bicorder.json',
|
||||
help='Path to bicorder.json (default: ../bicorder.json)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input file exists
|
||||
if not Path(args.input_csv).exists():
|
||||
print(f"Error: Input file '{args.input_csv}' not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Validate bicorder.json exists
|
||||
if not Path(args.bicorder).exists():
|
||||
print(f"Error: Bicorder config '{args.bicorder}' not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load protocol
|
||||
protocol = get_protocol_by_row(args.input_csv, args.row_number)
|
||||
if protocol is None:
|
||||
print(f"Error: Row {args.row_number} not found in CSV", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load bicorder config
|
||||
bicorder_data = load_bicorder_config(args.bicorder)
|
||||
|
||||
# Generate and output prompt
|
||||
prompt = generate_init_prompt(protocol, bicorder_data)
|
||||
print(prompt)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user