Added classifer analysis to bicorder ascii and web app

This commit is contained in:
Nathan Schneider
2025-12-21 21:38:39 -07:00
parent b541f85553
commit 1b508b911f
17 changed files with 2795 additions and 49 deletions

View File

@@ -6,6 +6,181 @@ Generate bicorder.txt from bicorder.json
import json
import argparse
import sys
import os
from pathlib import Path
# Simple version-based approach
#
# The model includes a 'bicorder_version' field indicating which version of
# bicorder.json it was trained on. The code checks that versions match before
# calculating. This ensures the gradient structure is compatible.
#
# When bicorder.json changes (gradients added/removed/reordered), update the
# version number and retrain the model.
def load_classifier_model():
"""Load the LDA model from bicorder_model.json"""
# Try to find the model file
script_dir = Path(__file__).parent
model_paths = [
script_dir / 'analysis' / 'bicorder_model.json',
script_dir / 'bicorder_model.json',
Path('analysis/bicorder_model.json'),
Path('bicorder_model.json'),
]
for path in model_paths:
if path.exists():
with open(path, 'r') as f:
return json.load(f)
return None
def calculate_lda_score(values_array, model):
"""
Calculate LDA score from an array of values using the model.
Args:
values_array: list of 23 values (1-9) in the order expected by the model
model: loaded classifier model
Returns:
LDA score (float), or None if insufficient data
"""
if model is None:
return None
if len(values_array) != len(model['dimensions']):
return None
# Standardize using model scaler
mean = model['scaler']['mean']
scale = model['scaler']['scale']
scaled = [(values_array[i] - mean[i]) / scale[i] for i in range(len(values_array))]
# Calculate LDA score: coef · x + intercept
coef = model['lda']['coefficients']
intercept = model['lda']['intercept']
# Dot product
lda_score = sum(coef[i] * scaled[i] for i in range(len(scaled))) + intercept
return lda_score
def lda_score_to_scale(lda_score):
"""
Convert LDA score to 1-9 scale.
LDA scores typically range from -4 to +4 (8 range)
Target scale is 1 to 9 (8 range)
Formula: value = 5 + (lda_score * 4/3)
- LDA -3 or less → 1 (bureaucratic)
- LDA 0 → 5 (boundary)
- LDA +3 or more → 9 (relational)
"""
if lda_score is None:
return None
# Scale: value = 5 + (lda_score * 1.33)
value = 5 + (lda_score * 4.0 / 3.0)
# Clamp to 1-9 range and round
value = max(1, min(9, value))
return round(value)
def calculate_hardness(diagnostic_values):
"""Calculate hardness/softness (mean of all diagnostic values)"""
if not diagnostic_values:
return None
valid_values = [v for v in diagnostic_values if v is not None]
if not valid_values:
return None
return round(sum(valid_values) / len(valid_values))
def calculate_polarization(diagnostic_values):
"""
Calculate polarization (1 = extreme, 9 = centrist).
Measures how far values are from the center (5).
"""
if not diagnostic_values:
return None
valid_values = [v for v in diagnostic_values if v is not None]
if not valid_values:
return None
# Calculate mean distance from center
distances = [abs(v - 5) for v in valid_values]
mean_distance = sum(distances) / len(distances)
# Convert to 1-9 scale (inverted: high distance = low value = polarized)
# Maximum possible distance is 4 (from 1 or 9 to 5)
# Scale: 1 (all at extremes) to 9 (all at center)
polarization = 9 - (mean_distance / 4 * 8)
return round(max(1, min(9, polarization)))
def calculate_automated_analysis(json_data):
"""
Calculate values for automated analysis fields.
Modifies json_data in place.
"""
# Collect all diagnostic values in order
diagnostic_values = []
values_array = []
for diagnostic_set in json_data.get("diagnostic", []):
for gradient in diagnostic_set.get("gradients", []):
value = gradient.get("value")
if value is not None:
diagnostic_values.append(value)
values_array.append(float(value))
else:
# Fill missing with neutral value
values_array.append(5.0)
# Only calculate if we have diagnostic values
if not diagnostic_values:
return
# Load classifier model
model = load_classifier_model()
# Check version compatibility
bicorder_version = json_data.get("version", "unknown")
model_version = model.get("bicorder_version", "unknown") if model else "unknown"
version_mismatch = (model and bicorder_version != model_version)
# Calculate each automated analysis field
for analysis_item in json_data.get("analysis", []):
if not analysis_item.get("automated", False):
continue
term_left = analysis_item.get("term_left", "")
# Calculate based on the type
if term_left == "hardness":
analysis_item["value"] = calculate_hardness(diagnostic_values)
elif term_left == "polarized":
analysis_item["value"] = calculate_polarization(diagnostic_values)
elif term_left == "bureaucratic":
if version_mismatch:
# Skip calculation if versions don't match
print(f"Warning: Model version ({model_version}) doesn't match bicorder version ({bicorder_version}). Skipping bureaucratic/relational calculation.")
analysis_item["value"] = None
elif model:
lda_score = calculate_lda_score(values_array, model)
analysis_item["value"] = lda_score_to_scale(lda_score)
def center_text(text, width):
@@ -218,6 +393,9 @@ def main():
print(f"Error: Invalid JSON in '{args.input_json}': {e}", file=sys.stderr)
sys.exit(1)
# Calculate automated analysis values
calculate_automated_analysis(data)
# Generate the formatted text
output = generate_bicorder_text(data)