Added classifer analysis to bicorder ascii and web app
This commit is contained in:
@@ -6,6 +6,181 @@ Generate bicorder.txt from bicorder.json
|
||||
import json
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Simple version-based approach
|
||||
#
|
||||
# The model includes a 'bicorder_version' field indicating which version of
|
||||
# bicorder.json it was trained on. The code checks that versions match before
|
||||
# calculating. This ensures the gradient structure is compatible.
|
||||
#
|
||||
# When bicorder.json changes (gradients added/removed/reordered), update the
|
||||
# version number and retrain the model.
|
||||
|
||||
|
||||
def load_classifier_model():
|
||||
"""Load the LDA model from bicorder_model.json"""
|
||||
# Try to find the model file
|
||||
script_dir = Path(__file__).parent
|
||||
model_paths = [
|
||||
script_dir / 'analysis' / 'bicorder_model.json',
|
||||
script_dir / 'bicorder_model.json',
|
||||
Path('analysis/bicorder_model.json'),
|
||||
Path('bicorder_model.json'),
|
||||
]
|
||||
|
||||
for path in model_paths:
|
||||
if path.exists():
|
||||
with open(path, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def calculate_lda_score(values_array, model):
|
||||
"""
|
||||
Calculate LDA score from an array of values using the model.
|
||||
|
||||
Args:
|
||||
values_array: list of 23 values (1-9) in the order expected by the model
|
||||
model: loaded classifier model
|
||||
|
||||
Returns:
|
||||
LDA score (float), or None if insufficient data
|
||||
"""
|
||||
if model is None:
|
||||
return None
|
||||
|
||||
if len(values_array) != len(model['dimensions']):
|
||||
return None
|
||||
|
||||
# Standardize using model scaler
|
||||
mean = model['scaler']['mean']
|
||||
scale = model['scaler']['scale']
|
||||
scaled = [(values_array[i] - mean[i]) / scale[i] for i in range(len(values_array))]
|
||||
|
||||
# Calculate LDA score: coef · x + intercept
|
||||
coef = model['lda']['coefficients']
|
||||
intercept = model['lda']['intercept']
|
||||
|
||||
# Dot product
|
||||
lda_score = sum(coef[i] * scaled[i] for i in range(len(scaled))) + intercept
|
||||
|
||||
return lda_score
|
||||
|
||||
|
||||
def lda_score_to_scale(lda_score):
|
||||
"""
|
||||
Convert LDA score to 1-9 scale.
|
||||
LDA scores typically range from -4 to +4 (8 range)
|
||||
Target scale is 1 to 9 (8 range)
|
||||
|
||||
Formula: value = 5 + (lda_score * 4/3)
|
||||
- LDA -3 or less → 1 (bureaucratic)
|
||||
- LDA 0 → 5 (boundary)
|
||||
- LDA +3 or more → 9 (relational)
|
||||
"""
|
||||
if lda_score is None:
|
||||
return None
|
||||
|
||||
# Scale: value = 5 + (lda_score * 1.33)
|
||||
value = 5 + (lda_score * 4.0 / 3.0)
|
||||
|
||||
# Clamp to 1-9 range and round
|
||||
value = max(1, min(9, value))
|
||||
return round(value)
|
||||
|
||||
|
||||
def calculate_hardness(diagnostic_values):
|
||||
"""Calculate hardness/softness (mean of all diagnostic values)"""
|
||||
if not diagnostic_values:
|
||||
return None
|
||||
|
||||
valid_values = [v for v in diagnostic_values if v is not None]
|
||||
if not valid_values:
|
||||
return None
|
||||
|
||||
return round(sum(valid_values) / len(valid_values))
|
||||
|
||||
|
||||
def calculate_polarization(diagnostic_values):
|
||||
"""
|
||||
Calculate polarization (1 = extreme, 9 = centrist).
|
||||
Measures how far values are from the center (5).
|
||||
"""
|
||||
if not diagnostic_values:
|
||||
return None
|
||||
|
||||
valid_values = [v for v in diagnostic_values if v is not None]
|
||||
if not valid_values:
|
||||
return None
|
||||
|
||||
# Calculate mean distance from center
|
||||
distances = [abs(v - 5) for v in valid_values]
|
||||
mean_distance = sum(distances) / len(distances)
|
||||
|
||||
# Convert to 1-9 scale (inverted: high distance = low value = polarized)
|
||||
# Maximum possible distance is 4 (from 1 or 9 to 5)
|
||||
# Scale: 1 (all at extremes) to 9 (all at center)
|
||||
polarization = 9 - (mean_distance / 4 * 8)
|
||||
|
||||
return round(max(1, min(9, polarization)))
|
||||
|
||||
|
||||
def calculate_automated_analysis(json_data):
|
||||
"""
|
||||
Calculate values for automated analysis fields.
|
||||
Modifies json_data in place.
|
||||
"""
|
||||
# Collect all diagnostic values in order
|
||||
diagnostic_values = []
|
||||
values_array = []
|
||||
|
||||
for diagnostic_set in json_data.get("diagnostic", []):
|
||||
for gradient in diagnostic_set.get("gradients", []):
|
||||
value = gradient.get("value")
|
||||
if value is not None:
|
||||
diagnostic_values.append(value)
|
||||
values_array.append(float(value))
|
||||
else:
|
||||
# Fill missing with neutral value
|
||||
values_array.append(5.0)
|
||||
|
||||
# Only calculate if we have diagnostic values
|
||||
if not diagnostic_values:
|
||||
return
|
||||
|
||||
# Load classifier model
|
||||
model = load_classifier_model()
|
||||
|
||||
# Check version compatibility
|
||||
bicorder_version = json_data.get("version", "unknown")
|
||||
model_version = model.get("bicorder_version", "unknown") if model else "unknown"
|
||||
|
||||
version_mismatch = (model and bicorder_version != model_version)
|
||||
|
||||
# Calculate each automated analysis field
|
||||
for analysis_item in json_data.get("analysis", []):
|
||||
if not analysis_item.get("automated", False):
|
||||
continue
|
||||
|
||||
term_left = analysis_item.get("term_left", "")
|
||||
|
||||
# Calculate based on the type
|
||||
if term_left == "hardness":
|
||||
analysis_item["value"] = calculate_hardness(diagnostic_values)
|
||||
elif term_left == "polarized":
|
||||
analysis_item["value"] = calculate_polarization(diagnostic_values)
|
||||
elif term_left == "bureaucratic":
|
||||
if version_mismatch:
|
||||
# Skip calculation if versions don't match
|
||||
print(f"Warning: Model version ({model_version}) doesn't match bicorder version ({bicorder_version}). Skipping bureaucratic/relational calculation.")
|
||||
analysis_item["value"] = None
|
||||
elif model:
|
||||
lda_score = calculate_lda_score(values_array, model)
|
||||
analysis_item["value"] = lda_score_to_scale(lda_score)
|
||||
|
||||
|
||||
def center_text(text, width):
|
||||
@@ -218,6 +393,9 @@ def main():
|
||||
print(f"Error: Invalid JSON in '{args.input_json}': {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Calculate automated analysis values
|
||||
calculate_automated_analysis(data)
|
||||
|
||||
# Generate the formatted text
|
||||
output = generate_bicorder_text(data)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user