Added classifer analysis to bicorder ascii and web app

2025-12-21 21:38:39 -07:00
parent b541f85553
commit 1b508b911f
17 changed files with 2795 additions and 49 deletions
--- a/ascii_bicorder.py
+++ b/ascii_bicorder.py
@@ -6,6 +6,181 @@ Generate bicorder.txt from bicorder.json
 import json
 import argparse
 import sys
+import os
+from pathlib import Path
+
+
+# Simple version-based approach
+#
+# The model includes a 'bicorder_version' field indicating which version of
+# bicorder.json it was trained on. The code checks that versions match before
+# calculating. This ensures the gradient structure is compatible.
+#
+# When bicorder.json changes (gradients added/removed/reordered), update the
+# version number and retrain the model.
+
+
+def load_classifier_model():
+    """Load the LDA model from bicorder_model.json"""
+    # Try to find the model file
+    script_dir = Path(__file__).parent
+    model_paths = [
+        script_dir / 'analysis' / 'bicorder_model.json',
+        script_dir / 'bicorder_model.json',
+        Path('analysis/bicorder_model.json'),
+        Path('bicorder_model.json'),
+    ]
+
+    for path in model_paths:
+        if path.exists():
+            with open(path, 'r') as f:
+                return json.load(f)
+
+    return None
+
+
+def calculate_lda_score(values_array, model):
+    """
+    Calculate LDA score from an array of values using the model.
+
+    Args:
+        values_array: list of 23 values (1-9) in the order expected by the model
+        model: loaded classifier model
+
+    Returns:
+        LDA score (float), or None if insufficient data
+    """
+    if model is None:
+        return None
+
+    if len(values_array) != len(model['dimensions']):
+        return None
+
+    # Standardize using model scaler
+    mean = model['scaler']['mean']
+    scale = model['scaler']['scale']
+    scaled = [(values_array[i] - mean[i]) / scale[i] for i in range(len(values_array))]
+
+    # Calculate LDA score: coef · x + intercept
+    coef = model['lda']['coefficients']
+    intercept = model['lda']['intercept']
+
+    # Dot product
+    lda_score = sum(coef[i] * scaled[i] for i in range(len(scaled))) + intercept
+
+    return lda_score
+
+
+def lda_score_to_scale(lda_score):
+    """
+    Convert LDA score to 1-9 scale.
+    LDA scores typically range from -4 to +4 (8 range)
+    Target scale is 1 to 9 (8 range)
+
+    Formula: value = 5 + (lda_score * 4/3)
+    - LDA -3 or less → 1 (bureaucratic)
+    - LDA 0 → 5 (boundary)
+    - LDA +3 or more → 9 (relational)
+    """
+    if lda_score is None:
+        return None
+
+    # Scale: value = 5 + (lda_score * 1.33)
+    value = 5 + (lda_score * 4.0 / 3.0)
+
+    # Clamp to 1-9 range and round
+    value = max(1, min(9, value))
+    return round(value)
+
+
+def calculate_hardness(diagnostic_values):
+    """Calculate hardness/softness (mean of all diagnostic values)"""
+    if not diagnostic_values:
+        return None
+
+    valid_values = [v for v in diagnostic_values if v is not None]
+    if not valid_values:
+        return None
+
+    return round(sum(valid_values) / len(valid_values))
+
+
+def calculate_polarization(diagnostic_values):
+    """
+    Calculate polarization (1 = extreme, 9 = centrist).
+    Measures how far values are from the center (5).
+    """
+    if not diagnostic_values:
+        return None
+
+    valid_values = [v for v in diagnostic_values if v is not None]
+    if not valid_values:
+        return None
+
+    # Calculate mean distance from center
+    distances = [abs(v - 5) for v in valid_values]
+    mean_distance = sum(distances) / len(distances)
+
+    # Convert to 1-9 scale (inverted: high distance = low value = polarized)
+    # Maximum possible distance is 4 (from 1 or 9 to 5)
+    # Scale: 1 (all at extremes) to 9 (all at center)
+    polarization = 9 - (mean_distance / 4 * 8)
+
+    return round(max(1, min(9, polarization)))
+
+
+def calculate_automated_analysis(json_data):
+    """
+    Calculate values for automated analysis fields.
+    Modifies json_data in place.
+    """
+    # Collect all diagnostic values in order
+    diagnostic_values = []
+    values_array = []
+
+    for diagnostic_set in json_data.get("diagnostic", []):
+        for gradient in diagnostic_set.get("gradients", []):
+            value = gradient.get("value")
+            if value is not None:
+                diagnostic_values.append(value)
+                values_array.append(float(value))
+            else:
+                # Fill missing with neutral value
+                values_array.append(5.0)
+
+    # Only calculate if we have diagnostic values
+    if not diagnostic_values:
+        return
+
+    # Load classifier model
+    model = load_classifier_model()
+
+    # Check version compatibility
+    bicorder_version = json_data.get("version", "unknown")
+    model_version = model.get("bicorder_version", "unknown") if model else "unknown"
+
+    version_mismatch = (model and bicorder_version != model_version)
+
+    # Calculate each automated analysis field
+    for analysis_item in json_data.get("analysis", []):
+        if not analysis_item.get("automated", False):
+            continue
+
+        term_left = analysis_item.get("term_left", "")
+
+        # Calculate based on the type
+        if term_left == "hardness":
+            analysis_item["value"] = calculate_hardness(diagnostic_values)
+        elif term_left == "polarized":
+            analysis_item["value"] = calculate_polarization(diagnostic_values)
+        elif term_left == "bureaucratic":
+            if version_mismatch:
+                # Skip calculation if versions don't match
+                print(f"Warning: Model version ({model_version}) doesn't match bicorder version ({bicorder_version}). Skipping bureaucratic/relational calculation.")
+                analysis_item["value"] = None
+            elif model:
+                lda_score = calculate_lda_score(values_array, model)
+                analysis_item["value"] = lda_score_to_scale(lda_score)


 def center_text(text, width):
@@ -218,6 +393,9 @@ def main():
        print(f"Error: Invalid JSON in '{args.input_json}': {e}", file=sys.stderr)
        sys.exit(1)

+    # Calculate automated analysis values
+    calculate_automated_analysis(data)
+
    # Generate the formatted text
    output = generate_bicorder_text(data)