From f1ae9cac1ffe021853397046e756c73715b890a3 Mon Sep 17 00:00:00 2001 From: Nathan Schneider Date: Fri, 20 Mar 2026 15:13:54 -0600 Subject: [PATCH] Derive classifier dimensions from bicorder.json automatically Both export_model_for_js.py and bicorder_classifier.py now read DIMENSIONS and KEY_DIMENSIONS directly from bicorder.json at runtime, so the model stays in sync whenever gradient terms are renamed or added. A COLUMN_RENAMES dict handles historical CSV column name changes. The model now includes bicorder_version so the app's version check works correctly. Regenerated bicorder_model.json against bicorder.json v1.2.6 with correct dimension names, 9 key dimensions from shortform flags, and updated thresholds. Co-Authored-By: Claude Sonnet 4.6 --- analysis/bicorder_classifier.py | 74 +++++++++++------------ analysis/bicorder_model.json | 31 +++++----- analysis/export_model_for_js.py | 100 ++++++++++++++++++-------------- 3 files changed, 108 insertions(+), 97 deletions(-) diff --git a/analysis/bicorder_classifier.py b/analysis/bicorder_classifier.py index 3b571e1..0368081 100644 --- a/analysis/bicorder_classifier.py +++ b/analysis/bicorder_classifier.py @@ -30,58 +30,46 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis import json from pathlib import Path +# Path to bicorder.json (relative to this script) +_BICORDER_JSON = Path(__file__).parent.parent / 'bicorder.json' + +# Historical column renames: maps old CSV column names → current bicorder.json names. +# Add an entry here whenever gradient terms are renamed in bicorder.json. +_COLUMN_RENAMES = { + 'Design_elite_vs_vernacular': 'Design_institutional_vs_vernacular', + 'Entanglement_exclusive_vs_non-exclusive': 'Entanglement_monopolistic_vs_pluralistic', + 'Experience_sufficient_vs_insufficient': 'Experience_sufficient_vs_limited', + 'Experience_Kafka_vs_Whitehead': 'Experience_restraining_vs_liberating', +} + + +def _load_bicorder_dimensions(bicorder_path=_BICORDER_JSON): + """Read DIMENSIONS and KEY_DIMENSIONS from bicorder.json.""" + with open(bicorder_path) as f: + data = json.load(f) + dimensions = [] + key_dimensions = [] + for category in data['diagnostic']: + set_name = category['set_name'] + for gradient in category['gradients']: + dim_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}" + dimensions.append(dim_name) + if gradient.get('shortform', False): + key_dimensions.append(dim_name) + return dimensions, key_dimensions + class BicorderClassifier: """ Classifies protocols into one of two families and recommends form type. """ - # Dimension names (in order) - DIMENSIONS = [ - 'Design_explicit_vs_implicit', - 'Design_precise_vs_interpretive', - 'Design_elite_vs_vernacular', - 'Design_documenting_vs_enabling', - 'Design_static_vs_malleable', - 'Design_technical_vs_social', - 'Design_universal_vs_particular', - 'Design_durable_vs_ephemeral', - 'Entanglement_macro_vs_micro', - 'Entanglement_sovereign_vs_subsidiary', - 'Entanglement_self-enforcing_vs_enforced', - 'Entanglement_abstract_vs_embodied', - 'Entanglement_obligatory_vs_voluntary', - 'Entanglement_flocking_vs_swarming', - 'Entanglement_defensible_vs_exposed', - 'Entanglement_exclusive_vs_non-exclusive', - 'Experience_sufficient_vs_insufficient', - 'Experience_crystallized_vs_contested', - 'Experience_trust-evading_vs_trust-inducing', - 'Experience_predictable_vs_emergent', - 'Experience_exclusion_vs_inclusion', - 'Experience_Kafka_vs_Whitehead', - 'Experience_dead_vs_alive', - ] - # Cluster names CLUSTER_NAMES = { 1: "Relational/Cultural", 2: "Institutional/Bureaucratic" } - # Key dimensions for short form (most discriminative) - # Based on LDA analysis - top differentiating dimensions - KEY_DIMENSIONS = [ - 'Design_elite_vs_vernacular', # 4.602 difference - 'Entanglement_flocking_vs_swarming', # 4.079 difference - 'Design_static_vs_malleable', # 3.775 difference - 'Entanglement_obligatory_vs_voluntary', # 3.648 difference - 'Entanglement_self-enforcing_vs_enforced', # 3.628 difference - 'Design_explicit_vs_implicit', # High importance - 'Entanglement_sovereign_vs_subsidiary', # High importance - 'Design_technical_vs_social', # High importance - ] - def __init__(self, model_path='analysis_results/data'): """Initialize classifier with pre-computed model data.""" self.model_path = Path(model_path) @@ -89,6 +77,9 @@ class BicorderClassifier: self.lda = None self.cluster_centroids = None + # Derive dimension lists from bicorder.json + self.DIMENSIONS, self.KEY_DIMENSIONS = _load_bicorder_dimensions() + # Load training data to fit scaler and LDA self._load_model() @@ -98,6 +89,9 @@ class BicorderClassifier: df = pd.read_csv('diagnostic_output.csv') clusters = pd.read_csv(self.model_path / 'kmeans_clusters.csv') + # Rename old column names to match current bicorder.json + df = df.rename(columns=_COLUMN_RENAMES) + # Remove duplicates df = df.drop_duplicates(subset='Descriptor', keep='first') diff --git a/analysis/bicorder_model.json b/analysis/bicorder_model.json index 7899e17..99a47a2 100644 --- a/analysis/bicorder_model.json +++ b/analysis/bicorder_model.json @@ -1,10 +1,11 @@ { "version": "1.0", - "generated": "2025-12-19T11:46:23.367069", + "bicorder_version": "1.2.6", + "generated": "2026-03-20T15:08:23.160614", "dimensions": [ "Design_explicit_vs_implicit", "Design_precise_vs_interpretive", - "Design_elite_vs_vernacular", + "Design_institutional_vs_vernacular", "Design_documenting_vs_enabling", "Design_static_vs_malleable", "Design_technical_vs_social", @@ -17,24 +18,25 @@ "Entanglement_obligatory_vs_voluntary", "Entanglement_flocking_vs_swarming", "Entanglement_defensible_vs_exposed", - "Entanglement_exclusive_vs_non-exclusive", - "Experience_sufficient_vs_insufficient", + "Entanglement_monopolistic_vs_pluralistic", + "Experience_sufficient_vs_limited", "Experience_crystallized_vs_contested", "Experience_trust-evading_vs_trust-inducing", "Experience_predictable_vs_emergent", "Experience_exclusion_vs_inclusion", - "Experience_Kafka_vs_Whitehead", + "Experience_restraining_vs_liberating", "Experience_dead_vs_alive" ], "key_dimensions": [ - "Design_elite_vs_vernacular", - "Entanglement_flocking_vs_swarming", + "Design_precise_vs_interpretive", + "Design_institutional_vs_vernacular", "Design_static_vs_malleable", - "Entanglement_obligatory_vs_voluntary", - "Entanglement_self-enforcing_vs_enforced", - "Design_explicit_vs_implicit", "Entanglement_sovereign_vs_subsidiary", - "Design_technical_vs_social" + "Entanglement_self-enforcing_vs_enforced", + "Entanglement_obligatory_vs_voluntary", + "Entanglement_flocking_vs_swarming", + "Experience_predictable_vs_emergent", + "Experience_exclusion_vs_inclusion" ], "cluster_names": { "1": "Relational/Cultural", @@ -229,14 +231,13 @@ ] }, "thresholds": { - "confidence_low": 0.5, + "confidence_low": 0.6, "completeness_low": 0.5, - "boundary_distance_low": 0.3 + "boundary_distance_low": 0.5 }, "metadata": { "total_protocols": 406, "cluster_1_count": 216, "cluster_2_count": 190 - }, - "bicorder_version": "1.2.3" + } } \ No newline at end of file diff --git a/analysis/export_model_for_js.py b/analysis/export_model_for_js.py index 24cc5c9..96996f8 100644 --- a/analysis/export_model_for_js.py +++ b/analysis/export_model_for_js.py @@ -1,45 +1,67 @@ #!/usr/bin/env python3 """ Export the cluster classification model to JSON for use in JavaScript. + +Reads dimension names directly from bicorder.json so the model always +stays in sync with the current bicorder structure. + +When gradients are renamed in bicorder.json, add the old→new mapping to +COLUMN_RENAMES so the training CSV columns are correctly aligned. """ +import json +from pathlib import Path + import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -import json -# Dimension names -DIMENSIONS = [ - 'Design_explicit_vs_implicit', - 'Design_precise_vs_interpretive', - 'Design_elite_vs_vernacular', - 'Design_documenting_vs_enabling', - 'Design_static_vs_malleable', - 'Design_technical_vs_social', - 'Design_universal_vs_particular', - 'Design_durable_vs_ephemeral', - 'Entanglement_macro_vs_micro', - 'Entanglement_sovereign_vs_subsidiary', - 'Entanglement_self-enforcing_vs_enforced', - 'Entanglement_abstract_vs_embodied', - 'Entanglement_obligatory_vs_voluntary', - 'Entanglement_flocking_vs_swarming', - 'Entanglement_defensible_vs_exposed', - 'Entanglement_exclusive_vs_non-exclusive', - 'Experience_sufficient_vs_insufficient', - 'Experience_crystallized_vs_contested', - 'Experience_trust-evading_vs_trust-inducing', - 'Experience_predictable_vs_emergent', - 'Experience_exclusion_vs_inclusion', - 'Experience_Kafka_vs_Whitehead', - 'Experience_dead_vs_alive', -] +# Path to bicorder.json (relative to this script) +BICORDER_JSON = Path(__file__).parent.parent / 'bicorder.json' + +# Historical column renames: maps old CSV column names → current bicorder.json names. +# Add an entry here whenever gradient terms are renamed in bicorder.json. +COLUMN_RENAMES = { + 'Design_elite_vs_vernacular': 'Design_institutional_vs_vernacular', + 'Entanglement_exclusive_vs_non-exclusive': 'Entanglement_monopolistic_vs_pluralistic', + 'Experience_sufficient_vs_insufficient': 'Experience_sufficient_vs_limited', + 'Experience_Kafka_vs_Whitehead': 'Experience_restraining_vs_liberating', +} + + +def load_bicorder_dimensions(bicorder_path): + """Read DIMENSIONS and KEY_DIMENSIONS from bicorder.json.""" + with open(bicorder_path) as f: + data = json.load(f) + + dimensions = [] + key_dimensions = [] + + for category in data['diagnostic']: + set_name = category['set_name'] + for gradient in category['gradients']: + dim_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}" + dimensions.append(dim_name) + if gradient.get('shortform', False): + key_dimensions.append(dim_name) + + return dimensions, key_dimensions, data['version'] + + +# Derive dimensions and version from bicorder.json +DIMENSIONS, KEY_DIMENSIONS, BICORDER_VERSION = load_bicorder_dimensions(BICORDER_JSON) + +print(f"Loaded bicorder.json v{BICORDER_VERSION}") +print(f"Dimensions: {len(DIMENSIONS)}, key dimensions: {len(KEY_DIMENSIONS)}") # Load data df = pd.read_csv('diagnostic_output.csv') clusters = pd.read_csv('analysis_results/data/kmeans_clusters.csv') +# Rename old column names to match current bicorder.json +df = df.rename(columns=COLUMN_RENAMES) + # Remove duplicates df = df.drop_duplicates(subset='Descriptor', keep='first') @@ -47,6 +69,8 @@ df = df.drop_duplicates(subset='Descriptor', keep='first') merged = df.merge(clusters, on='Descriptor') merged_clean = merged.dropna(subset=DIMENSIONS) +print(f"Training on {len(merged_clean)} protocols") + # Prepare training data X = merged_clean[DIMENSIONS].values y = merged_clean['cluster'].values @@ -59,7 +83,7 @@ X_scaled = scaler.fit_transform(X) lda = LinearDiscriminantAnalysis(n_components=1) lda.fit(X_scaled, y) -# Calculate cluster centroids +# Calculate cluster centroids in scaled space cluster_centroids = {} for cluster_id in [1, 2]: cluster_data = X_scaled[y == cluster_id] @@ -71,21 +95,10 @@ for cluster_id in [1, 2]: cluster_data_original = X[y == cluster_id] cluster_means_original[cluster_id] = cluster_data_original.mean(axis=0).tolist() -# Key dimensions (most discriminative) -KEY_DIMENSIONS = [ - 'Design_elite_vs_vernacular', - 'Entanglement_flocking_vs_swarming', - 'Design_static_vs_malleable', - 'Entanglement_obligatory_vs_voluntary', - 'Entanglement_self-enforcing_vs_enforced', - 'Design_explicit_vs_implicit', - 'Entanglement_sovereign_vs_subsidiary', - 'Design_technical_vs_social', -] - # Build model export model = { 'version': '1.0', + 'bicorder_version': BICORDER_VERSION, 'generated': pd.Timestamp.now().isoformat(), 'dimensions': DIMENSIONS, 'key_dimensions': KEY_DIMENSIONS, @@ -124,7 +137,10 @@ output_path = 'bicorder_model.json' with open(output_path, 'w') as f: json.dump(model, f, indent=2) -print(f"Model exported to {output_path}") +print(f"\nModel exported to {output_path}") +print(f"Bicorder version: {BICORDER_VERSION}") print(f"Total dimensions: {len(DIMENSIONS)}") -print(f"Key dimensions for short form: {len(KEY_DIMENSIONS)}") +print(f"Key dimensions (short form):") +for dim in KEY_DIMENSIONS: + print(f" - {dim}") print(f"Model size: {len(json.dumps(model))} bytes")