Derive classifier dimensions from bicorder.json automatically

Both export_model_for_js.py and bicorder_classifier.py now read
DIMENSIONS and KEY_DIMENSIONS directly from bicorder.json at runtime,
so the model stays in sync whenever gradient terms are renamed or
added. A COLUMN_RENAMES dict handles historical CSV column name
changes. The model now includes bicorder_version so the app's version
check works correctly.

Regenerated bicorder_model.json against bicorder.json v1.2.6 with
correct dimension names, 9 key dimensions from shortform flags, and
updated thresholds.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Nathan Schneider
2026-03-20 15:13:54 -06:00
parent 5232e760be
commit f1ae9cac1f
3 changed files with 108 additions and 97 deletions

View File

@@ -30,58 +30,46 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import json
from pathlib import Path
# Path to bicorder.json (relative to this script)
_BICORDER_JSON = Path(__file__).parent.parent / 'bicorder.json'
# Historical column renames: maps old CSV column names → current bicorder.json names.
# Add an entry here whenever gradient terms are renamed in bicorder.json.
_COLUMN_RENAMES = {
'Design_elite_vs_vernacular': 'Design_institutional_vs_vernacular',
'Entanglement_exclusive_vs_non-exclusive': 'Entanglement_monopolistic_vs_pluralistic',
'Experience_sufficient_vs_insufficient': 'Experience_sufficient_vs_limited',
'Experience_Kafka_vs_Whitehead': 'Experience_restraining_vs_liberating',
}
def _load_bicorder_dimensions(bicorder_path=_BICORDER_JSON):
"""Read DIMENSIONS and KEY_DIMENSIONS from bicorder.json."""
with open(bicorder_path) as f:
data = json.load(f)
dimensions = []
key_dimensions = []
for category in data['diagnostic']:
set_name = category['set_name']
for gradient in category['gradients']:
dim_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}"
dimensions.append(dim_name)
if gradient.get('shortform', False):
key_dimensions.append(dim_name)
return dimensions, key_dimensions
class BicorderClassifier:
"""
Classifies protocols into one of two families and recommends form type.
"""
# Dimension names (in order)
DIMENSIONS = [
'Design_explicit_vs_implicit',
'Design_precise_vs_interpretive',
'Design_elite_vs_vernacular',
'Design_documenting_vs_enabling',
'Design_static_vs_malleable',
'Design_technical_vs_social',
'Design_universal_vs_particular',
'Design_durable_vs_ephemeral',
'Entanglement_macro_vs_micro',
'Entanglement_sovereign_vs_subsidiary',
'Entanglement_self-enforcing_vs_enforced',
'Entanglement_abstract_vs_embodied',
'Entanglement_obligatory_vs_voluntary',
'Entanglement_flocking_vs_swarming',
'Entanglement_defensible_vs_exposed',
'Entanglement_exclusive_vs_non-exclusive',
'Experience_sufficient_vs_insufficient',
'Experience_crystallized_vs_contested',
'Experience_trust-evading_vs_trust-inducing',
'Experience_predictable_vs_emergent',
'Experience_exclusion_vs_inclusion',
'Experience_Kafka_vs_Whitehead',
'Experience_dead_vs_alive',
]
# Cluster names
CLUSTER_NAMES = {
1: "Relational/Cultural",
2: "Institutional/Bureaucratic"
}
# Key dimensions for short form (most discriminative)
# Based on LDA analysis - top differentiating dimensions
KEY_DIMENSIONS = [
'Design_elite_vs_vernacular', # 4.602 difference
'Entanglement_flocking_vs_swarming', # 4.079 difference
'Design_static_vs_malleable', # 3.775 difference
'Entanglement_obligatory_vs_voluntary', # 3.648 difference
'Entanglement_self-enforcing_vs_enforced', # 3.628 difference
'Design_explicit_vs_implicit', # High importance
'Entanglement_sovereign_vs_subsidiary', # High importance
'Design_technical_vs_social', # High importance
]
def __init__(self, model_path='analysis_results/data'):
"""Initialize classifier with pre-computed model data."""
self.model_path = Path(model_path)
@@ -89,6 +77,9 @@ class BicorderClassifier:
self.lda = None
self.cluster_centroids = None
# Derive dimension lists from bicorder.json
self.DIMENSIONS, self.KEY_DIMENSIONS = _load_bicorder_dimensions()
# Load training data to fit scaler and LDA
self._load_model()
@@ -98,6 +89,9 @@ class BicorderClassifier:
df = pd.read_csv('diagnostic_output.csv')
clusters = pd.read_csv(self.model_path / 'kmeans_clusters.csv')
# Rename old column names to match current bicorder.json
df = df.rename(columns=_COLUMN_RENAMES)
# Remove duplicates
df = df.drop_duplicates(subset='Descriptor', keep='first')

View File

@@ -1,10 +1,11 @@
{
"version": "1.0",
"generated": "2025-12-19T11:46:23.367069",
"bicorder_version": "1.2.6",
"generated": "2026-03-20T15:08:23.160614",
"dimensions": [
"Design_explicit_vs_implicit",
"Design_precise_vs_interpretive",
"Design_elite_vs_vernacular",
"Design_institutional_vs_vernacular",
"Design_documenting_vs_enabling",
"Design_static_vs_malleable",
"Design_technical_vs_social",
@@ -17,24 +18,25 @@
"Entanglement_obligatory_vs_voluntary",
"Entanglement_flocking_vs_swarming",
"Entanglement_defensible_vs_exposed",
"Entanglement_exclusive_vs_non-exclusive",
"Experience_sufficient_vs_insufficient",
"Entanglement_monopolistic_vs_pluralistic",
"Experience_sufficient_vs_limited",
"Experience_crystallized_vs_contested",
"Experience_trust-evading_vs_trust-inducing",
"Experience_predictable_vs_emergent",
"Experience_exclusion_vs_inclusion",
"Experience_Kafka_vs_Whitehead",
"Experience_restraining_vs_liberating",
"Experience_dead_vs_alive"
],
"key_dimensions": [
"Design_elite_vs_vernacular",
"Entanglement_flocking_vs_swarming",
"Design_precise_vs_interpretive",
"Design_institutional_vs_vernacular",
"Design_static_vs_malleable",
"Entanglement_obligatory_vs_voluntary",
"Entanglement_self-enforcing_vs_enforced",
"Design_explicit_vs_implicit",
"Entanglement_sovereign_vs_subsidiary",
"Design_technical_vs_social"
"Entanglement_self-enforcing_vs_enforced",
"Entanglement_obligatory_vs_voluntary",
"Entanglement_flocking_vs_swarming",
"Experience_predictable_vs_emergent",
"Experience_exclusion_vs_inclusion"
],
"cluster_names": {
"1": "Relational/Cultural",
@@ -229,14 +231,13 @@
]
},
"thresholds": {
"confidence_low": 0.5,
"confidence_low": 0.6,
"completeness_low": 0.5,
"boundary_distance_low": 0.3
"boundary_distance_low": 0.5
},
"metadata": {
"total_protocols": 406,
"cluster_1_count": 216,
"cluster_2_count": 190
},
"bicorder_version": "1.2.3"
}
}

View File

@@ -1,45 +1,67 @@
#!/usr/bin/env python3
"""
Export the cluster classification model to JSON for use in JavaScript.
Reads dimension names directly from bicorder.json so the model always
stays in sync with the current bicorder structure.
When gradients are renamed in bicorder.json, add the old→new mapping to
COLUMN_RENAMES so the training CSV columns are correctly aligned.
"""
import json
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import json
# Dimension names
DIMENSIONS = [
'Design_explicit_vs_implicit',
'Design_precise_vs_interpretive',
'Design_elite_vs_vernacular',
'Design_documenting_vs_enabling',
'Design_static_vs_malleable',
'Design_technical_vs_social',
'Design_universal_vs_particular',
'Design_durable_vs_ephemeral',
'Entanglement_macro_vs_micro',
'Entanglement_sovereign_vs_subsidiary',
'Entanglement_self-enforcing_vs_enforced',
'Entanglement_abstract_vs_embodied',
'Entanglement_obligatory_vs_voluntary',
'Entanglement_flocking_vs_swarming',
'Entanglement_defensible_vs_exposed',
'Entanglement_exclusive_vs_non-exclusive',
'Experience_sufficient_vs_insufficient',
'Experience_crystallized_vs_contested',
'Experience_trust-evading_vs_trust-inducing',
'Experience_predictable_vs_emergent',
'Experience_exclusion_vs_inclusion',
'Experience_Kafka_vs_Whitehead',
'Experience_dead_vs_alive',
]
# Path to bicorder.json (relative to this script)
BICORDER_JSON = Path(__file__).parent.parent / 'bicorder.json'
# Historical column renames: maps old CSV column names → current bicorder.json names.
# Add an entry here whenever gradient terms are renamed in bicorder.json.
COLUMN_RENAMES = {
'Design_elite_vs_vernacular': 'Design_institutional_vs_vernacular',
'Entanglement_exclusive_vs_non-exclusive': 'Entanglement_monopolistic_vs_pluralistic',
'Experience_sufficient_vs_insufficient': 'Experience_sufficient_vs_limited',
'Experience_Kafka_vs_Whitehead': 'Experience_restraining_vs_liberating',
}
def load_bicorder_dimensions(bicorder_path):
"""Read DIMENSIONS and KEY_DIMENSIONS from bicorder.json."""
with open(bicorder_path) as f:
data = json.load(f)
dimensions = []
key_dimensions = []
for category in data['diagnostic']:
set_name = category['set_name']
for gradient in category['gradients']:
dim_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}"
dimensions.append(dim_name)
if gradient.get('shortform', False):
key_dimensions.append(dim_name)
return dimensions, key_dimensions, data['version']
# Derive dimensions and version from bicorder.json
DIMENSIONS, KEY_DIMENSIONS, BICORDER_VERSION = load_bicorder_dimensions(BICORDER_JSON)
print(f"Loaded bicorder.json v{BICORDER_VERSION}")
print(f"Dimensions: {len(DIMENSIONS)}, key dimensions: {len(KEY_DIMENSIONS)}")
# Load data
df = pd.read_csv('diagnostic_output.csv')
clusters = pd.read_csv('analysis_results/data/kmeans_clusters.csv')
# Rename old column names to match current bicorder.json
df = df.rename(columns=COLUMN_RENAMES)
# Remove duplicates
df = df.drop_duplicates(subset='Descriptor', keep='first')
@@ -47,6 +69,8 @@ df = df.drop_duplicates(subset='Descriptor', keep='first')
merged = df.merge(clusters, on='Descriptor')
merged_clean = merged.dropna(subset=DIMENSIONS)
print(f"Training on {len(merged_clean)} protocols")
# Prepare training data
X = merged_clean[DIMENSIONS].values
y = merged_clean['cluster'].values
@@ -59,7 +83,7 @@ X_scaled = scaler.fit_transform(X)
lda = LinearDiscriminantAnalysis(n_components=1)
lda.fit(X_scaled, y)
# Calculate cluster centroids
# Calculate cluster centroids in scaled space
cluster_centroids = {}
for cluster_id in [1, 2]:
cluster_data = X_scaled[y == cluster_id]
@@ -71,21 +95,10 @@ for cluster_id in [1, 2]:
cluster_data_original = X[y == cluster_id]
cluster_means_original[cluster_id] = cluster_data_original.mean(axis=0).tolist()
# Key dimensions (most discriminative)
KEY_DIMENSIONS = [
'Design_elite_vs_vernacular',
'Entanglement_flocking_vs_swarming',
'Design_static_vs_malleable',
'Entanglement_obligatory_vs_voluntary',
'Entanglement_self-enforcing_vs_enforced',
'Design_explicit_vs_implicit',
'Entanglement_sovereign_vs_subsidiary',
'Design_technical_vs_social',
]
# Build model export
model = {
'version': '1.0',
'bicorder_version': BICORDER_VERSION,
'generated': pd.Timestamp.now().isoformat(),
'dimensions': DIMENSIONS,
'key_dimensions': KEY_DIMENSIONS,
@@ -124,7 +137,10 @@ output_path = 'bicorder_model.json'
with open(output_path, 'w') as f:
json.dump(model, f, indent=2)
print(f"Model exported to {output_path}")
print(f"\nModel exported to {output_path}")
print(f"Bicorder version: {BICORDER_VERSION}")
print(f"Total dimensions: {len(DIMENSIONS)}")
print(f"Key dimensions for short form: {len(KEY_DIMENSIONS)}")
print(f"Key dimensions (short form):")
for dim in KEY_DIMENSIONS:
print(f" - {dim}")
print(f"Model size: {len(json.dumps(model))} bytes")