Derive classifier dimensions from bicorder.json automatically
Both export_model_for_js.py and bicorder_classifier.py now read DIMENSIONS and KEY_DIMENSIONS directly from bicorder.json at runtime, so the model stays in sync whenever gradient terms are renamed or added. A COLUMN_RENAMES dict handles historical CSV column name changes. The model now includes bicorder_version so the app's version check works correctly. Regenerated bicorder_model.json against bicorder.json v1.2.6 with correct dimension names, 9 key dimensions from shortform flags, and updated thresholds. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -30,58 +30,46 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Path to bicorder.json (relative to this script)
|
||||||
|
_BICORDER_JSON = Path(__file__).parent.parent / 'bicorder.json'
|
||||||
|
|
||||||
|
# Historical column renames: maps old CSV column names → current bicorder.json names.
|
||||||
|
# Add an entry here whenever gradient terms are renamed in bicorder.json.
|
||||||
|
_COLUMN_RENAMES = {
|
||||||
|
'Design_elite_vs_vernacular': 'Design_institutional_vs_vernacular',
|
||||||
|
'Entanglement_exclusive_vs_non-exclusive': 'Entanglement_monopolistic_vs_pluralistic',
|
||||||
|
'Experience_sufficient_vs_insufficient': 'Experience_sufficient_vs_limited',
|
||||||
|
'Experience_Kafka_vs_Whitehead': 'Experience_restraining_vs_liberating',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _load_bicorder_dimensions(bicorder_path=_BICORDER_JSON):
|
||||||
|
"""Read DIMENSIONS and KEY_DIMENSIONS from bicorder.json."""
|
||||||
|
with open(bicorder_path) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
dimensions = []
|
||||||
|
key_dimensions = []
|
||||||
|
for category in data['diagnostic']:
|
||||||
|
set_name = category['set_name']
|
||||||
|
for gradient in category['gradients']:
|
||||||
|
dim_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}"
|
||||||
|
dimensions.append(dim_name)
|
||||||
|
if gradient.get('shortform', False):
|
||||||
|
key_dimensions.append(dim_name)
|
||||||
|
return dimensions, key_dimensions
|
||||||
|
|
||||||
|
|
||||||
class BicorderClassifier:
|
class BicorderClassifier:
|
||||||
"""
|
"""
|
||||||
Classifies protocols into one of two families and recommends form type.
|
Classifies protocols into one of two families and recommends form type.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Dimension names (in order)
|
|
||||||
DIMENSIONS = [
|
|
||||||
'Design_explicit_vs_implicit',
|
|
||||||
'Design_precise_vs_interpretive',
|
|
||||||
'Design_elite_vs_vernacular',
|
|
||||||
'Design_documenting_vs_enabling',
|
|
||||||
'Design_static_vs_malleable',
|
|
||||||
'Design_technical_vs_social',
|
|
||||||
'Design_universal_vs_particular',
|
|
||||||
'Design_durable_vs_ephemeral',
|
|
||||||
'Entanglement_macro_vs_micro',
|
|
||||||
'Entanglement_sovereign_vs_subsidiary',
|
|
||||||
'Entanglement_self-enforcing_vs_enforced',
|
|
||||||
'Entanglement_abstract_vs_embodied',
|
|
||||||
'Entanglement_obligatory_vs_voluntary',
|
|
||||||
'Entanglement_flocking_vs_swarming',
|
|
||||||
'Entanglement_defensible_vs_exposed',
|
|
||||||
'Entanglement_exclusive_vs_non-exclusive',
|
|
||||||
'Experience_sufficient_vs_insufficient',
|
|
||||||
'Experience_crystallized_vs_contested',
|
|
||||||
'Experience_trust-evading_vs_trust-inducing',
|
|
||||||
'Experience_predictable_vs_emergent',
|
|
||||||
'Experience_exclusion_vs_inclusion',
|
|
||||||
'Experience_Kafka_vs_Whitehead',
|
|
||||||
'Experience_dead_vs_alive',
|
|
||||||
]
|
|
||||||
|
|
||||||
# Cluster names
|
# Cluster names
|
||||||
CLUSTER_NAMES = {
|
CLUSTER_NAMES = {
|
||||||
1: "Relational/Cultural",
|
1: "Relational/Cultural",
|
||||||
2: "Institutional/Bureaucratic"
|
2: "Institutional/Bureaucratic"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Key dimensions for short form (most discriminative)
|
|
||||||
# Based on LDA analysis - top differentiating dimensions
|
|
||||||
KEY_DIMENSIONS = [
|
|
||||||
'Design_elite_vs_vernacular', # 4.602 difference
|
|
||||||
'Entanglement_flocking_vs_swarming', # 4.079 difference
|
|
||||||
'Design_static_vs_malleable', # 3.775 difference
|
|
||||||
'Entanglement_obligatory_vs_voluntary', # 3.648 difference
|
|
||||||
'Entanglement_self-enforcing_vs_enforced', # 3.628 difference
|
|
||||||
'Design_explicit_vs_implicit', # High importance
|
|
||||||
'Entanglement_sovereign_vs_subsidiary', # High importance
|
|
||||||
'Design_technical_vs_social', # High importance
|
|
||||||
]
|
|
||||||
|
|
||||||
def __init__(self, model_path='analysis_results/data'):
|
def __init__(self, model_path='analysis_results/data'):
|
||||||
"""Initialize classifier with pre-computed model data."""
|
"""Initialize classifier with pre-computed model data."""
|
||||||
self.model_path = Path(model_path)
|
self.model_path = Path(model_path)
|
||||||
@@ -89,6 +77,9 @@ class BicorderClassifier:
|
|||||||
self.lda = None
|
self.lda = None
|
||||||
self.cluster_centroids = None
|
self.cluster_centroids = None
|
||||||
|
|
||||||
|
# Derive dimension lists from bicorder.json
|
||||||
|
self.DIMENSIONS, self.KEY_DIMENSIONS = _load_bicorder_dimensions()
|
||||||
|
|
||||||
# Load training data to fit scaler and LDA
|
# Load training data to fit scaler and LDA
|
||||||
self._load_model()
|
self._load_model()
|
||||||
|
|
||||||
@@ -98,6 +89,9 @@ class BicorderClassifier:
|
|||||||
df = pd.read_csv('diagnostic_output.csv')
|
df = pd.read_csv('diagnostic_output.csv')
|
||||||
clusters = pd.read_csv(self.model_path / 'kmeans_clusters.csv')
|
clusters = pd.read_csv(self.model_path / 'kmeans_clusters.csv')
|
||||||
|
|
||||||
|
# Rename old column names to match current bicorder.json
|
||||||
|
df = df.rename(columns=_COLUMN_RENAMES)
|
||||||
|
|
||||||
# Remove duplicates
|
# Remove duplicates
|
||||||
df = df.drop_duplicates(subset='Descriptor', keep='first')
|
df = df.drop_duplicates(subset='Descriptor', keep='first')
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
{
|
{
|
||||||
"version": "1.0",
|
"version": "1.0",
|
||||||
"generated": "2025-12-19T11:46:23.367069",
|
"bicorder_version": "1.2.6",
|
||||||
|
"generated": "2026-03-20T15:08:23.160614",
|
||||||
"dimensions": [
|
"dimensions": [
|
||||||
"Design_explicit_vs_implicit",
|
"Design_explicit_vs_implicit",
|
||||||
"Design_precise_vs_interpretive",
|
"Design_precise_vs_interpretive",
|
||||||
"Design_elite_vs_vernacular",
|
"Design_institutional_vs_vernacular",
|
||||||
"Design_documenting_vs_enabling",
|
"Design_documenting_vs_enabling",
|
||||||
"Design_static_vs_malleable",
|
"Design_static_vs_malleable",
|
||||||
"Design_technical_vs_social",
|
"Design_technical_vs_social",
|
||||||
@@ -17,24 +18,25 @@
|
|||||||
"Entanglement_obligatory_vs_voluntary",
|
"Entanglement_obligatory_vs_voluntary",
|
||||||
"Entanglement_flocking_vs_swarming",
|
"Entanglement_flocking_vs_swarming",
|
||||||
"Entanglement_defensible_vs_exposed",
|
"Entanglement_defensible_vs_exposed",
|
||||||
"Entanglement_exclusive_vs_non-exclusive",
|
"Entanglement_monopolistic_vs_pluralistic",
|
||||||
"Experience_sufficient_vs_insufficient",
|
"Experience_sufficient_vs_limited",
|
||||||
"Experience_crystallized_vs_contested",
|
"Experience_crystallized_vs_contested",
|
||||||
"Experience_trust-evading_vs_trust-inducing",
|
"Experience_trust-evading_vs_trust-inducing",
|
||||||
"Experience_predictable_vs_emergent",
|
"Experience_predictable_vs_emergent",
|
||||||
"Experience_exclusion_vs_inclusion",
|
"Experience_exclusion_vs_inclusion",
|
||||||
"Experience_Kafka_vs_Whitehead",
|
"Experience_restraining_vs_liberating",
|
||||||
"Experience_dead_vs_alive"
|
"Experience_dead_vs_alive"
|
||||||
],
|
],
|
||||||
"key_dimensions": [
|
"key_dimensions": [
|
||||||
"Design_elite_vs_vernacular",
|
"Design_precise_vs_interpretive",
|
||||||
"Entanglement_flocking_vs_swarming",
|
"Design_institutional_vs_vernacular",
|
||||||
"Design_static_vs_malleable",
|
"Design_static_vs_malleable",
|
||||||
"Entanglement_obligatory_vs_voluntary",
|
|
||||||
"Entanglement_self-enforcing_vs_enforced",
|
|
||||||
"Design_explicit_vs_implicit",
|
|
||||||
"Entanglement_sovereign_vs_subsidiary",
|
"Entanglement_sovereign_vs_subsidiary",
|
||||||
"Design_technical_vs_social"
|
"Entanglement_self-enforcing_vs_enforced",
|
||||||
|
"Entanglement_obligatory_vs_voluntary",
|
||||||
|
"Entanglement_flocking_vs_swarming",
|
||||||
|
"Experience_predictable_vs_emergent",
|
||||||
|
"Experience_exclusion_vs_inclusion"
|
||||||
],
|
],
|
||||||
"cluster_names": {
|
"cluster_names": {
|
||||||
"1": "Relational/Cultural",
|
"1": "Relational/Cultural",
|
||||||
@@ -229,14 +231,13 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"thresholds": {
|
"thresholds": {
|
||||||
"confidence_low": 0.5,
|
"confidence_low": 0.6,
|
||||||
"completeness_low": 0.5,
|
"completeness_low": 0.5,
|
||||||
"boundary_distance_low": 0.3
|
"boundary_distance_low": 0.5
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"total_protocols": 406,
|
"total_protocols": 406,
|
||||||
"cluster_1_count": 216,
|
"cluster_1_count": 216,
|
||||||
"cluster_2_count": 190
|
"cluster_2_count": 190
|
||||||
},
|
}
|
||||||
"bicorder_version": "1.2.3"
|
|
||||||
}
|
}
|
||||||
@@ -1,45 +1,67 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Export the cluster classification model to JSON for use in JavaScript.
|
Export the cluster classification model to JSON for use in JavaScript.
|
||||||
|
|
||||||
|
Reads dimension names directly from bicorder.json so the model always
|
||||||
|
stays in sync with the current bicorder structure.
|
||||||
|
|
||||||
|
When gradients are renamed in bicorder.json, add the old→new mapping to
|
||||||
|
COLUMN_RENAMES so the training CSV columns are correctly aligned.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler
|
||||||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
||||||
import json
|
|
||||||
|
|
||||||
# Dimension names
|
# Path to bicorder.json (relative to this script)
|
||||||
DIMENSIONS = [
|
BICORDER_JSON = Path(__file__).parent.parent / 'bicorder.json'
|
||||||
'Design_explicit_vs_implicit',
|
|
||||||
'Design_precise_vs_interpretive',
|
# Historical column renames: maps old CSV column names → current bicorder.json names.
|
||||||
'Design_elite_vs_vernacular',
|
# Add an entry here whenever gradient terms are renamed in bicorder.json.
|
||||||
'Design_documenting_vs_enabling',
|
COLUMN_RENAMES = {
|
||||||
'Design_static_vs_malleable',
|
'Design_elite_vs_vernacular': 'Design_institutional_vs_vernacular',
|
||||||
'Design_technical_vs_social',
|
'Entanglement_exclusive_vs_non-exclusive': 'Entanglement_monopolistic_vs_pluralistic',
|
||||||
'Design_universal_vs_particular',
|
'Experience_sufficient_vs_insufficient': 'Experience_sufficient_vs_limited',
|
||||||
'Design_durable_vs_ephemeral',
|
'Experience_Kafka_vs_Whitehead': 'Experience_restraining_vs_liberating',
|
||||||
'Entanglement_macro_vs_micro',
|
}
|
||||||
'Entanglement_sovereign_vs_subsidiary',
|
|
||||||
'Entanglement_self-enforcing_vs_enforced',
|
|
||||||
'Entanglement_abstract_vs_embodied',
|
def load_bicorder_dimensions(bicorder_path):
|
||||||
'Entanglement_obligatory_vs_voluntary',
|
"""Read DIMENSIONS and KEY_DIMENSIONS from bicorder.json."""
|
||||||
'Entanglement_flocking_vs_swarming',
|
with open(bicorder_path) as f:
|
||||||
'Entanglement_defensible_vs_exposed',
|
data = json.load(f)
|
||||||
'Entanglement_exclusive_vs_non-exclusive',
|
|
||||||
'Experience_sufficient_vs_insufficient',
|
dimensions = []
|
||||||
'Experience_crystallized_vs_contested',
|
key_dimensions = []
|
||||||
'Experience_trust-evading_vs_trust-inducing',
|
|
||||||
'Experience_predictable_vs_emergent',
|
for category in data['diagnostic']:
|
||||||
'Experience_exclusion_vs_inclusion',
|
set_name = category['set_name']
|
||||||
'Experience_Kafka_vs_Whitehead',
|
for gradient in category['gradients']:
|
||||||
'Experience_dead_vs_alive',
|
dim_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}"
|
||||||
]
|
dimensions.append(dim_name)
|
||||||
|
if gradient.get('shortform', False):
|
||||||
|
key_dimensions.append(dim_name)
|
||||||
|
|
||||||
|
return dimensions, key_dimensions, data['version']
|
||||||
|
|
||||||
|
|
||||||
|
# Derive dimensions and version from bicorder.json
|
||||||
|
DIMENSIONS, KEY_DIMENSIONS, BICORDER_VERSION = load_bicorder_dimensions(BICORDER_JSON)
|
||||||
|
|
||||||
|
print(f"Loaded bicorder.json v{BICORDER_VERSION}")
|
||||||
|
print(f"Dimensions: {len(DIMENSIONS)}, key dimensions: {len(KEY_DIMENSIONS)}")
|
||||||
|
|
||||||
# Load data
|
# Load data
|
||||||
df = pd.read_csv('diagnostic_output.csv')
|
df = pd.read_csv('diagnostic_output.csv')
|
||||||
clusters = pd.read_csv('analysis_results/data/kmeans_clusters.csv')
|
clusters = pd.read_csv('analysis_results/data/kmeans_clusters.csv')
|
||||||
|
|
||||||
|
# Rename old column names to match current bicorder.json
|
||||||
|
df = df.rename(columns=COLUMN_RENAMES)
|
||||||
|
|
||||||
# Remove duplicates
|
# Remove duplicates
|
||||||
df = df.drop_duplicates(subset='Descriptor', keep='first')
|
df = df.drop_duplicates(subset='Descriptor', keep='first')
|
||||||
|
|
||||||
@@ -47,6 +69,8 @@ df = df.drop_duplicates(subset='Descriptor', keep='first')
|
|||||||
merged = df.merge(clusters, on='Descriptor')
|
merged = df.merge(clusters, on='Descriptor')
|
||||||
merged_clean = merged.dropna(subset=DIMENSIONS)
|
merged_clean = merged.dropna(subset=DIMENSIONS)
|
||||||
|
|
||||||
|
print(f"Training on {len(merged_clean)} protocols")
|
||||||
|
|
||||||
# Prepare training data
|
# Prepare training data
|
||||||
X = merged_clean[DIMENSIONS].values
|
X = merged_clean[DIMENSIONS].values
|
||||||
y = merged_clean['cluster'].values
|
y = merged_clean['cluster'].values
|
||||||
@@ -59,7 +83,7 @@ X_scaled = scaler.fit_transform(X)
|
|||||||
lda = LinearDiscriminantAnalysis(n_components=1)
|
lda = LinearDiscriminantAnalysis(n_components=1)
|
||||||
lda.fit(X_scaled, y)
|
lda.fit(X_scaled, y)
|
||||||
|
|
||||||
# Calculate cluster centroids
|
# Calculate cluster centroids in scaled space
|
||||||
cluster_centroids = {}
|
cluster_centroids = {}
|
||||||
for cluster_id in [1, 2]:
|
for cluster_id in [1, 2]:
|
||||||
cluster_data = X_scaled[y == cluster_id]
|
cluster_data = X_scaled[y == cluster_id]
|
||||||
@@ -71,21 +95,10 @@ for cluster_id in [1, 2]:
|
|||||||
cluster_data_original = X[y == cluster_id]
|
cluster_data_original = X[y == cluster_id]
|
||||||
cluster_means_original[cluster_id] = cluster_data_original.mean(axis=0).tolist()
|
cluster_means_original[cluster_id] = cluster_data_original.mean(axis=0).tolist()
|
||||||
|
|
||||||
# Key dimensions (most discriminative)
|
|
||||||
KEY_DIMENSIONS = [
|
|
||||||
'Design_elite_vs_vernacular',
|
|
||||||
'Entanglement_flocking_vs_swarming',
|
|
||||||
'Design_static_vs_malleable',
|
|
||||||
'Entanglement_obligatory_vs_voluntary',
|
|
||||||
'Entanglement_self-enforcing_vs_enforced',
|
|
||||||
'Design_explicit_vs_implicit',
|
|
||||||
'Entanglement_sovereign_vs_subsidiary',
|
|
||||||
'Design_technical_vs_social',
|
|
||||||
]
|
|
||||||
|
|
||||||
# Build model export
|
# Build model export
|
||||||
model = {
|
model = {
|
||||||
'version': '1.0',
|
'version': '1.0',
|
||||||
|
'bicorder_version': BICORDER_VERSION,
|
||||||
'generated': pd.Timestamp.now().isoformat(),
|
'generated': pd.Timestamp.now().isoformat(),
|
||||||
'dimensions': DIMENSIONS,
|
'dimensions': DIMENSIONS,
|
||||||
'key_dimensions': KEY_DIMENSIONS,
|
'key_dimensions': KEY_DIMENSIONS,
|
||||||
@@ -124,7 +137,10 @@ output_path = 'bicorder_model.json'
|
|||||||
with open(output_path, 'w') as f:
|
with open(output_path, 'w') as f:
|
||||||
json.dump(model, f, indent=2)
|
json.dump(model, f, indent=2)
|
||||||
|
|
||||||
print(f"Model exported to {output_path}")
|
print(f"\nModel exported to {output_path}")
|
||||||
|
print(f"Bicorder version: {BICORDER_VERSION}")
|
||||||
print(f"Total dimensions: {len(DIMENSIONS)}")
|
print(f"Total dimensions: {len(DIMENSIONS)}")
|
||||||
print(f"Key dimensions for short form: {len(KEY_DIMENSIONS)}")
|
print(f"Key dimensions (short form):")
|
||||||
|
for dim in KEY_DIMENSIONS:
|
||||||
|
print(f" - {dim}")
|
||||||
print(f"Model size: {len(json.dumps(model))} bytes")
|
print(f"Model size: {len(json.dumps(model))} bytes")
|
||||||
|
|||||||
Reference in New Issue
Block a user