Derive classifier dimensions from bicorder.json automatically

Both export_model_for_js.py and bicorder_classifier.py now read
DIMENSIONS and KEY_DIMENSIONS directly from bicorder.json at runtime,
so the model stays in sync whenever gradient terms are renamed or
added. A COLUMN_RENAMES dict handles historical CSV column name
changes. The model now includes bicorder_version so the app's version
check works correctly.

Regenerated bicorder_model.json against bicorder.json v1.2.6 with
correct dimension names, 9 key dimensions from shortform flags, and
updated thresholds.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Nathan Schneider
2026-03-20 15:13:54 -06:00
parent 5232e760be
commit f1ae9cac1f
3 changed files with 108 additions and 97 deletions

View File

@@ -30,58 +30,46 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import json
from pathlib import Path
# Path to bicorder.json (relative to this script)
_BICORDER_JSON = Path(__file__).parent.parent / 'bicorder.json'
# Historical column renames: maps old CSV column names → current bicorder.json names.
# Add an entry here whenever gradient terms are renamed in bicorder.json.
_COLUMN_RENAMES = {
'Design_elite_vs_vernacular': 'Design_institutional_vs_vernacular',
'Entanglement_exclusive_vs_non-exclusive': 'Entanglement_monopolistic_vs_pluralistic',
'Experience_sufficient_vs_insufficient': 'Experience_sufficient_vs_limited',
'Experience_Kafka_vs_Whitehead': 'Experience_restraining_vs_liberating',
}
def _load_bicorder_dimensions(bicorder_path=_BICORDER_JSON):
"""Read DIMENSIONS and KEY_DIMENSIONS from bicorder.json."""
with open(bicorder_path) as f:
data = json.load(f)
dimensions = []
key_dimensions = []
for category in data['diagnostic']:
set_name = category['set_name']
for gradient in category['gradients']:
dim_name = f"{set_name}_{gradient['term_left']}_vs_{gradient['term_right']}"
dimensions.append(dim_name)
if gradient.get('shortform', False):
key_dimensions.append(dim_name)
return dimensions, key_dimensions
class BicorderClassifier:
"""
Classifies protocols into one of two families and recommends form type.
"""
# Dimension names (in order)
DIMENSIONS = [
'Design_explicit_vs_implicit',
'Design_precise_vs_interpretive',
'Design_elite_vs_vernacular',
'Design_documenting_vs_enabling',
'Design_static_vs_malleable',
'Design_technical_vs_social',
'Design_universal_vs_particular',
'Design_durable_vs_ephemeral',
'Entanglement_macro_vs_micro',
'Entanglement_sovereign_vs_subsidiary',
'Entanglement_self-enforcing_vs_enforced',
'Entanglement_abstract_vs_embodied',
'Entanglement_obligatory_vs_voluntary',
'Entanglement_flocking_vs_swarming',
'Entanglement_defensible_vs_exposed',
'Entanglement_exclusive_vs_non-exclusive',
'Experience_sufficient_vs_insufficient',
'Experience_crystallized_vs_contested',
'Experience_trust-evading_vs_trust-inducing',
'Experience_predictable_vs_emergent',
'Experience_exclusion_vs_inclusion',
'Experience_Kafka_vs_Whitehead',
'Experience_dead_vs_alive',
]
# Cluster names
CLUSTER_NAMES = {
1: "Relational/Cultural",
2: "Institutional/Bureaucratic"
}
# Key dimensions for short form (most discriminative)
# Based on LDA analysis - top differentiating dimensions
KEY_DIMENSIONS = [
'Design_elite_vs_vernacular', # 4.602 difference
'Entanglement_flocking_vs_swarming', # 4.079 difference
'Design_static_vs_malleable', # 3.775 difference
'Entanglement_obligatory_vs_voluntary', # 3.648 difference
'Entanglement_self-enforcing_vs_enforced', # 3.628 difference
'Design_explicit_vs_implicit', # High importance
'Entanglement_sovereign_vs_subsidiary', # High importance
'Design_technical_vs_social', # High importance
]
def __init__(self, model_path='analysis_results/data'):
"""Initialize classifier with pre-computed model data."""
self.model_path = Path(model_path)
@@ -89,6 +77,9 @@ class BicorderClassifier:
self.lda = None
self.cluster_centroids = None
# Derive dimension lists from bicorder.json
self.DIMENSIONS, self.KEY_DIMENSIONS = _load_bicorder_dimensions()
# Load training data to fit scaler and LDA
self._load_model()
@@ -98,6 +89,9 @@ class BicorderClassifier:
df = pd.read_csv('diagnostic_output.csv')
clusters = pd.read_csv(self.model_path / 'kmeans_clusters.csv')
# Rename old column names to match current bicorder.json
df = df.rename(columns=_COLUMN_RENAMES)
# Remove duplicates
df = df.drop_duplicates(subset='Descriptor', keep='first')