Reorganize directory, add manual dataset and sync tooling

- Move all scripts to scripts/, web assets to web/, analysis results
  into self-contained data/readings/<type>_<YYYYMMDD>/ directories
- Add data/readings/manual_20260320/ with 32 JSON readings from
  git.medlab.host/ntnsndr/protocol-bicorder-data
- Add scripts/json_to_csv.py to convert bicorder JSON files to CSV
- Add scripts/sync_readings.sh for one-command sync + re-analysis of
  any dataset backed by a .sync_source config file
- Add scripts/classify_readings.py to apply the LDA classifier to all
  readings and save per-reading cluster assignments
- Add --min-coverage flag to multivariate_analysis.py for sparse/shortform
  datasets; also applies in lda_visualization.py
- Fix lda_visualization.py NaN handling and 0-d array annotation bug
- Update README.md and WORKFLOW.md to document datasets, sync workflow,
  shortform handling, and new scripts

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Nathan Schneider
2026-03-20 17:35:13 -06:00
parent 0c794dddae
commit 897c30406b
545 changed files with 10715 additions and 718 deletions

98
analysis/web/bicorder-classifier.d.ts vendored Normal file
View File

@@ -0,0 +1,98 @@
/**
* Type definitions for Bicorder Cluster Classifier
*/
export interface ModelData {
version: string;
generated: string;
dimensions: string[];
key_dimensions: string[];
cluster_names: {
'1': string;
'2': string;
};
cluster_descriptions: {
'1': string;
'2': string;
};
scaler: {
mean: number[];
scale: number[];
};
lda: {
coefficients: number[];
intercept: number;
};
cluster_centroids_scaled: {
'1': number[];
'2': number[];
};
cluster_means_original: {
'1': number[];
'2': number[];
};
thresholds: {
confidence_low: number;
completeness_low: number;
boundary_distance_low: number;
};
metadata: {
total_protocols: number;
cluster_1_count: number;
cluster_2_count: number;
};
}
export interface Ratings {
[dimensionName: string]: number | null | undefined;
}
export interface PredictionResult {
cluster: 1 | 2;
clusterName: string;
confidence: number;
completeness: number;
recommendedForm: 'short' | 'long';
}
export interface DetailedPredictionResult extends PredictionResult {
ldaScore: number;
distanceToBoundary: number;
dimensionsProvided: number;
dimensionsTotal: number;
keyDimensionsProvided: number;
keyDimensionsTotal: number;
distancesToCentroids: {
'1': number;
'2': number;
};
rawConfidence: number;
}
export interface ShortFormAssessment {
ready: boolean;
keyDimensionsProvided: number;
keyDimensionsTotal: number;
coverage: number;
missingKeyDimensions: string[];
}
export interface PredictOptions {
detailed?: boolean;
}
export class BicorderClassifier {
constructor(model: ModelData);
predict(ratings: Ratings, options?: { detailed: false }): PredictionResult;
predict(ratings: Ratings, options: { detailed: true }): DetailedPredictionResult;
predict(ratings: Ratings, options?: PredictOptions): PredictionResult | DetailedPredictionResult;
explainClassification(ratings: Ratings): string;
getKeyDimensions(): string[];
assessShortFormReadiness(ratings: Ratings): ShortFormAssessment;
}
export function loadClassifier(url?: string): Promise<BicorderClassifier>;

View File

@@ -0,0 +1,335 @@
/**
* Bicorder Cluster Classifier
*
* Real-time protocol classification for the Bicorder web app.
* Predicts which protocol family (Relational/Cultural vs Institutional/Bureaucratic)
* a protocol belongs to based on dimension ratings.
*
* Usage:
* import { BicorderClassifier } from './bicorder-classifier.js';
*
* const classifier = new BicorderClassifier(modelData);
* const result = classifier.predict(ratings);
* console.log(`Cluster: ${result.clusterName} (${result.confidence}% confidence)`);
*/
export class BicorderClassifier {
/**
* @param {Object} model - Model data loaded from bicorder_model.json
* @param {string} bicorderVersion - Version of bicorder.json being used
*
* Simple version-matching approach: The model includes a bicorder_version
* field. When bicorder structure changes, update the version and retrain.
*/
constructor(model, bicorderVersion = null) {
this.model = model;
this.dimensions = model.dimensions;
this.keyDimensions = model.key_dimensions;
this.bicorderVersion = bicorderVersion;
// Check version compatibility
if (bicorderVersion && model.bicorder_version && bicorderVersion !== model.bicorder_version) {
console.warn(`Model version (${model.bicorder_version}) doesn't match bicorder version (${bicorderVersion}). Results may be inaccurate.`);
}
}
/**
* Standardize values using the fitted scaler
* @private
*/
_standardize(values) {
return values.map((val, i) => {
if (val === null || val === undefined) return null;
return (val - this.model.scaler.mean[i]) / this.model.scaler.scale[i];
});
}
/**
* Calculate LDA score (position on discriminant axis)
* @private
*/
_ldaScore(scaledValues) {
// Fill missing values with 0 (mean in scaled space)
const filled = scaledValues.map(v => v === null ? 0 : v);
// Calculate: coef · x + intercept
let score = this.model.lda.intercept;
for (let i = 0; i < filled.length; i++) {
score += this.model.lda.coefficients[i] * filled[i];
}
return score;
}
/**
* Calculate Euclidean distance
* @private
*/
_distance(a, b) {
let sum = 0;
for (let i = 0; i < a.length; i++) {
const diff = a[i] - b[i];
sum += diff * diff;
}
return Math.sqrt(sum);
}
/**
* Predict cluster for given ratings
*
* @param {Object} ratings - Map of dimension names to values (1-9)
* Can be partial - missing dimensions handled gracefully
* @param {Object} options - Options
* @param {boolean} options.detailed - Return detailed information (default: true)
*
* @returns {Object} Prediction result with:
* - cluster: Cluster number (1 or 2)
* - clusterName: Human-readable name
* - confidence: Confidence percentage (0-100)
* - completeness: Percentage of dimensions provided (0-100)
* - recommendedForm: 'short' or 'long'
* - ldaScore: Position on discriminant axis
* - distanceToBoundary: Distance from cluster boundary
*/
predict(ratings, options = { detailed: true }) {
// Convert ratings object to array
const values = this.dimensions.map(dim => ratings[dim] ?? null);
const providedCount = values.filter(v => v !== null).length;
const completeness = providedCount / this.dimensions.length;
// Fill missing with neutral value (5 = middle of 1-9 scale)
const filled = values.map(v => v ?? 5);
// Standardize
const scaled = this._standardize(filled);
// Calculate LDA score
const ldaScore = this._ldaScore(scaled);
// Predict cluster (LDA boundary at 0)
// Positive score = cluster 2 (Institutional)
// Negative score = cluster 1 (Relational)
const cluster = ldaScore > 0 ? 2 : 1;
const clusterName = this.model.cluster_names[cluster];
// Calculate confidence based on distance from boundary
const distanceToBoundary = Math.abs(ldaScore);
// Confidence: higher when further from boundary
// Normalize based on typical strong separation (3.0)
let confidence = Math.min(1.0, distanceToBoundary / 3.0);
// Adjust for completeness
const adjustedConfidence = confidence * (0.5 + 0.5 * completeness);
// Recommend form
// Use long form when:
// 1. Low confidence (< 0.6)
// 2. Low completeness (< 50% of dimensions)
// 3. Near boundary (< 0.5 distance)
const shouldUseLongForm =
adjustedConfidence < this.model.thresholds.confidence_low ||
completeness < this.model.thresholds.completeness_low ||
distanceToBoundary < this.model.thresholds.boundary_distance_low;
const recommendedForm = shouldUseLongForm ? 'long' : 'short';
const basicResult = {
cluster,
clusterName,
confidence: Math.round(adjustedConfidence * 100),
completeness: Math.round(completeness * 100),
recommendedForm,
};
if (!options.detailed) {
return basicResult;
}
// Calculate distances to cluster centroids
const filledScaled = scaled.map(v => v ?? 0);
const distances = {};
for (const [clusterId, centroid] of Object.entries(this.model.cluster_centroids_scaled)) {
distances[clusterId] = this._distance(filledScaled, centroid);
}
// Count key dimensions provided
const keyDimensionsProvided = this.keyDimensions.filter(
dim => ratings[dim] !== null && ratings[dim] !== undefined
).length;
return {
...basicResult,
ldaScore,
distanceToBoundary,
dimensionsProvided: providedCount,
dimensionsTotal: this.dimensions.length,
keyDimensionsProvided,
keyDimensionsTotal: this.keyDimensions.length,
distancesToCentroids: distances,
rawConfidence: Math.round(confidence * 100),
};
}
/**
* Get explanation of classification
*
* @param {Object} ratings - Dimension ratings
* @returns {string} Human-readable explanation
*/
explainClassification(ratings) {
const result = this.predict(ratings, { detailed: true });
const lines = [];
lines.push(`Protocol Classification: ${result.clusterName}`);
lines.push(`Confidence: ${result.confidence}%`);
lines.push('');
if (result.cluster === 2) {
lines.push('This protocol leans toward Institutional/Bureaucratic characteristics:');
lines.push(' • More likely to be formal, standardized, top-down');
lines.push(' • May involve state/corporate enforcement');
lines.push(' • Tends toward precise, documented procedures');
} else {
lines.push('This protocol leans toward Relational/Cultural characteristics:');
lines.push(' • More likely to be emergent, community-based');
lines.push(' • May involve voluntary participation');
lines.push(' • Tends toward interpretive, flexible practices');
}
lines.push('');
lines.push(`Distance from boundary: ${result.distanceToBoundary.toFixed(2)}`);
if (result.distanceToBoundary < 0.5) {
lines.push('⚠️ This protocol is near the boundary between families.');
lines.push(' It may exhibit characteristics of both types.');
}
lines.push('');
lines.push(`Completeness: ${result.completeness}% (${result.dimensionsProvided}/${result.dimensionsTotal} dimensions)`);
if (result.completeness < 100) {
lines.push('Note: Missing dimensions filled with neutral values (5)');
lines.push(' Confidence improves with complete data');
}
lines.push('');
lines.push(`Recommended form: ${result.recommendedForm.toUpperCase()}`);
if (result.recommendedForm === 'long') {
lines.push('Reason: Use long form for:');
if (result.confidence < 60) {
lines.push(' • Low classification confidence');
}
if (result.completeness < 50) {
lines.push(' • Incomplete data');
}
if (result.distanceToBoundary < 0.5) {
lines.push(' • Ambiguous positioning between families');
}
} else {
lines.push(`Reason: High confidence classification with ${result.completeness}% data`);
}
return lines.join('\n');
}
/**
* Get the list of key dimensions for short form
* @returns {Array<string>} Dimension names
*/
getKeyDimensions() {
return [...this.keyDimensions];
}
/**
* Check if enough key dimensions are provided for reliable short-form classification
* @param {Object} ratings - Current ratings
* @returns {Object} Assessment with recommendation
*/
assessShortFormReadiness(ratings) {
const keyProvided = this.keyDimensions.filter(
dim => ratings[dim] !== null && ratings[dim] !== undefined
);
const coverage = keyProvided.length / this.keyDimensions.length;
const isReady = coverage >= 0.75; // 75% of key dimensions
return {
ready: isReady,
keyDimensionsProvided: keyProvided.length,
keyDimensionsTotal: this.keyDimensions.length,
coverage: Math.round(coverage * 100),
missingKeyDimensions: this.keyDimensions.filter(
dim => !ratings[dim]
),
};
}
}
/**
* Load model from JSON file
*
* @param {string} url - URL to bicorder_model.json
* @returns {Promise<BicorderClassifier>} Initialized classifier
*/
export async function loadClassifier(url = './bicorder_model.json') {
const response = await fetch(url);
const model = await response.json();
return new BicorderClassifier(model);
}
// Example usage (for testing in Node.js or browser console)
if (typeof window === 'undefined' && typeof module !== 'undefined') {
// Node.js example
const fs = require('fs');
function demo() {
const modelData = JSON.parse(fs.readFileSync('bicorder_model.json', 'utf8'));
const classifier = new BicorderClassifier(modelData);
console.log('='.repeat(80));
console.log('BICORDER CLASSIFIER - DEMO');
console.log('='.repeat(80));
// Example 1: Community protocol
console.log('\nExample 1: Community-Based Protocol');
console.log('-'.repeat(80));
const communityRatings = {
'Design_elite_vs_vernacular': 9,
'Design_explicit_vs_implicit': 8,
'Entanglement_flocking_vs_swarming': 9,
'Entanglement_obligatory_vs_voluntary': 9,
'Design_static_vs_malleable': 8,
};
console.log(classifier.explainClassification(communityRatings));
// Example 2: Institutional protocol
console.log('\n\n' + '='.repeat(80));
console.log('Example 2: Institutional Protocol');
console.log('-'.repeat(80));
const institutionalRatings = {
'Design_elite_vs_vernacular': 1,
'Design_explicit_vs_implicit': 1,
'Entanglement_flocking_vs_swarming': 1,
'Entanglement_obligatory_vs_voluntary': 1,
};
console.log(classifier.explainClassification(institutionalRatings));
// Example 3: Check short form readiness
console.log('\n\n' + '='.repeat(80));
console.log('Example 3: Short Form Readiness Assessment');
console.log('-'.repeat(80));
const partialRatings = {
'Design_elite_vs_vernacular': 5,
'Entanglement_flocking_vs_swarming': 6,
};
const assessment = classifier.assessShortFormReadiness(partialRatings);
console.log(`Ready for reliable classification: ${assessment.ready}`);
console.log(`Key dimensions coverage: ${assessment.coverage}% (${assessment.keyDimensionsProvided}/${assessment.keyDimensionsTotal})`);
console.log(`Missing key dimensions: ${assessment.missingKeyDimensions.length}`);
}
if (require.main === module) {
demo();
}
}

View File

@@ -0,0 +1,41 @@
import { BicorderClassifier } from './bicorder-classifier.js';
import { fileURLToPath } from 'url';
import path from 'path';
import fs from 'fs';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const modelPath = path.join(__dirname, '..', 'bicorder_model.json');
const modelData = JSON.parse(fs.readFileSync(modelPath, 'utf8'));
const classifier = new BicorderClassifier(modelData);
console.log('='.repeat(80));
console.log('BICORDER CLASSIFIER - TEST');
console.log('='.repeat(80));
// Test 1
console.log('\nTest 1: Institutional Protocol (e.g., Airport Security)');
console.log('-'.repeat(80));
const institutional = {
'Design_elite_vs_vernacular': 1,
'Design_explicit_vs_implicit': 1,
'Entanglement_flocking_vs_swarming': 1,
'Entanglement_obligatory_vs_voluntary': 1,
};
const result1 = classifier.predict(institutional);
console.log(JSON.stringify(result1, null, 2));
// Test 2
console.log('\n\nTest 2: Relational Protocol (e.g., Indigenous Practices)');
console.log('-'.repeat(80));
const relational = {
'Design_elite_vs_vernacular': 9,
'Entanglement_flocking_vs_swarming': 9,
'Entanglement_obligatory_vs_voluntary': 9,
};
const result2 = classifier.predict(relational);
console.log(JSON.stringify(result2, null, 2));
console.log('\n\n' + '='.repeat(80));
console.log('✓ JavaScript classifier working correctly!');
console.log(' Model size:', Math.round(fs.statSync(modelPath).size / 1024), 'KB');
console.log('='.repeat(80));