protocol-bicorder/analysis/bicorder-classifier.js

/**
 * Bicorder Cluster Classifier
 *
 * Real-time protocol classification for the Bicorder web app.
 * Predicts which protocol family (Relational/Cultural vs Institutional/Bureaucratic)
 * a protocol belongs to based on dimension ratings.
 *
 * Usage:
 *   import { BicorderClassifier } from './bicorder-classifier.js';
 *
 *   const classifier = new BicorderClassifier(modelData);
 *   const result = classifier.predict(ratings);
 *   console.log(`Cluster: ${result.clusterName} (${result.confidence}% confidence)`);
 */

export class BicorderClassifier {
  /**
   * @param {Object} model - Model data loaded from bicorder_model.json
   * @param {string} bicorderVersion - Version of bicorder.json being used
   *
   * Simple version-matching approach: The model includes a bicorder_version
   * field. When bicorder structure changes, update the version and retrain.
   */
  constructor(model, bicorderVersion = null) {
    this.model = model;
    this.dimensions = model.dimensions;
    this.keyDimensions = model.key_dimensions;
    this.bicorderVersion = bicorderVersion;

    // Check version compatibility
    if (bicorderVersion && model.bicorder_version && bicorderVersion !== model.bicorder_version) {
      console.warn(`Model version (${model.bicorder_version}) doesn't match bicorder version (${bicorderVersion}). Results may be inaccurate.`);
    }
  }

  /**
   * Standardize values using the fitted scaler
   * @private
   */
  _standardize(values) {
    return values.map((val, i) => {
      if (val === null || val === undefined) return null;
      return (val - this.model.scaler.mean[i]) / this.model.scaler.scale[i];
    });
  }

  /**
   * Calculate LDA score (position on discriminant axis)
   * @private
   */
  _ldaScore(scaledValues) {
    // Fill missing values with 0 (mean in scaled space)
    const filled = scaledValues.map(v => v === null ? 0 : v);

    // Calculate: coef · x + intercept
    let score = this.model.lda.intercept;
    for (let i = 0; i < filled.length; i++) {
      score += this.model.lda.coefficients[i] * filled[i];
    }
    return score;
  }

  /**
   * Calculate Euclidean distance
   * @private
   */
  _distance(a, b) {
    let sum = 0;
    for (let i = 0; i < a.length; i++) {
      const diff = a[i] - b[i];
      sum += diff * diff;
    }
    return Math.sqrt(sum);
  }

  /**
   * Predict cluster for given ratings
   *
   * @param {Object} ratings - Map of dimension names to values (1-9)
   *                          Can be partial - missing dimensions handled gracefully
   * @param {Object} options - Options
   * @param {boolean} options.detailed - Return detailed information (default: true)
   *
   * @returns {Object} Prediction result with:
   *   - cluster: Cluster number (1 or 2)
   *   - clusterName: Human-readable name
   *   - confidence: Confidence percentage (0-100)
   *   - completeness: Percentage of dimensions provided (0-100)
   *   - recommendedForm: 'short' or 'long'
   *   - ldaScore: Position on discriminant axis
   *   - distanceToBoundary: Distance from cluster boundary
   */
  predict(ratings, options = { detailed: true }) {
    // Convert ratings object to array
    const values = this.dimensions.map(dim => ratings[dim] ?? null);
    const providedCount = values.filter(v => v !== null).length;
    const completeness = providedCount / this.dimensions.length;

    // Fill missing with neutral value (5 = middle of 1-9 scale)
    const filled = values.map(v => v ?? 5);

    // Standardize
    const scaled = this._standardize(filled);

    // Calculate LDA score
    const ldaScore = this._ldaScore(scaled);

    // Predict cluster (LDA boundary at 0)
    // Positive score = cluster 2 (Institutional)
    // Negative score = cluster 1 (Relational)
    const cluster = ldaScore > 0 ? 2 : 1;
    const clusterName = this.model.cluster_names[cluster];

    // Calculate confidence based on distance from boundary
    const distanceToBoundary = Math.abs(ldaScore);

    // Confidence: higher when further from boundary
    // Normalize based on typical strong separation (3.0)
    let confidence = Math.min(1.0, distanceToBoundary / 3.0);

    // Adjust for completeness
    const adjustedConfidence = confidence * (0.5 + 0.5 * completeness);

    // Recommend form
    // Use long form when:
    // 1. Low confidence (< 0.6)
    // 2. Low completeness (< 50% of dimensions)
    // 3. Near boundary (< 0.5 distance)
    const shouldUseLongForm =
      adjustedConfidence < this.model.thresholds.confidence_low ||
      completeness < this.model.thresholds.completeness_low ||
      distanceToBoundary < this.model.thresholds.boundary_distance_low;

    const recommendedForm = shouldUseLongForm ? 'long' : 'short';

    const basicResult = {
      cluster,
      clusterName,
      confidence: Math.round(adjustedConfidence * 100),
      completeness: Math.round(completeness * 100),
      recommendedForm,
    };

    if (!options.detailed) {
      return basicResult;
    }

    // Calculate distances to cluster centroids
    const filledScaled = scaled.map(v => v ?? 0);
    const distances = {};
    for (const [clusterId, centroid] of Object.entries(this.model.cluster_centroids_scaled)) {
      distances[clusterId] = this._distance(filledScaled, centroid);
    }

    // Count key dimensions provided
    const keyDimensionsProvided = this.keyDimensions.filter(
      dim => ratings[dim] !== null && ratings[dim] !== undefined
    ).length;

    return {
      ...basicResult,
      ldaScore,
      distanceToBoundary,
      dimensionsProvided: providedCount,
      dimensionsTotal: this.dimensions.length,
      keyDimensionsProvided,
      keyDimensionsTotal: this.keyDimensions.length,
      distancesToCentroids: distances,
      rawConfidence: Math.round(confidence * 100),
    };
  }

  /**
   * Get explanation of classification
   *
   * @param {Object} ratings - Dimension ratings
   * @returns {string} Human-readable explanation
   */
  explainClassification(ratings) {
    const result = this.predict(ratings, { detailed: true });
    const lines = [];

    lines.push(`Protocol Classification: ${result.clusterName}`);
    lines.push(`Confidence: ${result.confidence}%`);
    lines.push('');

    if (result.cluster === 2) {
      lines.push('This protocol leans toward Institutional/Bureaucratic characteristics:');
      lines.push('  • More likely to be formal, standardized, top-down');
      lines.push('  • May involve state/corporate enforcement');
      lines.push('  • Tends toward precise, documented procedures');
    } else {
      lines.push('This protocol leans toward Relational/Cultural characteristics:');
      lines.push('  • More likely to be emergent, community-based');
      lines.push('  • May involve voluntary participation');
      lines.push('  • Tends toward interpretive, flexible practices');
    }

    lines.push('');
    lines.push(`Distance from boundary: ${result.distanceToBoundary.toFixed(2)}`);

    if (result.distanceToBoundary < 0.5) {
      lines.push('⚠️  This protocol is near the boundary between families.');
      lines.push('   It may exhibit characteristics of both types.');
    }

    lines.push('');
    lines.push(`Completeness: ${result.completeness}% (${result.dimensionsProvided}/${result.dimensionsTotal} dimensions)`);

    if (result.completeness < 100) {
      lines.push('Note: Missing dimensions filled with neutral values (5)');
      lines.push('      Confidence improves with complete data');
    }

    lines.push('');
    lines.push(`Recommended form: ${result.recommendedForm.toUpperCase()}`);

    if (result.recommendedForm === 'long') {
      lines.push('Reason: Use long form for:');
      if (result.confidence < 60) {
        lines.push('  • Low classification confidence');
      }
      if (result.completeness < 50) {
        lines.push('  • Incomplete data');
      }
      if (result.distanceToBoundary < 0.5) {
        lines.push('  • Ambiguous positioning between families');
      }
    } else {
      lines.push(`Reason: High confidence classification with ${result.completeness}% data`);
    }

    return lines.join('\n');
  }

  /**
   * Get the list of key dimensions for short form
   * @returns {Array<string>} Dimension names
   */
  getKeyDimensions() {
    return [...this.keyDimensions];
  }

  /**
   * Check if enough key dimensions are provided for reliable short-form classification
   * @param {Object} ratings - Current ratings
   * @returns {Object} Assessment with recommendation
   */
  assessShortFormReadiness(ratings) {
    const keyProvided = this.keyDimensions.filter(
      dim => ratings[dim] !== null && ratings[dim] !== undefined
    );

    const coverage = keyProvided.length / this.keyDimensions.length;
    const isReady = coverage >= 0.75; // 75% of key dimensions

    return {
      ready: isReady,
      keyDimensionsProvided: keyProvided.length,
      keyDimensionsTotal: this.keyDimensions.length,
      coverage: Math.round(coverage * 100),
      missingKeyDimensions: this.keyDimensions.filter(
        dim => !ratings[dim]
      ),
    };
  }
}

/**
 * Load model from JSON file
 *
 * @param {string} url - URL to bicorder_model.json
 * @returns {Promise<BicorderClassifier>} Initialized classifier
 */
export async function loadClassifier(url = './bicorder_model.json') {
  const response = await fetch(url);
  const model = await response.json();
  return new BicorderClassifier(model);
}

// Example usage (for testing in Node.js or browser console)
if (typeof window === 'undefined' && typeof module !== 'undefined') {
  // Node.js example
  const fs = require('fs');

  function demo() {
    const modelData = JSON.parse(fs.readFileSync('bicorder_model.json', 'utf8'));
    const classifier = new BicorderClassifier(modelData);

    console.log('='.repeat(80));
    console.log('BICORDER CLASSIFIER - DEMO');
    console.log('='.repeat(80));

    // Example 1: Community protocol
    console.log('\nExample 1: Community-Based Protocol');
    console.log('-'.repeat(80));
    const communityRatings = {
      'Design_elite_vs_vernacular': 9,
      'Design_explicit_vs_implicit': 8,
      'Entanglement_flocking_vs_swarming': 9,
      'Entanglement_obligatory_vs_voluntary': 9,
      'Design_static_vs_malleable': 8,
    };
    console.log(classifier.explainClassification(communityRatings));

    // Example 2: Institutional protocol
    console.log('\n\n' + '='.repeat(80));
    console.log('Example 2: Institutional Protocol');
    console.log('-'.repeat(80));
    const institutionalRatings = {
      'Design_elite_vs_vernacular': 1,
      'Design_explicit_vs_implicit': 1,
      'Entanglement_flocking_vs_swarming': 1,
      'Entanglement_obligatory_vs_voluntary': 1,
    };
    console.log(classifier.explainClassification(institutionalRatings));

    // Example 3: Check short form readiness
    console.log('\n\n' + '='.repeat(80));
    console.log('Example 3: Short Form Readiness Assessment');
    console.log('-'.repeat(80));
    const partialRatings = {
      'Design_elite_vs_vernacular': 5,
      'Entanglement_flocking_vs_swarming': 6,
    };
    const assessment = classifier.assessShortFormReadiness(partialRatings);
    console.log(`Ready for reliable classification: ${assessment.ready}`);
    console.log(`Key dimensions coverage: ${assessment.coverage}% (${assessment.keyDimensionsProvided}/${assessment.keyDimensionsTotal})`);
    console.log(`Missing key dimensions: ${assessment.missingKeyDimensions.length}`);
  }

  if (require.main === module) {
    demo();
  }
}