Reorganize directory, add manual dataset and sync tooling

- Move all scripts to scripts/, web assets to web/, analysis results into self-contained data/readings/<type>_<YYYYMMDD>/ directories - Add data/readings/manual_20260320/ with 32 JSON readings from git.medlab.host/ntnsndr/protocol-bicorder-data - Add scripts/json_to_csv.py to convert bicorder JSON files to CSV - Add scripts/sync_readings.sh for one-command sync + re-analysis of any dataset backed by a .sync_source config file - Add scripts/classify_readings.py to apply the LDA classifier to all readings and save per-reading cluster assignments - Add --min-coverage flag to multivariate_analysis.py for sparse/shortform datasets; also applies in lda_visualization.py - Fix lda_visualization.py NaN handling and 0-d array annotation bug - Update README.md and WORKFLOW.md to document datasets, sync workflow, shortform handling, and new scripts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-20 17:35:13 -06:00
parent 0c794dddae
commit 897c30406b
545 changed files with 10715 additions and 718 deletions
--- a/analysis/web/bicorder-classifier.d.ts
+++ b/analysis/web/bicorder-classifier.d.ts
@@ -0,0 +1,98 @@
+/**
+ * Type definitions for Bicorder Cluster Classifier
+ */
+
+export interface ModelData {
+  version: string;
+  generated: string;
+  dimensions: string[];
+  key_dimensions: string[];
+  cluster_names: {
+    '1': string;
+    '2': string;
+  };
+  cluster_descriptions: {
+    '1': string;
+    '2': string;
+  };
+  scaler: {
+    mean: number[];
+    scale: number[];
+  };
+  lda: {
+    coefficients: number[];
+    intercept: number;
+  };
+  cluster_centroids_scaled: {
+    '1': number[];
+    '2': number[];
+  };
+  cluster_means_original: {
+    '1': number[];
+    '2': number[];
+  };
+  thresholds: {
+    confidence_low: number;
+    completeness_low: number;
+    boundary_distance_low: number;
+  };
+  metadata: {
+    total_protocols: number;
+    cluster_1_count: number;
+    cluster_2_count: number;
+  };
+}
+
+export interface Ratings {
+  [dimensionName: string]: number | null | undefined;
+}
+
+export interface PredictionResult {
+  cluster: 1 | 2;
+  clusterName: string;
+  confidence: number;
+  completeness: number;
+  recommendedForm: 'short' | 'long';
+}
+
+export interface DetailedPredictionResult extends PredictionResult {
+  ldaScore: number;
+  distanceToBoundary: number;
+  dimensionsProvided: number;
+  dimensionsTotal: number;
+  keyDimensionsProvided: number;
+  keyDimensionsTotal: number;
+  distancesToCentroids: {
+    '1': number;
+    '2': number;
+  };
+  rawConfidence: number;
+}
+
+export interface ShortFormAssessment {
+  ready: boolean;
+  keyDimensionsProvided: number;
+  keyDimensionsTotal: number;
+  coverage: number;
+  missingKeyDimensions: string[];
+}
+
+export interface PredictOptions {
+  detailed?: boolean;
+}
+
+export class BicorderClassifier {
+  constructor(model: ModelData);
+
+  predict(ratings: Ratings, options?: { detailed: false }): PredictionResult;
+  predict(ratings: Ratings, options: { detailed: true }): DetailedPredictionResult;
+  predict(ratings: Ratings, options?: PredictOptions): PredictionResult | DetailedPredictionResult;
+
+  explainClassification(ratings: Ratings): string;
+
+  getKeyDimensions(): string[];
+
+  assessShortFormReadiness(ratings: Ratings): ShortFormAssessment;
+}
+
+export function loadClassifier(url?: string): Promise<BicorderClassifier>;