Reorganize directory, add manual dataset and sync tooling

- Move all scripts to scripts/, web assets to web/, analysis results
  into self-contained data/readings/<type>_<YYYYMMDD>/ directories
- Add data/readings/manual_20260320/ with 32 JSON readings from
  git.medlab.host/ntnsndr/protocol-bicorder-data
- Add scripts/json_to_csv.py to convert bicorder JSON files to CSV
- Add scripts/sync_readings.sh for one-command sync + re-analysis of
  any dataset backed by a .sync_source config file
- Add scripts/classify_readings.py to apply the LDA classifier to all
  readings and save per-reading cluster assignments
- Add --min-coverage flag to multivariate_analysis.py for sparse/shortform
  datasets; also applies in lda_visualization.py
- Fix lda_visualization.py NaN handling and 0-d array annotation bug
- Update README.md and WORKFLOW.md to document datasets, sync workflow,
  shortform handling, and new scripts

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Nathan Schneider
2026-03-20 17:35:13 -06:00
parent 0c794dddae
commit 897c30406b
545 changed files with 10715 additions and 718 deletions

98
analysis/web/bicorder-classifier.d.ts vendored Normal file
View File

@@ -0,0 +1,98 @@
/**
* Type definitions for Bicorder Cluster Classifier
*/
export interface ModelData {
version: string;
generated: string;
dimensions: string[];
key_dimensions: string[];
cluster_names: {
'1': string;
'2': string;
};
cluster_descriptions: {
'1': string;
'2': string;
};
scaler: {
mean: number[];
scale: number[];
};
lda: {
coefficients: number[];
intercept: number;
};
cluster_centroids_scaled: {
'1': number[];
'2': number[];
};
cluster_means_original: {
'1': number[];
'2': number[];
};
thresholds: {
confidence_low: number;
completeness_low: number;
boundary_distance_low: number;
};
metadata: {
total_protocols: number;
cluster_1_count: number;
cluster_2_count: number;
};
}
export interface Ratings {
[dimensionName: string]: number | null | undefined;
}
export interface PredictionResult {
cluster: 1 | 2;
clusterName: string;
confidence: number;
completeness: number;
recommendedForm: 'short' | 'long';
}
export interface DetailedPredictionResult extends PredictionResult {
ldaScore: number;
distanceToBoundary: number;
dimensionsProvided: number;
dimensionsTotal: number;
keyDimensionsProvided: number;
keyDimensionsTotal: number;
distancesToCentroids: {
'1': number;
'2': number;
};
rawConfidence: number;
}
export interface ShortFormAssessment {
ready: boolean;
keyDimensionsProvided: number;
keyDimensionsTotal: number;
coverage: number;
missingKeyDimensions: string[];
}
export interface PredictOptions {
detailed?: boolean;
}
export class BicorderClassifier {
constructor(model: ModelData);
predict(ratings: Ratings, options?: { detailed: false }): PredictionResult;
predict(ratings: Ratings, options: { detailed: true }): DetailedPredictionResult;
predict(ratings: Ratings, options?: PredictOptions): PredictionResult | DetailedPredictionResult;
explainClassification(ratings: Ratings): string;
getKeyDimensions(): string[];
assessShortFormReadiness(ratings: Ratings): ShortFormAssessment;
}
export function loadClassifier(url?: string): Promise<BicorderClassifier>;