#!/usr/bin/env python3 """ Detailed Analysis with Output Files """ import csv from collections import defaultdict, Counter from itertools import combinations import math def load_data(filename): with open(filename, 'r') as f: reader = csv.DictReader(f) rows = list(reader) return rows def main(): rows = load_data('coding.csv') # Extract virtues virtue_cols = ['Virtue_1', 'Virtue_2', 'Virtue_3', 'Virtue_4', 'Virtue_5'] all_virtues_per_row = [] for row in rows: virtues = [] for col in virtue_cols: val = row.get(col, '').strip() if row.get(col) else '' if val: virtues.append(val) all_virtues_per_row.append(virtues) # Get top virtues for matrix all_virtues_flat = [v for sublist in all_virtues_per_row for v in sublist] virtue_freq = Counter(all_virtues_flat) top_virtues = [v for v, c in virtue_freq.most_common(25)] # Create co-occurrence matrix cooccurrence = Counter() for virtues in all_virtues_per_row: for pair in combinations(sorted(virtues), 2): cooccurrence[pair] += 1 # Write co-occurrence matrix with open('cooccurrence_matrix.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['Virtue'] + top_virtues) for v1 in top_virtues: row = [v1] for v2 in top_virtues: if v1 == v2: row.append('') # Diagonal else: count = cooccurrence.get((min(v1,v2), max(v1,v2)), 0) row.append(count) writer.writerow(row) print("Created: cooccurrence_matrix.csv") # Create similarity matrix (Jaccard) virtue_sets = defaultdict(set) for idx, virtues in enumerate(all_virtues_per_row): for v in virtues: virtue_sets[v].add(idx) with open('jaccard_similarity_matrix.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['Virtue'] + top_virtues) for v1 in top_virtues: row = [v1] for v2 in top_virtues: if v1 == v2: row.append('1.0') else: set1 = virtue_sets[v1] set2 = virtue_sets[v2] jaccard = len(set1 & set2) / len(set1 | set2) if (set1 | set2) else 0 row.append(f"{jaccard:.3f}") writer.writerow(row) print("Created: jaccard_similarity_matrix.csv") # Centrality analysis - which virtues connect most to others print("\n" + "=" * 70) print("VIRTUE NETWORK CENTRALITY ANALYSIS") print("=" * 70) # Degree centrality (how many different virtues each connects to) connections = defaultdict(set) for (v1, v2), count in cooccurrence.items(): if count >= 1: connections[v1].add(v2) connections[v2].add(v1) centrality = [(v, len(connections[v])) for v in virtue_freq.keys()] centrality.sort(key=lambda x: x[1], reverse=True) print("\nTop 'Hub' Virtues (connect to most other virtue types):") print(f"{'Virtue':<40} {'Connections':<12}") print("-" * 55) for virtue, degree in centrality[:15]: nearby = list(connections[virtue])[:5] print(f"{virtue:<40} {degree:<12} → {', '.join(nearby)}") # Network density by source print("\n" + "=" * 70) print("NETWORK COMPLEXITY BY SOURCE") print("=" * 70) source_per_row = [row.get('Source', 'Unknown') for row in rows] for source in ['AFP', 'PR']: source_indices = [i for i, s in enumerate(source_per_row) if s == source] source_pairs = Counter() for idx in source_indices: virtues = all_virtues_per_row[idx] for pair in combinations(sorted(virtues), 2): source_pairs[pair] += 1 unique_connections = len(source_pairs) total_texts = len(source_indices) avg_pairs = sum(source_pairs.values()) / total_texts if total_texts else 0 print(f"\n{source}:") print(f" Texts: {total_texts}") print(f" Unique virtue pairs: {unique_connections}") print(f" Avg pairs per text: {avg_pairs:.2f}") print(f" Network density: {unique_connections / (len(virtue_freq) * (len(virtue_freq)-1) / 2) * 100:.1f}%") # Create profile for each text (export) with open('virtue_profiles.json', 'w') as f: profiles = [] for i, (row, virtues) in enumerate(zip(rows, all_virtues_per_row)): profiles.append({ 'id': i, 'source': row.get('Source', ''), 'virtues': virtues, 'virtue_count': len(virtues) }) import json json.dump(profiles, f, indent=2) print("\nCreated: virtue_profiles.json") # Summary of strongest associations with open('strong_associations.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['Virtue_1', 'Virtue_2', 'Co_count', 'Jaccard', 'Observed', 'Expected']) for (v1, v2), count in cooccurrence.most_common(50): set1 = virtue_sets[v1] set2 = virtue_sets[v2] jaccard = len(set1 & set2) / len(set1 | set2) if (set1 | set2) else 0 # Expected co-occurrence if random p1 = len(set1) / len(rows) p2 = len(set2) / len(rows) expected = len(rows) * p1 * p2 writer.writerow([v1, v2, count, f"{jaccard:.3f}", count, f"{expected:.2f}"]) print("Created: strong_associations.csv") if __name__ == "__main__": main()