166 lines
5.7 KiB
Python
166 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Detailed Analysis with Output Files
|
|
"""
|
|
|
|
import csv
|
|
from collections import defaultdict, Counter
|
|
from itertools import combinations
|
|
import math
|
|
|
|
def load_data(filename):
|
|
with open(filename, 'r') as f:
|
|
reader = csv.DictReader(f)
|
|
rows = list(reader)
|
|
return rows
|
|
|
|
def main():
|
|
rows = load_data('coding.csv')
|
|
|
|
# Extract virtues
|
|
virtue_cols = ['Virtue_1', 'Virtue_2', 'Virtue_3', 'Virtue_4', 'Virtue_5']
|
|
all_virtues_per_row = []
|
|
|
|
for row in rows:
|
|
virtues = []
|
|
for col in virtue_cols:
|
|
val = row.get(col, '').strip() if row.get(col) else ''
|
|
if val:
|
|
virtues.append(val)
|
|
all_virtues_per_row.append(virtues)
|
|
|
|
# Get top virtues for matrix
|
|
all_virtues_flat = [v for sublist in all_virtues_per_row for v in sublist]
|
|
virtue_freq = Counter(all_virtues_flat)
|
|
top_virtues = [v for v, c in virtue_freq.most_common(25)]
|
|
|
|
# Create co-occurrence matrix
|
|
cooccurrence = Counter()
|
|
for virtues in all_virtues_per_row:
|
|
for pair in combinations(sorted(virtues), 2):
|
|
cooccurrence[pair] += 1
|
|
|
|
# Write co-occurrence matrix
|
|
with open('cooccurrence_matrix.csv', 'w', newline='') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(['Virtue'] + top_virtues)
|
|
for v1 in top_virtues:
|
|
row = [v1]
|
|
for v2 in top_virtues:
|
|
if v1 == v2:
|
|
row.append('') # Diagonal
|
|
else:
|
|
count = cooccurrence.get((min(v1,v2), max(v1,v2)), 0)
|
|
row.append(count)
|
|
writer.writerow(row)
|
|
|
|
print("Created: cooccurrence_matrix.csv")
|
|
|
|
# Create similarity matrix (Jaccard)
|
|
virtue_sets = defaultdict(set)
|
|
for idx, virtues in enumerate(all_virtues_per_row):
|
|
for v in virtues:
|
|
virtue_sets[v].add(idx)
|
|
|
|
with open('jaccard_similarity_matrix.csv', 'w', newline='') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(['Virtue'] + top_virtues)
|
|
for v1 in top_virtues:
|
|
row = [v1]
|
|
for v2 in top_virtues:
|
|
if v1 == v2:
|
|
row.append('1.0')
|
|
else:
|
|
set1 = virtue_sets[v1]
|
|
set2 = virtue_sets[v2]
|
|
jaccard = len(set1 & set2) / len(set1 | set2) if (set1 | set2) else 0
|
|
row.append(f"{jaccard:.3f}")
|
|
writer.writerow(row)
|
|
|
|
print("Created: jaccard_similarity_matrix.csv")
|
|
|
|
# Centrality analysis - which virtues connect most to others
|
|
print("\n" + "=" * 70)
|
|
print("VIRTUE NETWORK CENTRALITY ANALYSIS")
|
|
print("=" * 70)
|
|
|
|
# Degree centrality (how many different virtues each connects to)
|
|
connections = defaultdict(set)
|
|
for (v1, v2), count in cooccurrence.items():
|
|
if count >= 1:
|
|
connections[v1].add(v2)
|
|
connections[v2].add(v1)
|
|
|
|
centrality = [(v, len(connections[v])) for v in virtue_freq.keys()]
|
|
centrality.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
print("\nTop 'Hub' Virtues (connect to most other virtue types):")
|
|
print(f"{'Virtue':<40} {'Connections':<12}")
|
|
print("-" * 55)
|
|
for virtue, degree in centrality[:15]:
|
|
nearby = list(connections[virtue])[:5]
|
|
print(f"{virtue:<40} {degree:<12} → {', '.join(nearby)}")
|
|
|
|
# Network density by source
|
|
print("\n" + "=" * 70)
|
|
print("NETWORK COMPLEXITY BY SOURCE")
|
|
print("=" * 70)
|
|
|
|
source_per_row = [row.get('Source', 'Unknown') for row in rows]
|
|
|
|
for source in ['AFP', 'PR']:
|
|
source_indices = [i for i, s in enumerate(source_per_row) if s == source]
|
|
source_pairs = Counter()
|
|
|
|
for idx in source_indices:
|
|
virtues = all_virtues_per_row[idx]
|
|
for pair in combinations(sorted(virtues), 2):
|
|
source_pairs[pair] += 1
|
|
|
|
unique_connections = len(source_pairs)
|
|
total_texts = len(source_indices)
|
|
avg_pairs = sum(source_pairs.values()) / total_texts if total_texts else 0
|
|
|
|
print(f"\n{source}:")
|
|
print(f" Texts: {total_texts}")
|
|
print(f" Unique virtue pairs: {unique_connections}")
|
|
print(f" Avg pairs per text: {avg_pairs:.2f}")
|
|
print(f" Network density: {unique_connections / (len(virtue_freq) * (len(virtue_freq)-1) / 2) * 100:.1f}%")
|
|
|
|
# Create profile for each text (export)
|
|
with open('virtue_profiles.json', 'w') as f:
|
|
profiles = []
|
|
for i, (row, virtues) in enumerate(zip(rows, all_virtues_per_row)):
|
|
profiles.append({
|
|
'id': i,
|
|
'source': row.get('Source', ''),
|
|
'virtues': virtues,
|
|
'virtue_count': len(virtues)
|
|
})
|
|
import json
|
|
json.dump(profiles, f, indent=2)
|
|
|
|
print("\nCreated: virtue_profiles.json")
|
|
|
|
# Summary of strongest associations
|
|
with open('strong_associations.csv', 'w', newline='') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(['Virtue_1', 'Virtue_2', 'Co_count', 'Jaccard', 'Observed', 'Expected'])
|
|
|
|
for (v1, v2), count in cooccurrence.most_common(50):
|
|
set1 = virtue_sets[v1]
|
|
set2 = virtue_sets[v2]
|
|
jaccard = len(set1 & set2) / len(set1 | set2) if (set1 | set2) else 0
|
|
|
|
# Expected co-occurrence if random
|
|
p1 = len(set1) / len(rows)
|
|
p2 = len(set2) / len(rows)
|
|
expected = len(rows) * p1 * p2
|
|
|
|
writer.writerow([v1, v2, count, f"{jaccard:.3f}", count, f"{expected:.2f}"])
|
|
|
|
print("Created: strong_associations.csv")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|