The analysis module provides comprehensive statistical and behavioral analysis tools.
analysis.core- Statistical utilities and streaming algorithmsanalysis.deception- Deception detection and belief calibrationanalysis.social- Coalition detection and temporal analysisanalysis.models- Model comparison and hypothesis testinganalysis.visualization- Publication-quality figure generation
Calculate confidence interval for proportions using Wilson score.
from analysis import calculate_proportion_ci
prop, lower, upper = calculate_proportion_ci(
successes=58,
total=100,
confidence=0.95,
method='wilson'
)
# Returns: (0.58, 0.48, 0.67)Parameters:
successes(int): Number of successestotal(int): Total trialsconfidence(float): Confidence level (default: 0.95)method(str): 'wilson', 'normal', or 'clopper-pearson'
Returns: Tuple of (proportion, lower_bound, upper_bound)
Calculate confidence interval for mean values.
from analysis import calculate_confidence_interval
import numpy as np
data = np.array([0.5, 0.6, 0.55, 0.62, 0.58])
mean, lower, upper = calculate_confidence_interval(data, confidence=0.95)Container for hypothesis test results.
from analysis import StatisticalResult
result = StatisticalResult(
statistic=2.34,
p_value=0.019,
significance="*",
effect_size=0.45,
confidence_interval=(0.12, 0.78),
interpretation="Significant difference detected"
)Detect contradictions between private reasoning and public statements.
from analysis import DeceptionDetector
detector = DeceptionDetector()
is_deceptive, confidence, summary = detector.detect_deception(
reasoning="I know Alice is a fascist based on her voting pattern",
statement="I trust Alice completely, she's definitely liberal"
)
# is_deceptive: True
# confidence: 0.85
# summary: "Sentiment contradiction detected..."Methods:
detect_deception(reasoning, statement, context=None)- Main detection methodget_deception_score(reasoning, statement)- Numeric score only
Get singleton detector instance.
from analysis import get_detector
detector = get_detector() # Returns cached instanceDetect voting coalitions using community detection algorithms.
from analysis import CoalitionDetector
detector = CoalitionDetector()
result = detector.detect_coalitions(
votes=vote_matrix,
player_names=['Alice', 'Bob', 'Carol', 'David', 'Eve']
)
print(result.partitions) # List of player groups
print(result.purity) # Alignment with true teams
print(result.modularity) # Network modularity scoreGenerate network data for visualization.
from analysis import get_alignment_network_for_visualization
nodes, edges = get_alignment_network_for_visualization(
votes=vote_matrix,
player_names=player_names,
roles=roles
)Divide game into early/mid/late phases.
from analysis import segment_game_into_phases, GamePhase
phases = segment_game_into_phases(
total_turns=15,
early_threshold=0.25,
late_threshold=0.75
)
# Returns: [GamePhase.EARLY, GamePhase.EARLY, GamePhase.MID, ...]Find critical moments in game progression.
from analysis import detect_turning_points
turning_points = detect_turning_points(
trust_scores=trust_trajectory,
threshold=0.2
)
# Returns: [TurningPoint(turn=5, type='trust_collapse'), ...]Statistical comparison of model win rates.
from analysis import compare_win_rates
result = compare_win_rates(
games_df,
model_a='deepseek/deepseek-v3.2-exp',
model_b='anthropic/claude-3.5-sonnet'
)
print(f"Chi-square: {result.statistic:.2f}")
print(f"p-value: {result.p_value:.4f}")
print(f"Effect size (Cohen's h): {result.effect_size:.3f}")Calculate Elo ratings across models.
from analysis import calculate_elo_ratings
ratings = calculate_elo_ratings(games_df)
# Returns: {'model_a': 1523, 'model_b': 1489, ...}Run batteries of hypothesis tests.
from analysis import HypothesisTester
tester = HypothesisTester(games_df)
results = tester.run_all_tests()
for name, result in results.items():
print(f"{name}: p={result.p_value:.4f} {result.significance}")from analysis import (
test_model_win_rates,
test_deception_by_role,
test_game_length_deception_correlation,
)
# Test if models differ in win rates
result = test_model_win_rates(games_df, 'model_a', 'model_b')
# Test if fascists lie more than liberals
result = test_deception_by_role(decisions_df)Apply consistent matplotlib styling.
from analysis import apply_publication_style
apply_publication_style()
# All subsequent plots use publication formattingSave figure in multiple formats.
from analysis import save_figure
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot([1, 2, 3], [1, 4, 9])
save_figure(fig, "output/plot.png", formats=['svg', 'pdf'])
# Creates: output/plot.png, output/plot.svg, output/plot.pdfColorblind-safe color palette.
from analysis import COLORBLIND_PALETTE
liberal_color = COLORBLIND_PALETTE['liberal'] # '#0077BB'
fascist_color = COLORBLIND_PALETTE['fascist'] # '#CC3311'Online mean/variance calculation.
from analysis import WelfordAccumulator
acc = WelfordAccumulator()
for value in data_stream:
acc.update(value)
print(f"Mean: {acc.mean}, Std: {acc.std}")Accumulate game statistics without loading all data.
from analysis import StreamingGameStats
stats = StreamingGameStats()
for game in game_stream:
stats.add_game(game)
print(stats.liberal_win_rate)
print(stats.average_game_length)