Visualizations for Grevy's Zebra Behavioral Analysis¶
includes confidence intervals, p-values, and effect sizes
In [12]:
Copied!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
# Set publication-quality style
plt.style.use('default') # Clean, publication-ready style
sns.set_palette("colorblind") # Colorblind-friendly palette
# Custom color palette for consistency
# COLORS = {
# 'Walk': '#0173B2', # Blue
# 'Head Up': '#DE8F05', # Orange
# 'Graze': '#029E73', # Green
# 'Other': '#CC78BC' # Purple
# }
ethogram = ['Out of Sight', 'Fight','Urinate','Mutual-Groom','Defecate','Auto-Groom','Browse','Trot/Run','Head Up','Walk','Graze', 'Other']
# custom_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#d62728','#2ca02c','#8c564b','#e377c2','#7f7f7f','#17becf']
# color_map = {behavior: custom_colors[i] for i, behavior in enumerate(ethogram)}
color = sns.color_palette("tab20", len(ethogram))
COLORS = {behavior: color[i] for i, behavior in enumerate(ethogram)}
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
# Set publication-quality style
plt.style.use('default') # Clean, publication-ready style
sns.set_palette("colorblind") # Colorblind-friendly palette
# Custom color palette for consistency
# COLORS = {
# 'Walk': '#0173B2', # Blue
# 'Head Up': '#DE8F05', # Orange
# 'Graze': '#029E73', # Green
# 'Other': '#CC78BC' # Purple
# }
ethogram = ['Out of Sight', 'Fight','Urinate','Mutual-Groom','Defecate','Auto-Groom','Browse','Trot/Run','Head Up','Walk','Graze', 'Other']
# custom_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#d62728','#2ca02c','#8c564b','#e377c2','#7f7f7f','#17becf']
# color_map = {behavior: custom_colors[i] for i, behavior in enumerate(ethogram)}
color = sns.color_palette("tab20", len(ethogram))
COLORS = {behavior: color[i] for i, behavior in enumerate(ethogram)}
In [4]:
Copied!
# ============================================================================
# DATA PREPARATION
# ============================================================================
# Load and prepare data
df = pd.read_csv('data/grevystimebudgetscleaned.csv')
# Encode categorical variables
le_habitat = LabelEncoder()
le_herd_size = LabelEncoder()
df['Habitat_encoded'] = le_habitat.fit_transform(df['Habitat'])
df['HerdSize_encoded'] = le_herd_size.fit_transform(df['Herd Size'])
behaviors = ['Walk', 'Head Up', 'Graze', 'Other']
X = df[['Habitat_encoded', 'HerdSize_encoded']]
# Calculate regression results with confidence intervals
def calculate_regression_stats(X, y):
"""Calculate regression statistics including confidence intervals"""
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
n = len(y)
k = X.shape[1]
# Residuals and MSE
residuals = y - y_pred
mse = np.sum(residuals**2) / (n - k - 1)
# Design matrix with intercept
X_with_intercept = np.column_stack([np.ones(n), X])
try:
# Covariance matrix
cov_matrix = mse * np.linalg.inv(X_with_intercept.T @ X_with_intercept)
se_coefs = np.sqrt(np.diag(cov_matrix))
# t-statistics and p-values
coefs_with_intercept = np.append(model.intercept_, model.coef_)
t_stats = coefs_with_intercept / se_coefs
p_values = 2 * (1 - stats.t.cdf(np.abs(t_stats), n - k - 1))
# 95% Confidence intervals
t_critical = stats.t.ppf(0.975, n - k - 1)
ci_lower = coefs_with_intercept - t_critical * se_coefs
ci_upper = coefs_with_intercept + t_critical * se_coefs
return {
'model': model,
'r2': r2_score(y, y_pred),
'coefficients': model.coef_,
'intercept': model.intercept_,
'se_coefs': se_coefs[1:], # Exclude intercept
'se_intercept': se_coefs[0],
't_stats': t_stats[1:], # Exclude intercept
'p_values': p_values[1:], # Exclude intercept
'ci_lower': ci_lower[1:], # Exclude intercept
'ci_upper': ci_upper[1:], # Exclude intercept
'predictions': y_pred
}
except:
return None
# Calculate stats for all behaviors
results = {}
for behavior in behaviors:
results[behavior] = calculate_regression_stats(X, df[behavior])
# ============================================================================
# DATA PREPARATION
# ============================================================================
# Load and prepare data
df = pd.read_csv('data/grevystimebudgetscleaned.csv')
# Encode categorical variables
le_habitat = LabelEncoder()
le_herd_size = LabelEncoder()
df['Habitat_encoded'] = le_habitat.fit_transform(df['Habitat'])
df['HerdSize_encoded'] = le_herd_size.fit_transform(df['Herd Size'])
behaviors = ['Walk', 'Head Up', 'Graze', 'Other']
X = df[['Habitat_encoded', 'HerdSize_encoded']]
# Calculate regression results with confidence intervals
def calculate_regression_stats(X, y):
"""Calculate regression statistics including confidence intervals"""
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
n = len(y)
k = X.shape[1]
# Residuals and MSE
residuals = y - y_pred
mse = np.sum(residuals**2) / (n - k - 1)
# Design matrix with intercept
X_with_intercept = np.column_stack([np.ones(n), X])
try:
# Covariance matrix
cov_matrix = mse * np.linalg.inv(X_with_intercept.T @ X_with_intercept)
se_coefs = np.sqrt(np.diag(cov_matrix))
# t-statistics and p-values
coefs_with_intercept = np.append(model.intercept_, model.coef_)
t_stats = coefs_with_intercept / se_coefs
p_values = 2 * (1 - stats.t.cdf(np.abs(t_stats), n - k - 1))
# 95% Confidence intervals
t_critical = stats.t.ppf(0.975, n - k - 1)
ci_lower = coefs_with_intercept - t_critical * se_coefs
ci_upper = coefs_with_intercept + t_critical * se_coefs
return {
'model': model,
'r2': r2_score(y, y_pred),
'coefficients': model.coef_,
'intercept': model.intercept_,
'se_coefs': se_coefs[1:], # Exclude intercept
'se_intercept': se_coefs[0],
't_stats': t_stats[1:], # Exclude intercept
'p_values': p_values[1:], # Exclude intercept
'ci_lower': ci_lower[1:], # Exclude intercept
'ci_upper': ci_upper[1:], # Exclude intercept
'predictions': y_pred
}
except:
return None
# Calculate stats for all behaviors
results = {}
for behavior in behaviors:
results[behavior] = calculate_regression_stats(X, df[behavior])
In [69]:
Copied!
# ============================================================================
# FIGURE 1: COEFFICIENT PLOT WITH CONFIDENCE INTERVALS
# ============================================================================
def create_coefficient_plot():
"""Create coefficient plot with confidence intervals"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 8))
# Prepare data for plotting
coef_data = []
for behavior in behaviors:
if results[behavior]:
# Habitat coefficient
coef_data.append({
'Behavior': behavior,
'Predictor': 'Habitat\n(Open vs Closed)',
'Coefficient': results[behavior]['coefficients'][0],
'CI_Lower': results[behavior]['ci_lower'][0],
'CI_Upper': results[behavior]['ci_upper'][0],
'P_Value': results[behavior]['p_values'][0],
'SE': results[behavior]['se_coefs'][0]
})
# Herd Size coefficient
coef_data.append({
'Behavior': behavior,
'Predictor': 'Herd Size\n(Small vs Large)',
'Coefficient': results[behavior]['coefficients'][1],
'CI_Lower': results[behavior]['ci_lower'][1],
'CI_Upper': results[behavior]['ci_upper'][1],
'P_Value': results[behavior]['p_values'][1],
'SE': results[behavior]['se_coefs'][1]
})
coef_df = pd.DataFrame(coef_data)
# Plot 1: Habitat Effects
habitat_data = coef_df[coef_df['Predictor'] == 'Habitat\n(Open vs Closed)']
y_pos = np.arange(len(behaviors))
colors_list = [COLORS[behavior] for behavior in behaviors]
# Plot coefficients with error bars
for i, (behavior, row) in enumerate(zip(behaviors, habitat_data.iterrows())):
_, data = row
# Plot coefficient (no alpha variation)
ax1.barh(i, data['Coefficient'], color=colors_list[i], alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Plot confidence interval
ax1.errorbar(data['Coefficient'], i,
xerr=[[data['Coefficient'] - data['CI_Lower']],
[data['CI_Upper'] - data['Coefficient']]],
fmt='none', color='black', capsize=4, capthick=1)
ax1.set_yticks(y_pos)
ax1.set_yticklabels(behaviors)
ax1.set_xlabel('Coefficient (Open - Closed)', fontweight='bold')
ax1.set_title('A) Habitat Effects', fontweight='bold', fontsize=14)
ax1.axvline(x=0, color='black', linestyle='--', alpha=0.5)
ax1.grid(axis='x', alpha=0.3)
# print p-values in bottom left corner of each plot
for i, (behavior, row) in enumerate(zip(behaviors, habitat_data.iterrows())):
_, data = row
p_val = data['P_Value']
if p_val < 0.001:
sig = '***'
elif p_val < 0.01:
sig = '**'
elif p_val < 0.05:
sig = '*'
else:
sig = 'ns'
# Plot 2: Herd Size Effects
herdsize_data = coef_df[coef_df['Predictor'] == 'Herd Size\n(Small vs Large)']
for i, (behavior, row) in enumerate(zip(behaviors, herdsize_data.iterrows())):
_, data = row
# Plot coefficient (no alpha variation)
ax2.barh(i, data['Coefficient'], color=colors_list[i], alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Plot confidence interval
ax2.errorbar(data['Coefficient'], i,
xerr=[[data['Coefficient'] - data['CI_Lower']],
[data['CI_Upper'] - data['Coefficient']]],
fmt='none', color='black', capsize=4, capthick=1)
ax2.set_yticks(y_pos)
ax2.set_yticklabels(behaviors)
ax2.set_xlabel('Coefficient (Small - Large)', fontweight='bold')
ax2.set_title('B) Herd Size Effects', fontweight='bold', fontsize=14)
ax2.axvline(x=0, color='black', linestyle='--', alpha=0.5)
ax2.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, top=0.9) # Add top padding for title
return fig
# ============================================================================
# FIGURE 1: COEFFICIENT PLOT WITH CONFIDENCE INTERVALS
# ============================================================================
def create_coefficient_plot():
"""Create coefficient plot with confidence intervals"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 8))
# Prepare data for plotting
coef_data = []
for behavior in behaviors:
if results[behavior]:
# Habitat coefficient
coef_data.append({
'Behavior': behavior,
'Predictor': 'Habitat\n(Open vs Closed)',
'Coefficient': results[behavior]['coefficients'][0],
'CI_Lower': results[behavior]['ci_lower'][0],
'CI_Upper': results[behavior]['ci_upper'][0],
'P_Value': results[behavior]['p_values'][0],
'SE': results[behavior]['se_coefs'][0]
})
# Herd Size coefficient
coef_data.append({
'Behavior': behavior,
'Predictor': 'Herd Size\n(Small vs Large)',
'Coefficient': results[behavior]['coefficients'][1],
'CI_Lower': results[behavior]['ci_lower'][1],
'CI_Upper': results[behavior]['ci_upper'][1],
'P_Value': results[behavior]['p_values'][1],
'SE': results[behavior]['se_coefs'][1]
})
coef_df = pd.DataFrame(coef_data)
# Plot 1: Habitat Effects
habitat_data = coef_df[coef_df['Predictor'] == 'Habitat\n(Open vs Closed)']
y_pos = np.arange(len(behaviors))
colors_list = [COLORS[behavior] for behavior in behaviors]
# Plot coefficients with error bars
for i, (behavior, row) in enumerate(zip(behaviors, habitat_data.iterrows())):
_, data = row
# Plot coefficient (no alpha variation)
ax1.barh(i, data['Coefficient'], color=colors_list[i], alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Plot confidence interval
ax1.errorbar(data['Coefficient'], i,
xerr=[[data['Coefficient'] - data['CI_Lower']],
[data['CI_Upper'] - data['Coefficient']]],
fmt='none', color='black', capsize=4, capthick=1)
ax1.set_yticks(y_pos)
ax1.set_yticklabels(behaviors)
ax1.set_xlabel('Coefficient (Open - Closed)', fontweight='bold')
ax1.set_title('A) Habitat Effects', fontweight='bold', fontsize=14)
ax1.axvline(x=0, color='black', linestyle='--', alpha=0.5)
ax1.grid(axis='x', alpha=0.3)
# print p-values in bottom left corner of each plot
for i, (behavior, row) in enumerate(zip(behaviors, habitat_data.iterrows())):
_, data = row
p_val = data['P_Value']
if p_val < 0.001:
sig = '***'
elif p_val < 0.01:
sig = '**'
elif p_val < 0.05:
sig = '*'
else:
sig = 'ns'
# Plot 2: Herd Size Effects
herdsize_data = coef_df[coef_df['Predictor'] == 'Herd Size\n(Small vs Large)']
for i, (behavior, row) in enumerate(zip(behaviors, herdsize_data.iterrows())):
_, data = row
# Plot coefficient (no alpha variation)
ax2.barh(i, data['Coefficient'], color=colors_list[i], alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Plot confidence interval
ax2.errorbar(data['Coefficient'], i,
xerr=[[data['Coefficient'] - data['CI_Lower']],
[data['CI_Upper'] - data['Coefficient']]],
fmt='none', color='black', capsize=4, capthick=1)
ax2.set_yticks(y_pos)
ax2.set_yticklabels(behaviors)
ax2.set_xlabel('Coefficient (Small - Large)', fontweight='bold')
ax2.set_title('B) Herd Size Effects', fontweight='bold', fontsize=14)
ax2.axvline(x=0, color='black', linestyle='--', alpha=0.5)
ax2.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, top=0.9) # Add top padding for title
return fig
In [73]:
Copied!
fig1 = create_coefficient_plot()
fig1.suptitle('Linear Regression Coefficients with 95% Confidence Intervals',
fontsize=16, fontweight='bold', y=0.98)
plt.show()
fig1 = create_coefficient_plot()
fig1.suptitle('Linear Regression Coefficients with 95% Confidence Intervals',
fontsize=16, fontweight='bold', y=0.98)
plt.show()
In [74]:
Copied!
# ============================================================================
# FIGURE 2: MODEL PERFORMANCE AND EFFECT SIZES
# ============================================================================
def create_performance_plot():
"""Create model performance visualization with R² and effect sizes"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# Prepare data
r2_values = [results[behavior]['r2'] for behavior in behaviors]
colors_list = [COLORS[behavior] for behavior in behaviors]
# Plot 1: R² values with confidence intervals
y_pos = np.arange(len(behaviors))
bars = ax1.barh(y_pos, r2_values, color=colors_list, alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Add value labels
for i, (bar, r2_val) in enumerate(zip(bars, r2_values)):
ax1.text(r2_val + 0.01, bar.get_y() + bar.get_height()/2,
f'{r2_val:.3f}', va='center', ha='left', fontweight='bold')
ax1.set_yticks(y_pos)
ax1.set_yticklabels(behaviors)
ax1.set_xlabel('R² (Variance Explained)', fontweight='bold')
ax1.set_title('A) Model Performance', fontweight='bold', fontsize=14)
ax1.set_xlim(0, max(r2_values) * 1.2)
ax1.grid(axis='x', alpha=0.3)
# Plot 2: Effect Size Comparison (Cohen's f²)
# Calculate Cohen's f² = R² / (1 - R²)
cohens_f2 = [r2 / (1 - r2) if r2 < 1 else np.inf for r2 in r2_values]
bars2 = ax2.barh(y_pos, cohens_f2, color=colors_list, alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Add value labels and effect size interpretation
for i, (bar, f2_val) in enumerate(zip(bars2, cohens_f2)):
if f2_val < 0.02:
effect_size = "trivial"
elif f2_val < 0.15:
effect_size = "small"
elif f2_val < 0.35:
effect_size = "medium"
else:
effect_size = "large"
ax2.text(f2_val + 0.01, bar.get_y() + bar.get_height()/2,
f'{f2_val:.3f}\n({effect_size})', va='center', ha='left', fontsize=9)
ax2.set_yticks(y_pos)
ax2.set_yticklabels(behaviors)
ax2.set_xlabel('Cohen\'s f² (Effect Size)', fontweight='bold')
ax2.set_title('B) Effect Size', fontweight='bold', fontsize=14)
ax2.set_xlim(0, max(cohens_f2) * 1.3)
ax2.grid(axis='x', alpha=0.3)
# Add effect size reference lines
ax2.axvline(x=0.02, color='gray', linestyle=':', alpha=0.7, label='Small (0.02)')
ax2.axvline(x=0.15, color='gray', linestyle='--', alpha=0.7, label='Medium (0.15)')
ax2.axvline(x=0.35, color='gray', linestyle='-', alpha=0.7, label='Large (0.35)')
ax2.legend(loc='lower right', fontsize=8)
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, top=0.9)
return fig
# ============================================================================
# FIGURE 2: MODEL PERFORMANCE AND EFFECT SIZES
# ============================================================================
def create_performance_plot():
"""Create model performance visualization with R² and effect sizes"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# Prepare data
r2_values = [results[behavior]['r2'] for behavior in behaviors]
colors_list = [COLORS[behavior] for behavior in behaviors]
# Plot 1: R² values with confidence intervals
y_pos = np.arange(len(behaviors))
bars = ax1.barh(y_pos, r2_values, color=colors_list, alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Add value labels
for i, (bar, r2_val) in enumerate(zip(bars, r2_values)):
ax1.text(r2_val + 0.01, bar.get_y() + bar.get_height()/2,
f'{r2_val:.3f}', va='center', ha='left', fontweight='bold')
ax1.set_yticks(y_pos)
ax1.set_yticklabels(behaviors)
ax1.set_xlabel('R² (Variance Explained)', fontweight='bold')
ax1.set_title('A) Model Performance', fontweight='bold', fontsize=14)
ax1.set_xlim(0, max(r2_values) * 1.2)
ax1.grid(axis='x', alpha=0.3)
# Plot 2: Effect Size Comparison (Cohen's f²)
# Calculate Cohen's f² = R² / (1 - R²)
cohens_f2 = [r2 / (1 - r2) if r2 < 1 else np.inf for r2 in r2_values]
bars2 = ax2.barh(y_pos, cohens_f2, color=colors_list, alpha=0.8,
height=0.6, edgecolor='black', linewidth=0.5)
# Add value labels and effect size interpretation
for i, (bar, f2_val) in enumerate(zip(bars2, cohens_f2)):
if f2_val < 0.02:
effect_size = "trivial"
elif f2_val < 0.15:
effect_size = "small"
elif f2_val < 0.35:
effect_size = "medium"
else:
effect_size = "large"
ax2.text(f2_val + 0.01, bar.get_y() + bar.get_height()/2,
f'{f2_val:.3f}\n({effect_size})', va='center', ha='left', fontsize=9)
ax2.set_yticks(y_pos)
ax2.set_yticklabels(behaviors)
ax2.set_xlabel('Cohen\'s f² (Effect Size)', fontweight='bold')
ax2.set_title('B) Effect Size', fontweight='bold', fontsize=14)
ax2.set_xlim(0, max(cohens_f2) * 1.3)
ax2.grid(axis='x', alpha=0.3)
# Add effect size reference lines
ax2.axvline(x=0.02, color='gray', linestyle=':', alpha=0.7, label='Small (0.02)')
ax2.axvline(x=0.15, color='gray', linestyle='--', alpha=0.7, label='Medium (0.15)')
ax2.axvline(x=0.35, color='gray', linestyle='-', alpha=0.7, label='Large (0.35)')
ax2.legend(loc='lower right', fontsize=8)
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, top=0.9)
return fig
In [75]:
Copied!
# ============================================================================
# FIGURE 3: PREDICTED MEANS WITH CONFIDENCE INTERVALS
# ============================================================================
def create_predicted_means_plot():
"""Create plot showing predicted means for each combination with CIs"""
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Calculate predicted means and confidence intervals
combinations = [
('Closed', 'Large', 0, 0), # Baseline
('Open', 'Large', 1, 0), # Habitat effect only
('Closed', 'Small', 0, 1), # Herd size effect only
('Open', 'Small', 1, 1) # Both effects
]
for i, behavior in enumerate(behaviors):
ax = axes[i//2, i%2]
if results[behavior] is None:
continue
model = results[behavior]['model']
intercept = results[behavior]['intercept']
habitat_coef = results[behavior]['coefficients'][0]
herdsize_coef = results[behavior]['coefficients'][1]
# Calculate predictions and standard errors
x_positions = []
predictions = []
ci_lowers = []
ci_uppers = []
labels = []
for j, (habitat, herd_size, h_code, hs_code) in enumerate(combinations):
# Predicted value
pred = intercept + habitat_coef * h_code + herdsize_coef * hs_code
# For simplicity, use pooled SE (could be more sophisticated)
# Standard error of prediction
X_combo = np.array([[h_code, hs_code]])
# Calculate actual observed values for this combination
mask = (df['Habitat'] == habitat) & (df['Herd Size'] == herd_size)
observed = df[mask][behavior]
if len(observed) > 1:
se_pred = stats.sem(observed) # Standard error of the mean
ci_lower = pred - 1.96 * se_pred
ci_upper = pred + 1.96 * se_pred
else:
ci_lower = pred
ci_upper = pred
x_positions.append(j)
predictions.append(pred)
ci_lowers.append(ci_lower)
ci_uppers.append(ci_upper)
labels.append(f'{habitat}\n{herd_size}')
# Plot predictions with confidence intervals
ax.bar(x_positions, predictions, color=COLORS[behavior], alpha=0.7,
width=0.6, edgecolor='black', linewidth=0.5)
# Add confidence intervals
ax.errorbar(x_positions, predictions,
yerr=[np.array(predictions) - np.array(ci_lowers),
np.array(ci_uppers) - np.array(predictions)],
fmt='none', color='black', capsize=5, capthick=1.5)
# Add value labels
for j, (x, pred) in enumerate(zip(x_positions, predictions)):
ax.text(x, pred + (max(ci_uppers) - min(ci_lowers)) * 0.05,
f'{pred:.3f}', ha='center', va='bottom', fontweight='bold', fontsize=9)
# Add observed data points as overlay
for j, (habitat, herd_size, _, _) in enumerate(combinations):
mask = (df['Habitat'] == habitat) & (df['Herd Size'] == herd_size)
observed = df[mask][behavior]
if len(observed) > 0:
jittered_x = j + np.random.normal(0, 0.08, len(observed))
ax.scatter(jittered_x, observed, color='red', alpha=0.6, s=20,
edgecolor='darkred', linewidth=0.5, zorder=10)
ax.set_xticks(x_positions)
ax.set_xticklabels(labels, rotation=0)
ax.set_ylabel('Proportion of Time', fontweight='bold')
ax.set_title(f'{behavior}\n(R² = {results[behavior]["r2"]:.3f})',
fontweight='bold', color=COLORS[behavior])
ax.grid(axis='y', alpha=0.3)
# Set y-axis to start from 0
ax.set_ylim(0, max(ci_uppers) * 1.15)
# Add overall legend
from matplotlib.lines import Line2D
legend_elements = [
Line2D([0], [0], marker='o', color='red', linestyle='', markersize=8,
alpha=0.6, label='Observed Data'),
Line2D([0], [0], color='black', linestyle='-', label='95% Confidence Intervals')
]
fig.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, 0.02), ncol=2)
plt.suptitle('Predicted Behavior by Habitat and Herd Size Combinations',
fontsize=16, fontweight='bold', y=0.98)
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, top=0.92)
return fig
# ============================================================================
# FIGURE 3: PREDICTED MEANS WITH CONFIDENCE INTERVALS
# ============================================================================
def create_predicted_means_plot():
"""Create plot showing predicted means for each combination with CIs"""
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Calculate predicted means and confidence intervals
combinations = [
('Closed', 'Large', 0, 0), # Baseline
('Open', 'Large', 1, 0), # Habitat effect only
('Closed', 'Small', 0, 1), # Herd size effect only
('Open', 'Small', 1, 1) # Both effects
]
for i, behavior in enumerate(behaviors):
ax = axes[i//2, i%2]
if results[behavior] is None:
continue
model = results[behavior]['model']
intercept = results[behavior]['intercept']
habitat_coef = results[behavior]['coefficients'][0]
herdsize_coef = results[behavior]['coefficients'][1]
# Calculate predictions and standard errors
x_positions = []
predictions = []
ci_lowers = []
ci_uppers = []
labels = []
for j, (habitat, herd_size, h_code, hs_code) in enumerate(combinations):
# Predicted value
pred = intercept + habitat_coef * h_code + herdsize_coef * hs_code
# For simplicity, use pooled SE (could be more sophisticated)
# Standard error of prediction
X_combo = np.array([[h_code, hs_code]])
# Calculate actual observed values for this combination
mask = (df['Habitat'] == habitat) & (df['Herd Size'] == herd_size)
observed = df[mask][behavior]
if len(observed) > 1:
se_pred = stats.sem(observed) # Standard error of the mean
ci_lower = pred - 1.96 * se_pred
ci_upper = pred + 1.96 * se_pred
else:
ci_lower = pred
ci_upper = pred
x_positions.append(j)
predictions.append(pred)
ci_lowers.append(ci_lower)
ci_uppers.append(ci_upper)
labels.append(f'{habitat}\n{herd_size}')
# Plot predictions with confidence intervals
ax.bar(x_positions, predictions, color=COLORS[behavior], alpha=0.7,
width=0.6, edgecolor='black', linewidth=0.5)
# Add confidence intervals
ax.errorbar(x_positions, predictions,
yerr=[np.array(predictions) - np.array(ci_lowers),
np.array(ci_uppers) - np.array(predictions)],
fmt='none', color='black', capsize=5, capthick=1.5)
# Add value labels
for j, (x, pred) in enumerate(zip(x_positions, predictions)):
ax.text(x, pred + (max(ci_uppers) - min(ci_lowers)) * 0.05,
f'{pred:.3f}', ha='center', va='bottom', fontweight='bold', fontsize=9)
# Add observed data points as overlay
for j, (habitat, herd_size, _, _) in enumerate(combinations):
mask = (df['Habitat'] == habitat) & (df['Herd Size'] == herd_size)
observed = df[mask][behavior]
if len(observed) > 0:
jittered_x = j + np.random.normal(0, 0.08, len(observed))
ax.scatter(jittered_x, observed, color='red', alpha=0.6, s=20,
edgecolor='darkred', linewidth=0.5, zorder=10)
ax.set_xticks(x_positions)
ax.set_xticklabels(labels, rotation=0)
ax.set_ylabel('Proportion of Time', fontweight='bold')
ax.set_title(f'{behavior}\n(R² = {results[behavior]["r2"]:.3f})',
fontweight='bold', color=COLORS[behavior])
ax.grid(axis='y', alpha=0.3)
# Set y-axis to start from 0
ax.set_ylim(0, max(ci_uppers) * 1.15)
# Add overall legend
from matplotlib.lines import Line2D
legend_elements = [
Line2D([0], [0], marker='o', color='red', linestyle='', markersize=8,
alpha=0.6, label='Observed Data'),
Line2D([0], [0], color='black', linestyle='-', label='95% Confidence Intervals')
]
fig.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, 0.02), ncol=2)
plt.suptitle('Predicted Behavior by Habitat and Herd Size Combinations',
fontsize=16, fontweight='bold', y=0.98)
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, top=0.92)
return fig
In [76]:
Copied!
# ============================================================================
# GENERATE ALL FIGURES
# ============================================================================
# Create all figures
print("Generating figures...")
fig2 = create_performance_plot()
fig2.suptitle('Model Performance and Effect Sizes',
fontsize=16, fontweight='bold', y=0.98)
plt.show()
fig3 = create_predicted_means_plot()
plt.show()
# ============================================================================
# GENERATE ALL FIGURES
# ============================================================================
# Create all figures
print("Generating figures...")
fig2 = create_performance_plot()
fig2.suptitle('Model Performance and Effect Sizes',
fontsize=16, fontweight='bold', y=0.98)
plt.show()
fig3 = create_predicted_means_plot()
plt.show()
In [77]:
Copied!
print("\nSaving high-resolution figures for publication...")
# Save as both PNG (for presentations) and PDF (for publication)
fig1.savefig('figures/Figure1_Coefficients.png', dpi=300, bbox_inches='tight', facecolor='white')
fig1.savefig('figures/Figure1_Coefficients.pdf', dpi=300, bbox_inches='tight', facecolor='white')
fig2.savefig('figures/Figure2_Performance.png', dpi=300, bbox_inches='tight', facecolor='white')
fig2.savefig('figures/Figure2_Performance.pdf', dpi=300, bbox_inches='tight', facecolor='white')
fig3.savefig('figures/Figure3_PredictedMeans.png', dpi=300, bbox_inches='tight', facecolor='white')
fig3.savefig('figures/Figure3_PredictedMeans.pdf', dpi=300, bbox_inches='tight', facecolor='white')
print("figures/Figures saved successfully!")
print("\nFigure descriptions for manuscript:")
print("="*50)
print("""
Figure 1: Linear regression coefficients with 95% confidence intervals showing the effect of
habitat (Open vs Closed) and herd size (Small vs Large) on four behaviors. Asterisks indicate
statistical significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant.
Solid bars indicate significant effects (p<0.05), transparent bars indicate non-significant effects.
Figure 2: Model performance metrics showing (A) proportion of variance explained (R²) and
(B) effect sizes (Cohen's f²) for each behavioral model. Reference lines in panel B indicate
conventional thresholds for small (0.02), medium (0.15), and large (0.35) effect sizes.
Figure 3: Predicted behavioral proportions for all habitat-herd size combinations with 95%
confidence intervals. Red dots show observed individual data points with jittering to reduce
overlap. Error bars represent confidence intervals around predicted means.
""")
print("\nSaving high-resolution figures for publication...")
# Save as both PNG (for presentations) and PDF (for publication)
fig1.savefig('figures/Figure1_Coefficients.png', dpi=300, bbox_inches='tight', facecolor='white')
fig1.savefig('figures/Figure1_Coefficients.pdf', dpi=300, bbox_inches='tight', facecolor='white')
fig2.savefig('figures/Figure2_Performance.png', dpi=300, bbox_inches='tight', facecolor='white')
fig2.savefig('figures/Figure2_Performance.pdf', dpi=300, bbox_inches='tight', facecolor='white')
fig3.savefig('figures/Figure3_PredictedMeans.png', dpi=300, bbox_inches='tight', facecolor='white')
fig3.savefig('figures/Figure3_PredictedMeans.pdf', dpi=300, bbox_inches='tight', facecolor='white')
print("figures/Figures saved successfully!")
print("\nFigure descriptions for manuscript:")
print("="*50)
print("""
Figure 1: Linear regression coefficients with 95% confidence intervals showing the effect of
habitat (Open vs Closed) and herd size (Small vs Large) on four behaviors. Asterisks indicate
statistical significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant.
Solid bars indicate significant effects (p<0.05), transparent bars indicate non-significant effects.
Figure 2: Model performance metrics showing (A) proportion of variance explained (R²) and
(B) effect sizes (Cohen's f²) for each behavioral model. Reference lines in panel B indicate
conventional thresholds for small (0.02), medium (0.15), and large (0.35) effect sizes.
Figure 3: Predicted behavioral proportions for all habitat-herd size combinations with 95%
confidence intervals. Red dots show observed individual data points with jittering to reduce
overlap. Error bars represent confidence intervals around predicted means.
""")
Saving high-resolution figures for publication... figures/Figures saved successfully! Figure descriptions for manuscript: ================================================== Figure 1: Linear regression coefficients with 95% confidence intervals showing the effect of habitat (Open vs Closed) and herd size (Small vs Large) on four behaviors. Asterisks indicate statistical significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant. Solid bars indicate significant effects (p<0.05), transparent bars indicate non-significant effects. Figure 2: Model performance metrics showing (A) proportion of variance explained (R²) and (B) effect sizes (Cohen's f²) for each behavioral model. Reference lines in panel B indicate conventional thresholds for small (0.02), medium (0.15), and large (0.35) effect sizes. Figure 3: Predicted behavioral proportions for all habitat-herd size combinations with 95% confidence intervals. Red dots show observed individual data points with jittering to reduce overlap. Error bars represent confidence intervals around predicted means.