Generate Gantt Charts showing behavior gathered from focal sampling vs drones¶
In [2]:
Copied!
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pysrt
import json
from pathlib import Path
import more_itertools as mit
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pysrt
import json
from pathlib import Path
import more_itertools as mit
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
Copied!
# get the start and end times for each behavior duration
def get_start_end_times(df):
times = []
for i in range(len(df)):
if i == 0:
start_behavior = df.iloc[i]['behavior']
start_times = df.iloc[i]['time']
times.append([start_behavior, start_times, 0])
elif df.iloc[i]['behavior'] != df.iloc[i-1]['behavior']:
end_behavior = df.iloc[i-1]['behavior']
end_times = df.iloc[i-1]['time']
times[-1][2] = end_times # update end time of previous behavior
behavior = df.iloc[i]['behavior'] # start new behavior
start_times = df.iloc[i]['time'] # start new behavior
times.append([behavior, start_times, 0])
# handle last behavior
if i == len(df)-1:
end_behavior = df.iloc[i]['behavior']
end_times = df.iloc[i]['time']
times[-1][2] = end_times
return times
def calculate_duration(df):
df['duration'] = df['end'] - df['start']
# convert to seconds
df['duration'] = df['duration'].dt.total_seconds()
df['rel_start'] = df['start'] - df['start'].iloc[0]
df['rel_start'] = df['rel_start'].dt.total_seconds()
return df
# get the start and end times for each behavior duration
def get_start_end_times(df):
times = []
for i in range(len(df)):
if i == 0:
start_behavior = df.iloc[i]['behavior']
start_times = df.iloc[i]['time']
times.append([start_behavior, start_times, 0])
elif df.iloc[i]['behavior'] != df.iloc[i-1]['behavior']:
end_behavior = df.iloc[i-1]['behavior']
end_times = df.iloc[i-1]['time']
times[-1][2] = end_times # update end time of previous behavior
behavior = df.iloc[i]['behavior'] # start new behavior
start_times = df.iloc[i]['time'] # start new behavior
times.append([behavior, start_times, 0])
# handle last behavior
if i == len(df)-1:
end_behavior = df.iloc[i]['behavior']
end_times = df.iloc[i]['time']
times[-1][2] = end_times
return times
def calculate_duration(df):
df['duration'] = df['end'] - df['start']
# convert to seconds
df['duration'] = df['duration'].dt.total_seconds()
df['rel_start'] = df['start'] - df['start'].iloc[0]
df['rel_start'] = df['rel_start'].dt.total_seconds()
return df
In [4]:
Copied!
def clean_categories(name):
lowercased = name.lower()
if "head up" in lowercased:
return "Head Up"
elif "walk" in lowercased:
return "Walk"
elif "out of focus" in lowercased:
return "Out of Sight"
elif "fighting" in lowercased:
return "Fight"
elif "trot/run" in lowercased:
return "Trot/Run"
elif "out of sight" in lowercased:
return "Out of Sight"
elif "no data" in lowercased:
return "Out of Sight"
elif "out of frame" in lowercased:
return "Out of Sight"
elif "occluded" in lowercased:
return "Out of Sight"
elif "no data" in lowercased:
return "Out of Sight"
# Replace sniff with graze
elif "sniff" in lowercased:
return "Graze"
else:
return name
def clean_categories(name):
lowercased = name.lower()
if "head up" in lowercased:
return "Head Up"
elif "walk" in lowercased:
return "Walk"
elif "out of focus" in lowercased:
return "Out of Sight"
elif "fighting" in lowercased:
return "Fight"
elif "trot/run" in lowercased:
return "Trot/Run"
elif "out of sight" in lowercased:
return "Out of Sight"
elif "no data" in lowercased:
return "Out of Sight"
elif "out of frame" in lowercased:
return "Out of Sight"
elif "occluded" in lowercased:
return "Out of Sight"
elif "no data" in lowercased:
return "Out of Sight"
# Replace sniff with graze
elif "sniff" in lowercased:
return "Graze"
else:
return name
In [5]:
Copied!
# data source
data_dir = Path('focalvsdrone')
for file in data_dir.glob('*.csv'):
focal_drone_dfs = pd.read_csv(file)
print(f'Processing file: {file.name}')
# Clean the behavior categories before processing
focal_drone_dfs['behavior'] = focal_drone_dfs['behavior'].apply(clean_categories)
focal_drone_dfs['focal_behavior'] = focal_drone_dfs['focal_behavior'].apply(clean_categories)
drone = focal_drone_dfs[['behavior', 'time']]
focal = focal_drone_dfs[['focal_behavior', 'time']]
# rename focal behavior to behavior
focal = focal.rename(columns={'focal_behavior':'behavior'})
# create dict of behaviors in focal and drone
behaviors = focal['behavior'].unique()
behaviors = {b: i for i,b in enumerate(behaviors)}
behaviors.update({b: i for i,b in enumerate(drone['behavior'].unique())})
# get the start and end times for each behavior duration
times = pd.DataFrame(columns=['behavior','start', 'end'])
focal_times = get_start_end_times(focal)
drone_times = get_start_end_times(drone)
# convert to dataframe
focal = pd.DataFrame(focal_times, columns=['behavior','start', 'end'])
drone = pd.DataFrame(drone_times, columns=['behavior','start', 'end'])
# convert to datetime
focal['start'] = pd.to_datetime(focal['start'])
focal['end'] = pd.to_datetime(focal['end'])
drone['start'] = pd.to_datetime(drone['start'])
drone['end'] = pd.to_datetime(drone['end'])
focal = calculate_duration(focal)
drone = calculate_duration(drone)
# Define your ethogram in order of occurance
# ethogram = ['Out of Sight','Walk', 'Run', 'Mutual Grooming', 'Head Up', 'Graze', 'Fight', 'Drink', 'Defecating', 'Auto-Groom']
ethogram = ['Out of Sight', 'Fight','Urinate','Mutual-Groom','Defecate','Auto-Groom','Browse','Trot/Run','Head Up','Walk','Graze']
# custom_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#d62728','#2ca02c','#8c564b','#e377c2','#7f7f7f','#17becf']
# color_map = {behavior: custom_colors[i] for i, behavior in enumerate(ethogram)}
colors = sns.color_palette("tab20", len(ethogram))
color_map = {behavior: colors[i] for i, behavior in enumerate(ethogram)}
# plot the gantt chart
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12,4))
# Set the same y-axis labels for both plots using the ethogram
ax1.set_yticks(range(len(ethogram)))
ax1.set_yticklabels(ethogram)
ax2.set_yticks(range(len(ethogram)))
ax2.set_yticklabels(ethogram)
# Map behaviors to y-positions based on ethogram
behavior_positions = {behavior: i for i, behavior in enumerate(ethogram)}
# Get the total time range for background bars
all_times = []
if not focal.empty:
all_times.extend([focal['rel_start'].min(), focal['rel_start'].max() + focal['duration'].max()])
if not drone.empty:
all_times.extend([drone['rel_start'].min(), drone['rel_start'].max() + drone['duration'].max()])
if all_times:
total_duration = max(all_times) - min(all_times)
start_time = min(all_times)
else:
total_duration = 0
start_time = 0
# Add background bars for all behaviors to show the full ethogram
for i, behavior in enumerate(ethogram):
# Light gray background bars
ax1.barh(y=i, width=total_duration, left=start_time,
color='lightgray', alpha=0.2, zorder=0)
ax2.barh(y=i, width=total_duration, left=start_time,
color='lightgray', alpha=0.2, zorder=0)
# Get behaviors that were actually observed
focal_behaviors = set(focal['behavior'].unique()) if not focal.empty else set()
drone_behaviors = set(drone['behavior'].unique()) if not drone.empty else set()
# Plot focal data
for _, row in focal.iterrows():
if row['behavior'] in behavior_positions: # Only plot if behavior is in ethogram
y_pos = behavior_positions[row['behavior']]
ax1.barh(y=y_pos, width=row['duration'], left=row['rel_start'],
color=color_map[row['behavior']], alpha=0.8, zorder=2)
# Plot drone data
for _, row in drone.iterrows():
if row['behavior'] in behavior_positions: # Only plot if behavior is in ethogram
y_pos = behavior_positions[row['behavior']]
ax2.barh(y=y_pos, width=row['duration'], left=row['rel_start'],
color=color_map[row['behavior']], alpha=0.8, zorder=2)
# Add subtle grid lines for better readability
ax1.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
ax2.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
# Set x-axis limits to match the data range
max_time_focal = (focal['rel_start'] + focal['duration']).max() if len(focal) > 0 else 0
max_time_drone = (drone['rel_start'] + drone['duration']).max() if len(drone) > 0 else 0
max_time = max(max_time_focal, max_time_drone)
print(f"Max time for {file.name}: {max_time} seconds")
ax1.set_xlim(0, max_time)
ax2.set_xlim(0, max_time)
# Set y-axis limits to show a bit of padding
ax1.set_ylim(-0.5, len(ethogram) - 0.5)
ax2.set_ylim(-0.5, len(ethogram) - 0.5)
ax1.set(title='Field Focal Data', ylabel='Behavior')
ax2.set(title='Drone Focal Data', xlabel='Time (seconds)', ylabel='Behavior')
plt.tight_layout()
# Save the plot
output_file = file.with_suffix('.png')
plt.savefig(output_file, dpi=300)
plt.close(fig)
# data source
data_dir = Path('focalvsdrone')
for file in data_dir.glob('*.csv'):
focal_drone_dfs = pd.read_csv(file)
print(f'Processing file: {file.name}')
# Clean the behavior categories before processing
focal_drone_dfs['behavior'] = focal_drone_dfs['behavior'].apply(clean_categories)
focal_drone_dfs['focal_behavior'] = focal_drone_dfs['focal_behavior'].apply(clean_categories)
drone = focal_drone_dfs[['behavior', 'time']]
focal = focal_drone_dfs[['focal_behavior', 'time']]
# rename focal behavior to behavior
focal = focal.rename(columns={'focal_behavior':'behavior'})
# create dict of behaviors in focal and drone
behaviors = focal['behavior'].unique()
behaviors = {b: i for i,b in enumerate(behaviors)}
behaviors.update({b: i for i,b in enumerate(drone['behavior'].unique())})
# get the start and end times for each behavior duration
times = pd.DataFrame(columns=['behavior','start', 'end'])
focal_times = get_start_end_times(focal)
drone_times = get_start_end_times(drone)
# convert to dataframe
focal = pd.DataFrame(focal_times, columns=['behavior','start', 'end'])
drone = pd.DataFrame(drone_times, columns=['behavior','start', 'end'])
# convert to datetime
focal['start'] = pd.to_datetime(focal['start'])
focal['end'] = pd.to_datetime(focal['end'])
drone['start'] = pd.to_datetime(drone['start'])
drone['end'] = pd.to_datetime(drone['end'])
focal = calculate_duration(focal)
drone = calculate_duration(drone)
# Define your ethogram in order of occurance
# ethogram = ['Out of Sight','Walk', 'Run', 'Mutual Grooming', 'Head Up', 'Graze', 'Fight', 'Drink', 'Defecating', 'Auto-Groom']
ethogram = ['Out of Sight', 'Fight','Urinate','Mutual-Groom','Defecate','Auto-Groom','Browse','Trot/Run','Head Up','Walk','Graze']
# custom_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#d62728','#2ca02c','#8c564b','#e377c2','#7f7f7f','#17becf']
# color_map = {behavior: custom_colors[i] for i, behavior in enumerate(ethogram)}
colors = sns.color_palette("tab20", len(ethogram))
color_map = {behavior: colors[i] for i, behavior in enumerate(ethogram)}
# plot the gantt chart
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12,4))
# Set the same y-axis labels for both plots using the ethogram
ax1.set_yticks(range(len(ethogram)))
ax1.set_yticklabels(ethogram)
ax2.set_yticks(range(len(ethogram)))
ax2.set_yticklabels(ethogram)
# Map behaviors to y-positions based on ethogram
behavior_positions = {behavior: i for i, behavior in enumerate(ethogram)}
# Get the total time range for background bars
all_times = []
if not focal.empty:
all_times.extend([focal['rel_start'].min(), focal['rel_start'].max() + focal['duration'].max()])
if not drone.empty:
all_times.extend([drone['rel_start'].min(), drone['rel_start'].max() + drone['duration'].max()])
if all_times:
total_duration = max(all_times) - min(all_times)
start_time = min(all_times)
else:
total_duration = 0
start_time = 0
# Add background bars for all behaviors to show the full ethogram
for i, behavior in enumerate(ethogram):
# Light gray background bars
ax1.barh(y=i, width=total_duration, left=start_time,
color='lightgray', alpha=0.2, zorder=0)
ax2.barh(y=i, width=total_duration, left=start_time,
color='lightgray', alpha=0.2, zorder=0)
# Get behaviors that were actually observed
focal_behaviors = set(focal['behavior'].unique()) if not focal.empty else set()
drone_behaviors = set(drone['behavior'].unique()) if not drone.empty else set()
# Plot focal data
for _, row in focal.iterrows():
if row['behavior'] in behavior_positions: # Only plot if behavior is in ethogram
y_pos = behavior_positions[row['behavior']]
ax1.barh(y=y_pos, width=row['duration'], left=row['rel_start'],
color=color_map[row['behavior']], alpha=0.8, zorder=2)
# Plot drone data
for _, row in drone.iterrows():
if row['behavior'] in behavior_positions: # Only plot if behavior is in ethogram
y_pos = behavior_positions[row['behavior']]
ax2.barh(y=y_pos, width=row['duration'], left=row['rel_start'],
color=color_map[row['behavior']], alpha=0.8, zorder=2)
# Add subtle grid lines for better readability
ax1.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
ax2.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
# Set x-axis limits to match the data range
max_time_focal = (focal['rel_start'] + focal['duration']).max() if len(focal) > 0 else 0
max_time_drone = (drone['rel_start'] + drone['duration']).max() if len(drone) > 0 else 0
max_time = max(max_time_focal, max_time_drone)
print(f"Max time for {file.name}: {max_time} seconds")
ax1.set_xlim(0, max_time)
ax2.set_xlim(0, max_time)
# Set y-axis limits to show a bit of padding
ax1.set_ylim(-0.5, len(ethogram) - 0.5)
ax2.set_ylim(-0.5, len(ethogram) - 0.5)
ax1.set(title='Field Focal Data', ylabel='Behavior')
ax2.set(title='Drone Focal Data', xlabel='Time (seconds)', ylabel='Behavior')
plt.tight_layout()
# Save the plot
output_file = file.with_suffix('.png')
plt.savefig(output_file, dpi=300)
plt.close(fig)
Processing file: focal_drone_df_12_01_23_female_grevy.csv Max time for focal_drone_df_12_01_23_female_grevy.csv: 372.641667 seconds Processing file: focal_drone_df_17_01_23_scar_cleaned.csv Max time for focal_drone_df_17_01_23_scar_cleaned.csv: 193.933333 seconds Processing file: focal_drone_df_16_01_23_white_female.csv Max time for focal_drone_df_16_01_23_white_female.csv: 405.80766600000004 seconds Processing file: focal_drone_df_16_01_23_thick_neck_stripes.csv Max time for focal_drone_df_16_01_23_thick_neck_stripes.csv: 351.075333 seconds
In [ ]:
Copied!