Generate Gantt Charts showing behavior gathered from focal sampling vs drones¶

In [2]:

Copied!





import os
import pandas as pd 
import numpy as np
from datetime import datetime, timedelta
import pysrt
import json
from pathlib import Path
import more_itertools as mit
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pandas as pd 
import numpy as np
from datetime import datetime, timedelta
import pysrt
import json
from pathlib import Path
import more_itertools as mit
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:

Copied!





# get the start and end times for each behavior duration

def get_start_end_times(df):
    times = []
    for i in range(len(df)):
        if i == 0:
            start_behavior = df.iloc[i]['behavior']
            start_times = df.iloc[i]['time']
            times.append([start_behavior, start_times, 0])
        elif df.iloc[i]['behavior'] != df.iloc[i-1]['behavior']:
            end_behavior = df.iloc[i-1]['behavior']
            end_times = df.iloc[i-1]['time']
            times[-1][2] = end_times # update end time of previous behavior
            
            behavior = df.iloc[i]['behavior'] # start new behavior
            start_times = df.iloc[i]['time'] # start new behavior
            times.append([behavior, start_times, 0]) 
        # handle last behavior
        if i == len(df)-1:
            end_behavior = df.iloc[i]['behavior']
            end_times = df.iloc[i]['time']
            times[-1][2] = end_times
    return times 

def calculate_duration(df):
    df['duration'] = df['end'] - df['start']
    # convert to seconds
    df['duration'] = df['duration'].dt.total_seconds()
    
    df['rel_start'] = df['start'] - df['start'].iloc[0]
    df['rel_start'] = df['rel_start'].dt.total_seconds()
    return df
# get the start and end times for each behavior duration

def get_start_end_times(df):
    times = []
    for i in range(len(df)):
        if i == 0:
            start_behavior = df.iloc[i]['behavior']
            start_times = df.iloc[i]['time']
            times.append([start_behavior, start_times, 0])
        elif df.iloc[i]['behavior'] != df.iloc[i-1]['behavior']:
            end_behavior = df.iloc[i-1]['behavior']
            end_times = df.iloc[i-1]['time']
            times[-1][2] = end_times # update end time of previous behavior
            
            behavior = df.iloc[i]['behavior'] # start new behavior
            start_times = df.iloc[i]['time'] # start new behavior
            times.append([behavior, start_times, 0]) 
        # handle last behavior
        if i == len(df)-1:
            end_behavior = df.iloc[i]['behavior']
            end_times = df.iloc[i]['time']
            times[-1][2] = end_times
    return times 

def calculate_duration(df):
    df['duration'] = df['end'] - df['start']
    # convert to seconds
    df['duration'] = df['duration'].dt.total_seconds()
    
    df['rel_start'] = df['start'] - df['start'].iloc[0]
    df['rel_start'] = df['rel_start'].dt.total_seconds()
    return df

In [4]:

Copied!





def clean_categories(name):
    lowercased = name.lower()
    if "head up" in lowercased:
        return "Head Up"
    elif "walk" in lowercased:
        return "Walk"
    elif "out of focus" in lowercased:
        return "Out of Sight"
    elif "fighting" in lowercased:
        return "Fight"
    elif "trot/run" in lowercased:
        return "Trot/Run"
    elif "out of sight" in lowercased:
        return "Out of Sight"
    elif "no data" in lowercased:
        return "Out of Sight"
    elif "out of frame" in lowercased:
        return "Out of Sight"
    elif "occluded" in lowercased:
        return "Out of Sight"
    elif "no data" in lowercased:
        return "Out of Sight"
    # Replace sniff with graze
    elif "sniff" in lowercased:
        return "Graze"
    else:
        return name
def clean_categories(name):
    lowercased = name.lower()
    if "head up" in lowercased:
        return "Head Up"
    elif "walk" in lowercased:
        return "Walk"
    elif "out of focus" in lowercased:
        return "Out of Sight"
    elif "fighting" in lowercased:
        return "Fight"
    elif "trot/run" in lowercased:
        return "Trot/Run"
    elif "out of sight" in lowercased:
        return "Out of Sight"
    elif "no data" in lowercased:
        return "Out of Sight"
    elif "out of frame" in lowercased:
        return "Out of Sight"
    elif "occluded" in lowercased:
        return "Out of Sight"
    elif "no data" in lowercased:
        return "Out of Sight"
    # Replace sniff with graze
    elif "sniff" in lowercased:
        return "Graze"
    else:
        return name

In [5]:

Copied!





# data source
data_dir = Path('focalvsdrone')

for file in data_dir.glob('*.csv'):
    focal_drone_dfs = pd.read_csv(file)
    
    print(f'Processing file: {file.name}')
    
    # Clean the behavior categories before processing
    focal_drone_dfs['behavior'] = focal_drone_dfs['behavior'].apply(clean_categories)
    focal_drone_dfs['focal_behavior'] = focal_drone_dfs['focal_behavior'].apply(clean_categories)
    
    drone = focal_drone_dfs[['behavior', 'time']]
    focal = focal_drone_dfs[['focal_behavior', 'time']]
    # rename focal behavior to behavior
    focal = focal.rename(columns={'focal_behavior':'behavior'})
    
    # create dict of behaviors in focal and drone
    behaviors = focal['behavior'].unique()
    behaviors = {b: i for i,b in enumerate(behaviors)}
    behaviors.update({b: i for i,b in enumerate(drone['behavior'].unique())})
    
    # get the start and end times for each behavior duration
    times = pd.DataFrame(columns=['behavior','start', 'end'])
    focal_times = get_start_end_times(focal)
    drone_times = get_start_end_times(drone)
    
    # convert to dataframe
    focal = pd.DataFrame(focal_times, columns=['behavior','start', 'end'])
    drone = pd.DataFrame(drone_times, columns=['behavior','start', 'end'])
    
    # convert to datetime
    focal['start'] = pd.to_datetime(focal['start'])
    focal['end'] = pd.to_datetime(focal['end'])
    drone['start'] = pd.to_datetime(drone['start'])
    drone['end'] = pd.to_datetime(drone['end'])
    
    focal = calculate_duration(focal)
    drone = calculate_duration(drone)
    
    # Define your ethogram in order of occurance
    # ethogram = ['Out of Sight','Walk', 'Run', 'Mutual Grooming', 'Head Up', 'Graze', 'Fight', 'Drink', 'Defecating', 'Auto-Groom']
    ethogram = ['Out of Sight', 'Fight','Urinate','Mutual-Groom','Defecate','Auto-Groom','Browse','Trot/Run','Head Up','Walk','Graze']
    
    # custom_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#d62728','#2ca02c','#8c564b','#e377c2','#7f7f7f','#17becf']
    # color_map = {behavior: custom_colors[i] for i, behavior in enumerate(ethogram)}
    
    colors = sns.color_palette("tab20", len(ethogram))
    color_map = {behavior: colors[i] for i, behavior in enumerate(ethogram)}


    # plot the gantt chart
    fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12,4))

    # Set the same y-axis labels for both plots using the ethogram
    ax1.set_yticks(range(len(ethogram)))
    ax1.set_yticklabels(ethogram)
    ax2.set_yticks(range(len(ethogram)))
    ax2.set_yticklabels(ethogram)

    # Map behaviors to y-positions based on ethogram
    behavior_positions = {behavior: i for i, behavior in enumerate(ethogram)}
    
    # Get the total time range for background bars
    all_times = []
    if not focal.empty:
        all_times.extend([focal['rel_start'].min(), focal['rel_start'].max() + focal['duration'].max()])
    if not drone.empty:
        all_times.extend([drone['rel_start'].min(), drone['rel_start'].max() + drone['duration'].max()])
    
    if all_times:
        total_duration = max(all_times) - min(all_times)
        start_time = min(all_times)
    else:
        total_duration = 0
        start_time = 0

    # Add background bars for all behaviors to show the full ethogram
    for i, behavior in enumerate(ethogram):
        # Light gray background bars
        ax1.barh(y=i, width=total_duration, left=start_time, 
                color='lightgray', alpha=0.2, zorder=0)
        ax2.barh(y=i, width=total_duration, left=start_time, 
                color='lightgray', alpha=0.2, zorder=0)

    # Get behaviors that were actually observed
    focal_behaviors = set(focal['behavior'].unique()) if not focal.empty else set()
    drone_behaviors = set(drone['behavior'].unique()) if not drone.empty else set()

    # Plot focal data
    for _, row in focal.iterrows():
        if row['behavior'] in behavior_positions:  # Only plot if behavior is in ethogram
            y_pos = behavior_positions[row['behavior']]
            ax1.barh(y=y_pos, width=row['duration'], left=row['rel_start'], 
                    color=color_map[row['behavior']], alpha=0.8, zorder=2)

    # Plot drone data  
    for _, row in drone.iterrows():
        if row['behavior'] in behavior_positions:  # Only plot if behavior is in ethogram
            y_pos = behavior_positions[row['behavior']]
            ax2.barh(y=y_pos, width=row['duration'], left=row['rel_start'], 
                    color=color_map[row['behavior']], alpha=0.8, zorder=2)

    # Add subtle grid lines for better readability
    ax1.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
    ax2.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
    
    # Set x-axis limits to match the data range
    max_time_focal = (focal['rel_start'] + focal['duration']).max() if len(focal) > 0 else 0
    max_time_drone = (drone['rel_start'] + drone['duration']).max() if len(drone) > 0 else 0
    max_time = max(max_time_focal, max_time_drone)
    print(f"Max time for {file.name}: {max_time} seconds")
    
    ax1.set_xlim(0, max_time)
    ax2.set_xlim(0, max_time)
    
    # Set y-axis limits to show a bit of padding
    ax1.set_ylim(-0.5, len(ethogram) - 0.5)
    ax2.set_ylim(-0.5, len(ethogram) - 0.5)
    
    ax1.set(title='Field Focal Data', ylabel='Behavior')
    ax2.set(title='Drone Focal Data', xlabel='Time (seconds)', ylabel='Behavior')
    plt.tight_layout()
    
    # Save the plot
    output_file = file.with_suffix('.png')
    plt.savefig(output_file, dpi=300)
    plt.close(fig)
# data source
data_dir = Path('focalvsdrone')

for file in data_dir.glob('*.csv'):
    focal_drone_dfs = pd.read_csv(file)
    
    print(f'Processing file: {file.name}')
    
    # Clean the behavior categories before processing
    focal_drone_dfs['behavior'] = focal_drone_dfs['behavior'].apply(clean_categories)
    focal_drone_dfs['focal_behavior'] = focal_drone_dfs['focal_behavior'].apply(clean_categories)
    
    drone = focal_drone_dfs[['behavior', 'time']]
    focal = focal_drone_dfs[['focal_behavior', 'time']]
    # rename focal behavior to behavior
    focal = focal.rename(columns={'focal_behavior':'behavior'})
    
    # create dict of behaviors in focal and drone
    behaviors = focal['behavior'].unique()
    behaviors = {b: i for i,b in enumerate(behaviors)}
    behaviors.update({b: i for i,b in enumerate(drone['behavior'].unique())})
    
    # get the start and end times for each behavior duration
    times = pd.DataFrame(columns=['behavior','start', 'end'])
    focal_times = get_start_end_times(focal)
    drone_times = get_start_end_times(drone)
    
    # convert to dataframe
    focal = pd.DataFrame(focal_times, columns=['behavior','start', 'end'])
    drone = pd.DataFrame(drone_times, columns=['behavior','start', 'end'])
    
    # convert to datetime
    focal['start'] = pd.to_datetime(focal['start'])
    focal['end'] = pd.to_datetime(focal['end'])
    drone['start'] = pd.to_datetime(drone['start'])
    drone['end'] = pd.to_datetime(drone['end'])
    
    focal = calculate_duration(focal)
    drone = calculate_duration(drone)
    
    # Define your ethogram in order of occurance
    # ethogram = ['Out of Sight','Walk', 'Run', 'Mutual Grooming', 'Head Up', 'Graze', 'Fight', 'Drink', 'Defecating', 'Auto-Groom']
    ethogram = ['Out of Sight', 'Fight','Urinate','Mutual-Groom','Defecate','Auto-Groom','Browse','Trot/Run','Head Up','Walk','Graze']
    
    # custom_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#d62728','#2ca02c','#8c564b','#e377c2','#7f7f7f','#17becf']
    # color_map = {behavior: custom_colors[i] for i, behavior in enumerate(ethogram)}
    
    colors = sns.color_palette("tab20", len(ethogram))
    color_map = {behavior: colors[i] for i, behavior in enumerate(ethogram)}


    # plot the gantt chart
    fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12,4))

    # Set the same y-axis labels for both plots using the ethogram
    ax1.set_yticks(range(len(ethogram)))
    ax1.set_yticklabels(ethogram)
    ax2.set_yticks(range(len(ethogram)))
    ax2.set_yticklabels(ethogram)

    # Map behaviors to y-positions based on ethogram
    behavior_positions = {behavior: i for i, behavior in enumerate(ethogram)}
    
    # Get the total time range for background bars
    all_times = []
    if not focal.empty:
        all_times.extend([focal['rel_start'].min(), focal['rel_start'].max() + focal['duration'].max()])
    if not drone.empty:
        all_times.extend([drone['rel_start'].min(), drone['rel_start'].max() + drone['duration'].max()])
    
    if all_times:
        total_duration = max(all_times) - min(all_times)
        start_time = min(all_times)
    else:
        total_duration = 0
        start_time = 0

    # Add background bars for all behaviors to show the full ethogram
    for i, behavior in enumerate(ethogram):
        # Light gray background bars
        ax1.barh(y=i, width=total_duration, left=start_time, 
                color='lightgray', alpha=0.2, zorder=0)
        ax2.barh(y=i, width=total_duration, left=start_time, 
                color='lightgray', alpha=0.2, zorder=0)

    # Get behaviors that were actually observed
    focal_behaviors = set(focal['behavior'].unique()) if not focal.empty else set()
    drone_behaviors = set(drone['behavior'].unique()) if not drone.empty else set()

    # Plot focal data
    for _, row in focal.iterrows():
        if row['behavior'] in behavior_positions:  # Only plot if behavior is in ethogram
            y_pos = behavior_positions[row['behavior']]
            ax1.barh(y=y_pos, width=row['duration'], left=row['rel_start'], 
                    color=color_map[row['behavior']], alpha=0.8, zorder=2)

    # Plot drone data  
    for _, row in drone.iterrows():
        if row['behavior'] in behavior_positions:  # Only plot if behavior is in ethogram
            y_pos = behavior_positions[row['behavior']]
            ax2.barh(y=y_pos, width=row['duration'], left=row['rel_start'], 
                    color=color_map[row['behavior']], alpha=0.8, zorder=2)

    # Add subtle grid lines for better readability
    ax1.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
    ax2.grid(True, axis='x', alpha=0.3, linestyle='-', linewidth=0.5)
    
    # Set x-axis limits to match the data range
    max_time_focal = (focal['rel_start'] + focal['duration']).max() if len(focal) > 0 else 0
    max_time_drone = (drone['rel_start'] + drone['duration']).max() if len(drone) > 0 else 0
    max_time = max(max_time_focal, max_time_drone)
    print(f"Max time for {file.name}: {max_time} seconds")
    
    ax1.set_xlim(0, max_time)
    ax2.set_xlim(0, max_time)
    
    # Set y-axis limits to show a bit of padding
    ax1.set_ylim(-0.5, len(ethogram) - 0.5)
    ax2.set_ylim(-0.5, len(ethogram) - 0.5)
    
    ax1.set(title='Field Focal Data', ylabel='Behavior')
    ax2.set(title='Drone Focal Data', xlabel='Time (seconds)', ylabel='Behavior')
    plt.tight_layout()
    
    # Save the plot
    output_file = file.with_suffix('.png')
    plt.savefig(output_file, dpi=300)
    plt.close(fig)

Processing file: focal_drone_df_12_01_23_female_grevy.csv
Max time for focal_drone_df_12_01_23_female_grevy.csv: 372.641667 seconds
Processing file: focal_drone_df_17_01_23_scar_cleaned.csv
Max time for focal_drone_df_17_01_23_scar_cleaned.csv: 193.933333 seconds
Processing file: focal_drone_df_16_01_23_white_female.csv
Max time for focal_drone_df_16_01_23_white_female.csv: 405.80766600000004 seconds
Processing file: focal_drone_df_16_01_23_thick_neck_stripes.csv
Max time for focal_drone_df_16_01_23_thick_neck_stripes.csv: 351.075333 seconds

In [ ]: