Use bounding boxes to detect inter- and intra-species interactions¶
In [1]:
Copied!
import xml.etree.ElementTree as ET
from shapely.geometry import box
from itertools import combinations
from collections import defaultdict, Counter
import os
import pandas as pd
warnings.filterwarnings('ignore')
import xml.etree.ElementTree as ET
from shapely.geometry import box
from itertools import combinations
from collections import defaultdict, Counter
import os
import pandas as pd
warnings.filterwarnings('ignore')
/users/PAS2136/kline377/.conda/envs/auto_drone/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
First, count the number of mini-scenes for each species¶
In [11]:
Copied!
from collections import Counter
import xml.etree.ElementTree as ET
from pathlib import Path
import csv # only needed if you want CSV output
# ---------------------------------------------------------------------
# 1. CONFIGURE THESE TWO LINES
# ---------------------------------------------------------------------
input_dir = "detections"
data_root = Path(input_dir) # top‑level directory
xml_pattern = "**/*.xml" # recursive search
# ---------------------------------------------------------------------
# 2. GATHER COUNTS ACROSS ALL FILES
# ---------------------------------------------------------------------
species_totals = Counter()
for xml_file in data_root.glob(xml_pattern):
try:
root = ET.parse(xml_file).getroot()
species_totals.update(
track.attrib["label"]
for track in root.findall(".//track")
)
except ET.ParseError as e:
print(f"⚠️ Skipped {xml_file.name} → parse error: {e}")
# ---------------------------------------------------------------------
# 3. DISPLAY RESULTS
# ---------------------------------------------------------------------
print("\n=== Total track counts across all files ===")
for label, n in species_totals.most_common():
print(f"{label:<20} : {n}")
#
from collections import Counter
import xml.etree.ElementTree as ET
from pathlib import Path
import csv # only needed if you want CSV output
# ---------------------------------------------------------------------
# 1. CONFIGURE THESE TWO LINES
# ---------------------------------------------------------------------
input_dir = "detections"
data_root = Path(input_dir) # top‑level directory
xml_pattern = "**/*.xml" # recursive search
# ---------------------------------------------------------------------
# 2. GATHER COUNTS ACROSS ALL FILES
# ---------------------------------------------------------------------
species_totals = Counter()
for xml_file in data_root.glob(xml_pattern):
try:
root = ET.parse(xml_file).getroot()
species_totals.update(
track.attrib["label"]
for track in root.findall(".//track")
)
except ET.ParseError as e:
print(f"⚠️ Skipped {xml_file.name} → parse error: {e}")
# ---------------------------------------------------------------------
# 3. DISPLAY RESULTS
# ---------------------------------------------------------------------
print("\n=== Total track counts across all files ===")
for label, n in species_totals.most_common():
print(f"{label:<20} : {n}")
#
=== Total track counts across all files === Grevys Zebra : 101 Giraffe : 56 Plains Zebra : 5
Parse the files to find where species overlap¶
In [2]:
Copied!
# --- CONFIGURATION ---
iou_threshold = 0.5 # Minimum intersection threshold (non-zero for robustness)
species_of_interest = ["Grevys Zebra", "Plains Zebra", "Giraffe"]
input_dir = "detections" # <-- update this
# --- PARSE XML ---
def parse_xml(filepath):
tree = ET.parse(filepath)
root = tree.getroot()
frame_data = defaultdict(list)
for track in root.findall("track"):
species = track.attrib["label"]
if species not in species_of_interest:
continue
for box_elem in track.findall("box"):
if box_elem.attrib["outside"] == "1":
continue # Skip invisible boxes
frame = int(box_elem.attrib["frame"])
xtl = float(box_elem.attrib["xtl"])
ytl = float(box_elem.attrib["ytl"])
xbr = float(box_elem.attrib["xbr"])
ybr = float(box_elem.attrib["ybr"])
shape = box(xtl, ytl, xbr, ybr)
frame_data[frame].append({"species": species, "bbox": shape})
return frame_data
# --- COMPUTE OVERLAPS ---
def compute_overlaps(frame_data):
overlap_counts = Counter()
for frame, objects in frame_data.items():
for a, b in combinations(objects, 2):
if a["bbox"].intersects(b["bbox"]):
inter_area = a["bbox"].intersection(b["bbox"]).area
union_area = a["bbox"].union(b["bbox"]).area
iou = inter_area / union_area
if iou >= iou_threshold:
sp_a = a["species"]
sp_b = b["species"]
pair = tuple(sorted([sp_a, sp_b]))
overlap_counts[pair] += 1
return overlap_counts
# --- MAIN LOOP ---
total_counts = Counter()
for filename in os.listdir(input_dir):
if filename.endswith(".xml"):
path = os.path.join(input_dir, filename)
frame_data = parse_xml(path)
overlap_counts = compute_overlaps(frame_data)
total_counts.update(overlap_counts)
# --- FORMAT RESULTS ---
df = pd.DataFrame(
[{"Species Pair": f"{a}–{b}", "Overlap Count": count} for (a, b), count in total_counts.items()]
)
df = df.sort_values(by="Overlap Count", ascending=False).reset_index(drop=True)
# --- DISPLAY OR SAVE ---
print(df)
# Optionally save:
# df.to_csv("overlap_summary.csv", index=False)
# --- CONFIGURATION ---
iou_threshold = 0.5 # Minimum intersection threshold (non-zero for robustness)
species_of_interest = ["Grevys Zebra", "Plains Zebra", "Giraffe"]
input_dir = "detections" # <-- update this
# --- PARSE XML ---
def parse_xml(filepath):
tree = ET.parse(filepath)
root = tree.getroot()
frame_data = defaultdict(list)
for track in root.findall("track"):
species = track.attrib["label"]
if species not in species_of_interest:
continue
for box_elem in track.findall("box"):
if box_elem.attrib["outside"] == "1":
continue # Skip invisible boxes
frame = int(box_elem.attrib["frame"])
xtl = float(box_elem.attrib["xtl"])
ytl = float(box_elem.attrib["ytl"])
xbr = float(box_elem.attrib["xbr"])
ybr = float(box_elem.attrib["ybr"])
shape = box(xtl, ytl, xbr, ybr)
frame_data[frame].append({"species": species, "bbox": shape})
return frame_data
# --- COMPUTE OVERLAPS ---
def compute_overlaps(frame_data):
overlap_counts = Counter()
for frame, objects in frame_data.items():
for a, b in combinations(objects, 2):
if a["bbox"].intersects(b["bbox"]):
inter_area = a["bbox"].intersection(b["bbox"]).area
union_area = a["bbox"].union(b["bbox"]).area
iou = inter_area / union_area
if iou >= iou_threshold:
sp_a = a["species"]
sp_b = b["species"]
pair = tuple(sorted([sp_a, sp_b]))
overlap_counts[pair] += 1
return overlap_counts
# --- MAIN LOOP ---
total_counts = Counter()
for filename in os.listdir(input_dir):
if filename.endswith(".xml"):
path = os.path.join(input_dir, filename)
frame_data = parse_xml(path)
overlap_counts = compute_overlaps(frame_data)
total_counts.update(overlap_counts)
# --- FORMAT RESULTS ---
df = pd.DataFrame(
[{"Species Pair": f"{a}–{b}", "Overlap Count": count} for (a, b), count in total_counts.items()]
)
df = df.sort_values(by="Overlap Count", ascending=False).reset_index(drop=True)
# --- DISPLAY OR SAVE ---
print(df)
# Optionally save:
# df.to_csv("overlap_summary.csv", index=False)
/users/PAS2136/kline377/.conda/envs/auto_drone/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
Species Pair Overlap Count 0 Grevys Zebra–Grevys Zebra 4836 1 Plains Zebra–Plains Zebra 93 2 Giraffe–Giraffe 78 3 Grevys Zebra–Plains Zebra 28
In [4]:
Copied!
total_interactions = sum(total_counts.values())
print(f"\nTotal interactions across all files: {total_interactions}")
total_interactions = sum(total_counts.values())
print(f"\nTotal interactions across all files: {total_interactions}")
Total interactions across all files: 5035