Install the Required Libraries
You will need to install a few libraries and dependencies to run the Python script. In the terminal, enter:
sudo apt install -y python3-opencv sudo apt install -y opencv-data pip install piper-tts python3 -m piper.download_voices en_US-joe-medium pip install anthropic
This installs OpenCV, the Piper text to speech (TTS) local model, the Piper voice used in the project and the Anthropic Python API.
Download the Project Bundle
Once you've finished setting up your Raspberry Pi with Blinka and the library dependencies, you can access the Python code file by downloading the Project Bundle.
To do this, click on the Download Project Bundle button in the window below. It will download as a zipped folder.
# SPDX-FileCopyrightText: 2025 Liz Clark for Adafruit Industries
#
# SPDX-License-Identifier: MIT
'''Raspberry Pi Halloween Costume Detector with OpenCV and Claude Vision API'''
#!/usr/bin/python3
import os
import subprocess
import time
import base64
import wave
import cv2
from picamera2 import Picamera2
import anthropic
from piper import PiperVoice, SynthesisConfig
import board
import digitalio
ANTHROPIC_API_KEY = "your-api-key-here"
try:
username = os.getlogin()
print(f"The current user is: {username}")
except OSError:
print("Could not determine the login name.")
print("Consider checking environment variables like USER or LOGNAME.")
# Fallback to environment variables if os.getlogin() fails
username = os.environ.get('USER') or os.environ.get('LOGNAME')
if username:
print(f"The user from environment variable is: {username}")
else:
print("User information not found in environment variables either.")
# Initialize LED
led = digitalio.DigitalInOut(board.D5)
led.direction = digitalio.Direction.OUTPUT
led.value = False # Start with LED off
# Initialize detectors
upperbody_detector = cv2.CascadeClassifier(
"/usr/share/opencv4/haarcascades/haarcascade_upperbody.xml")
fgbg = cv2.createBackgroundSubtractorMOG2(detectShadows=False)
# Initialize camera
cv2.startWindowThread()
picam2 = Picamera2()
picam2.configure(picam2.create_preview_configuration(
main={"format": 'XRGB8888', "size": (640, 480)}))
picam2.start()
# Initialize Piper voice
voice = PiperVoice.load("/home/pi/en_US-joe-medium.onnx")
syn_config = SynthesisConfig(
volume=1.0,
length_scale=1.5,
noise_scale=1.2,
noise_w_scale=1.5,
normalize_audio=False,
)
# Check for and create audio files if they don't exist
def create_audio_file_if_needed(filename, text):
"""Create audio file if it doesn't exist"""
if not os.path.exists(filename):
print(f"Creating {filename}...")
with wave.open(filename, "wb") as wav:
voice.synthesize_wav(text, wav, syn_config=syn_config)
print(f"{filename} created!")
else:
print(f"{filename} already exists.")
# Create the hello and halloween audio files
create_audio_file_if_needed("hello.wav", "Hello? Who goes there?")
create_audio_file_if_needed("halloween.wav", "Happy Halloween!")
# Initialize Anthropic client
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
# set API key via the terminal as an environmental variable:
# export ANTHROPIC_API_KEY="your-api-key-here"
# Detection parameters
MOTION_THRESHOLD = 100000
COOLDOWN_SECONDS = 15
last_capture_time = 0
capture_delay = 5
motion_frame_count = 0
MOTION_FRAMES_REQUIRED = 5
def encode_image(image_path):
"""Encode image to base64 for Claude API"""
with open(image_path, "rb") as image_file:
return base64.standard_b64encode(image_file.read()).decode("utf-8")
def get_costume_joke(image_path):
"""Send image to Claude and get dad joke about costume"""
print("Analyzing costume with Claude...")
# Encode the image
image_data = encode_image(image_path)
# Send to Claude API
message = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=250,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": image_data,
},
},
{
"type": "text",
"text": """Look at this image.
Your response must follow these rules exactly:
1. If you see a person in a Halloween costume, respond with ONLY a single-sentence cute,
family-friendly, encouraging, dad joke about the costume. Nothing else.
2. If you do NOT see a clear Halloween costume (empty room, unclear image, person in regular clothes, etc.),
respond with ONLY the character: 0
Examples of good responses:
- "Looks like that ghost costume is really boo-tiful!"
- "0"
- "I guess you could say that vampire costume really sucks... in a good way!"
Do not add any explanations, commentary, or descriptions. Just the joke or just 0.
"""
}
],
}
],
)
# Extract the joke from the response
joke = message.content[0].text
print(f"Claude's joke: {joke}")
return joke
# pylint: disable=subprocess-run-check, broad-except
def play_audio_file(filename):
"""Play a pre-existing audio file"""
print(f"Playing {filename}...")
subprocess.run(["su", username, "-c", f"aplay {filename}"])
def speak_joke(joke_text):
"""Convert joke to speech and play it"""
print("Generating speech...")
wav_file = "joke.wav"
# Generate audio with Piper
with wave.open(wav_file, "wb") as wav:
voice.synthesize_wav(joke_text, wav, syn_config=syn_config)
print("Playing audio...")
# Play the audio file (using aplay for Raspberry Pi)
subprocess.run(["su", username, "-c", f"aplay {wav_file}"])
# Optional: clean up the audio file after playing
# os.remove(wav_file)
# Main loop
while True:
im = picam2.capture_array()
grey = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# Check for motion
fgmask = fgbg.apply(im)
motion_amount = cv2.countNonZero(fgmask)
motion_detected = motion_amount > MOTION_THRESHOLD
if motion_detected:
motion_frame_count += 1
else:
motion_frame_count = 0
if motion_frame_count >= MOTION_FRAMES_REQUIRED:
# Detect upperbody
bodies = upperbody_detector.detectMultiScale(grey, 1.1, 3)
person_detected = len(bodies) > 0
# Draw rectangles
for (x, y, w, h) in bodies:
cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Process if person detected and cooldown passed
current_time = time.time()
if person_detected and (current_time - last_capture_time) > COOLDOWN_SECONDS:
print("Person detected!")
# Turn on LED and play hello message
led.value = True
play_audio_file("hello.wav")
im = picam2.capture_array()
# Capture image
timestamp = time.strftime("%Y%m%d-%H%M%S")
file = f"costume_{timestamp}.jpg"
cv2.imwrite(file, im)
print(f"\nPicture saved: {file}")
try:
# Get joke from Claude
the_joke = get_costume_joke(file)
# If Claude returns 0, use the halloween fallback
if the_joke.strip() == "0":
print("No costume detected, playing halloween.wav")
play_audio_file("halloween.wav")
else:
# Speak the joke
speak_joke(the_joke)
except Exception as e:
print(f"Error: {e}")
# Fallback to halloween.wav if something goes wrong
play_audio_file("halloween.wav")
# Turn off LED after processing
led.value = False
last_capture_time = current_time
motion_frame_count = 0
# Show motion amount
cv2.putText(im, f"Motion: {motion_amount}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.imshow("Camera", im)
cv2.waitKey(1)
Anthropic API Key
You'll need to add your API key that you created on the Claude API Key guide page to the code.py file. At the top of the file, update the ANTHROPIC_API_KEY string:
ANTHROPIC_API_KEY = "your-api-key-here"
Run the Script
After downloading the Project Bundle, move the folder to your /home/user directory. Then, unzip the folder by right-clicking on the folder in the File Manager and selecting Extract or with your preferred command line tool. Keep the following file in the /home/user directory:
- code.py
To run code.py from the terminal in your Python virtual environment, you'll use this line in the terminal:
sudo -E env PATH=$PATH python code.py
How the Code Works
OpenCV is used to detect movement and identify when a person has entered the view of the camera. A preview screen is created to stream the camera feed to the desktop on the Pi.
# Initialize detectors
upperbody_detector = cv2.CascadeClassifier(
"/usr/share/opencv4/haarcascades/haarcascade_upperbody.xml")
fgbg = cv2.createBackgroundSubtractorMOG2(detectShadows=False)
# Initialize camera
cv2.startWindowThread()
picam2 = Picamera2()
picam2.configure(picam2.create_preview_configuration(
main={"format": 'XRGB8888', "size": (640, 480)}))
picam2.start()
Piper
The Piper TTS model is used for voice synthesis. The SynthesisConfig() function can be adjusted to customize how the voice sounds. You can try out different voices from Piper with this browser tool.
Two audio files are created before the loop, hello.wav and halloween.wav since these files will be used multiple times throughout the code. If these files already exist, then new files aren't created.
# Initialize Piper voice
voice = PiperVoice.load("/home/pi/en_US-joe-medium.onnx")
syn_config = SynthesisConfig(
volume=1.0,
length_scale=1.5,
noise_scale=1.2,
noise_w_scale=1.5,
normalize_audio=False,
)
# Check for and create audio files if they don't exist
def create_audio_file_if_needed(filename, text):
"""Create audio file if it doesn't exist"""
if not os.path.exists(filename):
print(f"Creating {filename}...")
with wave.open(filename, "wb") as wav:
voice.synthesize_wav(text, wav, syn_config=syn_config)
print(f"{filename} created!")
else:
print(f"{filename} already exists.")
# Create the hello and halloween audio files
create_audio_file_if_needed("hello.wav", "Hello? Who goes there?")
create_audio_file_if_needed("halloween.wav", "Happy Halloween!")
Functions
A few functions are used in the loop. The first, encode_image() encodes the JPEG image from the camera to base64 to send to the Anthropic API.
def encode_image(image_path):
"""Encode image to base64 for Claude API"""
with open(image_path, "rb") as image_file:
return base64.standard_b64encode(image_file.read()).decode("utf-8")
get_costume_joke() sends the image to the API and prompts the API to return a dad joke about the costume. You can tweak the prompt to customize the response from Claude.
def get_costume_joke(image_path):
"""Send image to Claude and get dad joke about costume"""
print("Analyzing costume with Claude...")
# Encode the image
image_data = encode_image(image_path)
# Send to Claude API
message = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=250,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": image_data,
},
},
{
"type": "text",
"text": """Look at this image.
Your response must follow these rules exactly:
1. If you see a person in a Halloween costume, respond with ONLY a single-sentence cute,
family-friendly, encouraging, dad joke about the costume. Nothing else.
2. If you do NOT see a clear Halloween costume (empty room, unclear image, person in regular clothes, etc.),
respond with ONLY the character: 0
Examples of good responses:
- "Looks like that ghost costume is really boo-tiful!"
- "0"
- "I guess you could say that vampire costume really sucks... in a good way!"
Do not add any explanations, commentary, or descriptions. Just the joke or just 0.
"""
}
],
}
],
)
# Extract the joke from the response
joke = message.content[0].text
print(f"Claude's joke: {joke}")
return joke
play_audio_file() plays audio files, like the pre-made hello.wav and halloween.wav. speak_joke() takes the returned text from Claude, passes it through Piper and then plays the created audio file.
def play_audio_file(filename):
"""Play a pre-existing audio file"""
print(f"Playing {filename}...")
subprocess.run(["su", username, "-c", f"aplay {filename}"])
def speak_joke(joke_text):
"""Convert joke to speech and play it"""
print("Generating speech...")
wav_file = "joke.wav"
# Generate audio with Piper
with wave.open(wav_file, "wb") as wav:
voice.synthesize_wav(joke_text, wav, syn_config=syn_config)
print("Playing audio...")
# Play the audio file (using aplay for Raspberry Pi)
subprocess.run(["su", username, "-c", f"aplay {wav_file}"])
The Loop
In the loop, the camera feed is streamed to the OpenCV window. When motion it detected, the frame is checked for the person detection. If a person is detected, then the LEDs turn on and the hello.wav file is played. A picture is taken and its sent to Claude with the costume joke prompt. When the joke is returned, its run through Piper and the joke audio file plays. If for whatever reason no joke is returned (API connection issue, no costume identified, etc), then the halloween.wav file is played instead. After this process, the LEDs turn off.
while True:
im = picam2.capture_array()
grey = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# Check for motion
fgmask = fgbg.apply(im)
motion_amount = cv2.countNonZero(fgmask)
motion_detected = motion_amount > MOTION_THRESHOLD
if motion_detected:
motion_frame_count += 1
else:
motion_frame_count = 0
if motion_frame_count >= MOTION_FRAMES_REQUIRED:
# Detect upperbody
bodies = upperbody_detector.detectMultiScale(grey, 1.1, 3)
person_detected = len(bodies) > 0
# Draw rectangles
for (x, y, w, h) in bodies:
cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Process if person detected and cooldown passed
current_time = time.time()
if person_detected and (current_time - last_capture_time) > COOLDOWN_SECONDS:
print("Person detected!")
# Turn on LED and play hello message
led.value = True
play_audio_file("hello.wav")
im = picam2.capture_array()
# Capture image
timestamp = time.strftime("%Y%m%d-%H%M%S")
file = f"costume_{timestamp}.jpg"
cv2.imwrite(file, im)
print(f"\nPicture saved: {file}")
try:
# Get joke from Claude
the_joke = get_costume_joke(file)
# If Claude returns 0, use the halloween fallback
if the_joke.strip() == "0":
print("No costume detected, playing halloween.wav")
play_audio_file("halloween.wav")
else:
# Speak the joke
speak_joke(the_joke)
except Exception as e:
print(f"Error: {e}")
# Fallback to halloween.wav if something goes wrong
play_audio_file("halloween.wav")
# Turn off LED after processing
led.value = False
last_capture_time = current_time
motion_frame_count = 0
# Show motion amount
cv2.putText(im, f"Motion: {motion_amount}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.imshow("Camera", im)
cv2.waitKey(1)
Page last edited October 28, 2025
Text editor powered by tinymce.