This is intended to be a high level overview of the code, since it is relatively complex. There are two files that handle the storybook code. These are the story.py file, which handles the majority of the code and the listener.py file, which handles the listening and speech recognition tasks.
# SPDX-FileCopyrightText: 2023 Melissa LeBlanc-Williams for Adafruit Industries
#
# SPDX-License-Identifier: MIT
import threading
import sys
import os
import re
import time
import argparse
import math
import configparser
from enum import Enum
from collections import deque
import board
import digitalio
import neopixel
from openai import OpenAI
import pygame
from rpi_backlight import Backlight
from adafruit_led_animation.animation.pulse import Pulse
from listener import Listener
# Base Path is the folder the script resides in
BASE_PATH = os.path.dirname(sys.argv[0])
if BASE_PATH != "":
BASE_PATH += "/"
# General Settings
STORY_WORD_LENGTH = 800
REED_SWITCH_PIN = board.D17
NEOPIXEL_PIN = board.D18
API_KEYS_FILE = "~/keys.txt"
PROMPT_FILE = "/boot/bookprompt.txt"
# Quit Settings (Close book QUIT_CLOSES within QUIT_TIME_PERIOD to quit)
QUIT_CLOSES = 3
QUIT_TIME_PERIOD = 5 # Time period in Seconds
QUIT_DEBOUNCE_DELAY = 0.25 # Time to wait before counting next closeing
# Neopixel Settings
NEOPIXEL_COUNT = 1
NEOPIXEL_BRIGHTNESS = 0.2
NEOPIXEL_ORDER = neopixel.GRBW
NEOPIXEL_LOADING_COLOR = (0, 255, 0, 0) # Loading/Dreaming (Green)
NEOPIXEL_SLEEP_COLOR = (0, 0, 0, 0) # Sleeping (Off)
NEOPIXEL_WAITING_COLOR = (255, 255, 0, 0) # Waiting for Input (Yellow)
NEOPIXEL_READING_COLOR = (0, 0, 255, 0) # Reading (Blue)
NEOPIXEL_PULSE_SPEED = 0.1
# Image Settings
WELCOME_IMAGE = "welcome.png"
BACKGROUND_IMAGE = "paper_background.png"
LOADING_IMAGE = "loading.png"
BUTTON_BACK_IMAGE = "button_back.png"
BUTTON_NEXT_IMAGE = "button_next.png"
BUTTON_NEW_IMAGE = "button_new.png"
# Asset Paths
IMAGES_PATH = BASE_PATH + "images/"
FONTS_PATH = BASE_PATH + "fonts/"
# Font Path & Size
TITLE_FONT = (FONTS_PATH + "Desdemona Black Regular.otf", 48)
TITLE_COLOR = (0, 0, 0)
TEXT_FONT = (FONTS_PATH + "times new roman.ttf", 24)
TEXT_COLOR = (0, 0, 0)
# Delays Settings
# Used to control the speed of the text
WORD_DELAY = 0.1
TITLE_FADE_TIME = 0.05
TITLE_FADE_STEPS = 25
TEXT_FADE_TIME = 0.25
TEXT_FADE_STEPS = 51
ALSA_ERROR_DELAY = 0.5 # Delay to wait after an ALSA errors
# Whitespace Settings (in Pixels)
PAGE_TOP_MARGIN = 20
PAGE_SIDE_MARGIN = 20
PAGE_BOTTOM_MARGIN = 0
PAGE_NAV_HEIGHT = 100
EXTRA_LINE_SPACING = 0
PARAGRAPH_SPACING = 30
# ChatGPT Parameters
SYSTEM_ROLE = "You are a master AI Storyteller that can tell a story of any length."
CHATGPT_MODEL = "gpt-3.5-turbo" # You can also use "gpt-4", which is slower, but more accurate
WHISPER_MODEL = "whisper-1"
# Speech Recognition Parameters
ENERGY_THRESHOLD = 300 # Energy level for mic to detect
RECORD_TIMEOUT = 30 # Maximum time in seconds to wait for speech
# Do some checks and Import API keys from API_KEYS_FILE
config = configparser.ConfigParser()
if os.geteuid() != 0:
print("Please run this script as root.")
sys.exit(1)
username = os.environ["SUDO_USER"]
user_homedir = os.path.expanduser(f"~{username}")
API_KEYS_FILE = API_KEYS_FILE.replace("~", user_homedir)
print(os.path.expanduser(API_KEYS_FILE))
config.read(os.path.expanduser(API_KEYS_FILE))
if not config.has_section("openai"):
print("Please make sure API_KEYS_FILE points to a valid file.")
sys.exit(1)
if "OPENAI_API_KEY" not in config["openai"]:
print(
"Please make sure your API keys file contains an OPENAI_API_KEY under the openai section."
)
sys.exit(1)
if len(config["openai"]["OPENAI_API_KEY"]) < 10:
print("Please set OPENAI_API_KEY in your API keys file with a valid key.")
sys.exit(1)
openai = OpenAI(
# This is the default and can be omitted
api_key=config["openai"]["OPENAI_API_KEY"],
)
# Check that the prompt file exists and load it
if not os.path.isfile(PROMPT_FILE):
print("Please make sure PROMPT_FILE points to a valid file.")
sys.exit(1)
def strip_fancy_quotes(text):
text = re.sub(r"[\u2018\u2019]", "'", text)
text = re.sub(r"[\u201C\u201D]", '"', text)
return text
class Position(Enum):
TOP = 0
CENTER = 1
BOTTOM = 2
LEFT = 3
RIGHT = 4
class Button:
def __init__(self, x, y, image, action, draw_function):
self.x = x
self.y = y
self.image = image
self.action = action
self._width = self.image.get_width()
self._height = self.image.get_height()
self._visible = False
self._draw_function = draw_function
def is_in_bounds(self, position):
x, y = position
return (
self.x <= x <= self.x + self.width and self.y <= y <= self.y + self.height
)
def show(self):
self._draw_function(self.image, self.x, self.y)
self._visible = True
@property
def width(self):
return self._width
@property
def height(self):
return self._height
@property
def visible(self):
return self._visible
class Textarea:
def __init__(self, x, y, width, height):
self.x = x
self.y = y
self.width = width
self.height = height
@property
def size(self):
return {"width": self.width, "height": self.height}
class Book:
def __init__(self, rotation=0):
self.paragraph_number = 0
self.page = 0
self.pages = []
self.stories = []
self.story = 0
self.rotation = rotation
self.images = {}
self.fonts = {}
self.buttons = {}
self.width = 0
self.height = 0
self.textarea = None
self.screen = None
self.saved_screen = None
self._sleeping = False
self.sleep_check_delay = 0.1
self._sleep_check_thread = None
self._sleep_request = False
self._running = True
self._busy = False
self._loading = False
# Use a Double Ended Queue to handle the heavy lifting
self._closing_times = deque(maxlen=QUIT_CLOSES)
# Use a cursor to keep track of where we are in the text area
self.cursor = {"x": 0, "y": 0}
self.listener = None
self.backlight = Backlight()
self.pixels = neopixel.NeoPixel(
NEOPIXEL_PIN,
NEOPIXEL_COUNT,
brightness=NEOPIXEL_BRIGHTNESS,
pixel_order=NEOPIXEL_ORDER,
auto_write=False,
)
self._prompt = ""
self._load_thread = threading.Thread(target=self._handle_loading_status)
self._load_thread.start()
def start(self):
# Output to the LCD instead of the console
os.putenv("DISPLAY", ":0")
self._set_status_color(NEOPIXEL_LOADING_COLOR)
# Initialize the display
pygame.init()
self.screen = pygame.display.set_mode((0, 0), pygame.FULLSCREEN)
pygame.mouse.set_visible(False)
self.screen.fill((255, 255, 255))
self.width = self.screen.get_height()
self.height = self.screen.get_width()
# Preload welcome image and display it
self._load_image("welcome", WELCOME_IMAGE)
self.display_welcome()
# Load the prompt file
with open(PROMPT_FILE, "r") as f:
self._prompt = f.read()
# Initialize the Listener
self.listener = Listener(
openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT
)
# Preload remaining images
self._load_image("background", BACKGROUND_IMAGE)
self._load_image("loading", LOADING_IMAGE)
# Preload fonts
self._load_font("title", TITLE_FONT)
self._load_font("text", TEXT_FONT)
# Add buttons
back_button_image = pygame.image.load(IMAGES_PATH + BUTTON_BACK_IMAGE)
next_button_image = pygame.image.load(IMAGES_PATH + BUTTON_NEXT_IMAGE)
new_button_image = pygame.image.load(IMAGES_PATH + BUTTON_NEW_IMAGE)
button_spacing = (
self.width
- (
back_button_image.get_width()
+ next_button_image.get_width()
+ new_button_image.get_width()
)
) // 4
button_ypos = (
self.height
- PAGE_NAV_HEIGHT
+ (PAGE_NAV_HEIGHT - next_button_image.get_height()) // 2
)
self._load_button(
"back",
button_spacing,
button_ypos,
back_button_image,
self.previous_page,
self._display_surface,
)
self._load_button(
"new",
button_spacing * 2 + back_button_image.get_width(),
button_ypos,
new_button_image,
self.new_story,
self._display_surface,
)
self._load_button(
"next",
button_spacing * 3
+ back_button_image.get_width()
+ new_button_image.get_width(),
button_ypos,
next_button_image,
self.next_page,
self._display_surface,
)
# Add Text Area
self.textarea = Textarea(
PAGE_SIDE_MARGIN,
PAGE_TOP_MARGIN,
self.width - PAGE_SIDE_MARGIN * 2,
self.height - PAGE_NAV_HEIGHT - PAGE_TOP_MARGIN - PAGE_BOTTOM_MARGIN,
)
# Start the sleep check thread after everything is initialized
self._sleep_check_thread = threading.Thread(target=self._handle_sleep)
self._sleep_check_thread.start()
self._set_status_color(NEOPIXEL_READING_COLOR)
def deinit(self):
self._running = False
self._sleep_check_thread.join()
self._load_thread.join()
self.backlight.power = True
def _handle_sleep(self):
reed_switch = digitalio.DigitalInOut(REED_SWITCH_PIN)
reed_switch.direction = digitalio.Direction.INPUT
reed_switch.pull = digitalio.Pull.UP
while self._running:
if self._sleeping and reed_switch.value: # Book Open
self._wake()
elif not self._sleeping and not reed_switch.value:
self._sleep()
time.sleep(self.sleep_check_delay)
def _handle_loading_status(self):
pulse = Pulse(
self.pixels,
speed=NEOPIXEL_PULSE_SPEED,
color=NEOPIXEL_LOADING_COLOR,
period=3,
)
while self._running:
if self._loading:
pulse.animate()
time.sleep(0.1)
# Turn off the Neopixels
self.pixels.fill(0)
self.pixels.show()
def _set_status_color(self, status_color):
if status_color not in [
NEOPIXEL_READING_COLOR,
NEOPIXEL_WAITING_COLOR,
NEOPIXEL_SLEEP_COLOR,
NEOPIXEL_LOADING_COLOR,
]:
raise ValueError(f"Invalid status color {status_color}.")
# Handle loading color by setting the loading flag
self._loading = status_color == NEOPIXEL_LOADING_COLOR
# Handle other status colors by setting the neopixels
if status_color != NEOPIXEL_LOADING_COLOR:
self.pixels.fill(status_color)
self.pixels.show()
def handle_events(self):
if not self._sleeping:
for event in pygame.event.get():
if event.type == pygame.QUIT:
raise SystemExit
if event.type == pygame.MOUSEBUTTONDOWN:
self._handle_mousedown_event(event)
time.sleep(0.1)
def _handle_mousedown_event(self, event):
if event.button == 1:
# If button pressed while visible, trigger action
coords = self._rotate_mouse_pos(event.pos)
for button in self.buttons.values():
if button.visible and button.is_in_bounds(coords):
button.action()
def _rotate_mouse_pos(self, point):
# Recalculate the mouse position based on the rotation of the screen
# So that we have the coordinates relative to the upper left corner of the screen
angle = 360 - self.rotation
y, x = point
x -= self.width // 2
y -= self.height // 2
x, y = x * math.sin(math.radians(angle)) + y * math.cos(
math.radians(angle)
), x * math.cos(math.radians(angle)) - y * math.sin(math.radians(angle))
x += self.width // 2
y += self.height // 2
return (round(x), round(y))
def _load_image(self, name, filename):
try:
image = pygame.image.load(IMAGES_PATH + filename)
self.images[name] = image
except pygame.error:
pass
def _load_button(self, name, x, y, image, action, display_surface):
self.buttons[name] = Button(x, y, image, action, display_surface)
def _load_font(self, name, details):
self.fonts[name] = pygame.font.Font(details[0], details[1])
def _display_surface(self, surface, x=0, y=0, target_surface=None):
# Display a surface either positionally or with a specific x,y coordinate
buffer = self._create_transparent_buffer((self.width, self.height))
buffer.blit(surface, (x, y))
if target_surface is None:
buffer = pygame.transform.rotate(buffer, self.rotation)
self.screen.blit(buffer, (0, 0))
else:
target_surface.blit(buffer, (0, 0))
def _fade_in_surface(self, surface, x, y, fade_time, fade_steps=50):
background = self._create_transparent_buffer((self.width, self.height))
self._display_surface(self.images["background"], 0, 0, background)
buffer = self._create_transparent_buffer(surface.get_size())
fade_delay = round(
fade_time / fade_steps * 1000
) # Time to delay in ms between each fade step
def draw_alpha(alpha):
buffer.blit(background, (-x, -y))
surface.set_alpha(alpha)
buffer.blit(surface, (0, 0))
self._display_surface(buffer, x, y)
pygame.display.update()
for alpha in range(0, 255, round(255 / fade_steps)):
draw_alpha(alpha)
pygame.time.wait(fade_delay)
if self._sleep_request:
draw_alpha(255) # Finish up quickly
return
def display_current_page(self):
self._busy = True
self._display_surface(self.images["background"], 0, 0)
pygame.display.update()
print(f"Loading page {self.page} of {len(self.pages)}")
page_data = self.pages[self.page]
# Display the title
if page_data["title"]:
self._display_title_text(page_data["title"])
self._fade_in_surface(
page_data["buffer"],
self.textarea.x,
self.textarea.y + page_data["text_position"],
TEXT_FADE_TIME,
TEXT_FADE_STEPS,
)
# Display the navigation buttons
if self.page > 0 or self.story > 0:
self.buttons["back"].show()
self.buttons["next"].show()
self.buttons["new"].show()
pygame.display.update()
self._busy = False
@staticmethod
def _create_transparent_buffer(size):
if isinstance(size, (tuple, list)):
(width, height) = size
elif isinstance(size, dict):
width = size["width"]
height = size["height"]
else:
raise ValueError(f"Invalid size {size}. Should be tuple, list, or dict.")
buffer = pygame.Surface((width, height), pygame.SRCALPHA, 32)
buffer = buffer.convert_alpha()
return buffer
def _display_title_text(self, text, y=0):
# Render the title as multiple lines if too big
lines = self._wrap_text(text, self.fonts["title"], self.textarea.width)
self.cursor["y"] = y
delay_value = WORD_DELAY
for line in lines:
words = line.split(" ")
self.cursor["x"] = (
self.textarea.width // 2 - self.fonts["title"].size(line)[0] // 2
)
for word in words:
text = self.fonts["title"].render(word + " ", True, TITLE_COLOR)
if self._sleep_request:
delay_value = 0
self._display_surface(
text,
self.cursor["x"] + self.textarea.x,
self.cursor["y"] + self.textarea.y,
)
else:
self._fade_in_surface(
text,
self.cursor["x"] + self.textarea.x,
self.cursor["y"] + self.textarea.y,
TITLE_FADE_TIME,
TITLE_FADE_STEPS,
)
pygame.display.update()
self.cursor["x"] += text.get_width()
time.sleep(delay_value)
self.cursor["y"] += self.fonts["title"].size(line)[1]
def _title_text_height(self, text):
lines = self._wrap_text(text, self.fonts["title"], self.textarea.width)
height = 0
for line in lines:
height += self.fonts["title"].size(line)[1]
return height
@staticmethod
def _wrap_text(text, font, width):
lines = []
line = ""
for word in text.split(" "):
if font.size(line + word)[0] < width:
line += word + " "
else:
lines.append(line)
line = word + " "
lines.append(line)
return lines
def previous_page(self):
if self.page > 0 or self.story > 0:
self.page -= 1
if self.page < 0:
self.story -= 1
self.load_story(self.stories[self.story])
self.page = len(self.pages) - 1
self.display_current_page()
def next_page(self):
self.page += 1
if self.page >= len(self.pages):
if self.story < len(self.stories) - 1:
self.story += 1
self.load_story(self.stories[self.story])
self.page = 0
else:
self.generate_new_story()
self.display_current_page()
def new_story(self):
self.generate_new_story()
self.display_current_page()
def display_loading(self):
self._display_surface(self.images["loading"], 0, 0)
pygame.display.update()
self._set_status_color(NEOPIXEL_LOADING_COLOR)
def display_welcome(self):
self._display_surface(self.images["welcome"], 0, 0)
pygame.display.update()
def display_message(self, message):
self._busy = True
self._display_surface(self.images["background"], 0, 0)
height = self._title_text_height(message)
self._display_title_text(message, self.height // 2 - height // 2)
self._busy = False
def load_story(self, story):
# Parse out the title and story and render into pages
self._busy = True
self.pages = []
if not story.startswith("Title: "):
print("Unexpected story format from ChatGPT. Missing Title.")
title = "A Story"
else:
title = story.split("Title: ")[1].split("\n\n")[0]
page = self._add_page(title)
paragraphs = story.split("\n\n")[1:]
for paragraph in paragraphs:
lines = self._wrap_text(paragraph, self.fonts["text"], self.textarea.width)
for line in lines:
self.cursor["x"] = 0
text = self.fonts["text"].render(line, True, TEXT_COLOR)
if (
self.cursor["y"] + self.fonts["text"].get_height()
> page["buffer"].get_height()
):
page = self._add_page()
self._display_surface(
text, self.cursor["x"], self.cursor["y"], page["buffer"]
)
self.cursor["y"] += self.fonts["text"].size(line)[1]
if self.cursor["y"] > 0:
self.cursor["y"] += PARAGRAPH_SPACING
print(f"Loaded story at index {self.story} with {len(self.pages)} pages")
self._set_status_color(NEOPIXEL_READING_COLOR)
self._busy = False
def _add_page(self, title=None):
page = {
"title": title,
"text_position": 0,
}
if title:
page["text_position"] = self._title_text_height(title) + PARAGRAPH_SPACING
page["buffer"] = self._create_transparent_buffer(
(self.textarea.width, self.textarea.height - page["text_position"])
)
self.cursor["y"] = 0
self.pages.append(page)
return page
def generate_new_story(self):
self._busy = True
self.display_message("Speak aloud the story you wish to read.")
if self._sleep_request:
self._busy = False
time.sleep(0.2)
return
def show_listening():
# Pause for a beat because the listener doesn't
# immediately start listening sometimes
time.sleep(ALSA_ERROR_DELAY)
self.pixels.fill(NEOPIXEL_WAITING_COLOR)
self.pixels.show()
self.listener.listen(ready_callback=show_listening)
if self._sleep_request:
self._busy = False
return
if not self.listener.speech_waiting():
# No response from user, so return
print("No response from user.")
return
story_request = self.listener.recognize()
print(f"Whisper heard: {story_request}")
story_prompt = self._make_story_prompt(story_request)
self.display_loading()
response = self._sendchat(story_prompt)
if self._sleep_request:
self._busy = False
return
print(response)
self._busy = True
self.stories.append(response)
self.story = len(self.stories) - 1
self.page = 0
self._busy = False
self.load_story(response)
def _sleep(self):
# Set a sleep request flag so that any busy threads know to finish up
self._sleep_request = True
if self.listener.is_listening():
self.listener.stop_listening()
while self._busy:
time.sleep(0.1)
self._sleep_request = False
if (
len(self._closing_times) == 0
or (time.monotonic() - self._closing_times[-1]) > QUIT_DEBOUNCE_DELAY
):
self._closing_times.append(time.monotonic())
# Check if we've closed the book a certain number of times
# within a certain number of seconds
if (
len(self._closing_times) == QUIT_CLOSES
and self._closing_times[-1] - self._closing_times[0] < QUIT_TIME_PERIOD
):
self._running = False
return
self._sleeping = True
self._set_status_color(NEOPIXEL_SLEEP_COLOR)
self.sleep_check_delay = 0
self.backlight.power = False
def _wake(self):
# Turn on the screen
self.backlight.power = True
self.sleep_check_delay = 0.1
self._set_status_color(NEOPIXEL_READING_COLOR)
self._sleeping = False
def _make_story_prompt(self, request):
return self._prompt.format(
STORY_WORD_LENGTH=STORY_WORD_LENGTH, STORY_REQUEST=request
)
def _sendchat(self, prompt):
response = ""
print("Sending to chatGPT")
print("Prompt: ", prompt)
# Package up the text to send to ChatGPT
stream = openai.chat.completions.create(
model=CHATGPT_MODEL,
messages=[
{"role": "system", "content": SYSTEM_ROLE},
{"role": "user", "content": prompt},
],
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
response += chunk.choices[0].delta.content
if self._sleep_request:
return None
# Send the heard text to ChatGPT and return the result
return strip_fancy_quotes(response)
@property
def running(self):
return self._running
@property
def sleeping(self):
return self._sleeping
def parse_args():
parser = argparse.ArgumentParser()
# Book will only be rendered vertically for the sake of simplicity
parser.add_argument(
"--rotation",
type=int,
choices=[90, 270],
dest="rotation",
action="store",
default=90,
help="Rotate everything on the display by this amount",
)
return parser.parse_args()
def main(args):
book = Book(args.rotation)
try:
book.start()
while len(book.pages) == 0:
if not book.sleeping:
book.generate_new_story()
book.display_current_page()
while book.running:
book.handle_events()
except KeyboardInterrupt:
pass
finally:
book.deinit()
pygame.quit()
if __name__ == "__main__":
main(parse_args())
Configuration Options
This project has a lot of configuration options to get it to run exactly as you would like. In order to get through them, they will be discussed in groups.
General Settings
-
STORY_WORD_LENGTH: The approximate number of words used in the generated stories. -
REED_SWITCH_PIN: The pin that your reed switch is wired to in case your wiring varies from the wiring in this guide. -
NEOPIXEL_PIN: The pin your NeoPixels are wired to. There are very few PWM pins on the Pi, so this shouldn't change. -
API_KEYS_FILE: The location of your keys.txt file. By default, it points to your home directory. -
PROMPT_FILE: The location of your bookprompt.txt file. This was placed in the /boot folder for your convenience if you enabled the read-only file system.
Other Setting Groups
- Quit Settings: Gesture settings related to quitting the app
- NeoPixel Settings: NeoPixel parameters and Colors
- Image Settings: The filenames of the images
- Asset Paths: The file locations of the images and fonts. By default they are relative to the main script.
- Font Path & Size: The font files and font sizes for the Title and Text
- Delay Settings: Used to control the animations of the text
- Whitespace Settings: Settings to control the amount of whitespace around the text.
- ChatGPT Parameters: The Basic ChatGPT settings
- Speech Recognition Parameters: Parameters to control the voice input values
External Config Files
Next the script pulls in external configuration files including keys.txt and bookprompt.txt. The keys are read in as a config file, which is a built-in Python mechanism that makes it easy to parse and the prompt is read as a plain text file.
UI Element Classes
The Button and Textarea classes are intended to make handling the buttons and text areas easier. The buttons are the clickable elements and the text area is the area on the page that displays text. They mostly store all the relevant information related to their particular special purpose, though the button also handles displaying and checking if you clicked within its boundaries.
The Book Class
The book class is the main class that handles everything related to the book and handles the majority of the logic. Here are some of the notable functions:
- Start: Handles the starting and initialization of the code. This is what runs while it initially displays the welcome screen. It sets up Pygame. Then it loads the images, buttons, and fonts and starts some subthreads.
- Handle Functions: These run in the background and handle various tasks such as putting the book to sleep when the reed switch is closed, displaying the pulsing green NeoPixels while loading, and handling button presses.
- Display Surface: Handles displaying and rotating the image
- Fade In Surface: Handles the fade in animation of the title text
- Display Current Page: Displays the page of the story we are currently set to as well as the buttons.
- Display Title Text: Handles slicing up and centering the title text
- Previous Page, Next Page, New Story: Button handlers for navigation or making a new story.
- Display Loading, Welcome, Message: Display the appropriate special page
- Load Story: Parse the story output from ChatGPT into paragraphs and a title
- Add Page: Creates a new page in memory which includes a title and the height at which the text should display.
- Generate New Story: Prompts the user for a story. Display a message and use the listener class to listen and recognize the speech.
- Sleep and Wake: Put the book in sleep or wake modes. This turns off the backlight and coordinates with other threads to put the book into a sleep state or wake from it.
- SendChat: Formats the prompt into a data structure that is passed along to ChatGPT. Special characters that may be returned are stripped.
Parse Arguments
The app only takes a single argument, which is rotation. By default it is set to 90 degrees, but can also be set to 270 if the display is installed upside down.
Main
This is the entry point for the application and handles initializing the book and calling handle events in a loop while the book is running.
# SPDX-FileCopyrightText: 2023 Melissa LeBlanc-Williams for Adafruit Industries
#
# SPDX-License-Identifier: MIT
import time
import speech_recognition as sr
class Listener:
def __init__(
self, api_key, energy_threshold=300, record_timeout=30
):
self.listener_handle = None
self.microphone = sr.Microphone()
self.recognizer = sr.Recognizer()
self.recognizer.energy_threshold = energy_threshold
self.recognizer.dynamic_energy_threshold = False
self.recognizer.pause_threshold = 1
self.phrase_time = time.monotonic()
with self.microphone as source:
self.recognizer.adjust_for_ambient_noise(
source
) # we only need to calibrate once, before we start listening
self.record_timeout = record_timeout
self._audio = None
self.listener_handle = None
self.api_key = api_key
def listen(self, ready_callback=None):
print("Start listening...")
self._start_listening()
if ready_callback:
ready_callback()
while (
self.listener_handle and not self.speech_waiting()
):
time.sleep(0.1)
self.stop_listening()
def _save_audio_callback(self, _, audio):
print("Saving audio")
self._audio = audio
def _start_listening(self):
if not self.listener_handle:
self.listener_handle = self.recognizer.listen_in_background(
self.microphone,
self._save_audio_callback,
phrase_time_limit=self.record_timeout,
)
def stop_listening(self, wait_for_stop=False):
if self.listener_handle:
self.listener_handle(wait_for_stop=wait_for_stop)
self.listener_handle = None
print("Stop listening...")
def is_listening(self):
return self.listener_handle is not None
def speech_waiting(self):
return self._audio is not None
def recognize(self):
if self._audio:
# Transcribe the audio data to text using Whisper
print("Recognizing...")
attempts = 0
while attempts < 3:
try:
result = self.recognizer.recognize_whisper_api(
self._audio, api_key=self.api_key
)
self._audio = None
return result.strip()
except sr.RequestError as e:
print(f"Error: {e}")
time.sleep(3)
attempts += 1
print("Retry attempt: ", attempts)
print("Failed to recognize")
return None
return None
The Listener Class is fairly simple. It is mostly a wrapper for the SpeechRecognition library that listens in the background when the listening has started and is able to be stopped early in case the book needs to go to sleep.
The recognize function simply makes use of some recently added functionality in SpeechRecognition that interfaces with OpenAI's whisper API. If there is any communication issue, it will make up to 3 attempts before failing.
Page last edited March 04, 2025
Text editor powered by tinymce.