use same model instead of instantiating a new one to reserve vram

This commit is contained in:
ultrablob 2025-02-13 19:06:40 -05:00
parent b47e9cddbd
commit 01f13e5ab7

17
main.py
View file

@ -1,6 +1,7 @@
import pyautogui as pg
from pynput import keyboard
import speech_recognition as sr
from faster_whisper import WhisperModel
from string import punctuation
from slang import replacements
import re
@ -8,22 +9,26 @@ import subprocess
from time import sleep
from mss import mss
import numpy as np
from io import BytesIO
import sounddevice # turns off alsa error logging
r = sr.Recognizer()
model = WhisperModel("distil-small.en", device="cuda", compute_type="float16")
print("Testing Sound")
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=3)
print("ready!")
def recognize_text() -> str:
with sr.Microphone() as source:
print("Say something!")
audio = r.listen(source)
result = r.recognize_faster_whisper(audio, model="distil-small.en", beam_size=5, language="en", condition_on_previous_text=False)
results, _ = model.transcribe(BytesIO(audio.get_wav_data()), beam_size=5, language="en", condition_on_previous_text=False)
return result
return " ".join([segment.text for segment in results])
def chat_type():
@ -49,13 +54,13 @@ def on_press(key):
if key is not keyboard.Key.home:
return
print("triggered!")
print("Listening...")
command = recognize_text()
print(f"Heard: {command}")
# cleanup command
command = command.lower()
command = command.lower().strip()
for char in punctuation:
command = command.replace(char, '')
@ -86,7 +91,7 @@ def on_press(key):
# sleep(0.074)
# pg.keyUp("enter")
elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon"]):
elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon", "obliterate", "delete", "piss"]):
print("MAXIMUM PULSE!!!!")
pg.keyDown("q")
sleep(0.032)