use same model instead of instantiating a new one to reserve vram

This commit is contained in:
ultrablob 2025-02-13 19:06:40 -05:00
parent b47e9cddbd
commit 01f13e5ab7

17
main.py
View file

@ -1,6 +1,7 @@
import pyautogui as pg import pyautogui as pg
from pynput import keyboard from pynput import keyboard
import speech_recognition as sr import speech_recognition as sr
from faster_whisper import WhisperModel
from string import punctuation from string import punctuation
from slang import replacements from slang import replacements
import re import re
@ -8,22 +9,26 @@ import subprocess
from time import sleep from time import sleep
from mss import mss from mss import mss
import numpy as np import numpy as np
from io import BytesIO
import sounddevice # turns off alsa error logging import sounddevice # turns off alsa error logging
r = sr.Recognizer() r = sr.Recognizer()
model = WhisperModel("distil-small.en", device="cuda", compute_type="float16")
print("Testing Sound")
with sr.Microphone() as source: with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=3) r.adjust_for_ambient_noise(source, duration=3)
print("ready!")
def recognize_text() -> str: def recognize_text() -> str:
with sr.Microphone() as source: with sr.Microphone() as source:
print("Say something!")
audio = r.listen(source) audio = r.listen(source)
result = r.recognize_faster_whisper(audio, model="distil-small.en", beam_size=5, language="en", condition_on_previous_text=False) results, _ = model.transcribe(BytesIO(audio.get_wav_data()), beam_size=5, language="en", condition_on_previous_text=False)
return result return " ".join([segment.text for segment in results])
def chat_type(): def chat_type():
@ -49,13 +54,13 @@ def on_press(key):
if key is not keyboard.Key.home: if key is not keyboard.Key.home:
return return
print("triggered!") print("Listening...")
command = recognize_text() command = recognize_text()
print(f"Heard: {command}") print(f"Heard: {command}")
# cleanup command # cleanup command
command = command.lower() command = command.lower().strip()
for char in punctuation: for char in punctuation:
command = command.replace(char, '') command = command.replace(char, '')
@ -86,7 +91,7 @@ def on_press(key):
# sleep(0.074) # sleep(0.074)
# pg.keyUp("enter") # pg.keyUp("enter")
elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon"]): elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon", "obliterate", "delete", "piss"]):
print("MAXIMUM PULSE!!!!") print("MAXIMUM PULSE!!!!")
pg.keyDown("q") pg.keyDown("q")
sleep(0.032) sleep(0.032)