use same model instead of instantiating a new one to reserve vram

2025-02-13 19:06:40 -05:00 · 2025-02-13 19:06:40 -05:00 · 01f13e5ab7
commit 01f13e5ab7
parent b47e9cddbd
1 changed files with 11 additions and 6 deletions
--- a/main.py
+++ b/main.py
@ -1,6 +1,7 @@
 import pyautogui as pg
 from pynput import keyboard
 import speech_recognition as sr
+from faster_whisper import WhisperModel
 from string import punctuation
 from slang import replacements
 import re
@ -8,22 +9,26 @@ import subprocess
 from time import sleep
 from mss import mss
 import numpy as np
+from io import BytesIO
 import sounddevice # turns off alsa error logging

 r = sr.Recognizer()

+model = WhisperModel("distil-small.en", device="cuda", compute_type="float16")
+
+print("Testing Sound")
 with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source, duration=3)
+print("ready!")

 def recognize_text() -> str:

    with sr.Microphone() as source:
-        print("Say something!")
        audio = r.listen(source)

-        result = r.recognize_faster_whisper(audio, model="distil-small.en", beam_size=5, language="en", condition_on_previous_text=False)
+        results, _ = model.transcribe(BytesIO(audio.get_wav_data()), beam_size=5, language="en", condition_on_previous_text=False)

-    return result
+    return " ".join([segment.text for segment in results])

 def chat_type():

@ -49,13 +54,13 @@ def on_press(key):
    if key is not keyboard.Key.home:
        return
    
-    print("triggered!")
+    print("Listening...")
    command = recognize_text()

    print(f"Heard: {command}")

    # cleanup command
-    command = command.lower()
+    command = command.lower().strip()
    for char in punctuation:
        command = command.replace(char, '')

@ -86,7 +91,7 @@ def on_press(key):
        # sleep(0.074)
        # pg.keyUp("enter")

-    elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon"]):
+    elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon", "obliterate", "delete", "piss"]):
        print("MAXIMUM PULSE!!!!")
        pg.keyDown("q")
        sleep(0.032)