diff --git a/main.py b/main.py index eb2e8e6..d6a4720 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ import pyautogui as pg from pynput import keyboard import speech_recognition as sr +from faster_whisper import WhisperModel from string import punctuation from slang import replacements import re @@ -8,22 +9,26 @@ import subprocess from time import sleep from mss import mss import numpy as np +from io import BytesIO import sounddevice # turns off alsa error logging r = sr.Recognizer() +model = WhisperModel("distil-small.en", device="cuda", compute_type="float16") + +print("Testing Sound") with sr.Microphone() as source: r.adjust_for_ambient_noise(source, duration=3) +print("ready!") def recognize_text() -> str: with sr.Microphone() as source: - print("Say something!") audio = r.listen(source) - result = r.recognize_faster_whisper(audio, model="distil-small.en", beam_size=5, language="en", condition_on_previous_text=False) + results, _ = model.transcribe(BytesIO(audio.get_wav_data()), beam_size=5, language="en", condition_on_previous_text=False) - return result + return " ".join([segment.text for segment in results]) def chat_type(): @@ -49,13 +54,13 @@ def on_press(key): if key is not keyboard.Key.home: return - print("triggered!") + print("Listening...") command = recognize_text() print(f"Heard: {command}") # cleanup command - command = command.lower() + command = command.lower().strip() for char in punctuation: command = command.replace(char, '') @@ -86,7 +91,7 @@ def on_press(key): # sleep(0.074) # pg.keyUp("enter") - elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon"]): + elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon", "obliterate", "delete", "piss"]): print("MAXIMUM PULSE!!!!") pg.keyDown("q") sleep(0.032)