diff --git a/README.md b/README.md index 7670ac6..c9b7a50 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,4 @@ Press the hotkey (default is Home) and then talk to jarvis - "jarvis, clip that" - "hey jarvis, thats a clip" -Checkout the code to see specific keywords/phrases as all NLP is regex/string based, not generative AI - - -### Resource Usage - -With the `distil-small.en` model, on my system it uses about 500mb of VRAM +Checkout the code to see specific keywords/phrases as all NLP is regex/string based, not generative AI \ No newline at end of file diff --git a/config.py b/config.py deleted file mode 100644 index 399fd8e..0000000 --- a/config.py +++ /dev/null @@ -1,16 +0,0 @@ -model = "distil-small.en" - -replacements = {"gigi": "gg", "heels": "heals", "heeling": "healing", "heel": "heal"} - -maximum_pulse = [ - "maximum", - "pulse", - "ball", - "remove", - "eliminate", - "murder", - "goon", - "obliterate", - "delete", - "piss", -] \ No newline at end of file diff --git a/main.py b/main.py index 6f864c9..eb2e8e6 100644 --- a/main.py +++ b/main.py @@ -1,51 +1,35 @@ import pyautogui as pg from pynput import keyboard import speech_recognition as sr -from faster_whisper import WhisperModel from string import punctuation -import config +from slang import replacements import re import subprocess from time import sleep from mss import mss import numpy as np -from io import BytesIO -import sounddevice # turns off alsa error logging +import sounddevice # turns off alsa error logging r = sr.Recognizer() -r.pause_threshold = 2 - -model = WhisperModel(config.model, device="cuda", compute_type="int8_float16") - -print("Testing Sound") with sr.Microphone() as source: r.adjust_for_ambient_noise(source, duration=3) -print("ready!") - def recognize_text() -> str: with sr.Microphone() as source: + print("Say something!") audio = r.listen(source) - results, _ = model.transcribe( - BytesIO(audio.get_wav_data()), - beam_size=5, - language="en", - condition_on_previous_text=False, - ) - - return " ".join([segment.text for segment in results]) + result = r.recognize_faster_whisper(audio, model="distil-small.en", beam_size=5, language="en", condition_on_previous_text=False) + return result def chat_type(): - screen = mss() #! bad for performance but necessary + screen = mss() #! bad for performance but necessary - screenshot = np.array( - screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100}) - ) + screenshot = np.array(screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})) try: pg.locate("ui/team-chat.png", screenshot, confidence=0.9) return "team" @@ -60,43 +44,38 @@ def chat_type(): return None - def on_press(key): if key is not keyboard.Key.home: return - - print("Listening...") + + print("triggered!") command = recognize_text() print(f"Heard: {command}") # cleanup command - command = command.lower().strip() + command = command.lower() for char in punctuation: - command = command.replace(char, "") + command = command.replace(char, '') - for original, new in config.replacements.items(): + for original, new in replacements.items(): command = command.replace(original, new) print(f"Cleaned up command: {command}") - if any(keyword in command for keyword in ["type", "say", "write"]): - message = ( - re.search(r"(type|say|write) (.+?)(and |in |\n|$)", command) - .groups(0)[1] - .strip() - ) - print(f"Typing: {message} in chat") - + if "chat" in command: + message = re.search(r"type (.+?)(and |in |\n|$)", command).groups(0)[0].strip() + print(f"Typing: {message} in team chat") + pg.keyDown("enter") sleep(0.041) pg.keyUp("enter") sleep(0.94) current_chat = chat_type() - if current_chat == None or current_chat in command: - pass # no change needed + if current_chat in command: + pass # no change needed elif "match" in command or "team" in command: pg.keyDown("tab") sleep(0.041) @@ -107,7 +86,7 @@ def on_press(key): # sleep(0.074) # pg.keyUp("enter") - elif any(keyword in command for keyword in config.maximum_pulse): + elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon"]): print("MAXIMUM PULSE!!!!") pg.keyDown("q") sleep(0.032) @@ -116,7 +95,10 @@ def on_press(key): elif "clip" in command: subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh") + # Collect events until released -with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener: - listener.join() +with keyboard.Listener( + on_press=on_press, + on_release=lambda event: None) as listener: + listener.join() \ No newline at end of file diff --git a/slang.py b/slang.py new file mode 100644 index 0000000..906ba55 --- /dev/null +++ b/slang.py @@ -0,0 +1,6 @@ +replacements = { + "gigi": "gg", + "heels": "heals", + "heeling": "healing", + "heel": "heal" +} \ No newline at end of file