add config file and minor refactoring

This commit is contained in:
ultrablob 2025-02-13 20:00:21 -05:00
parent 01f13e5ab7
commit 7f5d221c56
4 changed files with 56 additions and 28 deletions

View file

@ -36,3 +36,8 @@ Press the hotkey (default is Home) and then talk to jarvis
- "hey jarvis, thats a clip" - "hey jarvis, thats a clip"
Checkout the code to see specific keywords/phrases as all NLP is regex/string based, not generative AI Checkout the code to see specific keywords/phrases as all NLP is regex/string based, not generative AI
### Resource Usage
With the `distil-small.en` model, on my system it uses about 500mb of VRAM

16
config.py Normal file
View file

@ -0,0 +1,16 @@
model = "distil-small.en"
replacements = {"gigi": "gg", "heels": "heals", "heeling": "healing", "heel": "heal"}
maximum_pulse = [
"maximum",
"pulse",
"ball",
"remove",
"eliminate",
"murder",
"goon",
"obliterate",
"delete",
"piss",
]

43
main.py
View file

@ -3,7 +3,7 @@ from pynput import keyboard
import speech_recognition as sr import speech_recognition as sr
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
from string import punctuation from string import punctuation
from slang import replacements import config
import re import re
import subprocess import subprocess
from time import sleep from time import sleep
@ -14,27 +14,38 @@ import sounddevice # turns off alsa error logging
r = sr.Recognizer() r = sr.Recognizer()
model = WhisperModel("distil-small.en", device="cuda", compute_type="float16") r.pause_threshold = 2
model = WhisperModel(config.model, device="cuda", compute_type="int8_float16")
print("Testing Sound") print("Testing Sound")
with sr.Microphone() as source: with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=3) r.adjust_for_ambient_noise(source, duration=3)
print("ready!") print("ready!")
def recognize_text() -> str: def recognize_text() -> str:
with sr.Microphone() as source: with sr.Microphone() as source:
audio = r.listen(source) audio = r.listen(source)
results, _ = model.transcribe(BytesIO(audio.get_wav_data()), beam_size=5, language="en", condition_on_previous_text=False) results, _ = model.transcribe(
BytesIO(audio.get_wav_data()),
beam_size=5,
language="en",
condition_on_previous_text=False,
)
return " ".join([segment.text for segment in results]) return " ".join([segment.text for segment in results])
def chat_type(): def chat_type():
screen = mss() #! bad for performance but necessary screen = mss() #! bad for performance but necessary
screenshot = np.array(screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})) screenshot = np.array(
screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})
)
try: try:
pg.locate("ui/team-chat.png", screenshot, confidence=0.9) pg.locate("ui/team-chat.png", screenshot, confidence=0.9)
return "team" return "team"
@ -49,6 +60,7 @@ def chat_type():
return None return None
def on_press(key): def on_press(key):
if key is not keyboard.Key.home: if key is not keyboard.Key.home:
@ -62,16 +74,20 @@ def on_press(key):
# cleanup command # cleanup command
command = command.lower().strip() command = command.lower().strip()
for char in punctuation: for char in punctuation:
command = command.replace(char, '') command = command.replace(char, "")
for original, new in replacements.items(): for original, new in config.replacements.items():
command = command.replace(original, new) command = command.replace(original, new)
print(f"Cleaned up command: {command}") print(f"Cleaned up command: {command}")
if "chat" in command: if any(keyword in command for keyword in ["type", "say", "write"]):
message = re.search(r"type (.+?)(and |in |\n|$)", command).groups(0)[0].strip() message = (
print(f"Typing: {message} in team chat") re.search(r"(type|say|write) (.+?)(and |in |\n|$)", command)
.groups(0)[1]
.strip()
)
print(f"Typing: {message} in chat")
pg.keyDown("enter") pg.keyDown("enter")
sleep(0.041) sleep(0.041)
@ -79,7 +95,7 @@ def on_press(key):
sleep(0.94) sleep(0.94)
current_chat = chat_type() current_chat = chat_type()
if current_chat in command: if current_chat == None or current_chat in command:
pass # no change needed pass # no change needed
elif "match" in command or "team" in command: elif "match" in command or "team" in command:
pg.keyDown("tab") pg.keyDown("tab")
@ -91,7 +107,7 @@ def on_press(key):
# sleep(0.074) # sleep(0.074)
# pg.keyUp("enter") # pg.keyUp("enter")
elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon", "obliterate", "delete", "piss"]): elif any(keyword in command for keyword in config.maximum_pulse):
print("MAXIMUM PULSE!!!!") print("MAXIMUM PULSE!!!!")
pg.keyDown("q") pg.keyDown("q")
sleep(0.032) sleep(0.032)
@ -101,9 +117,6 @@ def on_press(key):
subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh") subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")
# Collect events until released # Collect events until released
with keyboard.Listener( with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener:
on_press=on_press,
on_release=lambda event: None) as listener:
listener.join() listener.join()

View file

@ -1,6 +0,0 @@
replacements = {
"gigi": "gg",
"heels": "heals",
"heeling": "healing",
"heel": "heal"
}