add config file and minor refactoring

This commit is contained in:
ultrablob 2025-02-13 20:00:21 -05:00
parent 01f13e5ab7
commit 7f5d221c56
4 changed files with 56 additions and 28 deletions

View file

@ -36,3 +36,8 @@ Press the hotkey (default is Home) and then talk to jarvis
- "hey jarvis, thats a clip"
Checkout the code to see specific keywords/phrases as all NLP is regex/string based, not generative AI
### Resource Usage
With the `distil-small.en` model, on my system it uses about 500mb of VRAM

16
config.py Normal file
View file

@ -0,0 +1,16 @@
model = "distil-small.en"
replacements = {"gigi": "gg", "heels": "heals", "heeling": "healing", "heel": "heal"}
maximum_pulse = [
"maximum",
"pulse",
"ball",
"remove",
"eliminate",
"murder",
"goon",
"obliterate",
"delete",
"piss",
]

43
main.py
View file

@ -3,7 +3,7 @@ from pynput import keyboard
import speech_recognition as sr
from faster_whisper import WhisperModel
from string import punctuation
from slang import replacements
import config
import re
import subprocess
from time import sleep
@ -14,27 +14,38 @@ import sounddevice # turns off alsa error logging
r = sr.Recognizer()
model = WhisperModel("distil-small.en", device="cuda", compute_type="float16")
r.pause_threshold = 2
model = WhisperModel(config.model, device="cuda", compute_type="int8_float16")
print("Testing Sound")
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=3)
print("ready!")
def recognize_text() -> str:
with sr.Microphone() as source:
audio = r.listen(source)
results, _ = model.transcribe(BytesIO(audio.get_wav_data()), beam_size=5, language="en", condition_on_previous_text=False)
results, _ = model.transcribe(
BytesIO(audio.get_wav_data()),
beam_size=5,
language="en",
condition_on_previous_text=False,
)
return " ".join([segment.text for segment in results])
def chat_type():
screen = mss() #! bad for performance but necessary
screenshot = np.array(screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100}))
screenshot = np.array(
screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})
)
try:
pg.locate("ui/team-chat.png", screenshot, confidence=0.9)
return "team"
@ -49,6 +60,7 @@ def chat_type():
return None
def on_press(key):
if key is not keyboard.Key.home:
@ -62,16 +74,20 @@ def on_press(key):
# cleanup command
command = command.lower().strip()
for char in punctuation:
command = command.replace(char, '')
command = command.replace(char, "")
for original, new in replacements.items():
for original, new in config.replacements.items():
command = command.replace(original, new)
print(f"Cleaned up command: {command}")
if "chat" in command:
message = re.search(r"type (.+?)(and |in |\n|$)", command).groups(0)[0].strip()
print(f"Typing: {message} in team chat")
if any(keyword in command for keyword in ["type", "say", "write"]):
message = (
re.search(r"(type|say|write) (.+?)(and |in |\n|$)", command)
.groups(0)[1]
.strip()
)
print(f"Typing: {message} in chat")
pg.keyDown("enter")
sleep(0.041)
@ -79,7 +95,7 @@ def on_press(key):
sleep(0.94)
current_chat = chat_type()
if current_chat in command:
if current_chat == None or current_chat in command:
pass # no change needed
elif "match" in command or "team" in command:
pg.keyDown("tab")
@ -91,7 +107,7 @@ def on_press(key):
# sleep(0.074)
# pg.keyUp("enter")
elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon", "obliterate", "delete", "piss"]):
elif any(keyword in command for keyword in config.maximum_pulse):
print("MAXIMUM PULSE!!!!")
pg.keyDown("q")
sleep(0.032)
@ -101,9 +117,6 @@ def on_press(key):
subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")
# Collect events until released
with keyboard.Listener(
on_press=on_press,
on_release=lambda event: None) as listener:
with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener:
listener.join()

View file

@ -1,6 +0,0 @@
replacements = {
"gigi": "gg",
"heels": "heals",
"heeling": "healing",
"heel": "heal"
}