add config file and minor refactoring
This commit is contained in:
parent
01f13e5ab7
commit
7f5d221c56
4 changed files with 56 additions and 28 deletions
|
@ -36,3 +36,8 @@ Press the hotkey (default is Home) and then talk to jarvis
|
||||||
- "hey jarvis, thats a clip"
|
- "hey jarvis, thats a clip"
|
||||||
|
|
||||||
Checkout the code to see specific keywords/phrases as all NLP is regex/string based, not generative AI
|
Checkout the code to see specific keywords/phrases as all NLP is regex/string based, not generative AI
|
||||||
|
|
||||||
|
|
||||||
|
### Resource Usage
|
||||||
|
|
||||||
|
With the `distil-small.en` model, on my system it uses about 500mb of VRAM
|
||||||
|
|
16
config.py
Normal file
16
config.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
model = "distil-small.en"
|
||||||
|
|
||||||
|
replacements = {"gigi": "gg", "heels": "heals", "heeling": "healing", "heel": "heal"}
|
||||||
|
|
||||||
|
maximum_pulse = [
|
||||||
|
"maximum",
|
||||||
|
"pulse",
|
||||||
|
"ball",
|
||||||
|
"remove",
|
||||||
|
"eliminate",
|
||||||
|
"murder",
|
||||||
|
"goon",
|
||||||
|
"obliterate",
|
||||||
|
"delete",
|
||||||
|
"piss",
|
||||||
|
]
|
49
main.py
49
main.py
|
@ -3,38 +3,49 @@ from pynput import keyboard
|
||||||
import speech_recognition as sr
|
import speech_recognition as sr
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
from string import punctuation
|
from string import punctuation
|
||||||
from slang import replacements
|
import config
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from mss import mss
|
from mss import mss
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import sounddevice # turns off alsa error logging
|
import sounddevice # turns off alsa error logging
|
||||||
|
|
||||||
r = sr.Recognizer()
|
r = sr.Recognizer()
|
||||||
|
|
||||||
model = WhisperModel("distil-small.en", device="cuda", compute_type="float16")
|
r.pause_threshold = 2
|
||||||
|
|
||||||
|
model = WhisperModel(config.model, device="cuda", compute_type="int8_float16")
|
||||||
|
|
||||||
print("Testing Sound")
|
print("Testing Sound")
|
||||||
with sr.Microphone() as source:
|
with sr.Microphone() as source:
|
||||||
r.adjust_for_ambient_noise(source, duration=3)
|
r.adjust_for_ambient_noise(source, duration=3)
|
||||||
print("ready!")
|
print("ready!")
|
||||||
|
|
||||||
|
|
||||||
def recognize_text() -> str:
|
def recognize_text() -> str:
|
||||||
|
|
||||||
with sr.Microphone() as source:
|
with sr.Microphone() as source:
|
||||||
audio = r.listen(source)
|
audio = r.listen(source)
|
||||||
|
|
||||||
results, _ = model.transcribe(BytesIO(audio.get_wav_data()), beam_size=5, language="en", condition_on_previous_text=False)
|
results, _ = model.transcribe(
|
||||||
|
BytesIO(audio.get_wav_data()),
|
||||||
|
beam_size=5,
|
||||||
|
language="en",
|
||||||
|
condition_on_previous_text=False,
|
||||||
|
)
|
||||||
|
|
||||||
return " ".join([segment.text for segment in results])
|
return " ".join([segment.text for segment in results])
|
||||||
|
|
||||||
|
|
||||||
def chat_type():
|
def chat_type():
|
||||||
|
|
||||||
screen = mss() #! bad for performance but necessary
|
screen = mss() #! bad for performance but necessary
|
||||||
|
|
||||||
screenshot = np.array(screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100}))
|
screenshot = np.array(
|
||||||
|
screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
pg.locate("ui/team-chat.png", screenshot, confidence=0.9)
|
pg.locate("ui/team-chat.png", screenshot, confidence=0.9)
|
||||||
return "team"
|
return "team"
|
||||||
|
@ -49,6 +60,7 @@ def chat_type():
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def on_press(key):
|
def on_press(key):
|
||||||
|
|
||||||
if key is not keyboard.Key.home:
|
if key is not keyboard.Key.home:
|
||||||
|
@ -62,16 +74,20 @@ def on_press(key):
|
||||||
# cleanup command
|
# cleanup command
|
||||||
command = command.lower().strip()
|
command = command.lower().strip()
|
||||||
for char in punctuation:
|
for char in punctuation:
|
||||||
command = command.replace(char, '')
|
command = command.replace(char, "")
|
||||||
|
|
||||||
for original, new in replacements.items():
|
for original, new in config.replacements.items():
|
||||||
command = command.replace(original, new)
|
command = command.replace(original, new)
|
||||||
|
|
||||||
print(f"Cleaned up command: {command}")
|
print(f"Cleaned up command: {command}")
|
||||||
|
|
||||||
if "chat" in command:
|
if any(keyword in command for keyword in ["type", "say", "write"]):
|
||||||
message = re.search(r"type (.+?)(and |in |\n|$)", command).groups(0)[0].strip()
|
message = (
|
||||||
print(f"Typing: {message} in team chat")
|
re.search(r"(type|say|write) (.+?)(and |in |\n|$)", command)
|
||||||
|
.groups(0)[1]
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
print(f"Typing: {message} in chat")
|
||||||
|
|
||||||
pg.keyDown("enter")
|
pg.keyDown("enter")
|
||||||
sleep(0.041)
|
sleep(0.041)
|
||||||
|
@ -79,8 +95,8 @@ def on_press(key):
|
||||||
sleep(0.94)
|
sleep(0.94)
|
||||||
|
|
||||||
current_chat = chat_type()
|
current_chat = chat_type()
|
||||||
if current_chat in command:
|
if current_chat == None or current_chat in command:
|
||||||
pass # no change needed
|
pass # no change needed
|
||||||
elif "match" in command or "team" in command:
|
elif "match" in command or "team" in command:
|
||||||
pg.keyDown("tab")
|
pg.keyDown("tab")
|
||||||
sleep(0.041)
|
sleep(0.041)
|
||||||
|
@ -91,7 +107,7 @@ def on_press(key):
|
||||||
# sleep(0.074)
|
# sleep(0.074)
|
||||||
# pg.keyUp("enter")
|
# pg.keyUp("enter")
|
||||||
|
|
||||||
elif any(keyword in command for keyword in ["maximum", "pulse", "balls", "remove", "eliminate", "murder", "goon", "obliterate", "delete", "piss"]):
|
elif any(keyword in command for keyword in config.maximum_pulse):
|
||||||
print("MAXIMUM PULSE!!!!")
|
print("MAXIMUM PULSE!!!!")
|
||||||
pg.keyDown("q")
|
pg.keyDown("q")
|
||||||
sleep(0.032)
|
sleep(0.032)
|
||||||
|
@ -101,9 +117,6 @@ def on_press(key):
|
||||||
subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")
|
subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Collect events until released
|
# Collect events until released
|
||||||
with keyboard.Listener(
|
with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener:
|
||||||
on_press=on_press,
|
|
||||||
on_release=lambda event: None) as listener:
|
|
||||||
listener.join()
|
listener.join()
|
6
slang.py
6
slang.py
|
@ -1,6 +0,0 @@
|
||||||
replacements = {
|
|
||||||
"gigi": "gg",
|
|
||||||
"heels": "heals",
|
|
||||||
"heeling": "healing",
|
|
||||||
"heel": "heal"
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue