jarvis-marvel-rivals/main.py

123 lines
3 KiB
Python
Raw Permalink Normal View History

2025-02-12 21:56:41 -05:00
import pyautogui as pg
from pynput import keyboard
import speech_recognition as sr
from faster_whisper import WhisperModel
2025-02-12 21:56:41 -05:00
from string import punctuation
2025-02-13 20:00:21 -05:00
import config
2025-02-12 21:56:41 -05:00
import re
2025-02-13 09:36:53 -05:00
import subprocess
2025-02-12 21:56:41 -05:00
from time import sleep
from mss import mss
import numpy as np
from io import BytesIO
2025-02-13 20:00:21 -05:00
import sounddevice # turns off alsa error logging
2025-02-12 21:56:41 -05:00
r = sr.Recognizer()
2025-02-13 20:00:21 -05:00
r.pause_threshold = 2
model = WhisperModel(config.model, device="cuda", compute_type="int8_float16")
print("Testing Sound")
2025-02-12 21:56:41 -05:00
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=3)
print("ready!")
2025-02-12 21:56:41 -05:00
2025-02-13 20:00:21 -05:00
2025-02-12 21:56:41 -05:00
def recognize_text() -> str:
with sr.Microphone() as source:
audio = r.listen(source)
2025-02-13 20:00:21 -05:00
results, _ = model.transcribe(
BytesIO(audio.get_wav_data()),
beam_size=5,
language="en",
condition_on_previous_text=False,
)
2025-02-12 21:56:41 -05:00
return " ".join([segment.text for segment in results])
2025-02-12 21:56:41 -05:00
2025-02-13 20:00:21 -05:00
def chat_type():
2025-02-13 20:00:21 -05:00
screen = mss() #! bad for performance but necessary
2025-02-13 20:00:21 -05:00
screenshot = np.array(
screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})
)
try:
pg.locate("ui/team-chat.png", screenshot, confidence=0.9)
return "team"
except pg.ImageNotFoundException:
pass
try:
pg.locate("ui/match-chat.png", screenshot, confidence=0.9)
return "match"
except pg.ImageNotFoundException:
pass
return None
2025-02-13 20:00:21 -05:00
2025-02-12 21:56:41 -05:00
def on_press(key):
2025-02-12 21:56:41 -05:00
if key is not keyboard.Key.home:
return
2025-02-13 20:00:21 -05:00
print("Listening...")
2025-02-12 21:56:41 -05:00
command = recognize_text()
print(f"Heard: {command}")
# cleanup command
command = command.lower().strip()
2025-02-12 21:56:41 -05:00
for char in punctuation:
2025-02-13 20:00:21 -05:00
command = command.replace(char, "")
2025-02-12 21:56:41 -05:00
2025-02-13 20:00:21 -05:00
for original, new in config.replacements.items():
2025-02-12 21:56:41 -05:00
command = command.replace(original, new)
print(f"Cleaned up command: {command}")
2025-02-13 20:00:21 -05:00
if any(keyword in command for keyword in ["type", "say", "write"]):
message = (
re.search(r"(type|say|write) (.+?)(and |in |\n|$)", command)
.groups(0)[1]
.strip()
)
print(f"Typing: {message} in chat")
2025-02-12 21:56:41 -05:00
pg.keyDown("enter")
sleep(0.041)
pg.keyUp("enter")
sleep(0.94)
current_chat = chat_type()
2025-02-13 20:00:21 -05:00
if current_chat == None or current_chat in command:
pass # no change needed
elif "match" in command or "team" in command:
pg.keyDown("tab")
sleep(0.041)
pg.keyUp("tab")
2025-02-12 21:56:41 -05:00
pg.typewrite(message, 0.048)
# pg.keyDown("enter")
# sleep(0.074)
# pg.keyUp("enter")
2025-02-12 21:56:41 -05:00
2025-02-13 20:00:21 -05:00
elif any(keyword in command for keyword in config.maximum_pulse):
2025-02-12 21:56:41 -05:00
print("MAXIMUM PULSE!!!!")
pg.keyDown("q")
sleep(0.032)
pg.keyUp("q")
2025-02-13 09:36:53 -05:00
elif "clip" in command:
subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")
2025-02-12 21:56:41 -05:00
# Collect events until released
2025-02-13 20:00:21 -05:00
with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener:
listener.join()