jarvis-marvel-rivals/main.py

import pyautogui as pg
from pynput import keyboard
import speech_recognition as sr
from faster_whisper import WhisperModel
from string import punctuation
import config
import re
import subprocess
from time import sleep
from mss import mss
import numpy as np
from io import BytesIO
import sounddevice  # turns off alsa error logging

r = sr.Recognizer()

r.pause_threshold = 2

model = WhisperModel(config.model, device="cuda", compute_type="int8_float16")

print("Testing Sound")
with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source, duration=3)
print("ready!")


def recognize_text() -> str:

    with sr.Microphone() as source:
        audio = r.listen(source)

        results, _ = model.transcribe(
            BytesIO(audio.get_wav_data()),
            beam_size=5,
            language="en",
            condition_on_previous_text=False,
        )

    return " ".join([segment.text for segment in results])


def chat_type():

    screen = mss()  #! bad for performance but necessary

    screenshot = np.array(
        screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})
    )
    try:
        pg.locate("ui/team-chat.png", screenshot, confidence=0.9)
        return "team"
    except pg.ImageNotFoundException:
        pass

    try:
        pg.locate("ui/match-chat.png", screenshot, confidence=0.9)
        return "match"
    except pg.ImageNotFoundException:
        pass

    return None


def on_press(key):

    if key is not keyboard.Key.home:
        return

    print("Listening...")
    command = recognize_text()

    print(f"Heard: {command}")

    # cleanup command
    command = command.lower().strip()
    for char in punctuation:
        command = command.replace(char, "")

    for original, new in config.replacements.items():
        command = command.replace(original, new)

    print(f"Cleaned up command: {command}")

    if any(keyword in command for keyword in ["type", "say", "write"]):
        message = (
            re.search(r"(type|say|write) (.+?)(and |in |\n|$)", command)
            .groups(0)[1]
            .strip()
        )
        print(f"Typing: {message} in chat")

        pg.keyDown("enter")
        sleep(0.041)
        pg.keyUp("enter")
        sleep(0.94)

        current_chat = chat_type()
        if current_chat == None or current_chat in command:
            pass  # no change needed
        elif "match" in command or "team" in command:
            pg.keyDown("tab")
            sleep(0.041)
            pg.keyUp("tab")

        pg.typewrite(message, 0.048)
        # pg.keyDown("enter")
        # sleep(0.074)
        # pg.keyUp("enter")

    elif any(keyword in command for keyword in config.maximum_pulse):
        print("MAXIMUM PULSE!!!!")
        pg.keyDown("q")
        sleep(0.032)
        pg.keyUp("q")

    elif "clip" in command:
        subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")


# Collect events until released
with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener:
    listener.join()