jarvis-marvel-rivals/main.py

import pyautogui as pg
from pynput import keyboard
import speech_recognition as sr
from faster_whisper import WhisperModel
from string import punctuation
import config
import re
import subprocess
from time import sleep
from mss import mss
import numpy as np
from io import BytesIO
import sounddevice  # turns off alsa error logging

r = sr.Recognizer()

r.pause_threshold = 2

model = WhisperModel(config.model, device="cuda", compute_type="int8_float16")

print("Testing Sound")
with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source, duration=3)
print("ready!")


def recognize_text() -> str:

    with sr.Microphone() as source:
        audio = r.listen(source)

        results, _ = model.transcribe(
            BytesIO(audio.get_wav_data()),
            beam_size=5,
            language="en",
            condition_on_previous_text=False,
        )

    return " ".join([segment.text for segment in results])


def chat_type():

    screen = mss()  #! bad for performance but necessary

    screenshot = np.array(
        screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})
    )
    try:
        pg.locate("ui/team-chat.png", screenshot, confidence=0.9)
        return "team"
    except pg.ImageNotFoundException:
        pass

    try:
        pg.locate("ui/match-chat.png", screenshot, confidence=0.9)
        return "match"
    except pg.ImageNotFoundException:
        pass

    return None


def on_press(key):

    if key is not keyboard.Key.home:
        return

    print("Listening...")
    command = recognize_text()

    print(f"Heard: {command}")

    # cleanup command
    command = command.lower().strip()
    for char in punctuation:
        command = command.replace(char, "")

    for original, new in config.replacements.items():
        command = command.replace(original, new)

    print(f"Cleaned up command: {command}")

    if any(keyword in command for keyword in ["type", "say", "write"]):
        message = (
            re.search(r"(type|say|write) (.+?)(and |in |\n|$)", command)
            .groups(0)[1]
            .strip()
        )
        print(f"Typing: {message} in chat")

        pg.keyDown("enter")
        sleep(0.041)
        pg.keyUp("enter")
        sleep(0.94)

        current_chat = chat_type()
        if current_chat == None or current_chat in command:
            pass  # no change needed
        elif "match" in command or "team" in command:
            pg.keyDown("tab")
            sleep(0.041)
            pg.keyUp("tab")

        pg.typewrite(message, 0.048)
        # pg.keyDown("enter")
        # sleep(0.074)
        # pg.keyUp("enter")

    elif any(keyword in command for keyword in config.maximum_pulse):
        print("MAXIMUM PULSE!!!!")
        pg.keyDown("q")
        sleep(0.032)
        pg.keyUp("q")

    elif "clip" in command:
        subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")


# Collect events until released
with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener:
    listener.join()
working software 2025-02-12 21:56:41 -05:00			`import pyautogui as pg`
			`from pynput import keyboard`
			`import speech_recognition as sr`
use same model instead of instantiating a new one to reserve vram 2025-02-13 19:06:40 -05:00			`from faster_whisper import WhisperModel`
working software 2025-02-12 21:56:41 -05:00			`from string import punctuation`
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`import config`
working software 2025-02-12 21:56:41 -05:00			`import re`
add clipping 2025-02-13 09:36:53 -05:00			`import subprocess`
working software 2025-02-12 21:56:41 -05:00			`from time import sleep`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00			`from mss import mss`
			`import numpy as np`
use same model instead of instantiating a new one to reserve vram 2025-02-13 19:06:40 -05:00			`from io import BytesIO`
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`import sounddevice # turns off alsa error logging`
working software 2025-02-12 21:56:41 -05:00
			`r = sr.Recognizer()`

add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`r.pause_threshold = 2`

			`model = WhisperModel(config.model, device="cuda", compute_type="int8_float16")`
use same model instead of instantiating a new one to reserve vram 2025-02-13 19:06:40 -05:00
			`print("Testing Sound")`
working software 2025-02-12 21:56:41 -05:00			`with sr.Microphone() as source:`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00			`r.adjust_for_ambient_noise(source, duration=3)`
use same model instead of instantiating a new one to reserve vram 2025-02-13 19:06:40 -05:00			`print("ready!")`
working software 2025-02-12 21:56:41 -05:00
add config file and minor refactoring 2025-02-13 20:00:21 -05:00
working software 2025-02-12 21:56:41 -05:00			`def recognize_text() -> str:`

			`with sr.Microphone() as source:`
			`audio = r.listen(source)`

add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`results, _ = model.transcribe(`
			`BytesIO(audio.get_wav_data()),`
			`beam_size=5,`
			`language="en",`
			`condition_on_previous_text=False,`
			`)`
working software 2025-02-12 21:56:41 -05:00
use same model instead of instantiating a new one to reserve vram 2025-02-13 19:06:40 -05:00			`return " ".join([segment.text for segment in results])`
working software 2025-02-12 21:56:41 -05:00
add config file and minor refactoring 2025-02-13 20:00:21 -05:00
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00			`def chat_type():`

add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`screen = mss() #! bad for performance but necessary`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`screenshot = np.array(`
			`screen.grab({"top": 1090, "left": 1110, "width": 100, "height": 100})`
			`)`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00			`try:`
			`pg.locate("ui/team-chat.png", screenshot, confidence=0.9)`
			`return "team"`
			`except pg.ImageNotFoundException:`
			`pass`

			`try:`
			`pg.locate("ui/match-chat.png", screenshot, confidence=0.9)`
			`return "match"`
			`except pg.ImageNotFoundException:`
			`pass`

			`return None`

add config file and minor refactoring 2025-02-13 20:00:21 -05:00
working software 2025-02-12 21:56:41 -05:00			`def on_press(key):`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00
working software 2025-02-12 21:56:41 -05:00			`if key is not keyboard.Key.home:`
			`return`
add config file and minor refactoring 2025-02-13 20:00:21 -05:00
use same model instead of instantiating a new one to reserve vram 2025-02-13 19:06:40 -05:00			`print("Listening...")`
working software 2025-02-12 21:56:41 -05:00			`command = recognize_text()`

			`print(f"Heard: {command}")`

			`# cleanup command`
use same model instead of instantiating a new one to reserve vram 2025-02-13 19:06:40 -05:00			`command = command.lower().strip()`
working software 2025-02-12 21:56:41 -05:00			`for char in punctuation:`
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`command = command.replace(char, "")`
working software 2025-02-12 21:56:41 -05:00
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`for original, new in config.replacements.items():`
working software 2025-02-12 21:56:41 -05:00			`command = command.replace(original, new)`

			`print(f"Cleaned up command: {command}")`

add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`if any(keyword in command for keyword in ["type", "say", "write"]):`
			`message = (`
			`re.search(r"(type\|say\|write) (.+?)(and \|in \|\n\|$)", command)`
			`.groups(0)[1]`
			`.strip()`
			`)`
			`print(f"Typing: {message} in chat")`

working software 2025-02-12 21:56:41 -05:00			`pg.keyDown("enter")`
			`sleep(0.041)`
			`pg.keyUp("enter")`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00			`sleep(0.94)`

			`current_chat = chat_type()`
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`if current_chat == None or current_chat in command:`
			`pass # no change needed`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00			`elif "match" in command or "team" in command:`
			`pg.keyDown("tab")`
			`sleep(0.041)`
			`pg.keyUp("tab")`

working software 2025-02-12 21:56:41 -05:00			`pg.typewrite(message, 0.048)`
add ability to auto-switch between team and match chat 2025-02-13 11:28:33 -05:00			`# pg.keyDown("enter")`
			`# sleep(0.074)`
			`# pg.keyUp("enter")`
working software 2025-02-12 21:56:41 -05:00
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`elif any(keyword in command for keyword in config.maximum_pulse):`
working software 2025-02-12 21:56:41 -05:00			`print("MAXIMUM PULSE!!!!")`
			`pg.keyDown("q")`
			`sleep(0.032)`
			`pg.keyUp("q")`
add clipping 2025-02-13 09:36:53 -05:00
			`elif "clip" in command:`
			`subprocess.run("/home/ultrablob/Videos/Clips/save_clip.sh")`

working software 2025-02-12 21:56:41 -05:00
			`# Collect events until released`
add config file and minor refactoring 2025-02-13 20:00:21 -05:00			`with keyboard.Listener(on_press=on_press, on_release=lambda event: None) as listener:`
			`listener.join()`