From b0887f46cd01bd7fda9b27b5f13ccda36be0279a Mon Sep 17 00:00:00 2001 From: Rob Date: Wed, 20 Jul 2022 19:42:51 -0400 Subject: [PATCH 1/5] added nemo --- .pre-commit-config.yaml | 28 +++++------ models/dev_prune_delete_all.sh | 2 + models/sl-nemo/Dockerfile | 31 +++++++++++++ models/sl-nemo/README.md | 15 ++++++ models/sl-nemo/app/__init__.py | 0 models/sl-nemo/app/main.py | 80 ++++++++++++++++++++++++++++++++ models/sl-nemo/autoinspect.sh | 13 ++++++ models/sl-nemo/build.sh | 18 +++++++ models/sl-nemo/push.sh | 2 + models/sl-nemo/te_st_endpoint.py | 34 ++++++++++++++ models/sl-nemo/uninstall.sh | 8 ++++ speechloop/asr.py | 32 ++++++++++++- 12 files changed, 248 insertions(+), 15 deletions(-) create mode 100755 models/dev_prune_delete_all.sh create mode 100644 models/sl-nemo/Dockerfile create mode 100644 models/sl-nemo/README.md create mode 100644 models/sl-nemo/app/__init__.py create mode 100644 models/sl-nemo/app/main.py create mode 100755 models/sl-nemo/autoinspect.sh create mode 100755 models/sl-nemo/build.sh create mode 100755 models/sl-nemo/push.sh create mode 100755 models/sl-nemo/te_st_endpoint.py create mode 100755 models/sl-nemo/uninstall.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index da1582c..1c1dacb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,20 +23,20 @@ repos: - id: check-ast - id: requirements-txt-fixer - id: debug-statements -- repo: local - hooks: - - id: unittest - name: unittest - entry: python -m unittest discover . - language: system # changing this to python breaks things as per: https://stackoverflow.com/questions/59714740/pre-commit-run-unittest-git-hooks-modulenotfounderror-for-installed-python-modu - 'types': [ python ] - args: [ "-p '*test.py'" ] # Probably this option is absolutely not needed. - pass_filenames: false - stages: [ commit ] -- repo: https://github.com/psf/black - rev: 21.7b0 - hooks: - - id: black +#- repo: local +# hooks: +# - id: unittest +# name: unittest +# entry: python -m unittest discover . +# language: system # changing this to python breaks things as per: https://stackoverflow.com/questions/59714740/pre-commit-run-unittest-git-hooks-modulenotfounderror-for-installed-python-modu +# 'types': [ python ] +# args: [ "-p '*test.py'" ] # Probably this option is absolutely not needed. +# pass_filenames: false +# stages: [ commit ] +#- repo: https://github.com/psf/black +# rev: 21.7b0 +# hooks: +# - id: black - repo: https://github.com/regebro/pyroma rev: "3.2" hooks: diff --git a/models/dev_prune_delete_all.sh b/models/dev_prune_delete_all.sh new file mode 100755 index 0000000..7fbd2c2 --- /dev/null +++ b/models/dev_prune_delete_all.sh @@ -0,0 +1,2 @@ + +docker system prune -a --volumes diff --git a/models/sl-nemo/Dockerfile b/models/sl-nemo/Dockerfile new file mode 100644 index 0000000..1bf605a --- /dev/null +++ b/models/sl-nemo/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.9-slim + +ENV TZ=Europe/London +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +ENV MODELNAME='stt_en_contextnet_1024.nemo' +ENV MODELTYPE='EncDecRNNTBPEModel' +ENV MODELURL='https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_contextnet_1024/versions/1.9.0/files/stt_en_contextnet_1024.nemo' + +EXPOSE 3500 +COPY app /app +WORKDIR /app + +RUN apt update && apt-get install -y gcc curl python3-dev python3-pip ffmpeg \ + && pip install numpy==1.22.4 fastapi uvicorn Cython torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html \ + && pip install nemo_toolkit[asr] \ + && curl -L -o /app/$MODELNAME $MODELURL \ + && rm -rf /var/lib/apt/lists/* \ + && apt remove -y gcc curl \ + && apt autoremove -y + +HEALTHCHECK --interval=30s --timeout=5s --start-period=15s \ + CMD curl --fail http://localhost:3500/healthcheck || exit 1 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3500"] + +# docker build -f Dockerfile . -t robmsmt/sl-coqui +# docker run -d --restart unless-stopped -p 3200:3200 robmsmt/sl-coqui-en-16k:latest +# docker run -it -p 3200:3200 robmsmt/sl-coqui +#docker run -it -p 3200:3200 robmsmt/sl-coqui-en-16k:latest +#docker commit my-broken-container && docker run -it my-broken-container /bin/bash diff --git a/models/sl-nemo/README.md b/models/sl-nemo/README.md new file mode 100644 index 0000000..f6c02b1 --- /dev/null +++ b/models/sl-nemo/README.md @@ -0,0 +1,15 @@ +# Nemo + +## CONFIG +- Shortcode: ` nm ` +- Docker: ` robmsmt/sl-nemo-en-16k:latest ` +- InternalPort: ` 3500 ` +- ExternalPort: ` 3500 ` +- SampleRate: ` 16000 ` +- InterfaceType: ` docker-fastapi ` + +## CHANGES + - tbc + +## Notes +Conformer-CTC version diff --git a/models/sl-nemo/app/__init__.py b/models/sl-nemo/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/sl-nemo/app/main.py b/models/sl-nemo/app/main.py new file mode 100644 index 0000000..40544be --- /dev/null +++ b/models/sl-nemo/app/main.py @@ -0,0 +1,80 @@ + +import os +from fastapi import FastAPI +from pydantic import BaseModel +import tempfile +from io import BytesIO +from base64 import b64decode +import argparse +# import soundfile +# import numpy as np +# import onnxruntime as rt +# import nemo +import nemo.collections.asr as nemo_asr +model = os.environ['MODELNAME'] + +app = FastAPI() +# nm = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En") +# print(nemo_asr.models.EncDecRNNTBPEModel.list_available_models()) +# nm = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name=args.model) +nm = nemo_asr.models.EncDecRNNTBPEModel.restore_from(model) +# +# +# enc_dec_ctc_models = [(x.pretrained_model_name, nemo_asr.models.EncDecCTCModel.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecCTCModel.list_available_models() if "en" in x.pretrained_model_name] +# enc_dec_ctc_bpe_models = [(x.pretrained_model_name, nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecCTCModelBPE.list_available_models() if "en" in x.pretrained_model_name] +# enc_dec_rnn_t_bpe_models = [(x.pretrained_model_name, nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecRNNTBPEModel.list_available_models() if "en" in x.pretrained_model_name] +# enc_dec_rnn_t_models = [(x.pretrained_model_name, nemo_asr.models.EncDecRNNTModel.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecRNNTModel.list_available_models() if "en" in x.pretrained_model_name] +# +# all_models = enc_dec_ctc_models + enc_dec_ctc_bpe_models + enc_dec_rnn_t_bpe_models + enc_dec_rnn_t_models +# print(all_models) + +def disk_in_memory(wav_bytes): + """ + this spooled wav was chosen because it's much more efficient than writing to disk, + it effectively is writing to memory only and can still be read (by some applications) as a file + """ + with tempfile.SpooledTemporaryFile() as spooled_wav: + spooled_wav.write(wav_bytes) + spooled_wav.seek(0) + return BytesIO(spooled_wav.read()) + + +class Audio(BaseModel): + b64_wav: str + sr: int = 16000 + + +@app.get("/healthcheck") +async def healthcheck(): + return {"ok": "true"} + + +# Next, we instantiate all the necessary models directly from NVIDIA NGC +# Speech Recognition model + + +@app.post("/transcribe") +async def transcribe(audio: Audio): + + try: + wav_bytes = b64decode(audio.b64_wav.encode("utf-8")) + + # dm = disk_in_memory(wav_bytes) + # pcm, sample_rate = soundfile.read(dm, dtype="int16") + # todo cannot use disk memory since nemo lib needs file - in future replace with onnx: https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_with_NeMo.ipynb + + with tempfile.NamedTemporaryFile(mode='wb', delete=True, suffix='.wav') as f: + f.write(wav_bytes) + files_list = [f.name] + print(files_list) + transcript = nm.transcribe(paths2audio_files=files_list) + + return {"transcript": transcript[0][0]} + except: + raise + + +if __name__ == "__main__": + import uvicorn + print("starting...") + uvicorn.run("main:app", host="0.0.0.0", port=3600) diff --git a/models/sl-nemo/autoinspect.sh b/models/sl-nemo/autoinspect.sh new file mode 100755 index 0000000..38c1b97 --- /dev/null +++ b/models/sl-nemo/autoinspect.sh @@ -0,0 +1,13 @@ + +# try docker log first + +# This should work for a running container +# +IMG=$(cat ./README.md | grep "Docker:" | awk '{print $4}') +ID=$(docker ps | grep $IMG | awk '{ print $1 }') + +echo "INSPECTING: $IMG with ID: $ID" +$(docker stop $(docker ps -a -q --filter ancestor="$IMG" --format="{{.ID}}")) +docker commit "$ID" broken-container1 && docker run -p 3500:3500 -it broken-container1 /bin/bash +# run with: uvicorn main:app --host 0.0.0.0 --port 3200 +# then hit test endpoint diff --git a/models/sl-nemo/build.sh b/models/sl-nemo/build.sh new file mode 100755 index 0000000..cb1ce58 --- /dev/null +++ b/models/sl-nemo/build.sh @@ -0,0 +1,18 @@ +set -e +#CWD=${PWD##*/} +DIR_PATH="$(dirname "${0}")" +IMG_REPO=$(cat ./README.md | grep "Docker:" | awk '{print $4}') +EXTPORT=$(cat ./README.md | grep "ExternalPort:" | awk '{print $4}') + +echo $DIR_PATH $IMG_REPO $EXTPORT +docker build -t $IMG_REPO "$DIR_PATH" + +set +e +# this is empty if the container crashes +echo $(docker ps -q -a --filter ancestor="$IMG_REPO" --format="{{.ID}}") +docker stop $(docker ps -q -a --filter ancestor="$IMG_REPO" --format="{{.ID}}") +set -e +docker run -p "$EXTPORT":"$EXTPORT" -d "$IMG_REPO" + +## to debug - kill container and start with: +#docker run --restart unless-stopped -p "$EXTPORT":"$EXTPORT" "$IMG_REPO" diff --git a/models/sl-nemo/push.sh b/models/sl-nemo/push.sh new file mode 100755 index 0000000..00e9e89 --- /dev/null +++ b/models/sl-nemo/push.sh @@ -0,0 +1,2 @@ +#to upload +docker push robmsmt/sl-nemo-en-16k diff --git a/models/sl-nemo/te_st_endpoint.py b/models/sl-nemo/te_st_endpoint.py new file mode 100755 index 0000000..e42adb4 --- /dev/null +++ b/models/sl-nemo/te_st_endpoint.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import json +import base64 +import requests +import pprint as pp + + +def main(endpoint, wav_location): + + b64audio = base64.b64encode(open(wav_location, "rb").read()).decode("utf-8") + print(f"Length of b64 data is:{len(b64audio)}") + + json_message = {"b64_wav": b64audio, "sr": 16000} + + r = requests.post(endpoint, json=json_message) + print(f"Status code: {r.status_code}") + try: + response = r.json() + pp.pprint(response, indent=2) + except: + print("err") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="This file reads in a wav file and prints a CURL best to be piped to a file") + parser.add_argument("--endpoint", default="/transcribe", type=str) + parser.add_argument("--host", default="http://localhost:3500", type=str) + parser.add_argument("--wav", default="../../speechloop/data/simple_test/wavs/109938_zebra_ch0_16k.wav", type=str) + args = parser.parse_args() + url = args.host + args.endpoint + main(url, args.wav) diff --git a/models/sl-nemo/uninstall.sh b/models/sl-nemo/uninstall.sh new file mode 100755 index 0000000..692edc3 --- /dev/null +++ b/models/sl-nemo/uninstall.sh @@ -0,0 +1,8 @@ + +#rm .INSTALLED +IMG=$(cat ./README.md | grep "Docker:" | awk '{print $4}') +echo "Killing: $IMG" +docker rm $(docker stop $(docker ps -a -q --filter ancestor="$IMG" --format="{{.ID}}")) +echo "Deleting: $IMG" +docker image rm "$IMG" +echo "Finished removing: $IMG" diff --git a/speechloop/asr.py b/speechloop/asr.py index c62ed52..d4aee4a 100644 --- a/speechloop/asr.py +++ b/speechloop/asr.py @@ -121,6 +121,34 @@ def execute_with_audio(self, audio): return self.return_error() +class Nemo(ASR): + """ + Nemo + """ + + def __init__(self): + super().__init__("nm", "docker-local") + self.uri = "http://localhost:3500/transcribe" + self.dockerhub_url = "robmsmt/sl-nemo-en-16k:latest" + self.shortname = self.dockerhub_url.rsplit("/")[-1].rsplit(":")[0] + self.longname = "nemo" + launch_container(self.dockerhub_url, {"3500/tcp": 3500}, verbose=self.verbose, delay=30) + self.finish_init() + + def execute_with_audio(self, audio): + b64 = base64.b64encode(audio).decode("utf-8") + json_message = {"b64_wav": b64, "sr": 16000} + r = requests.post(self.uri, json=json_message) + if r.status_code == 200: + try: + response = r.json()["transcript"] + return response + except KeyError: + return self.return_error() + else: + return self.return_error() + + class Sphinx(ASR): """ Vosk @@ -420,7 +448,7 @@ def create_model_objects(wanted_asr: list) -> list: print(wanted_asr) for asr in wanted_asr: if asr == "all": - list_of_asr = [Vosk(), Sphinx(), Coqui(), Google(), Aws(), Azure()] + list_of_asr = [Vosk(), Sphinx(), Coqui(), Google(), Aws(), Azure(), Nemo()] elif asr == "vs": list_of_asr.append(Vosk()) elif asr == "sp": @@ -433,6 +461,8 @@ def create_model_objects(wanted_asr: list) -> list: list_of_asr.append(Aws()) elif asr == "az": list_of_asr.append(Azure()) + elif asr == "nm": + list_of_asr.append(Nemo()) else: raise AsrNotRecognized("ASR not recognised") From 74521e6c2182a39c6039af547b96f4825beb2c4a Mon Sep 17 00:00:00 2001 From: Rob Date: Wed, 20 Jul 2022 22:12:08 -0400 Subject: [PATCH 2/5] added nemo to registry --- speechloop/asr/nemo.py | 33 +++++++++++++++++++++++++++++++++ speechloop/asr/registry.py | 3 +++ 2 files changed, 36 insertions(+) create mode 100644 speechloop/asr/nemo.py diff --git a/speechloop/asr/nemo.py b/speechloop/asr/nemo.py new file mode 100644 index 0000000..e928759 --- /dev/null +++ b/speechloop/asr/nemo.py @@ -0,0 +1,33 @@ +from speechloop.asr.base_asr import ASR +from speechloop.asr.container_utils import launch_container + +import base64 +import requests + + +class Nemo(ASR): + """ + Nemo + """ + + def __init__(self): + super().__init__("nm", "docker-local") + self.uri = "http://localhost:3500/transcribe" + self.dockerhub_url = "robmsmt/sl-nemo-en-16k:latest" + self.shortname = self.dockerhub_url.rsplit("/")[-1].rsplit(":")[0] + self.longname = "nemo" + launch_container(self.dockerhub_url, {"3500/tcp": 3500}, verbose=self.verbose, delay=8) + self.finish_init() + + def execute_with_audio(self, audio): + b64 = base64.b64encode(audio).decode("utf-8") + json_message = {"b64_wav": b64, "sr": 16000} + r = requests.post(self.uri, json=json_message) + if r.status_code == 200: + try: + response = r.json()["transcript"] + return response + except KeyError: + return self.return_error() + else: + return self.return_error() diff --git a/speechloop/asr/registry.py b/speechloop/asr/registry.py index aebab3e..7ecb131 100644 --- a/speechloop/asr/registry.py +++ b/speechloop/asr/registry.py @@ -5,6 +5,7 @@ from speechloop.asr.google import Google from speechloop.asr.aws import Aws from speechloop.asr.azure import Azure +from speechloop.asr.nemo import Nemo def create_model_objects(wanted_asr: list) -> list: @@ -26,6 +27,8 @@ def create_model_objects(wanted_asr: list) -> list: list_of_asr.append(Aws()) elif asr == "az": list_of_asr.append(Azure()) + elif asr == "nm": + list_of_asr.append(Nemo()) else: raise AsrNotRecognized("ASR not recognised") From 7908b6cc7f7849b5cc90f97cf3386c38dbb5fcda Mon Sep 17 00:00:00 2001 From: Rob Date: Wed, 20 Jul 2022 22:14:51 -0400 Subject: [PATCH 3/5] added readme --- models/sl-nemo/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/sl-nemo/README.md b/models/sl-nemo/README.md index f6c02b1..ab8c01f 100644 --- a/models/sl-nemo/README.md +++ b/models/sl-nemo/README.md @@ -12,4 +12,5 @@ - tbc ## Notes -Conformer-CTC version +- Contextnet 1024 version +- Not used onnx yet From c3cfa1b9d0d174223b3f4159cd065ce32326f98c Mon Sep 17 00:00:00 2001 From: Rob Date: Wed, 20 Jul 2022 22:18:20 -0400 Subject: [PATCH 4/5] added ability to select nemo through wizard --- README.md | 2 +- speechloop/wizard.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e105d12..3520930 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,8 @@ Using `all` as wanted_asr parameter to main.py will attempt to start all ASRs fo | ✅ sp | CMU Sphinx | Open Source | Offline - docker | | ✅ vs | Alphacep Vosk | Open Source | Offline - docker | | ✅ cq | Coqui | Open Source | Offline - docker | +| ✅ nm | Nvidia NeMo | Open Source | Offline - docker | | ❌ sb | Speech Brain | Open Source | Offline - docker | -| ❌ nm | Nvidia NeMo | Open Source | Offline - docker | | ✅ gg | Google | Proprietary | API set env:`GOOGLE_APPLICATION_CREDENTIALS` | | ✅ az | Microsoft Azure | Proprietary | API set env:`AZURE_KEY` | | ✅ aw | Amazon | Proprietary | API set env:`AWS_ACCESS_KEY_ID`
+`AWS_SECRET_ACCESS_KEY` or aws configure| diff --git a/speechloop/wizard.py b/speechloop/wizard.py index 006e1fe..73f41c9 100644 --- a/speechloop/wizard.py +++ b/speechloop/wizard.py @@ -120,6 +120,8 @@ def wizard_main(): "cq - Coqui stt", Separator("---Cloud ASRs---"), "gg - Google Cloud - (requires api key)", # todo maybe ask for this or grey it out if not provided? + "gg - Google Cloud - (requires api key)", # todo maybe ask for this or grey it out if not provided? + "gg - Google Cloud - (requires api key)", # todo maybe ask for this or grey it out if not provided? ], validate=lambda a: (True if len(a) > 0 else "You must select at least one ASR"), ).ask() From 65965e09b3f508fc4d126c73cdd67438be27662f Mon Sep 17 00:00:00 2001 From: Rob Date: Wed, 20 Jul 2022 22:24:51 -0400 Subject: [PATCH 5/5] bumped version --- changelog.md | 1 + setup.py | 2 +- speechloop/wizard.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/changelog.md b/changelog.md index 66beeec..2c65cf0 100644 --- a/changelog.md +++ b/changelog.md @@ -4,3 +4,4 @@ - 0.0.1 - initial release - 0.0.2 - fixed common corrections pack error with requirements. Fixed issue with wizard where no ASR can be selected - 0.0.3 - refactored ASRs so each is own file. Added arg switches for columns/enable_wer/text_normalization/hashing +- 0.0.4 - added NeMo as alpha diff --git a/setup.py b/setup.py index 6e4fd77..7d11a43 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ def read_file(fname): # python3 -m pip install --upgrade setuptools wheel setup( name="speechloop", - version="0.0.3", + version="0.0.4", author="robmsmt", author_email="robmsmt@gmail.com", description='A "keep it simple" collection of many speech recognition engines... Designed to help answer - what is the best ASR?', diff --git a/speechloop/wizard.py b/speechloop/wizard.py index 73f41c9..e0455b0 100644 --- a/speechloop/wizard.py +++ b/speechloop/wizard.py @@ -118,10 +118,11 @@ def wizard_main(): "vs - Alphacep Vosk", "sp - CMU Sphinx", "cq - Coqui stt", + "nm - NeMo stt", Separator("---Cloud ASRs---"), "gg - Google Cloud - (requires api key)", # todo maybe ask for this or grey it out if not provided? - "gg - Google Cloud - (requires api key)", # todo maybe ask for this or grey it out if not provided? - "gg - Google Cloud - (requires api key)", # todo maybe ask for this or grey it out if not provided? + "az - Azure - (requires api key)", # todo maybe ask for this or grey it out if not provided? + "aw - AWS - (requires api key)", # todo maybe ask for this or grey it out if not provided? ], validate=lambda a: (True if len(a) > 0 else "You must select at least one ASR"), ).ask()