Skip to content

Commit 1ac4f57

Browse files
committed
feat:add funasr module
1 parent 376ae8d commit 1ac4f57

File tree

12 files changed

+545
-26
lines changed

12 files changed

+545
-26
lines changed

docker/docker-compose.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,21 @@ services:
9999
timeout: 10s
100100
retries: 3
101101

102+
funasr:
103+
image: harryliu888/funasr-online-server:latest
104+
container_name: funasr-online-server
105+
ports:
106+
- 10095:10095
107+
- 10096:10096
108+
volumes:
109+
- ./models:/workspace/models
102110
volumes:
103111
mysql-volume:
104112
redis-volume:
105113
influxdb-volume:
106114
emqx-volume:
107115
chromadb-volume: # 新增 ChromaDB 数据卷
116+
funasr-volume:
108117

109118
networks:
110119
emqx-bridge:

docs/get_started/quick_start.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
物联网平台内核是一个轻量级的物联网平台,包含了物联网最核心的基础模块,
55
包括物模型、监控、AI、MQTT、微信小程序、微信服务号等模块。该平台旨在帮助开发者快速搭建物联网应用,提高开发效率。
66
物联网平台内核并不包括ui组件,只提供基础的api接口,开发者可以自由选择自己喜欢的ui框架来搭建应用。
7+
目前已经提供了一套示例UI界面,基于Vue3,可以直接使用。
78

89
## 什么是Agent智能体
910
Agent智能体可以自我学习。智能体是一种通用问题解决器。从软件工程的角度看来,
@@ -30,6 +31,7 @@ cd docker && docker-compose up -d
3031
- EMQX MQTT消息中间件(https://www.emqx.com/zh)
3132
- InfluxDB 时序数据库
3233
- Chroma 一个开源的向量数据库,用于存储和检索向量数据。(https://docs.trychroma.com/docs/overview/introduction)
34+
- FunAsr 语音识别服务。(https://github.com/modelscope/FunASR)
3335

3436
2. 验证容器状态(可选):
3537
```bash
@@ -194,10 +196,24 @@ ai:
194196
audio-temp-url: [项目运行的域名]
195197
glm-key:
196198
deepSeek-key:
197-
dashscope-key:
198-
robot-name: [机器人名称]
199+
dashscope-key:
200+
uniApi-Key:
201+
robot-name: 小创
199202
team-name: 创万联
203+
custom-key: XXX
204+
custom-llm-provider-url: XXX
200205
```
206+
>默认支持以上几种主流供应商,你可以自行添加其他供应商或者本地部署的LLM,只需要配置对应的供应商URL即可。
207+
208+
* 对应的供应商及其名称规则如下:
209+
210+
```yaml
211+
1. 硅基流动(https://siliconflow.cn/): silicon-模型名称(silicon-Qwen/Qwen3-Next-80B-A3B-Instruct)
212+
2. 阿里云百炼(https://www.aliyun.com/product/tongyi?utm_content=se_1021879167): dashscope-模型名称(dashscope-qwen3-max)
213+
3. uniapi(https://uniapi.ai/): uniapi-模型名称(uniapi-qwen3-max)
214+
4. 私有部署或其它供应商: custom-模型名称(custom-模型名称)
215+
```
216+
201217
通过以上步骤即可完成基础环境的搭建与配置,建议通过健康检查接口验证各服务连接状态。如果你需要
202218
控制家里的电气,你可以选择安装homeAssistant与本平台集成,它将帮助你管理家庭的电器,并提供流畅的体验。
203219

src/main/java/top/rslly/iot/services/agent/AiServiceImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import top.rslly.iot.utility.ai.chain.Router;
3636
import top.rslly.iot.utility.ai.llm.LLMFactory;
3737
import top.rslly.iot.utility.ai.mcp.McpWebsocket;
38-
import top.rslly.iot.utility.ai.voice.Audio2Text;
38+
import top.rslly.iot.utility.ai.voice.ASR.Audio2Text;
3939
import top.rslly.iot.utility.ai.voice.AudioUtils;
4040
import top.rslly.iot.utility.ai.voice.TTS.Text2audio;
4141
import top.rslly.iot.utility.ai.voice.TTS.TtsService;
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* Copyright © 2023-2030 The ruanrongman Authors
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
package top.rslly.iot.utility.ai.voice.ASR;
21+
22+
import java.io.File;
23+
24+
public interface AsrService {
25+
public String getText(String url);
26+
27+
public String getTextRealtime(File file, int sampleRate, String format);
28+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* Copyright © 2023-2030 The ruanrongman Authors
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
package top.rslly.iot.utility.ai.voice.ASR;
21+
22+
import lombok.extern.slf4j.Slf4j;
23+
import org.springframework.beans.factory.annotation.Autowired;
24+
import org.springframework.beans.factory.annotation.Value;
25+
import org.springframework.stereotype.Component;
26+
27+
/**
28+
* ASR服务工厂类
29+
*/
30+
@Slf4j
31+
@Component
32+
public class AsrServiceFactory {
33+
34+
@Value("${ai.asr.provider:funasr}")
35+
private String defaultProvider;
36+
37+
@Autowired
38+
private Audio2Text audio2Text;
39+
40+
@Autowired
41+
private FunAsrClient funAsrClient;
42+
43+
/**
44+
* 获取默认的ASR服务
45+
*/
46+
public AsrService getService() {
47+
return getService(defaultProvider);
48+
}
49+
50+
/**
51+
* 根据提供商名称获取ASR服务
52+
*
53+
* @param provider 提供商名称(dashscope/funasr)
54+
* @return ASR服务实例
55+
*/
56+
public AsrService getService(String provider) {
57+
if ("funasr".equalsIgnoreCase(provider)) {
58+
return funAsrClient;
59+
}
60+
if ("dashscope".equalsIgnoreCase(provider)) {
61+
return audio2Text;
62+
}
63+
log.warn("未知的ASR提供商: {}, 使用默认服务", provider);
64+
return "funasr".equalsIgnoreCase(defaultProvider) ? funAsrClient : audio2Text;
65+
}
66+
67+
}

src/main/java/top/rslly/iot/utility/ai/voice/Audio2Text.java renamed to src/main/java/top/rslly/iot/utility/ai/voice/ASR/Audio2Text.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* See the License for the specific language governing permissions and
1818
* limitations under the License.
1919
*/
20-
package top.rslly.iot.utility.ai.voice;
20+
package top.rslly.iot.utility.ai.voice.ASR;
2121

2222
import com.alibaba.dashscope.audio.asr.recognition.Recognition;
2323
import com.alibaba.dashscope.audio.asr.recognition.RecognitionParam;
@@ -48,7 +48,7 @@
4848

4949
@Component
5050
@Slf4j
51-
public class Audio2Text {
51+
public class Audio2Text implements AsrService {
5252
private String apiKey;
5353

5454
@Value("${ai.audio-tmp-path}")
@@ -126,7 +126,18 @@ public String getTextRealtime(File file, int sampleRate, String format) {
126126

127127
try {
128128
// System.out.println("识别结果:" + recognizer.call(param, file));
129-
return recognizer.call(param, file);
129+
String text = recognizer.call(param, file);
130+
StringBuilder sentences = new StringBuilder();
131+
var jsonObject = JSON.parseObject(text);
132+
var sentencesArray = jsonObject.getJSONArray("sentences");
133+
if (sentencesArray.size() > 0) {
134+
for (int i = 0; i < sentencesArray.size(); i++) {
135+
sentences.append(sentencesArray.getJSONObject(i).getString("text"));
136+
}
137+
} else {
138+
sentences.append("识别结果为空");
139+
}
140+
return sentences.toString();
130141
} catch (Exception e) {
131142
log.error("语音识别失败{}", e.getMessage());
132143
return "语音识别失败";

0 commit comments

Comments
 (0)