灵云语音识别


转载请说明出处!
作者:kqw攻城狮
出处:个人站 | CSDN


源码

GitHub

在线语音识别

SDK下载

灵云SDK下载

SDK集成

下载SDK以后,将jar和so导入工程

权限

1
2
3
4
5
6
7
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<uses-permission android:name="android.permission.SYSTEM_ALERT_WINDOW" />
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />

封装

灵云配置类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
package kong.qingwei.kqwhcidemo;
/**
* Created by kqw on 2016/8/12.
* 灵云配置信息
*/
public final class ConfigUtil {
/**
* 灵云APP_KEY
*/
public static final String APP_KEY = "3d5d5466";
/**
* 开发者密钥
*/
public static final String DEVELOPER_KEY = "eca643ff7b3c758745d7cf516e808d34";
/**
* 灵云云服务的接口地址
*/
public static final String CLOUD_URL = "test.api.hcicloud.com:8888";
/**
* 需要运行的灵云能力
*/
public static final String CAP_KEY = "tts.local.synth";
// public static final String CAP_KEY = "tts.cloud.wangjing";
public static final String CAP_KEY_NUL = "nlu.cloud";
}

初始化灵云语音能力的工具类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
package kong.qingwei.kqwhcidemo;
import android.app.Activity;
import android.os.Environment;
import android.util.Log;
import android.widget.Toast;
import com.sinovoice.hcicloudsdk.api.HciCloudSys;
import com.sinovoice.hcicloudsdk.common.AuthExpireTime;
import com.sinovoice.hcicloudsdk.common.HciErrorCode;
import com.sinovoice.hcicloudsdk.common.InitParam;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
/**
* Created by kqw on 2016/8/12.
* 初始化灵云语音
*/
public class HciUtil {
private static final String TAG = "HciUtil";
private Activity mActivity;
private final String mConfigStr;
public HciUtil(Activity activity) {
mActivity = activity;
// 加载信息,返回InitParam, 获得配置参数的字符串
InitParam initParam = getInitParam();
mConfigStr = initParam.getStringConfig();
}
public boolean initHci() {
// 初始化
int errCode = HciCloudSys.hciInit(mConfigStr, mActivity);
if (errCode != HciErrorCode.HCI_ERR_NONE && errCode != HciErrorCode.HCI_ERR_SYS_ALREADY_INIT) {
Toast.makeText(mActivity, "hciInit error: " + HciCloudSys.hciGetErrorInfo(errCode), Toast.LENGTH_SHORT).show();
return false;
}
// 获取授权/更新授权文件 :
errCode = checkAuthAndUpdateAuth();
if (errCode != HciErrorCode.HCI_ERR_NONE) {
// 由于系统已经初始化成功,在结束前需要调用方法hciRelease()进行系统的反初始化
Toast.makeText(mActivity, "CheckAuthAndUpdateAuth error: " + HciCloudSys.hciGetErrorInfo(errCode), Toast.LENGTH_SHORT).show();
HciCloudSys.hciRelease();
return false;
}
return true;
}
/**
* 释放
*/
public void hciRelease(){
HciCloudSys.hciRelease();
}
/**
* 加载初始化信息
*
* @return 系统初始化参数
*/
private InitParam getInitParam() {
String authDirPath = mActivity.getFilesDir().getAbsolutePath();
// 前置条件:无
InitParam initparam = new InitParam();
// 授权文件所在路径,此项必填
initparam.addParam(InitParam.AuthParam.PARAM_KEY_AUTH_PATH, authDirPath);
// 是否自动访问云授权,详见 获取授权/更新授权文件处注释
initparam.addParam(InitParam.AuthParam.PARAM_KEY_AUTO_CLOUD_AUTH, "no");
// 灵云云服务的接口地址,此项必填
initparam.addParam(InitParam.AuthParam.PARAM_KEY_CLOUD_URL, ConfigUtil.CLOUD_URL);
// 开发者Key,此项必填,由捷通华声提供
initparam.addParam(InitParam.AuthParam.PARAM_KEY_DEVELOPER_KEY, ConfigUtil.DEVELOPER_KEY);
// 应用Key,此项必填,由捷通华声提供
initparam.addParam(InitParam.AuthParam.PARAM_KEY_APP_KEY, ConfigUtil.APP_KEY);
// 配置日志参数
String sdcardState = Environment.getExternalStorageState();
if (Environment.MEDIA_MOUNTED.equals(sdcardState)) {
String sdPath = Environment.getExternalStorageDirectory().getAbsolutePath();
String packageName = mActivity.getPackageName();
String logPath = sdPath + File.separator + "sinovoice" + File.separator + packageName + File.separator + "log" + File.separator;
// 日志文件地址
File fileDir = new File(logPath);
if (!fileDir.exists()) {
fileDir.mkdirs();
}
// 日志的路径,可选,如果不传或者为空则不生成日志
initparam.addParam(InitParam.LogParam.PARAM_KEY_LOG_FILE_PATH, logPath);
// 日志数目,默认保留多少个日志文件,超过则覆盖最旧的日志
initparam.addParam(InitParam.LogParam.PARAM_KEY_LOG_FILE_COUNT, "5");
// 日志大小,默认一个日志文件写多大,单位为K
initparam.addParam(InitParam.LogParam.PARAM_KEY_LOG_FILE_SIZE, "1024");
// 日志等级,0=无,1=错误,2=警告,3=信息,4=细节,5=调试,SDK将输出小于等于logLevel的日志信息
initparam.addParam(InitParam.LogParam.PARAM_KEY_LOG_LEVEL, "5");
}
return initparam;
}
/**
* 获取授权
*
* @return 授权结果
*/
private int checkAuthAndUpdateAuth() {
// 获取系统授权到期时间
int initResult;
AuthExpireTime objExpireTime = new AuthExpireTime();
initResult = HciCloudSys.hciGetAuthExpireTime(objExpireTime);
if (initResult == HciErrorCode.HCI_ERR_NONE) {
// 显示授权日期,如用户不需要关注该值,此处代码可忽略
Date date = new Date(objExpireTime.getExpireTime() * 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd", Locale.CHINA);
Log.i(TAG, "expire time: " + sdf.format(date));
if (objExpireTime.getExpireTime() * 1000 > System.currentTimeMillis()) {
// 已经成功获取了授权,并且距离授权到期有充足的时间(>7天)
Log.i(TAG, "checkAuth success");
return initResult;
}
}
// 获取过期时间失败或者已经过期
initResult = HciCloudSys.hciCheckAuth();
if (initResult == HciErrorCode.HCI_ERR_NONE) {
Log.i(TAG, "checkAuth success");
return initResult;
} else {
Log.e(TAG, "checkAuth failed: " + initResult);
return initResult;
}
}
}

语音识别的类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
package kong.qingwei.kqwhcidemo;
import android.app.Activity;
import android.util.Log;
import com.sinovoice.hcicloudsdk.android.asr.recorder.ASRRecorder;
import com.sinovoice.hcicloudsdk.common.asr.AsrConfig;
import com.sinovoice.hcicloudsdk.common.asr.AsrInitParam;
import com.sinovoice.hcicloudsdk.common.asr.AsrRecogResult;
import com.sinovoice.hcicloudsdk.recorder.ASRRecorderListener;
import com.sinovoice.hcicloudsdk.recorder.RecorderEvent;
/**
* Created by kqw on 2016/8/15.
* 语音识别类
*/
public class AsrUtil {
private static final String TAG = "AsrUtil";
private Activity mActivity;
private ASRRecorder mAsrRecorder;
private AsrConfig asrConfig;
private OnAsrRecogListener mOnAsrRecogListener;
public AsrUtil(Activity activity) {
mActivity = activity;
initAsr();
}
private void initAsr() {
Log.i(TAG, "initAsr: ");
// 初始化录音机
mAsrRecorder = new ASRRecorder();
// 配置初始化参数
AsrInitParam asrInitParam = new AsrInitParam();
String dataPath = mActivity.getFilesDir().getPath().replace("files", "lib");
asrInitParam.addParam(AsrInitParam.PARAM_KEY_INIT_CAP_KEYS, ConfigUtil.CAP_KEY_ASR_CLOUD_FREETALK);
asrInitParam.addParam(AsrInitParam.PARAM_KEY_DATA_PATH, dataPath);
asrInitParam.addParam(AsrInitParam.PARAM_KEY_FILE_FLAG, AsrInitParam.VALUE_OF_PARAM_FILE_FLAG_ANDROID_SO);
Log.v(TAG, "init parameters:" + asrInitParam.getStringConfig());
// 设置初始化参数
mAsrRecorder.init(asrInitParam.getStringConfig(), new ASRResultProcess());
// 配置识别参数
asrConfig = new AsrConfig();
// PARAM_KEY_CAP_KEY 设置使用的能力
asrConfig.addParam(AsrConfig.SessionConfig.PARAM_KEY_CAP_KEY, ConfigUtil.CAP_KEY_ASR_CLOUD_FREETALK);
// PARAM_KEY_AUDIO_FORMAT 音频格式根据不同的能力使用不用的音频格式
asrConfig.addParam(AsrConfig.AudioConfig.PARAM_KEY_AUDIO_FORMAT, AsrConfig.AudioConfig.VALUE_OF_PARAM_AUDIO_FORMAT_PCM_16K16BIT);
// PARAM_KEY_ENCODE 音频编码压缩格式,使用OPUS可以有效减小数据流量
asrConfig.addParam(AsrConfig.AudioConfig.PARAM_KEY_ENCODE, AsrConfig.AudioConfig.VALUE_OF_PARAM_ENCODE_SPEEX);
// 其他配置,此处可以全部选取缺省值
asrConfig.addParam("intention", "weather");
}
/**
* 开始语音识别
*/
public void start(OnAsrRecogListener listener) {
mOnAsrRecogListener = listener;
if (mAsrRecorder.getRecorderState() == ASRRecorder.RECORDER_STATE_IDLE) {
asrConfig.addParam(AsrConfig.SessionConfig.PARAM_KEY_REALTIME, "no");
mAsrRecorder.start(asrConfig.getStringConfig(), null);
} else {
Log.i(TAG, "start: 录音机未处于空闲状态,请稍等");
}
}
private class ASRResultProcess implements ASRRecorderListener {
@Override
public void onRecorderEventError(RecorderEvent event, int errorCode) {
Log.i(TAG, "onRecorderEventError: errorCode = " + errorCode);
if (null != mOnAsrRecogListener) {
mOnAsrRecogListener.onError(errorCode);
}
}
@Override
public void onRecorderEventRecogFinsh(RecorderEvent recorderEvent, final AsrRecogResult arg1) {
if (recorderEvent == RecorderEvent.RECORDER_EVENT_RECOGNIZE_COMPLETE) {
Log.i(TAG, "onRecorderEventRecogFinsh: 识别结束");
}
if (null != mOnAsrRecogListener) {
mActivity.runOnUiThread(new Runnable() {
@Override
public void run() {
mOnAsrRecogListener.onAsrRecogResult(arg1);
}
});
}
}
@Override
public void onRecorderEventStateChange(RecorderEvent recorderEvent) {
if (recorderEvent == RecorderEvent.RECORDER_EVENT_BEGIN_RECORD) {
Log.i(TAG, "onRecorderEventStateChange: 开始录音");
} else if (recorderEvent == RecorderEvent.RECORDER_EVENT_BEGIN_RECOGNIZE) {
Log.i(TAG, "onRecorderEventStateChange: 开始识别");
} else if (recorderEvent == RecorderEvent.RECORDER_EVENT_NO_VOICE_INPUT) {
Log.i(TAG, "onRecorderEventStateChange: 无音频输入");
} else {
Log.i(TAG, "onRecorderEventStateChange: recorderEvent = " + recorderEvent);
}
}
@Override
public void onRecorderRecording(byte[] volumedata, int volume) {
if (null != mOnAsrRecogListener) {
mOnAsrRecogListener.onVolume(volume);
}
}
@Override
public void onRecorderEventRecogProcess(RecorderEvent recorderEvent, AsrRecogResult arg1) {
if (recorderEvent == RecorderEvent.RECORDER_EVENT_RECOGNIZE_PROCESS) {
Log.i(TAG, "onRecorderEventRecogProcess: 识别中间反馈");
}
if (arg1 != null) {
if (arg1.getRecogItemList().size() > 0) {
Log.i(TAG, "onRecorderEventRecogProcess: 识别中间结果结果为:" + arg1.getRecogItemList().get(0).getRecogResult());
} else {
Log.i(TAG, "onRecorderEventRecogProcess: 未能正确识别,请重新输入");
}
}
}
}
/**
* 语音识别的回调接口
*/
public interface OnAsrRecogListener {
// 识别结果
void onAsrRecogResult(AsrRecogResult asrRecogResult);
// 识别错误码
void onError(int errorCode);
// 录音音量
void onVolume(int volume);
}
}

使用

初始化灵云的语音能力和语音识别

1
2
3
4
5
6
7
8
9
// 灵云语音工具类
mInitTts = new HciUtil(this);
// 初始化灵云语音
boolean isInitHci = mInitTts.initHci();
if (isInitHci) { // 初始化成功
……
// 语音识别
mAsrUtil = new AsrUtil(this);
}

语音识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/**
* 语音识别(语音转文字)
*
* @param view view
*/
public void asr(View view) {
mAsrUtil.start(new AsrUtil.OnAsrRecogListener() {
@Override
public void onAsrRecogResult(AsrRecogResult asrRecogResult) {
StringBuilder stringBuffer = new StringBuilder();
ArrayList<AsrRecogItem> asrRecogItemArrayList = asrRecogResult.getRecogItemList();
for (AsrRecogItem asrRecogItem : asrRecogItemArrayList) {
String result = asrRecogItem.getRecogResult();
Log.i(TAG, "onAsrRecogResult: " + result);
stringBuffer.append(result).append("\n");
}
showDialog("识别结果", stringBuffer.toString());
}
@Override
public void onError(int errorCode) {
Log.i(TAG, "onError: " + errorCode);
}
@Override
public void onVolume(int volume) {
Log.i(TAG, "onVolume: " + volume);
}
});
}

离线语音识别

离线命令词和在线很类似,只需要更改CapKey,导入离线资源包即可,我们先下载离线资源

下载完解压

解压后的源文件

将里面所有的文件重命名,前面加lib,后面加.so,然后导入工程

重命名

修改CapKey为asr.local.freetalk

注意,灵云的离线语音功能第一次使用需要联网激活,激活以后才可以使用离线功能。

在线语音识别 + 语义理解

直接在在线语音识别的基础上实现语义理解更加简单,只要将CapKey换成asr.cloud.dialog即可

但是需要注意一点的是,想要使用哪个场景,必须开通以后,在参数里加上对应的场景,才是识别出来。

类似这样

1
asrConfig.addParam("intention", "weather");

这里的天气,对应的参数是weather,了解更多场景和对应场景参数可以查看捷通华声灵云公有云能力平台NLU结果开发手册.pdf

坚持原创技术分享,您的支持将鼓励我继续创作!