Java录制时检测静音

Java中开始录制操作时,如何检测静音?什么是PCM数据?如何在Java中计算PCM数据?

我找到了解决方案:

package bemukan.voiceRecognition.speechToText;

import javax.sound.sampled.*;

import java.io.*;

public class RecordAudio {

private File audioFile;

protected boolean running;

private ByteArrayOutputStream out;

private AudioInputStream inputStream;

final static float MAX_8_BITS_SIGNED = Byte.MAX_VALUE;

final static float MAX_8_BITS_UNSIGNED = 0xff;

final static float MAX_16_BITS_SIGNED = Short.MAX_VALUE;

final static float MAX_16_BITS_UNSIGNED = 0xffff;

private AudioFormat format;

private float level;

private int frameSize;

public RecordAudio(){

getFormat();

}

private AudioFormat getFormat() {

File file = new File("src/Facebook/1.wav");

AudioInputStream stream;

try {

stream = AudioSystem.getAudioInputStream(file);

format=stream.getFormat();

frameSize=stream.getFormat().getFrameSize();

return stream.getFormat();

} catch (UnsupportedAudioFileException e) {

} catch (IOException e) {

}

return null;

}

public void stopAudio() {

running = false;

}

public void recordAudio() {

try {

final AudioFormat format = getFormat();

DataLine.Info info = new DataLine.Info(

TargetDataLine.class, format);

final TargetDataLine line = (TargetDataLine)

AudioSystem.getLine(info);

line.open(format);

line.start();

Runnable runner = new Runnable() {

int bufferSize = (int) format.getSampleRate()

* format.getFrameSize();

byte buffer[] = new byte[bufferSize];

public void run() {

int readPoint = 0;

out = new ByteArrayOutputStream();

running = true;

int sum=0;

while (running) {

int count =

line.read(buffer, 0, buffer.length);

calculateLevel(buffer,0,0);

System.out.println(level);

if (count > 0) {

out.write(buffer, 0, count);

}

}

line.stop();

}

};

Thread captureThread = new Thread(runner);

captureThread.start();

} catch (LineUnavailableException e) {

System.err.println("Line unavailable: " + e);

System.exit(-2);

}

}

public File getAudioFile() {

byte[] audio = out.toByteArray();

InputStream input = new ByteArrayInputStream(audio);

try {

final AudioFormat format = getFormat();

final AudioInputStream ais =

new AudioInputStream(input, format,

audio.length / format.getFrameSize());

AudioSystem.write(ais, AudioFileFormat.Type.WAVE, new File("temp.wav"));

input.close();

System.out.println("New file created!");

} catch (IOException e) {

System.out.println(e.getMessage());

}

return new File("temp.wav");

}

private void calculateLevel (byte[] buffer,

int readPoint,

int leftOver) {

int max = 0;

boolean use16Bit = (format.getSampleSizeInBits() == 16);

boolean signed = (format.getEncoding() ==

AudioFormat.Encoding.PCM_SIGNED);

boolean bigEndian = (format.isBigEndian());

if (use16Bit) {

for (int i=readPoint; i<buffer.length-leftOver; i+=2) {

int value = 0;

// deal with endianness

int hiByte = (bigEndian ? buffer[i] : buffer[i+1]);

int loByte = (bigEndian ? buffer[i+1] : buffer [i]);

if (signed) {

short shortVal = (short) hiByte;

shortVal = (short) ((shortVal << 8) | (byte) loByte);

value = shortVal;

} else {

value = (hiByte << 8) | loByte;

}

max = Math.max(max, value);

} // for

} else {

// 8 bit - no endianness issues, just sign

for (int i=readPoint; i<buffer.length-leftOver; i++) {

int value = 0;

if (signed) {

value = buffer [i];

} else {

short shortVal = 0;

shortVal = (short) (shortVal | buffer [i]);

value = shortVal;

}

max = Math.max (max, value);

} // for

} // 8 bit

// express max as float of 0.0 to 1.0 of max value

// of 8 or 16 bits (signed or unsigned)

if (signed) {

if (use16Bit) { level = (float) max / MAX_16_BITS_SIGNED; }

else { level = (float) max / MAX_8_BITS_SIGNED; }

} else {

if (use16Bit) { level = (float) max / MAX_16_BITS_UNSIGNED; }

else { level = (float) max / MAX_8_BITS_UNSIGNED; }

}

} // calculateLevel

}

回答:

在Java中开始录制操作时,如何检测静音?

计算一组声音帧的dB或RMS值,并确定将其视为“静音”的级别。

什么是PCM数据?

数据是在脉冲编码调制格式。

如何在Java中计算PCM数据?

我不明白这个问题。但是,猜测它与speech-recognition标记有关,我有一些坏消息。从理论上讲,这可以使用Java Speech API来完成。但是,显然没有可用于API的“语音到文本”实现(仅“文字到语音”)。

我必须为语音识别项目计算均方根值。但是我不知道如何用Java计算。

对于信号double范围在-1到1之间的单个通道,可以使用此方法。

/** Computes the RMS volume of a group of signal sizes ranging from -1 to 1. */

public double volumeRMS(double[] raw) {

double sum = 0d;

if (raw.length==0) {

return sum;

} else {

for (int ii=0; ii<raw.length; ii++) {

sum += raw[ii];

}

}

double average = sum/raw.length;

double sumMeanSquare = 0d;

for (int ii=0; ii<raw.length; ii++) {

sumMeanSquare += Math.pow(raw[ii]-average,2d);

}

double averageMeanSquare = sumMeanSquare/raw.length;

double rootMeanSquare = Math.sqrt(averageMeanSquare);

return rootMeanSquare;

}

有一个字节缓冲区来保存行中的输入值,我应该使用该缓冲区做什么?

如果使用该volumeRMS(double[])方法,则将byte值转换double为-1到1范围内的值的数组。

以上是 Java录制时检测静音 的全部内容, 来源链接: utcz.com/qa/414104.html

回到顶部