Android:如何实时改变输出声音的音调(升降调)

7

我是Android开发的新手。我正在寻找任何一种方法,可以在实时输出声音时应用音调变换。但我找不到任何开始的点。

我找到了这个主题,但我仍然不知道如何应用它。

有什么建议吗?


也许这个Android音高变换库有所帮助:http://stackoverflow.com/questions/15364201/pitch-shifter-sdk-for-android-build-example-app - user2140005
user2140005,你的安卓音高变换库链接已经失效了。能否再次发布一下? - inder
有没有可用的文件进行时间拉伸? - Rohit gupta
3个回答

6
一般来说,这个算法被称为相位变换器--在互联网上搜索这个词汇应该会让你有所了解。
现在有一些开源的相位变换器,你可以参考它们。
你可以实时地使用相位变换器--主要组件是FFT,所以你需要一个快速的FFT。Android库可以帮助你做到这一点,请看这份文档:http://developer.android.com/reference/android/media/audiofx/Visualizer.html 恰好,我即将发布一个用于ARM的开源FFT,比苹果的vDSP库(迄今为止最快的)更快。我将在上传到github.com后几天内回复您。
祝好运。

啊,谢谢你的建议。我发现这个算法有很多数学公式来转换波形。听起来不错!但是对于安卓平台,是否没有任何库或方法可以直接应用这种效果?另一个问题,如果我选择应用相位语音合成算法,我该如何将其应用于实时安卓输出音频?我可以应用哪种技术将任何公式应用于输出波形?非常感谢你的帮助:D - midnighz
1
根据上述评论,据我所知,Android核心库不提供相位估计功能。但我非常确定有一些开源实现 - 但如果您使用这些实现,您必须公开您的代码。 - Anthony Blake
哇,非常感谢您的提前帮助。所以我必须在这方面进行深入学习。再次非常感谢您。 :) - midnighz
@AnthonyBlake 我对查看你的算法很感兴趣。你已经完成了吗? - Bitcoin Cash - ADA enthusiast

5
在Android SDK中没有内置的音高转换算法,您需要自己编码。音高转换是一种真正的DSP算法;好听的算法是经过多月甚至多年的开发得出的结果...我个人不知道任何Java实现,所以建议您采用一些免费的C++ PS算法,其中最好的一个是我在我的音频应用程序中使用的SoundTouch:

http://www.surina.net/soundtouch/

我稍微玩了一下它的代码,似乎重写成Java并不会太复杂。


1
啊,这个例子真的很好。我会把它留下来作为我的Android编码指南。非常感谢你:)) - midnighz

2

主页链接:http://www.dspdimension.com

public class AudioPitch{

//region Private Static Memebers
private static int MAX_FRAME_LENGTH = 8192;
private static double M_PI = 3.14159265358979323846;
private static float[] gInFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gOutFIFO = new float[MAX_FRAME_LENGTH];
private static float[] gFFTworksp = new float[2 * MAX_FRAME_LENGTH];
private static float[] gLastPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gSumPhase = new float[MAX_FRAME_LENGTH / 2 + 1];
private static float[] gOutputAccum = new float[2 * MAX_FRAME_LENGTH];
private static float[] gAnaFreq = new float[MAX_FRAME_LENGTH];
private static float[] gAnaMagn = new float[MAX_FRAME_LENGTH];
private static float[] gSynFreq = new float[MAX_FRAME_LENGTH];
private static float[] gSynMagn = new float[MAX_FRAME_LENGTH];
private static long gRover;
//endregion



public static void PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize/*(long)2048*/, long osamp/*(long)10*/, float sampleRate, float[] indata)            
{

    double magn, phase, tmp, window, real, imag;
    double freqPerBin, expct;
    long i, k, qpd, index, inFifoLatency, stepSize, fftFrameSize2;

    float[] outdata = indata;
    /* set up some handy variables */
    fftFrameSize2 = fftFrameSize / 2;
    stepSize = fftFrameSize / osamp;
    freqPerBin = sampleRate / (double)fftFrameSize;
    expct = 2.0 * M_PI * (double)stepSize / (double)fftFrameSize;
    inFifoLatency = fftFrameSize - stepSize;
    if (gRover == 0) gRover = inFifoLatency;


    /* main processing loop */
    for (i = 0; i < numSampsToProcess; i++)
    {
        /* As long as we have not yet collected enough data just read in */
        gInFIFO[(int) gRover] = indata[(int) i];
        outdata[(int) i] = gOutFIFO[(int) (gRover - inFifoLatency)];
        gRover++;

        /* now we have enough data for processing */
        if (gRover >= fftFrameSize)
        {
            gRover = inFifoLatency;

            /* do windowing and re,im interleave */
            for (k = 0; k < fftFrameSize; k++)
            {
                window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
                gFFTworksp[(int) (2 * k)] = (float)(gInFIFO[(int) k] * window);
                gFFTworksp[(int) (2 * k + 1)] = 0.0F;
            }


            /* ***************** ANALYSIS ******************* */
            /* do transform */
            ShortTimeFourierTransform(gFFTworksp, fftFrameSize, -1);

            /* this is the analysis step */
            for (k = 0; k <= fftFrameSize2; k++)
            {

                /* de-interlace FFT buffer */
                real = gFFTworksp[(int) (2 * k)];
                imag = gFFTworksp[(int) (2 * k + 1)];

                /* compute magnitude and phase */
                magn = 2.0 * Math.sqrt(real * real + imag * imag);
                phase = smbAtan2(imag, real);

                /* compute phase difference */
                tmp = phase - gLastPhase[(int) k];
                gLastPhase[(int) k] = (float)phase;

                /* subtract expected phase difference */
                tmp -= (double)k * expct;

                /* map delta phase into +/- Pi interval */
                qpd = (long)(tmp / M_PI);
                if (qpd >= 0) qpd += qpd & 1;
                else qpd -= qpd & 1;
                tmp -= M_PI * (double)qpd;

                /* get deviation from bin frequency from the +/- Pi interval */
                tmp = osamp * tmp / (2.0 * M_PI);

                /* compute the k-th partials' true frequency */
                tmp = (double)k * freqPerBin + tmp * freqPerBin;

                /* store magnitude and true frequency in analysis arrays */
                gAnaMagn[(int) k] = (float)magn;
                gAnaFreq[(int) k] = (float)tmp;

            }

            /* ***************** PROCESSING ******************* */
            /* this does the actual pitch shifting */
            for (int zero = 0; zero < fftFrameSize; zero++)
            {
                gSynMagn[zero] = 0;
                gSynFreq[zero] = 0;
            }

            for (k = 0; k <= fftFrameSize2; k++)
            {
                index = (long)(k * pitchShift);
                if (index <= fftFrameSize2)
                {
                    gSynMagn[(int) index] += gAnaMagn[(int) k];
                    gSynFreq[(int) index] = gAnaFreq[(int) k] * pitchShift;
                }
            }

            /* ***************** SYNTHESIS ******************* */
            /* this is the synthesis step */
            for (k = 0; k <= fftFrameSize2; k++)
            {

                /* get magnitude and true frequency from synthesis arrays */
                magn = gSynMagn[(int) k];
                tmp = gSynFreq[(int) k];

                /* subtract bin mid frequency */
                tmp -= (double)k * freqPerBin;

                /* get bin deviation from freq deviation */
                tmp /= freqPerBin;

                /* take osamp into account */
                tmp = 2.0 * M_PI * tmp / osamp;

                /* add the overlap phase advance back in */
                tmp += (double)k * expct;

                /* accumulate delta phase to get bin phase */
                gSumPhase[(int) k] += (float)tmp;
                phase = gSumPhase[(int) k];

                /* get real and imag part and re-interleave */
                gFFTworksp[(int) (2 * k)] = (float)(magn * Math.cos(phase));
                gFFTworksp[(int) (2 * k + 1)] = (float)(magn * Math.sin(phase));
            }

            /* zero negative frequencies */
            for (k = fftFrameSize + 2; k < 2 * fftFrameSize; k++) gFFTworksp[(int) k] = 0.0F;

            /* do inverse transform */
            ShortTimeFourierTransform(gFFTworksp, fftFrameSize, 1);

            /* do windowing and add to output accumulator */
            for (k = 0; k < fftFrameSize; k++)
            {
                window = -.5 * Math.cos(2.0 * M_PI * (double)k / (double)fftFrameSize) + .5;
                gOutputAccum[(int) k] += (float)(2.0 * window * gFFTworksp[(int) (2 * k)] / (fftFrameSize2 * osamp));
            }
            for (k = 0; k < stepSize; k++) gOutFIFO[(int) k] = gOutputAccum[(int) k];

            /* shift accumulator */
            //memmove(gOutputAccum, gOutputAccum + stepSize, fftFrameSize * sizeof(float));
            for (k = 0; k < fftFrameSize; k++)
            {
                gOutputAccum[(int) k] = gOutputAccum[(int) (k + stepSize)];
            }

            /* move input FIFO */
            for (k = 0; k < inFifoLatency; k++) gInFIFO[(int) k] = gInFIFO[(int) (k + stepSize)];
        }
    }
}
//endregion


//region Private Static Methods
public static void ShortTimeFourierTransform(float[] fftBuffer, long fftFrameSize, long sign)
{
    float wr, wi, arg, temp;
    float tr, ti, ur, ui;
    long i, bitm, j, le, le2, k;

    for (i = 2; i < 2 * fftFrameSize - 2; i += 2)
    {
        for (bitm = 2, j = 0; bitm < 2 * fftFrameSize; bitm <<= 1)
        {
            if ((i & bitm) != 0) j++;
            j <<= 1;
        }
        if (i < j)
        {
            temp = fftBuffer[(int) i];
            fftBuffer[(int) i] = fftBuffer[(int) j];
            fftBuffer[(int) j] = temp;
            temp = fftBuffer[(int) (i + 1)];
            fftBuffer[(int) (i + 1)] = fftBuffer[(int) (j + 1)];
            fftBuffer[(int) (j + 1)] = temp;
        }
    }
    long max = (long)(Math.log(fftFrameSize) / Math.log(2.0) + .5);
    for (k = 0, le = 2; k < max; k++)
    {
        le <<= 1;
        le2 = le >> 1;
        ur = 1.0F;
        ui = 0.0F;
        arg = (float)M_PI / (le2 >> 1);
        wr = (float)Math.cos(arg);
        wi = (float)(sign * Math.sin(arg));
        for (j = 0; j < le2; j += 2)
        {

            for (i = j; i < 2 * fftFrameSize; i += le)
            {
                tr = fftBuffer[(int) (i + le2)] * ur - fftBuffer[(int) (i + le2 + 1)] * ui;
                ti = fftBuffer[(int) (i + le2)] * ui + fftBuffer[(int) (i + le2 + 1)] * ur;
                fftBuffer[(int) (i + le2)] = fftBuffer[(int) i] - tr;
                fftBuffer[(int) (i + le2 + 1)] = fftBuffer[(int) (i + 1)] - ti;
                fftBuffer[(int) i] += tr;
                fftBuffer[(int) (i + 1)] += ti;

            }
            tr = ur * wr - ui * wi;
            ui = ur * wi + ui * wr;
            ur = tr;
        }
    }
}
//endregion


private static double smbAtan2(double x, double y)
{
  double signx;
  if (x > 0.) signx = 1.;  
  else signx = -1.;

  if (x == 0.) return 0.;
  if (y == 0.) return signx * M_PI / 2.;
  return Math.atan2(x, y);
}

}

这段代码可以工作,但耗费CPU资源较多。

pitchShift值介于0.5至2.0之间。

按照以下方式调用该类:

int maxValueOFShort = 32768;             
short [] buffer = new short[800];               
float[] inData = new float[buffer.length];
while (audiorackIsRun) 
{                               
 int m =  recorder.read(buffer, 0, buffer.length);                  
 for(int n=0; n<buffer.length;n++)
      inData[n] =  buffer[n]/(float)maxValueOFShort;    

 AudioPitch.PitchShift(1, buffer.length, 4096, 4, 44100, inData);

 for(int n=0; n<buffer.length;n++)
      buffer[n] = (short)(inData[n]*maxValueOFShort);  

  player.write(buffer, 0, buffer.length); 
}

你能分享一下时间拉伸的代码吗? 谢谢。 - Rohit gupta

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接