Changing the Pitch of a Sound

I got a tweet earlier today from some one asking me how to change the pitch of a wave file. The person asking was aware that SoundEffectInstance has a setting to alter pitch but it wasn’t sufficient for his needs. He needed to be able to save the modified WAV to a file. It’s something that is easy to do. So I made a quick example

Video Example

I used a technique that comes close to matching linear interpolation. It get’s the job done but isn’t the best technique because of the opportunity for certain types of distortion to introduced. Methods with less distortion are available at the cost of potentially more CPU cycles. For the example I made no matter what the original sample rate was I am playing back at 44KHz and adjusting my interpolation accordingly so that no unintentional changes in pitch are introduced.

To do the work I’ve created a class named AdjustedSoundEffect. It has a Play() method that takes as it’s argument the factor by which the pitch should be adjusted where 1 plays the sound at the original pitch, 2 plays it at twice its pitch, and 0.5 plays it at half its pitch.

If you are interested the code I used is below.

using System;
using System.IO;
using System.Net;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Documents;
using System.Windows.Ink;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Animation;
using System.Windows.Shapes;
using Microsoft.Xna.Framework.Audio;

namespace J2i.Net.VoiceRecorder.Utility
{
    public class AdjustedSoundEffect
    {
        //I will always playback at 44KHz regardless of the original sample rate. 
        //I'm making appropriate adjustments to prevent this from resulting in the
        //pitch being shifted. 
        private const int PlaybackSampleRate = 16000;
        private const int BufferSize = PlaybackSampleRate*2;

        private int _channelCount = 1;
        private int _sampleRate;
        private int _bytesPerSample = 16;
        private int _byteCount = 0;
        private float _baseStepRate = 1;
        private float _adjustedStepRate;
        private float _index = 0;
        private int playbackBufferIndex = 0;
        private int _sampleStep = 2;

        private bool _timeToStop = false;

        private byte[][] _playbackBuffers;

        public bool IsPlaying { get; set;  }

        public object SyncRoot = new object();


        private DynamicSoundEffectInstance _dse;

        public static AdjustedSoundEffect FromStream(Stream source)
        {
            var retVal = new AdjustedSoundEffect(source);
            return retVal;
        }

        public AdjustedSoundEffect()
        {
            _playbackBuffers = new byte[3][];
            for (var i = 0; i < _playbackBuffers.Length;++i )
            {
                _playbackBuffers[i] = new byte[BufferSize];
            }
                _dse = new DynamicSoundEffectInstance(PlaybackSampleRate, AudioChannels.Stereo);
            _dse.BufferNeeded += new EventHandler<EventArgs>(_dse_BufferNeeded);
        }

        void SubmitNextBuffer()
        {
            if(_timeToStop)
            {
                Stop();
            }
            lock (SyncRoot)
            {
                byte[] nextBuffer = _playbackBuffers[playbackBufferIndex];
                playbackBufferIndex = (playbackBufferIndex + 1)%_playbackBuffers.Length;
                int i_step = 0;
                int i = 0;

                int endOfBufferMargin = 2*_channelCount;
                for (;
                    i < (nextBuffer.Length / 4) && (_index < (_sourceBuffer.Length - endOfBufferMargin));
                    ++i, i_step += 4)
                {

                    int k = _sampleStep*(int) _index;
                    if (k > _sourceBuffer.Length - endOfBufferMargin)
                        k = _sourceBuffer.Length -endOfBufferMargin ;
                    nextBuffer[i_step + 0] = _sourceBuffer[k + 0];
                    nextBuffer[i_step + 1] = _sourceBuffer[k + 1];
                    if (_channelCount == 2)
                    {
                        nextBuffer[i_step + 2] = _sourceBuffer[k + 2];
                        nextBuffer[i_step + 3] = _sourceBuffer[k + 3];
                    }
                    else
                    {
                        nextBuffer[i_step + 2] = _sourceBuffer[k + 0];
                        nextBuffer[i_step + 3] = _sourceBuffer[k + 1];

                    }
                    _index += _adjustedStepRate;
                }

                if ((_index >= _sourceBuffer.Length - endOfBufferMargin))
                    _timeToStop = true;
                for (; i < (nextBuffer.Length/4); ++i, i_step += 4)
                {
                    nextBuffer[i_step + 0] = 0;
                    nextBuffer[i_step + 1] = 0;
                    if (_channelCount == 2)
                    {
                        nextBuffer[i_step + 2] = 0;
                        nextBuffer[i_step + 3] = 0;
                    }
                }
                _dse.SubmitBuffer(nextBuffer);
            }
        }

        void _dse_BufferNeeded(object sender, EventArgs e)
        {
            SubmitNextBuffer();
        }

        private byte[] _sourceBuffer;
        

        public AdjustedSoundEffect(Stream source): this()
        {
            byte[] header = new byte[44];
            source.Read(header, 0, 44);

            // I'm assuming you passed a proper wave file so I won't bother 
            // verifying  that  the  header  is properly formatted and will 
            // accept it on faith :-)

            _channelCount = header[22] + (header[23] << 8);
            _sampleRate = header[24] | (header[25] << 8) | (header[26] << 16) | (header[27] << 24);
            _bytesPerSample = header[34]/8;
            _byteCount = header[40] | (header[41] << 8) | (header[42] << 16) | (header[43] << 24);
            _sampleStep = _bytesPerSample*_channelCount;
            _sourceBuffer = new byte[_byteCount];
            source.Read(_sourceBuffer, 0, _sourceBuffer.Length);


            _baseStepRate = ((float)_sampleRate) / PlaybackSampleRate;
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="pitchFactor">Factor by which pitch will be adjusted. 2 doubles the frequency,
        /// // 1 is normal speed, 0.5 halfs the frequency</param>
        public void Play(float pitchFactor)
        {
            _timeToStop = false;

            _index = 0;
            lock (SyncRoot)
            {
                _adjustedStepRate = _baseStepRate * pitchFactor;
                _index = 0;
                playbackBufferIndex = 0;
            }
            if(!IsPlaying)
            {
                SubmitNextBuffer();
                SubmitNextBuffer();
                SubmitNextBuffer();
                _dse.Play();
                IsPlaying = true;
            }
        }

        public void Stop()
        {
            if(IsPlaying)
            {
                _dse.Stop();
            }
        }
    }
}

Advertisements
Post a comment or leave a trackback: Trackback URL.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: