diff options
author | Miller Puckette <millerpuckette@users.sourceforge.net> | 2004-09-06 20:44:42 +0000 |
---|---|---|
committer | Miller Puckette <millerpuckette@users.sourceforge.net> | 2004-09-06 20:44:42 +0000 |
commit | cfc2f7d280ae57ef563dd69bad27c61a148d6ded (patch) | |
tree | 92c4c645399fcad23f4a6ba15dff91c0e6fccb29 /pd/portaudio/pa_win | |
parent | ed932acb5860bf8b9296169676499562a55d139e (diff) |
... more changes to try to upload 0.38 test 5 to CVS
svn path=/trunk/; revision=2011
Diffstat (limited to 'pd/portaudio/pa_win')
-rw-r--r-- | pd/portaudio/pa_win/pa_win_hostapis.c | 79 | ||||
-rw-r--r-- | pd/portaudio/pa_win/pa_win_util.c | 134 | ||||
-rw-r--r-- | pd/portaudio/pa_win/pa_x86_plain_converters.c | 1167 | ||||
-rw-r--r-- | pd/portaudio/pa_win/pa_x86_plain_converters.h | 19 |
4 files changed, 1399 insertions, 0 deletions
diff --git a/pd/portaudio/pa_win/pa_win_hostapis.c b/pd/portaudio/pa_win/pa_win_hostapis.c new file mode 100644 index 00000000..fa6048e4 --- /dev/null +++ b/pd/portaudio/pa_win/pa_win_hostapis.c @@ -0,0 +1,79 @@ +/* + * $Id: pa_win_hostapis.c,v 1.1.2.9 2003/09/15 18:30:26 rossbencina Exp $ + * Portable Audio I/O Library Windows initialization table + * + * Based on the Open Source API proposed by Ross Bencina + * Copyright (c) 1999-2002 Ross Bencina, Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** @file + Win32 host API initialization function table. + + @todo Consider using PA_USE_WMME etc instead of PA_NO_WMME. This is what + the Unix version does, we should consider being consistent. +*/ + + +#include "pa_hostapi.h" + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +PaError PaSkeleton_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex index ); +PaError PaWinMme_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex index ); +PaError PaWinDs_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex index ); +PaError PaAsio_Initialize( PaUtilHostApiRepresentation **hostApi, PaHostApiIndex index ); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + + +PaUtilHostApiInitializer *paHostApiInitializers[] = + { + +#ifndef PA_NO_WMME + PaWinMme_Initialize, +#endif + +#ifndef PA_NO_DS + PaWinDs_Initialize, +#endif + +#ifndef PA_NO_ASIO + PaAsio_Initialize, +#endif + + PaSkeleton_Initialize, /* just for testing */ + + 0 /* NULL terminated array */ + }; + + +int paDefaultHostApiIndex = 0; + diff --git a/pd/portaudio/pa_win/pa_win_util.c b/pd/portaudio/pa_win/pa_win_util.c new file mode 100644 index 00000000..0395e5c8 --- /dev/null +++ b/pd/portaudio/pa_win/pa_win_util.c @@ -0,0 +1,134 @@ +/* + * $Id: pa_win_util.c,v 1.1.2.7 2003/09/15 18:30:26 rossbencina Exp $ + * Portable Audio I/O Library + * Win32 platform-specific support functions + * + * Based on the Open Source API proposed by Ross Bencina + * Copyright (c) 1999-2000 Ross Bencina + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** @file + Win32 platform-specific support functions. + + @todo Implement workaround for QueryPerformanceCounter() skipping forward + bug. (see msdn kb Q274323). +*/ + +#include <windows.h> +#include <mmsystem.h> /* for timeGetTime() */ + +#include "pa_util.h" + + +/* + Track memory allocations to avoid leaks. + */ + +#if PA_TRACK_MEMORY +static int numAllocations_ = 0; +#endif + + +void *PaUtil_AllocateMemory( long size ) +{ + void *result = GlobalAlloc( GPTR, size ); + +#if PA_TRACK_MEMORY + if( result != NULL ) numAllocations_ += 1; +#endif + return result; +} + + +void PaUtil_FreeMemory( void *block ) +{ + if( block != NULL ) + { + GlobalFree( block ); +#if PA_TRACK_MEMORY + numAllocations_ -= 1; +#endif + + } +} + + +int PaUtil_CountCurrentlyAllocatedBlocks( void ) +{ +#if PA_TRACK_MEMORY + return numAllocations_; +#else + return 0; +#endif +} + + +void Pa_Sleep( long msec ) +{ + Sleep( msec ); +} + +static int usePerformanceCounter_; +static double secondsPerTick_; + +void PaUtil_InitializeClock( void ) +{ + LARGE_INTEGER ticksPerSecond; + + if( QueryPerformanceFrequency( &ticksPerSecond ) != 0 ) + { + usePerformanceCounter_ = 1; + secondsPerTick_ = 1.0 / (double)ticksPerSecond.QuadPart; + } + else + { + usePerformanceCounter_ = 0; + } +} + + +double PaUtil_GetTime( void ) +{ + LARGE_INTEGER time; + + if( usePerformanceCounter_ ) + { + /* FIXME: + according to this knowledge-base article, QueryPerformanceCounter + can skip forward by seconds! + http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q274323& + + it may be better to use the rtdsc instruction using inline asm, + however then a method is needed to calculate a ticks/seconds ratio. + */ + QueryPerformanceCounter( &time ); + return time.QuadPart * secondsPerTick_; + } + else + { + return timeGetTime() * .001; + } +} diff --git a/pd/portaudio/pa_win/pa_x86_plain_converters.c b/pd/portaudio/pa_win/pa_x86_plain_converters.c new file mode 100644 index 00000000..98442a8c --- /dev/null +++ b/pd/portaudio/pa_win/pa_x86_plain_converters.c @@ -0,0 +1,1167 @@ +#include "pa_x86_plain_converters.h" + +#include "pa_converters.h" +#include "pa_dither.h" + +/* + plain intel assemby versions of standard pa converter functions. + + the main reason these versions are faster than the equivalent C versions + is that float -> int casting is expensive in C on x86 because the rounding + mode needs to be changed for every cast. these versions only set + the rounding mode once outside the loop. + + small additional speed gains are made by the way that clamping is + implemented. + +TODO: + o- inline dither code + o- implement Dither only (no-clip) versions + o- implement int8 and uint8 versions + o- test thouroughly + + o- the packed 24 bit functions could benefit from unrolling and avoiding + byte and word sized register access. +*/ + +/* -------------------------------------------------------------------------- */ + +/* +#define PA_CLIP_( val, min, max )\ + { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); } +*/ + +/* + the following notes were used to determine whether a floating point + value should be saturated (ie >1 or <-1) by loading it into an integer + register. these should be rewritten so that they make sense. + + an ieee floating point value + + 1.xxxxxxxxxxxxxxxxxxxx? + + + is less than or equal to 1 and greater than or equal to -1 either: + + if the mantissa is 0 and the unbiased exponent is 0 + + OR + + if the unbiased exponent < 0 + + this translates to: + + if the mantissa is 0 and the biased exponent is 7F + + or + + if the biased exponent is less than 7F + + + therefore the value is greater than 1 or less than -1 if + + the mantissa is not 0 and the biased exponent is 7F + + or + + if the biased exponent is greater than 7F + + + in other words, if we mask out the sign bit, the value is + greater than 1 or less than -1 if its integer representation is greater than: + + 0 01111111 0000 0000 0000 0000 0000 000 + + 0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000 +*/ + +/* -------------------------------------------------------------------------- */ + +static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/ +static const double int32Scaler_ = 0x7FFFFFFF; +static const double ditheredInt32Scaler_ = 0x7FFFFFFE; +static const double int24Scaler_ = 0x7FFFFF; +static const double ditheredInt24Scaler_ = 0x7FFFFE; +static const double int16Scaler_ = 0x7FFF; +static const double ditheredInt16Scaler_ = 0x7FFE; + +#define PA_DITHER_BITS_ (15) +/* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */ +#define PA_FLOAT_DITHER_SCALE_ (1.0 / ((1<<PA_DITHER_BITS_)-1)) +static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_; +#define PA_DITHER_SHIFT_ ((32 - PA_DITHER_BITS_) + 1) + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int32( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + signed long *dest = (signed long*)destinationBuffer; + (void)ditherGenerator; // unused parameter + + while( count-- ) + { + // REVIEW + double scaled = *src * 0x7FFFFFFF; + *dest = (signed long) scaled; + + src += sourceStride; + dest += destinationStride; + } +*/ + + short savedFpuControlWord; + + (void) ditherGenerator; /* unused parameter */ + + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 and int32 + mov eax, sourceStride + imul eax, edx + + mov ecx, count + imul ecx, eax + add ecx, esi + + mov edi, destinationBuffer + + mov ebx, destinationStride + imul ebx, edx + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld int32Scaler_ // stack: (int)0x7FFFFFFF + + Float32_To_Int32_loop: + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF + /* + note: we could store to a temporary qword here which would cause + wraparound distortion instead of int indefinite 0x10. that would + be more work, and given that not enabling clipping is only advisable + when you know that your signal isn't going to clip it isn't worth it. + */ + fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF + + add edi, ebx // increment destination ptr + //lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int32_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int32_Clip( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + signed long *dest = (signed long*)destinationBuffer; + (void) ditherGenerator; // unused parameter + + while( count-- ) + { + // REVIEW + double scaled = *src * 0x7FFFFFFF; + PA_CLIP_( scaled, -2147483648., 2147483647. ); + *dest = (signed long) scaled; + + src += sourceStride; + dest += destinationStride; + } +*/ + + short savedFpuControlWord; + + (void) ditherGenerator; /* unused parameter */ + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 and int32 + mov eax, sourceStride + imul eax, edx + + mov ecx, count + imul ecx, eax + add ecx, esi + + mov edi, destinationBuffer + + mov ebx, destinationStride + imul ebx, edx + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld int32Scaler_ // stack: (int)0x7FFFFFFF + + Float32_To_Int32_Clip_loop: + + mov edx, dword ptr [esi] // load floating point value into integer register + + and edx, 0x7FFFFFFF // mask off sign + cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 + + jg Float32_To_Int32_Clip_clamp + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF + fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF + jmp Float32_To_Int32_Clip_stored + + Float32_To_Int32_Clip_clamp: + mov edx, dword ptr [esi] // load floating point value into integer register + shr edx, 31 // move sign bit into bit 0 + add esi, eax // increment source ptr + //lea esi, [esi+eax] + add edx, 0x7FFFFFFF // convert to maximum range integers + mov dword ptr [edi], edx + + Float32_To_Int32_Clip_stored: + + //add edi, ebx // increment destination ptr + lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int32_Clip_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int32_DitherClip( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ + /* + float *src = (float*)sourceBuffer; + signed long *dest = (signed long*)destinationBuffer; + + while( count-- ) + { + // REVIEW + double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); + // use smaller scaler to prevent overflow when we add the dither + double dithered = ((double)*src * (2147483646.0)) + dither; + PA_CLIP_( dithered, -2147483648., 2147483647. ); + *dest = (signed long) dithered; + + + src += sourceStride; + dest += destinationStride; + } + */ + + short savedFpuControlWord; + + // spill storage: + signed long sourceByteStride; + signed long highpassedDither; + + // dither state: + unsigned long ditherPrevious = ditherGenerator->previous; + unsigned long ditherRandSeed1 = ditherGenerator->randSeed1; + unsigned long ditherRandSeed2 = ditherGenerator->randSeed2; + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 and int32 + mov eax, sourceStride + imul eax, edx + + mov ecx, count + imul ecx, eax + add ecx, esi + + mov edi, destinationBuffer + + mov ebx, destinationStride + imul ebx, edx + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld ditheredInt32Scaler_ // stack: int scaler + + Float32_To_Int32_DitherClip_loop: + + mov edx, dword ptr [esi] // load floating point value into integer register + + and edx, 0x7FFFFFFF // mask off sign + cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 + + jg Float32_To_Int32_DitherClip_clamp + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, int scaler + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler + + /* + // call PaUtil_GenerateFloatTriangularDither with C calling convention + mov sourceByteStride, eax // save eax + mov sourceEnd, ecx // save ecx + push ditherGenerator // pass ditherGenerator parameter on stack + call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler + pop edx // clear parameter off stack + mov ecx, sourceEnd // restore ecx + mov eax, sourceByteStride // restore eax + */ + + // generate dither + mov sourceByteStride, eax // save eax + mov edx, 196314165 + mov eax, ditherRandSeed1 + mul edx // eax:edx = eax * 196314165 + //add eax, 907633515 + lea eax, [eax+907633515] + mov ditherRandSeed1, eax + mov edx, 196314165 + mov eax, ditherRandSeed2 + mul edx // eax:edx = eax * 196314165 + //add eax, 907633515 + lea eax, [eax+907633515] + mov edx, ditherRandSeed1 + shr edx, PA_DITHER_SHIFT_ + mov ditherRandSeed2, eax + shr eax, PA_DITHER_SHIFT_ + //add eax, edx // eax -> current + lea eax, [eax+edx] + mov edx, ditherPrevious + neg edx + lea edx, [eax+edx] // highpass = current - previous + mov highpassedDither, edx + mov ditherPrevious, eax // previous = current + mov eax, sourceByteStride // restore eax + fild highpassedDither + fmul const_float_dither_scale_ + // end generate dither, dither signal in st(0) + + faddp st(1), st(0) // stack: dither + value*(int scaler), int scaler + fistp dword ptr [edi] // pop st(0) into dest, stack: int scaler + jmp Float32_To_Int32_DitherClip_stored + + Float32_To_Int32_DitherClip_clamp: + mov edx, dword ptr [esi] // load floating point value into integer register + shr edx, 31 // move sign bit into bit 0 + add esi, eax // increment source ptr + //lea esi, [esi+eax] + add edx, 0x7FFFFFFF // convert to maximum range integers + mov dword ptr [edi], edx + + Float32_To_Int32_DitherClip_stored: + + //add edi, ebx // increment destination ptr + lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int32_DitherClip_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } + + ditherGenerator->previous = ditherPrevious; + ditherGenerator->randSeed1 = ditherRandSeed1; + ditherGenerator->randSeed2 = ditherRandSeed2; +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int24( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + unsigned char *dest = (unsigned char*)destinationBuffer; + signed long temp; + + (void) ditherGenerator; // unused parameter + + while( count-- ) + { + // convert to 32 bit and drop the low 8 bits + double scaled = *src * 0x7FFFFFFF; + temp = (signed long) scaled; + + dest[0] = (unsigned char)(temp >> 8); + dest[1] = (unsigned char)(temp >> 16); + dest[2] = (unsigned char)(temp >> 24); + + src += sourceStride; + dest += destinationStride * 3; + } +*/ + + short savedFpuControlWord; + + signed long tempInt32; + + (void) ditherGenerator; /* unused parameter */ + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 + mov eax, sourceStride + imul eax, edx + + mov ecx, count + imul ecx, eax + add ecx, esi + + mov edi, destinationBuffer + + mov edx, 3 // sizeof int24 + mov ebx, destinationStride + imul ebx, edx + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld int24Scaler_ // stack: (int)0x7FFFFF + + Float32_To_Int24_loop: + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, (int)0x7FFFFF + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF + fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF + mov edx, tempInt32 + + mov byte ptr [edi], DL + shr edx, 8 + //mov byte ptr [edi+1], DL + //mov byte ptr [edi+2], DH + mov word ptr [edi+1], DX + + //add edi, ebx // increment destination ptr + lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int24_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int24_Clip( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + unsigned char *dest = (unsigned char*)destinationBuffer; + signed long temp; + + (void) ditherGenerator; // unused parameter + + while( count-- ) + { + // convert to 32 bit and drop the low 8 bits + double scaled = *src * 0x7FFFFFFF; + PA_CLIP_( scaled, -2147483648., 2147483647. ); + temp = (signed long) scaled; + + dest[0] = (unsigned char)(temp >> 8); + dest[1] = (unsigned char)(temp >> 16); + dest[2] = (unsigned char)(temp >> 24); + + src += sourceStride; + dest += destinationStride * 3; + } +*/ + + short savedFpuControlWord; + + signed long tempInt32; + + (void) ditherGenerator; /* unused parameter */ + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 + mov eax, sourceStride + imul eax, edx + + mov ecx, count + imul ecx, eax + add ecx, esi + + mov edi, destinationBuffer + + mov edx, 3 // sizeof int24 + mov ebx, destinationStride + imul ebx, edx + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld int24Scaler_ // stack: (int)0x7FFFFF + + Float32_To_Int24_Clip_loop: + + mov edx, dword ptr [esi] // load floating point value into integer register + + and edx, 0x7FFFFFFF // mask off sign + cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 + + jg Float32_To_Int24_Clip_clamp + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, (int)0x7FFFFF + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF + fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF + mov edx, tempInt32 + jmp Float32_To_Int24_Clip_store + + Float32_To_Int24_Clip_clamp: + mov edx, dword ptr [esi] // load floating point value into integer register + shr edx, 31 // move sign bit into bit 0 + add esi, eax // increment source ptr + //lea esi, [esi+eax] + add edx, 0x7FFFFF // convert to maximum range integers + + Float32_To_Int24_Clip_store: + + mov byte ptr [edi], DL + shr edx, 8 + //mov byte ptr [edi+1], DL + //mov byte ptr [edi+2], DH + mov word ptr [edi+1], DX + + //add edi, ebx // increment destination ptr + lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int24_Clip_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int24_DitherClip( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + unsigned char *dest = (unsigned char*)destinationBuffer; + signed long temp; + + while( count-- ) + { + // convert to 32 bit and drop the low 8 bits + + // FIXME: the dither amplitude here appears to be too small by 8 bits + double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); + // use smaller scaler to prevent overflow when we add the dither + double dithered = ((double)*src * (2147483646.0)) + dither; + PA_CLIP_( dithered, -2147483648., 2147483647. ); + + temp = (signed long) dithered; + + dest[0] = (unsigned char)(temp >> 8); + dest[1] = (unsigned char)(temp >> 16); + dest[2] = (unsigned char)(temp >> 24); + + src += sourceStride; + dest += destinationStride * 3; + } +*/ + + short savedFpuControlWord; + + // spill storage: + signed long sourceByteStride; + signed long highpassedDither; + + // dither state: + unsigned long ditherPrevious = ditherGenerator->previous; + unsigned long ditherRandSeed1 = ditherGenerator->randSeed1; + unsigned long ditherRandSeed2 = ditherGenerator->randSeed2; + + signed long tempInt32; + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 + mov eax, sourceStride + imul eax, edx + + mov ecx, count + imul ecx, eax + add ecx, esi + + mov edi, destinationBuffer + + mov edx, 3 // sizeof int24 + mov ebx, destinationStride + imul ebx, edx + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld ditheredInt24Scaler_ // stack: int scaler + + Float32_To_Int24_DitherClip_loop: + + mov edx, dword ptr [esi] // load floating point value into integer register + + and edx, 0x7FFFFFFF // mask off sign + cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 + + jg Float32_To_Int24_DitherClip_clamp + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, int scaler + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler + + /* + // call PaUtil_GenerateFloatTriangularDither with C calling convention + mov sourceByteStride, eax // save eax + mov sourceEnd, ecx // save ecx + push ditherGenerator // pass ditherGenerator parameter on stack + call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler + pop edx // clear parameter off stack + mov ecx, sourceEnd // restore ecx + mov eax, sourceByteStride // restore eax + */ + + // generate dither + mov sourceByteStride, eax // save eax + mov edx, 196314165 + mov eax, ditherRandSeed1 + mul edx // eax:edx = eax * 196314165 + //add eax, 907633515 + lea eax, [eax+907633515] + mov ditherRandSeed1, eax + mov edx, 196314165 + mov eax, ditherRandSeed2 + mul edx // eax:edx = eax * 196314165 + //add eax, 907633515 + lea eax, [eax+907633515] + mov edx, ditherRandSeed1 + shr edx, PA_DITHER_SHIFT_ + mov ditherRandSeed2, eax + shr eax, PA_DITHER_SHIFT_ + //add eax, edx // eax -> current + lea eax, [eax+edx] + mov edx, ditherPrevious + neg edx + lea edx, [eax+edx] // highpass = current - previous + mov highpassedDither, edx + mov ditherPrevious, eax // previous = current + mov eax, sourceByteStride // restore eax + fild highpassedDither + fmul const_float_dither_scale_ + // end generate dither, dither signal in st(0) + + faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler + fistp tempInt32 // pop st(0) into tempInt32, stack: int scaler + mov edx, tempInt32 + jmp Float32_To_Int24_DitherClip_store + + Float32_To_Int24_DitherClip_clamp: + mov edx, dword ptr [esi] // load floating point value into integer register + shr edx, 31 // move sign bit into bit 0 + add esi, eax // increment source ptr + //lea esi, [esi+eax] + add edx, 0x7FFFFF // convert to maximum range integers + + Float32_To_Int24_DitherClip_store: + + mov byte ptr [edi], DL + shr edx, 8 + //mov byte ptr [edi+1], DL + //mov byte ptr [edi+2], DH + mov word ptr [edi+1], DX + + //add edi, ebx // increment destination ptr + lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int24_DitherClip_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } + + ditherGenerator->previous = ditherPrevious; + ditherGenerator->randSeed1 = ditherRandSeed1; + ditherGenerator->randSeed2 = ditherRandSeed2; +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int16( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + signed short *dest = (signed short*)destinationBuffer; + (void)ditherGenerator; // unused parameter + + while( count-- ) + { + + short samp = (short) (*src * (32767.0f)); + *dest = samp; + + src += sourceStride; + dest += destinationStride; + } +*/ + + short savedFpuControlWord; + + (void) ditherGenerator; /* unused parameter */ + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 + mov eax, sourceStride + imul eax, edx // source byte stride + + mov ecx, count + imul ecx, eax + add ecx, esi // source end ptr = count * source byte stride + source ptr + + mov edi, destinationBuffer + + mov edx, 2 // sizeof int16 + mov ebx, destinationStride + imul ebx, edx // destination byte stride + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld int16Scaler_ // stack: (int)0x7FFF + + Float32_To_Int16_loop: + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, (int)0x7FFF + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF + fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF + + add edi, ebx // increment destination ptr + //lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int16_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int16_Clip( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + signed short *dest = (signed short*)destinationBuffer; + (void)ditherGenerator; // unused parameter + + while( count-- ) + { + long samp = (signed long) (*src * (32767.0f)); + PA_CLIP_( samp, -0x8000, 0x7FFF ); + *dest = (signed short) samp; + + src += sourceStride; + dest += destinationStride; + } +*/ + + short savedFpuControlWord; + + (void) ditherGenerator; /* unused parameter */ + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 + mov eax, sourceStride + imul eax, edx // source byte stride + + mov ecx, count + imul ecx, eax + add ecx, esi // source end ptr = count * source byte stride + source ptr + + mov edi, destinationBuffer + + mov edx, 2 // sizeof int16 + mov ebx, destinationStride + imul ebx, edx // destination byte stride + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld int16Scaler_ // stack: (int)0x7FFF + + Float32_To_Int16_Clip_loop: + + mov edx, dword ptr [esi] // load floating point value into integer register + + and edx, 0x7FFFFFFF // mask off sign + cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 + + jg Float32_To_Int16_Clip_clamp + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, (int)0x7FFF + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF + fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF + jmp Float32_To_Int16_Clip_stored + + Float32_To_Int16_Clip_clamp: + mov edx, dword ptr [esi] // load floating point value into integer register + shr edx, 31 // move sign bit into bit 0 + add esi, eax // increment source ptr + //lea esi, [esi+eax] + add dx, 0x7FFF // convert to maximum range integers + mov word ptr [edi], dx // store clamped into into dest + + Float32_To_Int16_Clip_stored: + + add edi, ebx // increment destination ptr + //lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int16_Clip_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } +} + +/* -------------------------------------------------------------------------- */ + +static void Float32_To_Int16_DitherClip( + void *destinationBuffer, signed int destinationStride, + void *sourceBuffer, signed int sourceStride, + unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) +{ +/* + float *src = (float*)sourceBuffer; + signed short *dest = (signed short*)destinationBuffer; + (void)ditherGenerator; // unused parameter + + while( count-- ) + { + + float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); + // use smaller scaler to prevent overflow when we add the dither + float dithered = (*src * (32766.0f)) + dither; + signed long samp = (signed long) dithered; + PA_CLIP_( samp, -0x8000, 0x7FFF ); + *dest = (signed short) samp; + + src += sourceStride; + dest += destinationStride; + } +*/ + + short savedFpuControlWord; + + // spill storage: + signed long sourceByteStride; + signed long highpassedDither; + + // dither state: + unsigned long ditherPrevious = ditherGenerator->previous; + unsigned long ditherRandSeed1 = ditherGenerator->randSeed1; + unsigned long ditherRandSeed2 = ditherGenerator->randSeed2; + + __asm{ + // esi -> source ptr + // eax -> source byte stride + // edi -> destination ptr + // ebx -> destination byte stride + // ecx -> source end ptr + // edx -> temp + + mov esi, sourceBuffer + + mov edx, 4 // sizeof float32 + mov eax, sourceStride + imul eax, edx // source byte stride + + mov ecx, count + imul ecx, eax + add ecx, esi // source end ptr = count * source byte stride + source ptr + + mov edi, destinationBuffer + + mov edx, 2 // sizeof int16 + mov ebx, destinationStride + imul ebx, edx // destination byte stride + + fwait + fstcw savedFpuControlWord + fldcw fpuControlWord_ + + fld ditheredInt16Scaler_ // stack: int scaler + + Float32_To_Int16_DitherClip_loop: + + mov edx, dword ptr [esi] // load floating point value into integer register + + and edx, 0x7FFFFFFF // mask off sign + cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 + + jg Float32_To_Int16_DitherClip_clamp + + // load unscaled value into st(0) + fld dword ptr [esi] // stack: value, int scaler + add esi, eax // increment source ptr + //lea esi, [esi+eax] + fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler + + /* + // call PaUtil_GenerateFloatTriangularDither with C calling convention + mov sourceByteStride, eax // save eax + mov sourceEnd, ecx // save ecx + push ditherGenerator // pass ditherGenerator parameter on stack + call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler + pop edx // clear parameter off stack + mov ecx, sourceEnd // restore ecx + mov eax, sourceByteStride // restore eax + */ + + // generate dither + mov sourceByteStride, eax // save eax + mov edx, 196314165 + mov eax, ditherRandSeed1 + mul edx // eax:edx = eax * 196314165 + //add eax, 907633515 + lea eax, [eax+907633515] + mov ditherRandSeed1, eax + mov edx, 196314165 + mov eax, ditherRandSeed2 + mul edx // eax:edx = eax * 196314165 + //add eax, 907633515 + lea eax, [eax+907633515] + mov edx, ditherRandSeed1 + shr edx, PA_DITHER_SHIFT_ + mov ditherRandSeed2, eax + shr eax, PA_DITHER_SHIFT_ + //add eax, edx // eax -> current + lea eax, [eax+edx] // current = randSeed1>>x + randSeed2>>x + mov edx, ditherPrevious + neg edx + lea edx, [eax+edx] // highpass = current - previous + mov highpassedDither, edx + mov ditherPrevious, eax // previous = current + mov eax, sourceByteStride // restore eax + fild highpassedDither + fmul const_float_dither_scale_ + // end generate dither, dither signal in st(0) + + faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler + fistp word ptr [edi] // store scaled int into dest, stack: int scaler + jmp Float32_To_Int16_DitherClip_stored + + Float32_To_Int16_DitherClip_clamp: + mov edx, dword ptr [esi] // load floating point value into integer register + shr edx, 31 // move sign bit into bit 0 + add esi, eax // increment source ptr + //lea esi, [esi+eax] + add dx, 0x7FFF // convert to maximum range integers + mov word ptr [edi], dx // store clamped into into dest + + Float32_To_Int16_DitherClip_stored: + + add edi, ebx // increment destination ptr + //lea edi, [edi+ebx] + + cmp esi, ecx // has src ptr reached end? + jne Float32_To_Int16_DitherClip_loop + + ffree st(0) + fincstp + + fwait + fnclex + fldcw savedFpuControlWord + } + + ditherGenerator->previous = ditherPrevious; + ditherGenerator->randSeed1 = ditherRandSeed1; + ditherGenerator->randSeed2 = ditherRandSeed2; +} + +/* -------------------------------------------------------------------------- */ + +void PaUtil_InitializeX86PlainConverters( void ) +{ + paConverters.Float32_To_Int32 = Float32_To_Int32; + paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip; + paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip; + + paConverters.Float32_To_Int24 = Float32_To_Int24; + paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip; + paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip; + + paConverters.Float32_To_Int16 = Float32_To_Int16; + paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip; + paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip; +} + +/* -------------------------------------------------------------------------- */ diff --git a/pd/portaudio/pa_win/pa_x86_plain_converters.h b/pd/portaudio/pa_win/pa_x86_plain_converters.h new file mode 100644 index 00000000..f56c710f --- /dev/null +++ b/pd/portaudio/pa_win/pa_x86_plain_converters.h @@ -0,0 +1,19 @@ +#ifndef PA_X86_PLAIN_CONVERTERS_H +#define PA_X86_PLAIN_CONVERTERS_H + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + +/** + @brief Install optimised converter functions suitable for all IA32 processors +*/ +void PaUtil_InitializeX86PlainConverters( void ); + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* PA_X86_PLAIN_CONVERTERS_H */ |