296 lines
8.9 KiB
C
296 lines
8.9 KiB
C
|
/* ***** BEGIN LICENSE BLOCK *****
|
||
|
* Version: RCSL 1.0/RPSL 1.0
|
||
|
*
|
||
|
* Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
|
||
|
*
|
||
|
* The contents of this file, and the files included with this file, are
|
||
|
* subject to the current version of the RealNetworks Public Source License
|
||
|
* Version 1.0 (the "RPSL") available at
|
||
|
* http://www.helixcommunity.org/content/rpsl unless you have licensed
|
||
|
* the file under the RealNetworks Community Source License Version 1.0
|
||
|
* (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
|
||
|
* in which case the RCSL will apply. You may also obtain the license terms
|
||
|
* directly from RealNetworks. You may not use this file except in
|
||
|
* compliance with the RPSL or, if you have a valid RCSL with RealNetworks
|
||
|
* applicable to this file, the RCSL. Please see the applicable RPSL or
|
||
|
* RCSL for the rights, obligations and limitations governing use of the
|
||
|
* contents of the file.
|
||
|
*
|
||
|
* This file is part of the Helix DNA Technology. RealNetworks is the
|
||
|
* developer of the Original Code and owns the copyrights in the portions
|
||
|
* it created.
|
||
|
*
|
||
|
* This file, and the files included with this file, is distributed and made
|
||
|
* available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
||
|
* EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
||
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
|
||
|
* FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
||
|
*
|
||
|
* Technology Compatibility Kit Test Suite(s) Location:
|
||
|
* http://www.helixcommunity.org/content/tck
|
||
|
*
|
||
|
* Contributor(s):
|
||
|
*
|
||
|
* ***** END LICENSE BLOCK ***** */
|
||
|
|
||
|
/**************************************************************************************
|
||
|
* Fixed-point MP3 decoder
|
||
|
* Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
|
||
|
* June 2003
|
||
|
*
|
||
|
* polyphase.c - final stage of subband transform (polyphase synthesis filter)
|
||
|
*
|
||
|
* This is the C reference version using __int64
|
||
|
* Look in the appropriate subdirectories for optimized asm implementations
|
||
|
* (e.g. arm/asmpoly.s)
|
||
|
**************************************************************************************/
|
||
|
|
||
|
#include "coder.h"
|
||
|
#include "assembly.h"
|
||
|
|
||
|
/* input to Polyphase = Q(DQ_FRACBITS_OUT-2), gain 2 bits in convolution
|
||
|
* we also have the implicit bias of 2^15 to add back, so net fraction bits =
|
||
|
* DQ_FRACBITS_OUT - 2 - 2 - 15
|
||
|
* (see comment on Dequantize() for more info)
|
||
|
*/
|
||
|
#define DEF_NFRACBITS (DQ_FRACBITS_OUT - 2 - 2 - 15)
|
||
|
#define CSHIFT 12 /* coefficients have 12 leading sign bits for early-terminating mulitplies */
|
||
|
|
||
|
static __inline short ClipToShort(int x, int fracBits)
|
||
|
{
|
||
|
int sign;
|
||
|
|
||
|
/* assumes you've already rounded (x += (1 << (fracBits-1))) */
|
||
|
x >>= fracBits;
|
||
|
|
||
|
/* Ken's trick: clips to [-32768, 32767] */
|
||
|
sign = x >> 31;
|
||
|
if (sign != (x >> 15))
|
||
|
x = sign ^ ((1 << 15) - 1);
|
||
|
|
||
|
return (short)x;
|
||
|
}
|
||
|
|
||
|
#define MC0M(x) { \
|
||
|
c1 = *coef; coef++; c2 = *coef; coef++; \
|
||
|
vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
|
||
|
sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \
|
||
|
}
|
||
|
|
||
|
#define MC1M(x) { \
|
||
|
c1 = *coef; coef++; \
|
||
|
vLo = *(vb1+(x)); \
|
||
|
sum1L = MADD64(sum1L, vLo, c1); \
|
||
|
}
|
||
|
|
||
|
#define MC2M(x) { \
|
||
|
c1 = *coef; coef++; c2 = *coef; coef++; \
|
||
|
vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
|
||
|
sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \
|
||
|
sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \
|
||
|
}
|
||
|
|
||
|
/**************************************************************************************
|
||
|
* Function: PolyphaseMono
|
||
|
*
|
||
|
* Description: filter one subband and produce 32 output PCM samples for one channel
|
||
|
*
|
||
|
* Inputs: pointer to PCM output buffer
|
||
|
* number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
|
||
|
* pointer to start of vbuf (preserved from last call)
|
||
|
* start of filter coefficient table (in proper, shuffled order)
|
||
|
* no minimum number of guard bits is required for input vbuf
|
||
|
* (see additional scaling comments below)
|
||
|
*
|
||
|
* Outputs: 32 samples of one channel of decoded PCM data, (i.e. Q16.0)
|
||
|
*
|
||
|
* Return: none
|
||
|
*
|
||
|
* TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported
|
||
|
* (note max filter gain - see polyCoef[] comments)
|
||
|
**************************************************************************************/
|
||
|
void PolyphaseMono(short *pcm, int *vbuf, const int *coefBase)
|
||
|
{
|
||
|
int i;
|
||
|
const int *coef;
|
||
|
int *vb1;
|
||
|
int vLo, vHi, c1, c2;
|
||
|
Word64 sum1L, sum2L, rndVal;
|
||
|
|
||
|
rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
|
||
|
|
||
|
/* special case, output sample 0 */
|
||
|
coef = coefBase;
|
||
|
vb1 = vbuf;
|
||
|
sum1L = rndVal;
|
||
|
|
||
|
MC0M(0)
|
||
|
MC0M(1)
|
||
|
MC0M(2)
|
||
|
MC0M(3)
|
||
|
MC0M(4)
|
||
|
MC0M(5)
|
||
|
MC0M(6)
|
||
|
MC0M(7)
|
||
|
|
||
|
*(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
|
||
|
/* special case, output sample 16 */
|
||
|
coef = coefBase + 256;
|
||
|
vb1 = vbuf + 64*16;
|
||
|
sum1L = rndVal;
|
||
|
|
||
|
MC1M(0)
|
||
|
MC1M(1)
|
||
|
MC1M(2)
|
||
|
MC1M(3)
|
||
|
MC1M(4)
|
||
|
MC1M(5)
|
||
|
MC1M(6)
|
||
|
MC1M(7)
|
||
|
|
||
|
*(pcm + 16) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
|
||
|
/* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */
|
||
|
coef = coefBase + 16;
|
||
|
vb1 = vbuf + 64;
|
||
|
pcm++;
|
||
|
|
||
|
/* right now, the compiler creates bad asm from this... */
|
||
|
for (i = 15; i > 0; i--) {
|
||
|
sum1L = sum2L = rndVal;
|
||
|
|
||
|
MC2M(0)
|
||
|
MC2M(1)
|
||
|
MC2M(2)
|
||
|
MC2M(3)
|
||
|
MC2M(4)
|
||
|
MC2M(5)
|
||
|
MC2M(6)
|
||
|
MC2M(7)
|
||
|
|
||
|
vb1 += 64;
|
||
|
*(pcm) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
*(pcm + 2*i) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
pcm++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#define MC0S(x) { \
|
||
|
c1 = *coef; coef++; c2 = *coef; coef++; \
|
||
|
vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
|
||
|
sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \
|
||
|
vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \
|
||
|
sum1R = MADD64(sum1R, vLo, c1); sum1R = MADD64(sum1R, vHi, -c2); \
|
||
|
}
|
||
|
|
||
|
#define MC1S(x) { \
|
||
|
c1 = *coef; coef++; \
|
||
|
vLo = *(vb1+(x)); \
|
||
|
sum1L = MADD64(sum1L, vLo, c1); \
|
||
|
vLo = *(vb1+32+(x)); \
|
||
|
sum1R = MADD64(sum1R, vLo, c1); \
|
||
|
}
|
||
|
|
||
|
#define MC2S(x) { \
|
||
|
c1 = *coef; coef++; c2 = *coef; coef++; \
|
||
|
vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
|
||
|
sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \
|
||
|
sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \
|
||
|
vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \
|
||
|
sum1R = MADD64(sum1R, vLo, c1); sum2R = MADD64(sum2R, vLo, c2); \
|
||
|
sum1R = MADD64(sum1R, vHi, -c2); sum2R = MADD64(sum2R, vHi, c1); \
|
||
|
}
|
||
|
|
||
|
/**************************************************************************************
|
||
|
* Function: PolyphaseStereo
|
||
|
*
|
||
|
* Description: filter one subband and produce 32 output PCM samples for each channel
|
||
|
*
|
||
|
* Inputs: pointer to PCM output buffer
|
||
|
* number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
|
||
|
* pointer to start of vbuf (preserved from last call)
|
||
|
* start of filter coefficient table (in proper, shuffled order)
|
||
|
* no minimum number of guard bits is required for input vbuf
|
||
|
* (see additional scaling comments below)
|
||
|
*
|
||
|
* Outputs: 32 samples of two channels of decoded PCM data, (i.e. Q16.0)
|
||
|
*
|
||
|
* Return: none
|
||
|
*
|
||
|
* Notes: interleaves PCM samples LRLRLR...
|
||
|
*
|
||
|
* TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported
|
||
|
**************************************************************************************/
|
||
|
void PolyphaseStereo(short *pcm, int *vbuf, const int *coefBase)
|
||
|
{
|
||
|
int i;
|
||
|
const int *coef;
|
||
|
int *vb1;
|
||
|
int vLo, vHi, c1, c2;
|
||
|
Word64 sum1L, sum2L, sum1R, sum2R, rndVal;
|
||
|
|
||
|
rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
|
||
|
|
||
|
/* special case, output sample 0 */
|
||
|
coef = coefBase;
|
||
|
vb1 = vbuf;
|
||
|
sum1L = sum1R = rndVal;
|
||
|
|
||
|
MC0S(0)
|
||
|
MC0S(1)
|
||
|
MC0S(2)
|
||
|
MC0S(3)
|
||
|
MC0S(4)
|
||
|
MC0S(5)
|
||
|
MC0S(6)
|
||
|
MC0S(7)
|
||
|
|
||
|
*(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
*(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
|
||
|
/* special case, output sample 16 */
|
||
|
coef = coefBase + 256;
|
||
|
vb1 = vbuf + 64*16;
|
||
|
sum1L = sum1R = rndVal;
|
||
|
|
||
|
MC1S(0)
|
||
|
MC1S(1)
|
||
|
MC1S(2)
|
||
|
MC1S(3)
|
||
|
MC1S(4)
|
||
|
MC1S(5)
|
||
|
MC1S(6)
|
||
|
MC1S(7)
|
||
|
|
||
|
*(pcm + 2*16 + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
*(pcm + 2*16 + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
|
||
|
/* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */
|
||
|
coef = coefBase + 16;
|
||
|
vb1 = vbuf + 64;
|
||
|
pcm += 2;
|
||
|
|
||
|
/* right now, the compiler creates bad asm from this... */
|
||
|
for (i = 15; i > 0; i--) {
|
||
|
sum1L = sum2L = rndVal;
|
||
|
sum1R = sum2R = rndVal;
|
||
|
|
||
|
MC2S(0)
|
||
|
MC2S(1)
|
||
|
MC2S(2)
|
||
|
MC2S(3)
|
||
|
MC2S(4)
|
||
|
MC2S(5)
|
||
|
MC2S(6)
|
||
|
MC2S(7)
|
||
|
|
||
|
vb1 += 64;
|
||
|
*(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
*(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
*(pcm + 2*2*i + 0) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
*(pcm + 2*2*i + 1) = ClipToShort((int)SAR64(sum2R, (32-CSHIFT)), DEF_NFRACBITS);
|
||
|
pcm += 2;
|
||
|
}
|
||
|
}
|