428 lines
10 KiB
ArmAsm
428 lines
10 KiB
ArmAsm
# *
|
|
# * Version: RCSL 1.0/RPSL 1.0
|
|
# *
|
|
# * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
|
|
# *
|
|
# * The contents of this file, and the files included with this file, are
|
|
# * subject to the current version of the RealNetworks Public Source License
|
|
# * Version 1.0 (the "RPSL") available at
|
|
# * http://www.helixcommunity.org/content/rpsl unless you have licensed
|
|
# * the file under the RealNetworks Community Source License Version 1.0
|
|
# * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
|
|
# * in which case the RCSL will apply. You may also obtain the license terms
|
|
# * directly from RealNetworks. You may not use this file except in
|
|
# * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
|
|
# * applicable to this file, the RCSL. Please see the applicable RPSL or
|
|
# * RCSL for the rights, obligations and limitations governing use of the
|
|
# * contents of the file.
|
|
# *
|
|
# * This file is part of the Helix DNA Technology. RealNetworks is the
|
|
# * developer of the Original Code and owns the copyrights in the portions
|
|
# * it created.
|
|
# *
|
|
# * This file, and the files included with this file, is distributed and made
|
|
# * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
# * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
# * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
# * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
# *
|
|
# * Technology Compatibility Kit Test Suite(s) Location:
|
|
# * http://www.helixcommunity.org/content/tck
|
|
# *
|
|
# * Contributor(s):
|
|
# *
|
|
# * ***** END LICENSE BLOCK ***** */
|
|
|
|
.GLOBAL xmp3_PolyphaseStereo
|
|
# * kj AREA |.text|, CODE, READONLY
|
|
|
|
PCM .req r0
|
|
VB1 .req r1
|
|
COEF .req r2
|
|
|
|
VLO .req r0 @ must push PCM ptr to stack during inner looop
|
|
VHI .req r3 @ temp variable
|
|
|
|
SUM1LL .req r4
|
|
SUM1LH .req r5
|
|
SUM2LL .req r6
|
|
SUM2LH .req r7
|
|
SUM1RL .req r8
|
|
SUM1RH .req r9
|
|
SUM2RL .req r10
|
|
SUM2RH .req r11
|
|
|
|
CF1 .req r12
|
|
CF2 .req r14
|
|
|
|
SIGN .req r12 @ used for clipping - after discarding CF1
|
|
MAXPOS .req r14 @ used for clipping - after discarding CF2
|
|
|
|
I .req r12 @ overlay loop counter with CF1, SIGN
|
|
|
|
GBLA RNDVAL
|
|
#RNDVAL SETA (1 << ((32 - 12) + (6 - 1)))
|
|
.set RNDVAL,(1 << ((32 - 12) + (6 - 1)))
|
|
|
|
# C64TOS - clip 64-bit accumulator to short (no rounding)
|
|
# xl, xh = value (lo 32, hi 32)
|
|
# input assumed to have 6 fraction bits
|
|
# sign = temp variable to use for sign
|
|
# maxPos = 0x00007fff (takes 2 instr. to generate - calculating
|
|
# once and using repeatedly saves if you do several CTOS in a row)
|
|
MACRO
|
|
C64TOS $xl, $xh, $sign, $maxPos
|
|
|
|
mov $xl, $xl, lsr #(20+6)
|
|
orr $xl, $xl, $xh, lsl #(12-6)
|
|
mov $sign, $xl, ASR #31
|
|
cmp $sign, $xl, ASR #15
|
|
eorne $xl, $sign, $maxPos
|
|
|
|
MEND @ C64TOS
|
|
|
|
# MC0S - process 2 taps, 1 sample per channel (sample 0)
|
|
# x = vb1 offset
|
|
MACRO
|
|
MC0S $x
|
|
|
|
ldr CF1, [COEF], #4
|
|
ldr CF2, [COEF], #4
|
|
ldr VLO, [VB1, #(4*($x))]
|
|
ldr VHI, [VB1, #(4*(23 - $x))]
|
|
|
|
smlal SUM1LL, SUM1LH, VLO, CF1
|
|
ldr VLO, [VB1, #(4*(32 + $x))]
|
|
rsb CF2, CF2, #0
|
|
smlal SUM1LL, SUM1LH, VHI, CF2
|
|
ldr VHI, [VB1, #(4*(32 + 23 - $x))]
|
|
|
|
smlal SUM1RL, SUM1RH, VLO, CF1
|
|
smlal SUM1RL, SUM1RH, VHI, CF2
|
|
|
|
MEND @ MC0S
|
|
|
|
# MC1S - process 2 taps, 1 sample per channel (sample 16)
|
|
# x = vb1 offset
|
|
MACRO
|
|
MC1S $x
|
|
|
|
ldr CF1, [COEF], #4
|
|
ldr VLO, [VB1, #(4*($x))]
|
|
ldr VHI, [VB1, #(4*(32 + $x))]
|
|
smlal SUM1LL, SUM1LH, VLO, CF1
|
|
smlal SUM1RL, SUM1RH, VHI, CF1
|
|
|
|
MEND @ MC1S
|
|
|
|
# MC2S - process 2 taps, 2 samples per channel
|
|
# x = vb1 offset
|
|
MACRO
|
|
MC2S $x
|
|
|
|
# load data as far as possible in advance of using it
|
|
ldr CF1, [COEF], #4
|
|
ldr CF2, [COEF], #4
|
|
ldr VLO, [VB1, #(4*($x))]
|
|
ldr VHI, [VB1, #(4*(23 - $x))]
|
|
|
|
smlal SUM1LL, SUM1LH, VLO, CF1
|
|
smlal SUM2LL, SUM2LH, VLO, CF2
|
|
rsb CF2, CF2, #0
|
|
smlal SUM2LL, SUM2LH, VHI, CF1
|
|
smlal SUM1LL, SUM1LH, VHI, CF2
|
|
|
|
ldr VHI, [VB1, #(4*(32 + 23 - $x))]
|
|
ldr VLO, [VB1, #(4*(32 + $x))]
|
|
|
|
smlal SUM1RL, SUM1RH, VHI, CF2
|
|
smlal SUM2RL, SUM2RH, VHI, CF1
|
|
rsb CF2, CF2, #0
|
|
smlal SUM1RL, SUM1RH, VLO, CF1
|
|
smlal SUM2RL, SUM2RH, VLO, CF2
|
|
|
|
MEND @ MC2S
|
|
|
|
# void PolyphaseStereo(short *pcm, int *vbuf, const int *coefBase)
|
|
|
|
xmp3_PolyphaseStereo FUNCTION
|
|
EXPORT xmp3_PolyphaseStereo
|
|
|
|
stmfd sp!, {r4-r11, r14} @ Push to stack
|
|
|
|
# clear out stack space for 2 local variables (4 bytes each)
|
|
sub sp, sp, #8
|
|
str PCM, [sp, #4] @ sp[1] = pcm pointer
|
|
|
|
# special case, output sample 0
|
|
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1RL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1LH, #0
|
|
mov SUM1RH, #0
|
|
|
|
MC0S 0
|
|
MC0S 1
|
|
MC0S 2
|
|
MC0S 3
|
|
MC0S 4
|
|
MC0S 5
|
|
MC0S 6
|
|
MC0S 7
|
|
|
|
ldr PCM, [sp, #4] @ load pcm pointer
|
|
mov MAXPOS, #0x7f00
|
|
orr MAXPOS, MAXPOS, #0xff
|
|
|
|
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
|
|
C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS
|
|
|
|
strh SUM1LL, [PCM, #(2*0)]
|
|
strh SUM1RL, [PCM, #(2*1)]
|
|
|
|
# special case, output sample 16
|
|
add COEF, COEF, #(4*(256-16)) @ coef = coefBase + 256 (was coefBase + 16 after MC0S block)
|
|
add VB1, VB1, #(4*1024) @ vb1 = vbuf + 64*16
|
|
|
|
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1RL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1LH, #0
|
|
mov SUM1RH, #0
|
|
|
|
MC1S 0
|
|
MC1S 1
|
|
MC1S 2
|
|
MC1S 3
|
|
MC1S 4
|
|
MC1S 5
|
|
MC1S 6
|
|
MC1S 7
|
|
|
|
ldr PCM, [sp, #4] @ load pcm pointer
|
|
mov MAXPOS, #0x7f00
|
|
orr MAXPOS, MAXPOS, #0xff
|
|
|
|
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
|
|
C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS
|
|
|
|
strh SUM1LL, [PCM, #(2*(2*16+0))]
|
|
strh SUM1RL, [PCM, #(2*(2*16+1))]
|
|
|
|
# main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17
|
|
sub COEF, COEF, #(4*(264-16)) @ coef = coefBase + 16 (was coefBase + 264 after MC1S block)
|
|
sub VB1, VB1, #(4*(1024-64)) @ vb1 = vbuf + 64 (was vbuf + 64*16 after MC1S block)
|
|
mov I, #15 @ loop counter, count down
|
|
add PCM, PCM, #(2*2) @ pcm+=2
|
|
|
|
LoopPS
|
|
str I, [sp, #0] @ sp[0] = i (loop counter)
|
|
str PCM, [sp, #4] @ sp[1] = pcm (pointer to pcm buffer)
|
|
|
|
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1RL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM2LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM2RL, #RNDVAL @ load rndVal (low 32)
|
|
|
|
mov SUM1LH, #0
|
|
mov SUM1RH, #0
|
|
mov SUM2LH, #0
|
|
mov SUM2RH, #0
|
|
|
|
MC2S 0
|
|
MC2S 1
|
|
MC2S 2
|
|
MC2S 3
|
|
MC2S 4
|
|
MC2S 5
|
|
MC2S 6
|
|
MC2S 7
|
|
|
|
add VB1, VB1, #(4*64) @ vb1 += 64
|
|
|
|
ldr PCM, [sp, #4] @ load pcm pointer
|
|
mov MAXPOS, #0x7f00
|
|
orr MAXPOS, MAXPOS, #0xff
|
|
|
|
|
|
|
|
#C64TOS $xl, $xh, $sign, $maxPos
|
|
|
|
# mov SUM1LL, SUM1LL, lsr #(20+6)
|
|
# orr SUM1LL, SUM1LL, SUM1LH, lsl #(12-6)
|
|
# mov SIGN, SUM1LL, ASR #31
|
|
# cmp SIGN, SUM1LL, ASR #15
|
|
# eorne SUM1LL,SIGN, MAXPOS
|
|
|
|
|
|
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
|
|
C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS
|
|
C64TOS SUM2LL, SUM2LH, SIGN, MAXPOS
|
|
C64TOS SUM2RL, SUM2RH, SIGN, MAXPOS
|
|
|
|
ldr I, [sp, #0] @ load loop counter
|
|
add CF2, PCM, I, lsl #3 @ CF2 = PCM + 4*i (short offset)
|
|
strh SUM2LL, [CF2], #2 @ *(pcm + 2*2*i + 0)
|
|
strh SUM2RL, [CF2], #2 @ *(pcm + 2*2*i + 1)
|
|
|
|
strh SUM1LL, [PCM], #2 @ *(pcm + 0)
|
|
strh SUM1RL, [PCM], #2 @ *(pcm + 1)
|
|
|
|
subs I, I, #1
|
|
bne LoopPS
|
|
|
|
# restore stack pointer
|
|
add sp, sp, #8
|
|
|
|
ldmfd sp!, {r4-r11, pc}
|
|
ENDFUNC
|
|
|
|
## MONO PROCESSING
|
|
|
|
# MC0M - process 2 taps, 1 sample (sample 0)
|
|
# x = vb1 offset
|
|
MACRO
|
|
MC0M $x
|
|
|
|
ldr CF1, [COEF], #4
|
|
ldr CF2, [COEF], #4
|
|
ldr VLO, [VB1, #(4*($x))]
|
|
ldr VHI, [VB1, #(4*(23 - $x))]
|
|
|
|
rsb CF2, CF2, #0
|
|
smlal SUM1LL, SUM1LH, VLO, CF1
|
|
smlal SUM1LL, SUM1LH, VHI, CF2
|
|
|
|
MEND @ MC0M
|
|
|
|
# MC1M - process 2 taps, 1 sample (sample 16)
|
|
# x = vb1 offset
|
|
MACRO
|
|
MC1M $x
|
|
|
|
ldr CF1, [COEF], #4
|
|
ldr VLO, [VB1, #(4*($x))]
|
|
smlal SUM1LL, SUM1LH, VLO, CF1
|
|
|
|
MEND @ MC1M
|
|
|
|
# MC2M - process 2 taps, 2 samples
|
|
# x = vb1 offset
|
|
MACRO
|
|
MC2M $x
|
|
|
|
# load data as far as possible in advance of using it
|
|
ldr CF1, [COEF], #4
|
|
ldr CF2, [COEF], #4
|
|
ldr VLO, [VB1, #(4*($x))]
|
|
ldr VHI, [VB1, #(4*(23 - $x))]
|
|
|
|
smlal SUM1LL, SUM1LH, VLO, CF1
|
|
smlal SUM2LL, SUM2LH, VLO, CF2
|
|
rsb CF2, CF2, #0
|
|
smlal SUM1LL, SUM1LH, VHI, CF2
|
|
smlal SUM2LL, SUM2LH, VHI, CF1
|
|
|
|
MEND @ MC2M
|
|
|
|
# void PolyphaseMono(short *pcm, int *vbuf, const int *coefBase)
|
|
|
|
xmp3_PolyphaseMono FUNCTION
|
|
EXPORT xmp3_PolyphaseMono
|
|
|
|
stmfd sp!, {r4-r11, r14}
|
|
|
|
# clear out stack space for 4 local variables (4 bytes each)
|
|
sub sp, sp, #8
|
|
str PCM, [sp, #4] @ sp[1] = pcm pointer
|
|
|
|
# special case, output sample 0
|
|
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1LH, #0
|
|
|
|
MC0M 0
|
|
MC0M 1
|
|
MC0M 2
|
|
MC0M 3
|
|
MC0M 4
|
|
MC0M 5
|
|
MC0M 6
|
|
MC0M 7
|
|
|
|
ldr PCM, [sp, #4] @ load pcm pointer
|
|
mov MAXPOS, #0x7f00
|
|
orr MAXPOS, MAXPOS, #0xff
|
|
|
|
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
|
|
strh SUM1LL, [PCM, #(2*0)]
|
|
|
|
# special case, output sample 16
|
|
add COEF, COEF, #(4*(256-16)) @ coef = coefBase + 256 (was coefBase + 16 after MC0M block)
|
|
add VB1, VB1, #(4*1024) @ vb1 = vbuf + 64*16
|
|
|
|
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1LH, #0
|
|
|
|
MC1M 0
|
|
MC1M 1
|
|
MC1M 2
|
|
MC1M 3
|
|
MC1M 4
|
|
MC1M 5
|
|
MC1M 6
|
|
MC1M 7
|
|
|
|
ldr PCM, [sp, #4] @ load pcm pointer
|
|
mov MAXPOS, #0x7f00
|
|
orr MAXPOS, MAXPOS, #0xff
|
|
|
|
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
|
|
|
|
strh SUM1LL, [PCM, #(2*16)]
|
|
|
|
# main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17
|
|
sub COEF, COEF, #(4*(264-16)) @ coef = coefBase + 16 (was coefBase + 264 after MC1M block)
|
|
sub VB1, VB1, #(4*(1024-64)) @ vb1 = vbuf + 64 (was vbuf + 64*16 after MC1M block)
|
|
mov I, #15 @ loop counter, count down
|
|
add PCM, PCM, #(2) @ pcm++
|
|
|
|
LoopPM
|
|
str I, [sp, #0] @ sp[0] = i (loop counter)
|
|
str PCM, [sp, #4] @ sp[1] = pcm (pointer to pcm buffer)
|
|
|
|
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM2LL, #RNDVAL @ load rndVal (low 32)
|
|
mov SUM1LH, #0
|
|
mov SUM2LH, #0
|
|
|
|
MC2M 0
|
|
MC2M 1
|
|
MC2M 2
|
|
MC2M 3
|
|
MC2M 4
|
|
MC2M 5
|
|
MC2M 6
|
|
MC2M 7
|
|
|
|
add VB1, VB1, #(4*64) @ vb1 += 64
|
|
|
|
ldr PCM, [sp, #4] @ load pcm pointer
|
|
mov MAXPOS, #0x7f00
|
|
orr MAXPOS, MAXPOS, #0xff
|
|
|
|
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
|
|
C64TOS SUM2LL, SUM2LH, SIGN, MAXPOS
|
|
|
|
ldr I, [sp, #0] @ load loop counter
|
|
add CF2, PCM, I, lsl #2 @ CF2 = PCM + 2*i (short offset)
|
|
strh SUM2LL, [CF2], #2 @ *(pcm + 2*i + 0)
|
|
strh SUM1LL, [PCM], #2 @ *(pcm + 0) # pcm++
|
|
|
|
subs I, I, #1
|
|
bne LoopPM
|
|
|
|
# restore stack pointer
|
|
add sp, sp, #8
|
|
|
|
ldmfd sp!, {r4-r11, pc}
|
|
ENDFUNC
|
|
|
|
.END |