rt-thread-official/bsp/stm32_radio/mp3/real/arm/asmpoly.s

428 lines
10 KiB
ArmAsm

# *
# * Version: RCSL 1.0/RPSL 1.0
# *
# * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
# *
# * The contents of this file, and the files included with this file, are
# * subject to the current version of the RealNetworks Public Source License
# * Version 1.0 (the "RPSL") available at
# * http://www.helixcommunity.org/content/rpsl unless you have licensed
# * the file under the RealNetworks Community Source License Version 1.0
# * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
# * in which case the RCSL will apply. You may also obtain the license terms
# * directly from RealNetworks. You may not use this file except in
# * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
# * applicable to this file, the RCSL. Please see the applicable RPSL or
# * RCSL for the rights, obligations and limitations governing use of the
# * contents of the file.
# *
# * This file is part of the Helix DNA Technology. RealNetworks is the
# * developer of the Original Code and owns the copyrights in the portions
# * it created.
# *
# * This file, and the files included with this file, is distributed and made
# * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
# * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
# * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
# * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
# *
# * Technology Compatibility Kit Test Suite(s) Location:
# * http://www.helixcommunity.org/content/tck
# *
# * Contributor(s):
# *
# * ***** END LICENSE BLOCK ***** */
.GLOBAL xmp3_PolyphaseStereo
# * kj AREA |.text|, CODE, READONLY
PCM .req r0
VB1 .req r1
COEF .req r2
VLO .req r0 @ must push PCM ptr to stack during inner looop
VHI .req r3 @ temp variable
SUM1LL .req r4
SUM1LH .req r5
SUM2LL .req r6
SUM2LH .req r7
SUM1RL .req r8
SUM1RH .req r9
SUM2RL .req r10
SUM2RH .req r11
CF1 .req r12
CF2 .req r14
SIGN .req r12 @ used for clipping - after discarding CF1
MAXPOS .req r14 @ used for clipping - after discarding CF2
I .req r12 @ overlay loop counter with CF1, SIGN
GBLA RNDVAL
#RNDVAL SETA (1 << ((32 - 12) + (6 - 1)))
.set RNDVAL,(1 << ((32 - 12) + (6 - 1)))
# C64TOS - clip 64-bit accumulator to short (no rounding)
# xl, xh = value (lo 32, hi 32)
# input assumed to have 6 fraction bits
# sign = temp variable to use for sign
# maxPos = 0x00007fff (takes 2 instr. to generate - calculating
# once and using repeatedly saves if you do several CTOS in a row)
MACRO
C64TOS $xl, $xh, $sign, $maxPos
mov $xl, $xl, lsr #(20+6)
orr $xl, $xl, $xh, lsl #(12-6)
mov $sign, $xl, ASR #31
cmp $sign, $xl, ASR #15
eorne $xl, $sign, $maxPos
MEND @ C64TOS
# MC0S - process 2 taps, 1 sample per channel (sample 0)
# x = vb1 offset
MACRO
MC0S $x
ldr CF1, [COEF], #4
ldr CF2, [COEF], #4
ldr VLO, [VB1, #(4*($x))]
ldr VHI, [VB1, #(4*(23 - $x))]
smlal SUM1LL, SUM1LH, VLO, CF1
ldr VLO, [VB1, #(4*(32 + $x))]
rsb CF2, CF2, #0
smlal SUM1LL, SUM1LH, VHI, CF2
ldr VHI, [VB1, #(4*(32 + 23 - $x))]
smlal SUM1RL, SUM1RH, VLO, CF1
smlal SUM1RL, SUM1RH, VHI, CF2
MEND @ MC0S
# MC1S - process 2 taps, 1 sample per channel (sample 16)
# x = vb1 offset
MACRO
MC1S $x
ldr CF1, [COEF], #4
ldr VLO, [VB1, #(4*($x))]
ldr VHI, [VB1, #(4*(32 + $x))]
smlal SUM1LL, SUM1LH, VLO, CF1
smlal SUM1RL, SUM1RH, VHI, CF1
MEND @ MC1S
# MC2S - process 2 taps, 2 samples per channel
# x = vb1 offset
MACRO
MC2S $x
# load data as far as possible in advance of using it
ldr CF1, [COEF], #4
ldr CF2, [COEF], #4
ldr VLO, [VB1, #(4*($x))]
ldr VHI, [VB1, #(4*(23 - $x))]
smlal SUM1LL, SUM1LH, VLO, CF1
smlal SUM2LL, SUM2LH, VLO, CF2
rsb CF2, CF2, #0
smlal SUM2LL, SUM2LH, VHI, CF1
smlal SUM1LL, SUM1LH, VHI, CF2
ldr VHI, [VB1, #(4*(32 + 23 - $x))]
ldr VLO, [VB1, #(4*(32 + $x))]
smlal SUM1RL, SUM1RH, VHI, CF2
smlal SUM2RL, SUM2RH, VHI, CF1
rsb CF2, CF2, #0
smlal SUM1RL, SUM1RH, VLO, CF1
smlal SUM2RL, SUM2RH, VLO, CF2
MEND @ MC2S
# void PolyphaseStereo(short *pcm, int *vbuf, const int *coefBase)
xmp3_PolyphaseStereo FUNCTION
EXPORT xmp3_PolyphaseStereo
stmfd sp!, {r4-r11, r14} @ Push to stack
# clear out stack space for 2 local variables (4 bytes each)
sub sp, sp, #8
str PCM, [sp, #4] @ sp[1] = pcm pointer
# special case, output sample 0
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
mov SUM1RL, #RNDVAL @ load rndVal (low 32)
mov SUM1LH, #0
mov SUM1RH, #0
MC0S 0
MC0S 1
MC0S 2
MC0S 3
MC0S 4
MC0S 5
MC0S 6
MC0S 7
ldr PCM, [sp, #4] @ load pcm pointer
mov MAXPOS, #0x7f00
orr MAXPOS, MAXPOS, #0xff
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS
strh SUM1LL, [PCM, #(2*0)]
strh SUM1RL, [PCM, #(2*1)]
# special case, output sample 16
add COEF, COEF, #(4*(256-16)) @ coef = coefBase + 256 (was coefBase + 16 after MC0S block)
add VB1, VB1, #(4*1024) @ vb1 = vbuf + 64*16
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
mov SUM1RL, #RNDVAL @ load rndVal (low 32)
mov SUM1LH, #0
mov SUM1RH, #0
MC1S 0
MC1S 1
MC1S 2
MC1S 3
MC1S 4
MC1S 5
MC1S 6
MC1S 7
ldr PCM, [sp, #4] @ load pcm pointer
mov MAXPOS, #0x7f00
orr MAXPOS, MAXPOS, #0xff
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS
strh SUM1LL, [PCM, #(2*(2*16+0))]
strh SUM1RL, [PCM, #(2*(2*16+1))]
# main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17
sub COEF, COEF, #(4*(264-16)) @ coef = coefBase + 16 (was coefBase + 264 after MC1S block)
sub VB1, VB1, #(4*(1024-64)) @ vb1 = vbuf + 64 (was vbuf + 64*16 after MC1S block)
mov I, #15 @ loop counter, count down
add PCM, PCM, #(2*2) @ pcm+=2
LoopPS
str I, [sp, #0] @ sp[0] = i (loop counter)
str PCM, [sp, #4] @ sp[1] = pcm (pointer to pcm buffer)
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
mov SUM1RL, #RNDVAL @ load rndVal (low 32)
mov SUM2LL, #RNDVAL @ load rndVal (low 32)
mov SUM2RL, #RNDVAL @ load rndVal (low 32)
mov SUM1LH, #0
mov SUM1RH, #0
mov SUM2LH, #0
mov SUM2RH, #0
MC2S 0
MC2S 1
MC2S 2
MC2S 3
MC2S 4
MC2S 5
MC2S 6
MC2S 7
add VB1, VB1, #(4*64) @ vb1 += 64
ldr PCM, [sp, #4] @ load pcm pointer
mov MAXPOS, #0x7f00
orr MAXPOS, MAXPOS, #0xff
#C64TOS $xl, $xh, $sign, $maxPos
# mov SUM1LL, SUM1LL, lsr #(20+6)
# orr SUM1LL, SUM1LL, SUM1LH, lsl #(12-6)
# mov SIGN, SUM1LL, ASR #31
# cmp SIGN, SUM1LL, ASR #15
# eorne SUM1LL,SIGN, MAXPOS
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS
C64TOS SUM2LL, SUM2LH, SIGN, MAXPOS
C64TOS SUM2RL, SUM2RH, SIGN, MAXPOS
ldr I, [sp, #0] @ load loop counter
add CF2, PCM, I, lsl #3 @ CF2 = PCM + 4*i (short offset)
strh SUM2LL, [CF2], #2 @ *(pcm + 2*2*i + 0)
strh SUM2RL, [CF2], #2 @ *(pcm + 2*2*i + 1)
strh SUM1LL, [PCM], #2 @ *(pcm + 0)
strh SUM1RL, [PCM], #2 @ *(pcm + 1)
subs I, I, #1
bne LoopPS
# restore stack pointer
add sp, sp, #8
ldmfd sp!, {r4-r11, pc}
ENDFUNC
## MONO PROCESSING
# MC0M - process 2 taps, 1 sample (sample 0)
# x = vb1 offset
MACRO
MC0M $x
ldr CF1, [COEF], #4
ldr CF2, [COEF], #4
ldr VLO, [VB1, #(4*($x))]
ldr VHI, [VB1, #(4*(23 - $x))]
rsb CF2, CF2, #0
smlal SUM1LL, SUM1LH, VLO, CF1
smlal SUM1LL, SUM1LH, VHI, CF2
MEND @ MC0M
# MC1M - process 2 taps, 1 sample (sample 16)
# x = vb1 offset
MACRO
MC1M $x
ldr CF1, [COEF], #4
ldr VLO, [VB1, #(4*($x))]
smlal SUM1LL, SUM1LH, VLO, CF1
MEND @ MC1M
# MC2M - process 2 taps, 2 samples
# x = vb1 offset
MACRO
MC2M $x
# load data as far as possible in advance of using it
ldr CF1, [COEF], #4
ldr CF2, [COEF], #4
ldr VLO, [VB1, #(4*($x))]
ldr VHI, [VB1, #(4*(23 - $x))]
smlal SUM1LL, SUM1LH, VLO, CF1
smlal SUM2LL, SUM2LH, VLO, CF2
rsb CF2, CF2, #0
smlal SUM1LL, SUM1LH, VHI, CF2
smlal SUM2LL, SUM2LH, VHI, CF1
MEND @ MC2M
# void PolyphaseMono(short *pcm, int *vbuf, const int *coefBase)
xmp3_PolyphaseMono FUNCTION
EXPORT xmp3_PolyphaseMono
stmfd sp!, {r4-r11, r14}
# clear out stack space for 4 local variables (4 bytes each)
sub sp, sp, #8
str PCM, [sp, #4] @ sp[1] = pcm pointer
# special case, output sample 0
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
mov SUM1LH, #0
MC0M 0
MC0M 1
MC0M 2
MC0M 3
MC0M 4
MC0M 5
MC0M 6
MC0M 7
ldr PCM, [sp, #4] @ load pcm pointer
mov MAXPOS, #0x7f00
orr MAXPOS, MAXPOS, #0xff
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
strh SUM1LL, [PCM, #(2*0)]
# special case, output sample 16
add COEF, COEF, #(4*(256-16)) @ coef = coefBase + 256 (was coefBase + 16 after MC0M block)
add VB1, VB1, #(4*1024) @ vb1 = vbuf + 64*16
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
mov SUM1LH, #0
MC1M 0
MC1M 1
MC1M 2
MC1M 3
MC1M 4
MC1M 5
MC1M 6
MC1M 7
ldr PCM, [sp, #4] @ load pcm pointer
mov MAXPOS, #0x7f00
orr MAXPOS, MAXPOS, #0xff
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
strh SUM1LL, [PCM, #(2*16)]
# main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17
sub COEF, COEF, #(4*(264-16)) @ coef = coefBase + 16 (was coefBase + 264 after MC1M block)
sub VB1, VB1, #(4*(1024-64)) @ vb1 = vbuf + 64 (was vbuf + 64*16 after MC1M block)
mov I, #15 @ loop counter, count down
add PCM, PCM, #(2) @ pcm++
LoopPM
str I, [sp, #0] @ sp[0] = i (loop counter)
str PCM, [sp, #4] @ sp[1] = pcm (pointer to pcm buffer)
mov SUM1LL, #RNDVAL @ load rndVal (low 32)
mov SUM2LL, #RNDVAL @ load rndVal (low 32)
mov SUM1LH, #0
mov SUM2LH, #0
MC2M 0
MC2M 1
MC2M 2
MC2M 3
MC2M 4
MC2M 5
MC2M 6
MC2M 7
add VB1, VB1, #(4*64) @ vb1 += 64
ldr PCM, [sp, #4] @ load pcm pointer
mov MAXPOS, #0x7f00
orr MAXPOS, MAXPOS, #0xff
C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS
C64TOS SUM2LL, SUM2LH, SIGN, MAXPOS
ldr I, [sp, #0] @ load loop counter
add CF2, PCM, I, lsl #2 @ CF2 = PCM + 2*i (short offset)
strh SUM2LL, [CF2], #2 @ *(pcm + 2*i + 0)
strh SUM1LL, [PCM], #2 @ *(pcm + 0) # pcm++
subs I, I, #1
bne LoopPM
# restore stack pointer
add sp, sp, #8
ldmfd sp!, {r4-r11, pc}
ENDFUNC
.END