416 lines
11 KiB
ArmAsm
416 lines
11 KiB
ArmAsm
cPI3_8 EQU 0x30fbc54d
|
|
cPI2_8 EQU 0x5a82799a
|
|
cPI1_8 EQU 0x7641af3d
|
|
|
|
AREA |.text|, CODE, READONLY, ALIGN=2
|
|
THUMB
|
|
REQUIRE8
|
|
PRESERVE8
|
|
|
|
mdct_butterfly_8 PROC
|
|
add r9, r5, r1 ; x4 + x0
|
|
sub r5, r5, r1 ; x4 - x0
|
|
add r7, r6, r2 ; x5 + x1
|
|
sub r6, r6, r2 ; x5 - x1
|
|
add r8, r10, r3 ; x6 + x2
|
|
sub r10, r10, r3 ; x6 - x2
|
|
add r12, r11, r4 ; x7 + x3
|
|
sub r11, r11, r4 ; x7 - x3
|
|
|
|
add r1, r10, r6 ; y0 = (x6 - x2) + (x5 - x1)
|
|
sub r2, r11, r5 ; y1 = (x7 - x3) - (x4 - x0)
|
|
sub r3, r10, r6 ; y2 = (x6 - x2) - (x5 - x1)
|
|
add r4, r11, r5 ; y3 = (x7 - x3) + (x4 - x0)
|
|
sub r5, r8, r9 ; y4 = (x6 + x2) - (x4 + x0)
|
|
sub r6, r12, r7 ; y5 = (x7 + x3) - (x5 + x1)
|
|
add r10, r8, r9 ; y6 = (x6 + x2) + (x4 + x0)
|
|
add r11, r12, r7 ; y7 = (x7 + x3) + (x5 + x1)
|
|
stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
|
|
|
|
bx lr
|
|
|
|
ENDP
|
|
|
|
mdct_butterfly_16 PROC
|
|
str lr, [sp, #-4]!
|
|
add r1, r0, #8*4
|
|
|
|
ldmia r0, {r2, r3, r4, r5}
|
|
ldmia r1, {r6, r7, r8, r9}
|
|
add r6, r6, r2 ; y8 = x8 + x0
|
|
rsb r2, r6, r2, lsl #1 ; x0 - x8
|
|
add r7, r7, r3 ; y9 = x9 + x1
|
|
rsb r3, r7, r3, lsl #1 ; x1 - x9
|
|
add r8, r8, r4 ; y10 = x10 + x2
|
|
sub r11, r8, r4, lsl #1 ; x10 - x2
|
|
add r9, r9, r5 ; y11 = x11 + x3
|
|
rsb r10, r9, r5, lsl #1 ; x3 - x11
|
|
|
|
stmia r1!, {r6, r7, r8, r9}
|
|
|
|
add r2, r2, r3 ; (x0 - x8) + (x1 - x9)
|
|
rsb r3, r2, r3, lsl #1 ; (x1 - x9) - (x0 - x8)
|
|
|
|
ldr r12, =cPI2_8
|
|
smull r8, r5, r12, r2
|
|
smull r8, r6, r12, r3
|
|
mov r5, r5, lsl #1
|
|
mov r6, r6, lsl #1
|
|
|
|
stmia r0!, {r5, r6, r10, r11}
|
|
|
|
ldmia r0, {r2, r3, r4, r5}
|
|
ldmia r1, {r6, r7, r8, r9}
|
|
add r6, r6, r2 ; y12 = x12 + x4
|
|
sub r2, r6, r2, lsl #1 ; x12 - x4
|
|
add r7, r7, r3 ; y13 = x13 + x5
|
|
sub r3, r7, r3, lsl #1 ; x13 - x5
|
|
add r8, r8, r4 ; y10 = x14 + x6
|
|
sub r10, r8, r4, lsl #1 ; x14 - x6
|
|
add r9, r9, r5 ; y11 = x15 + x7
|
|
sub r11, r9, r5, lsl #1 ; x15 - x7
|
|
|
|
stmia r1, {r6, r7, r8, r9}
|
|
|
|
sub r2, r2, r3 ; (x12 - x4) - (x13 - x5)
|
|
add r3, r2, r3, lsl #1 ; (x12 - x4) + (x13 - x5)
|
|
|
|
smull r8, r5, r12, r2
|
|
smull r8, r6, r12, r3
|
|
mov r5, r5, lsl #1
|
|
mov r6, r6, lsl #1
|
|
; no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8
|
|
|
|
sub r0, r0, #4*4
|
|
ldmia r0, {r1, r2, r3, r4}
|
|
bl mdct_butterfly_8
|
|
add r0, r0, #8*4
|
|
ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
|
|
bl mdct_butterfly_8
|
|
|
|
ldr pc, [sp], #4
|
|
|
|
ENDP
|
|
|
|
mdct_butterfly_32 PROC
|
|
EXPORT mdct_butterfly_32
|
|
|
|
stmdb sp!, {r4-r11, lr}
|
|
|
|
add r1, r0, #16*4
|
|
|
|
ldmia r0, {r2, r3, r4, r5}
|
|
ldmia r1, {r6, r7, r8, r9}
|
|
add r6, r6, r2 ; y16 = x16 + x0
|
|
rsb r2, r6, r2, lsl #1 ; x0 - x16
|
|
add r7, r7, r3 ; y17 = x17 + x1
|
|
rsb r3, r7, r3, lsl #1 ; x1 - x17
|
|
add r8, r8, r4 ; y18 = x18 + x2
|
|
rsb r4, r8, r4, lsl #1 ; x2 - x18
|
|
add r9, r9, r5 ; y19 = x19 + x3
|
|
rsb r5, r9, r5, lsl #1 ; x3 - x19
|
|
|
|
stmia r1!, {r6, r7, r8, r9}
|
|
|
|
ldr r12, =cPI1_8
|
|
ldr lr, =cPI3_8
|
|
smull r10, r6, r12, r2
|
|
rsb r2, r2, #0
|
|
smlal r10, r6, lr, r3
|
|
smull r10, r7, r12, r3
|
|
smlal r10, r7, lr, r2
|
|
mov r6, r6, lsl #1
|
|
mov r7, r7, lsl #1
|
|
|
|
add r4, r4, r5 ; (x3 - x19) + (x2 - x18)
|
|
rsb r5, r4, r5, lsl #1 ; (x3 - x19) - (x2 - x18)
|
|
|
|
ldr r11, =cPI2_8
|
|
smull r10, r8, r4, r11
|
|
smull r10, r9, r5, r11
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
|
|
stmia r0!, {r6, r7, r8, r9}
|
|
|
|
ldmia r0, {r2, r3, r4, r5}
|
|
ldmia r1, {r6, r7, r8, r9}
|
|
add r6, r6, r2 ; y20 = x20 + x4
|
|
rsb r2, r6, r2, lsl #1 ; x4 - x20
|
|
add r7, r7, r3 ; y21 = x21 + x5
|
|
rsb r3, r7, r3, lsl #1 ; x5 - x21
|
|
add r8, r8, r4 ; y22 = x22 + x6
|
|
sub r4, r8, r4, lsl #1 ; x22 - x6
|
|
add r9, r9, r5 ; y23 = x23 + x7
|
|
rsb r5, r9, r5, lsl #1 ; x7 - x23
|
|
|
|
stmia r1!, {r6, r7, r8, r9}
|
|
|
|
smull r10, r6, lr, r2
|
|
rsb r2, r2, #0
|
|
smlal r10, r6, r12, r3
|
|
smull r10, r7, lr, r3
|
|
smlal r10, r7, r12, r2
|
|
mov r6, r6, lsl #1
|
|
mov r7, r7, lsl #1
|
|
|
|
mov r8, r5
|
|
mov r9, r4
|
|
stmia r0!, {r6, r7, r8, r9}
|
|
|
|
ldmia r0, {r2, r3, r4, r5}
|
|
ldmia r1, {r6, r7, r8, r9}
|
|
add r6, r6, r2 ; y24 = x24 + x8
|
|
sub r2, r6, r2, lsl #1 ; x24 - x8
|
|
add r7, r7, r3 ; y25 = x25 + x9
|
|
sub r3, r7, r3, lsl #1 ; x25 - x9
|
|
add r8, r8, r4 ; y26 = x26 + x10
|
|
sub r4, r8, r4, lsl #1 ; x26 - x10
|
|
add r9, r9, r5 ; y27 = x27 + x11
|
|
sub r5, r9, r5, lsl #1 ; x27 - x11
|
|
|
|
stmia r1!, {r6, r7, r8, r9}
|
|
|
|
smull r10, r7, lr, r3
|
|
rsb r3, r3, #0
|
|
smlal r10, r7, r12, r2
|
|
smull r10, r6, r12, r3
|
|
smlal r10, r6, lr, r2
|
|
mov r6, r6, lsl #1
|
|
mov r7, r7, lsl #1
|
|
|
|
sub r4, r4, r5 ; (x26 - x10) - (x27 - x11)
|
|
add r5, r4, r5, lsl #1 ; (x26 - x10) + (x27 - x11)
|
|
|
|
ldr r11, =cPI2_8
|
|
smull r10, r8, r11, r4
|
|
smull r10, r9, r11, r5
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
|
|
stmia r0!, {r6, r7, r8, r9}
|
|
|
|
ldmia r0, {r2, r3, r4, r5}
|
|
ldmia r1, {r6, r7, r8, r9}
|
|
add r6, r6, r2 ; y28 = x28 + x12
|
|
sub r2, r6, r2, lsl #1 ; x28 - x12
|
|
add r7, r7, r3 ; y29 = x29 + x13
|
|
sub r3, r7, r3, lsl #1 ; x29 - x13
|
|
add r8, r8, r4 ; y30 = x30 + x14
|
|
sub r4, r8, r4, lsl #1 ; x30 - x14
|
|
add r9, r9, r5 ; y31 = x31 + x15
|
|
sub r5, r9, r5, lsl #1 ; x31 - x15
|
|
|
|
stmia r1, {r6, r7, r8, r9}
|
|
|
|
smull r10, r7, r12, r3
|
|
rsb r3, r3, #0
|
|
smlal r10, r7, lr, r2
|
|
smull r10, r6, lr, r3
|
|
smlal r10, r6, r12, r2
|
|
mov r6, r6, lsl #1
|
|
mov r7, r7, lsl #1
|
|
|
|
mov r8, r4
|
|
mov r9, r5
|
|
stmia r0, {r6, r7, r8, r9}
|
|
|
|
sub r0, r0, #12*4
|
|
str r0, [sp, #-4]!
|
|
bl mdct_butterfly_16
|
|
|
|
ldr r0, [sp], #4
|
|
add r0, r0, #16*4
|
|
bl mdct_butterfly_16
|
|
|
|
ldmia sp!, {r4-r11, pc}
|
|
|
|
ENDP
|
|
|
|
; mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
|
|
mdct_butterfly_generic_loop PROC
|
|
EXPORT mdct_butterfly_generic_loop
|
|
stmdb sp!, {r4-r11, lr}
|
|
str r2, [sp, #-4]
|
|
ldr r4, [sp, #36]
|
|
|
|
label_1
|
|
ldmdb r0, {r6, r7, r8, r9}
|
|
ldmdb r1, {r10, r11, r12, r14}
|
|
|
|
add r6, r6, r10
|
|
sub r10, r6, r10, lsl #1
|
|
add r7, r7, r11
|
|
rsb r11, r7, r11, lsl #1
|
|
add r8, r8, r12
|
|
sub r12, r8, r12, lsl #1
|
|
add r9, r9, r14
|
|
rsb r14, r9, r14, lsl #1
|
|
|
|
stmdb r0!, {r6, r7, r8, r9}
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r8, r6, r14
|
|
rsb r14, r14, #0
|
|
smlal r5, r8, r7, r12
|
|
smull r5, r9, r6, r12
|
|
smlal r5, r9, r7, r14
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
add r2, r2, r3, lsl #2
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r8, r6, r11
|
|
rsb r11, r11, #0
|
|
smlal r5, r8, r7, r10
|
|
smull r5, r9, r6, r10
|
|
smlal r5, r9, r7, r11
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
add r2, r2, r3, lsl #2
|
|
|
|
cmp r2, r4
|
|
blo label_1
|
|
|
|
ldr r4, [sp, #-4]
|
|
|
|
label_2
|
|
ldmdb r0, {r6, r7, r8, r9}
|
|
ldmdb r1, {r10, r11, r12, r14}
|
|
|
|
add r6, r6, r10
|
|
sub r10, r6, r10, lsl #1
|
|
add r7, r7, r11
|
|
sub r11, r7, r11, lsl #1
|
|
add r8, r8, r12
|
|
sub r12, r8, r12, lsl #1
|
|
add r9, r9, r14
|
|
sub r14, r9, r14, lsl #1
|
|
|
|
stmdb r0!, {r6, r7, r8, r9}
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r9, r6, r14
|
|
rsb r14, r14, #0
|
|
smlal r5, r9, r7, r12
|
|
smull r5, r8, r6, r12
|
|
smlal r5, r8, r7, r14
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
sub r2, r2, r3, lsl #2
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r9, r6, r11
|
|
rsb r11, r11, #0
|
|
smlal r5, r9, r7, r10
|
|
smull r5, r8, r6, r10
|
|
smlal r5, r8, r7, r11
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
sub r2, r2, r3, lsl #2
|
|
|
|
cmp r2, r4
|
|
bhi label_2
|
|
|
|
ldr r4, [sp, #36]
|
|
|
|
label_3
|
|
ldmdb r0, {r6, r7, r8, r9}
|
|
ldmdb r1, {r10, r11, r12, r14}
|
|
|
|
add r6, r6, r10
|
|
rsb r10, r6, r10, lsl #1
|
|
add r7, r7, r11
|
|
rsb r11, r7, r11, lsl #1
|
|
add r8, r8, r12
|
|
rsb r12, r8, r12, lsl #1
|
|
add r9, r9, r14
|
|
rsb r14, r9, r14, lsl #1
|
|
|
|
stmdb r0!, {r6, r7, r8, r9}
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r8, r6, r12
|
|
rsb r12, r12, #0
|
|
smlal r5, r8, r7, r14
|
|
smull r5, r9, r6, r14
|
|
smlal r5, r9, r7, r12
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
add r2, r2, r3, lsl #2
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r8, r6, r10
|
|
rsb r10, r10, #0
|
|
smlal r5, r8, r7, r11
|
|
smull r5, r9, r6, r11
|
|
smlal r5, r9, r7, r10
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
add r2, r2, r3, lsl #2
|
|
|
|
cmp r2, r4
|
|
blo label_3
|
|
|
|
ldr r4, [sp, #-4]
|
|
|
|
label_4
|
|
ldmdb r0, {r6, r7, r8, r9}
|
|
ldmdb r1, {r10, r11, r12, r14}
|
|
|
|
add r6, r6, r10
|
|
sub r10, r6, r10, lsl #1
|
|
add r7, r7, r11
|
|
rsb r11, r7, r11, lsl #1
|
|
add r8, r8, r12
|
|
sub r12, r8, r12, lsl #1
|
|
add r9, r9, r14
|
|
rsb r14, r9, r14, lsl #1
|
|
|
|
stmdb r0!, {r6, r7, r8, r9}
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r9, r6, r12
|
|
smlal r5, r9, r7, r14
|
|
rsb r12, r12, #0
|
|
smull r5, r8, r6, r14
|
|
smlal r5, r8, r7, r12
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
sub r2, r2, r3, lsl #2
|
|
|
|
ldmia r2, {r6, r7}
|
|
smull r5, r9, r6, r10
|
|
rsb r10, r10, #0
|
|
smlal r5, r9, r7, r11
|
|
smull r5, r8, r6, r11
|
|
smlal r5, r8, r7, r10
|
|
|
|
mov r8, r8, lsl #1
|
|
mov r9, r9, lsl #1
|
|
stmdb r1!, {r8, r9}
|
|
sub r2, r2, r3, lsl #2
|
|
|
|
cmp r2, r4
|
|
bhi label_4
|
|
|
|
ldmia sp!, {r4-r11, pc}
|
|
|
|
ENDP
|
|
|
|
END
|