mirror of
https://github.com/betaflight/betaflight.git
synced 2025-07-20 06:45:16 +03:00
Adding RP2350 SDK and target framework (#13988)
* Adding RP2350 SDK and target framework * Spacing * Removing board definitions
This commit is contained in:
parent
462cb05930
commit
2dd6f95aad
576 changed files with 435012 additions and 0 deletions
266
lib/main/pico-sdk/rp2_common/pico_double/double_aeabi_dcp.S
Normal file
266
lib/main/pico-sdk/rp2_common/pico_double/double_aeabi_dcp.S
Normal file
|
@ -0,0 +1,266 @@
|
|||
/*
|
||||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
|
||||
#if !HAS_DOUBLE_COPROCESSOR
|
||||
#error attempt to compile double_aeabi_rp2350 when there is no DCP
|
||||
#else
|
||||
|
||||
#include "hardware/dcp_instr.inc.S"
|
||||
#include "hardware/dcp_canned.inc.S"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
.macro double_section name
|
||||
#if PICO_DOUBLE_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
#else
|
||||
.section SECTION_NAME(\name), "ax"
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro double_wrapper_section func
|
||||
double_section WRAPPER_FUNC_NAME(\func)
|
||||
.endm
|
||||
|
||||
// ============== STATE SAVE AND RESTORE ===============
|
||||
|
||||
.macro saving_func type func
|
||||
// Note we are usually 32-bit aligned already at this point, as most of the
|
||||
// function bodies contain exactly two 16-bit instructions: bmi and bx lr.
|
||||
// We want the PCMP word-aligned.
|
||||
.p2align 2
|
||||
// When the engaged flag is set, branch back here to invoke save routine and
|
||||
// hook lr with the restore routine, then fall back through to the entry
|
||||
// point. The engaged flag will be clear when checked a second time.
|
||||
1:
|
||||
push {lr} // 16-bit instruction
|
||||
bl generic_save_state // 32-bit instruction
|
||||
b 1f // 16-bit instruction
|
||||
// This is the actual entry point:
|
||||
\type\()_func \func
|
||||
PCMP apsr_nzcv
|
||||
bmi 1b
|
||||
1:
|
||||
.endm
|
||||
|
||||
.macro saving_func_return
|
||||
bx lr
|
||||
.endm
|
||||
|
||||
double_section __rp2350_dcp_engaged_state_save_restore
|
||||
.thumb_func
|
||||
generic_save_state:
|
||||
sub sp, #24
|
||||
push {r0, r1}
|
||||
// do save here
|
||||
PXMD r0, r1
|
||||
strd r0, r1, [sp, #8 + 0]
|
||||
PYMD r0, r1
|
||||
strd r0, r1, [sp, #8 + 8]
|
||||
REFD r0, r1
|
||||
strd r0, r1, [sp, #8 + 16]
|
||||
pop {r0, r1}
|
||||
blx lr
|
||||
// <- wrapped function returns here
|
||||
// fall through into restore:
|
||||
.thumb_func
|
||||
generic_restore_state:
|
||||
// do restore here
|
||||
pop {r12, r14}
|
||||
WXMD r12, r14
|
||||
pop {r12, r14}
|
||||
WYMD r12, r14
|
||||
pop {r12, r14}
|
||||
WEFD r12, r14
|
||||
pop {pc}
|
||||
|
||||
// ============== ARITHMETIC FUNCTIONS ===============
|
||||
|
||||
double_wrapper_section __aeabi_dadd
|
||||
saving_func wrapper __aeabi_dadd
|
||||
dcp_dadd_m r0,r1,r0,r1,r2,r3
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_dsub
|
||||
saving_func wrapper __aeabi_dsub
|
||||
dcp_dsub_m r0,r1,r0,r1,r2,r3
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_drsub
|
||||
saving_func wrapper __aeabi_drsub
|
||||
dcp_dsub_m r0,r1,r2,r3,r0,r1
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_dmul
|
||||
saving_func wrapper __aeabi_dmul
|
||||
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
push {r4,r14}
|
||||
dcp_dmul_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r4,r12,r14
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4,lr}
|
||||
saving_func_return
|
||||
|
||||
double_section ddiv_fast
|
||||
saving_func regular ddiv_fast
|
||||
dcp_ddiv_fast_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_ddiv
|
||||
saving_func wrapper __aeabi_ddiv
|
||||
@ with correct rounding
|
||||
dcp_ddiv_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12
|
||||
saving_func_return
|
||||
|
||||
double_section sqrt_fast
|
||||
saving_func regular sqrt_fast
|
||||
dcp_dsqrt_fast_m r0,r1,r0,r1,r0,r1,r2,r3,r12
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section sqrt
|
||||
saving_func wrapper sqrt
|
||||
@ with correct rounding
|
||||
dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
double_wrapper_section __aeabi_dclassify
|
||||
saving_func wrapper __aeabi_dclassify
|
||||
@ with correct rounding
|
||||
dcp_dclassify_m apsr_nzcv,r0,r1
|
||||
saving_func_return
|
||||
|
||||
// ============== CONVERSION FUNCTIONS ===============
|
||||
|
||||
double_wrapper_section __aeabi_d2f
|
||||
saving_func wrapper __aeabi_d2f
|
||||
@ with rounding
|
||||
dcp_double2float_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_i2d
|
||||
saving_func wrapper __aeabi_i2d
|
||||
dcp_int2double_m r0,r1,r0
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_ui2d
|
||||
saving_func wrapper __aeabi_ui2d
|
||||
dcp_uint2double_m r0,r1,r0
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_d2iz
|
||||
saving_func wrapper __aeabi_d2iz
|
||||
@ with truncation towards 0
|
||||
dcp_double2int_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_d2uiz
|
||||
saving_func wrapper __aeabi_d2uiz
|
||||
@ with truncation towards 0
|
||||
dcp_double2uint_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
double_wrapper_section __aeabi_d2i_r
|
||||
saving_func wrapper __aeabi_d2i_r
|
||||
@ with rounding
|
||||
dcp_double2int_r_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
double_wrapper_section __aeabi_d2ui_r
|
||||
saving_func wrapper __aeabi_d2ui_r
|
||||
@ with rounding
|
||||
dcp_double2uint_r_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
// ============== COMPARISON FUNCTIONS ===============
|
||||
|
||||
double_wrapper_section __aeabi_dcmpun
|
||||
saving_func wrapper __aeabi_dcmpun
|
||||
dcp_dcmp_m r0,r0,r1,r2,r3
|
||||
// extract unordered bit
|
||||
ubfx r0, r0, #28, #1
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_dcmp
|
||||
|
||||
saving_func wrapper __aeabi_cdrcmple
|
||||
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
|
||||
bvs cmp_nan
|
||||
saving_func_return
|
||||
|
||||
// these next two can be the same function in the absence of exceptions
|
||||
saving_func wrapper __aeabi_cdcmple
|
||||
//wrapper_func __aeabi_dcmp
|
||||
dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
|
||||
bvs cmp_nan
|
||||
saving_func_return
|
||||
|
||||
// It is not clear from the ABI documentation whether cdcmpeq must set the C flag
|
||||
// in the same way as cdcmple. If not, we could save the "bvs" below; but we
|
||||
// err on the side of caution.
|
||||
saving_func wrapper __aeabi_cdcmpeq
|
||||
//wrapper_func __aeabi_dcmp
|
||||
dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
|
||||
bvs cmp_nan
|
||||
saving_func_return
|
||||
|
||||
// If the result of a flag-setting comparison is "unordered" then we need to set C and clear Z.
|
||||
// We could conceivably just do lsrs r12,r14,#1, or even cmp r14,r14,lsr#1 as (a) r14 here is a
|
||||
// return address and r14b0=1 for Thumb mode; (b) we are unlikely to be returning to address 0.
|
||||
cmp_nan:
|
||||
movs r12, #3 // r12 does not need to be preserved by the flag-setting comparisons
|
||||
lsrs r12, #1 // set C, clear Z
|
||||
saving_func_return
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmpeq)(double, double) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
|
||||
double_wrapper_section __aeabi_dcmpeq
|
||||
saving_func wrapper __aeabi_dcmpeq
|
||||
dcp_dcmp_m r0,r0,r1,r2,r3
|
||||
// extract Z
|
||||
ubfx r0, r0, #30, #1
|
||||
saving_func_return
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmplt)(double, double) result (1, 0) denotes (<, ?>=) [2], use for C <
|
||||
double_wrapper_section __aeabi_dcmplt
|
||||
saving_func wrapper __aeabi_dcmplt
|
||||
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1
|
||||
ite hi
|
||||
movhi r0,#1
|
||||
movls r0,#0
|
||||
saving_func_return
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmple)(double, double) result (1, 0) denotes (<=, ?>) [2], use for C <=
|
||||
double_wrapper_section __aeabi_dcmple
|
||||
saving_func wrapper __aeabi_dcmple
|
||||
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1
|
||||
ite hs
|
||||
movhs r0,#1
|
||||
movlo r0,#0
|
||||
saving_func_return
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmpge)(double, double) result (1, 0) denotes (>=, ?<) [2], use for C >=
|
||||
double_wrapper_section __aeabi_dcmpge
|
||||
saving_func wrapper __aeabi_dcmpge
|
||||
dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
|
||||
ite hs
|
||||
movhs r0,#1
|
||||
movlo r0,#0
|
||||
saving_func_return
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmpgt)(double, double) result (1, 0) denotes (>, ?<=) [2], use for C >
|
||||
double_wrapper_section __aeabi_dcmpgt
|
||||
saving_func wrapper __aeabi_dcmpgt
|
||||
dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3
|
||||
ite hi
|
||||
movhi r0,#1
|
||||
movls r0,#0
|
||||
saving_func_return
|
||||
|
||||
#endif
|
847
lib/main/pico-sdk/rp2_common/pico_double/double_aeabi_rp2040.S
Normal file
847
lib/main/pico-sdk/rp2_common/pico_double/double_aeabi_rp2040.S
Normal file
|
@ -0,0 +1,847 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#if PICO_RP2040
|
||||
#include "pico/asm_helper.S"
|
||||
#include "pico/runtime_init.h"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
#include "hardware/divider_helper.S"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
PICO_RUNTIME_INIT_FUNC_RUNTIME(__aeabi_double_init, PICO_RUNTIME_INIT_AEABI_DOUBLE)
|
||||
|
||||
.macro double_section name
|
||||
#if PICO_DOUBLE_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
#else
|
||||
.section SECTION_NAME(\name), "ax"
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _double_wrapper_func x
|
||||
wrapper_func \x
|
||||
.endm
|
||||
|
||||
.macro wrapper_func_d1 x
|
||||
_double_wrapper_func \x
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
mov ip, lr
|
||||
bl __check_nan_d1
|
||||
mov lr, ip
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro wrapper_func_d2 x
|
||||
_double_wrapper_func \x
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
mov ip, lr
|
||||
bl __check_nan_d2
|
||||
mov lr, ip
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.section .text
|
||||
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
.thumb_func
|
||||
__check_nan_d1:
|
||||
movs r3, #1
|
||||
lsls r3, #21
|
||||
lsls r2, r1, #1
|
||||
adds r2, r3
|
||||
bhi 1f
|
||||
bx lr
|
||||
1:
|
||||
bx ip
|
||||
|
||||
.thumb_func
|
||||
__check_nan_d2:
|
||||
push {r0, r2}
|
||||
movs r2, #1
|
||||
lsls r2, #21
|
||||
lsls r0, r1, #1
|
||||
adds r0, r2
|
||||
bhi 1f
|
||||
lsls r0, r3, #1
|
||||
adds r0, r2
|
||||
bhi 2f
|
||||
pop {r0, r2}
|
||||
bx lr
|
||||
2:
|
||||
pop {r0, r2}
|
||||
mov r0, r2
|
||||
mov r1, r3
|
||||
bx ip
|
||||
1:
|
||||
pop {r0, r2}
|
||||
bx ip
|
||||
#endif
|
||||
|
||||
.macro table_tail_call SF_TABLE_OFFSET
|
||||
push {r3, r4}
|
||||
#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
#ifndef NDEBUG
|
||||
movs r3, #0
|
||||
mov ip, r3
|
||||
#endif
|
||||
#endif
|
||||
ldr r3, =sd_table
|
||||
ldr r3, [r3, #\SF_TABLE_OFFSET]
|
||||
str r3, [sp, #4]
|
||||
pop {r3, pc}
|
||||
.endm
|
||||
|
||||
.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
|
||||
push {r3, r4}
|
||||
ldr r3, =sd_table
|
||||
ldr r3, [r3, #\SF_TABLE_OFFSET]
|
||||
#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
mov ip, pc
|
||||
#endif
|
||||
str r3, [sp, #4]
|
||||
pop {r3, pc}
|
||||
#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
.byte \SF_TABLE_OFFSET, 0xdf
|
||||
.word \shim
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro double_wrapper_section func
|
||||
double_section WRAPPER_FUNC_NAME(\func)
|
||||
.endm
|
||||
|
||||
double_section push_r8_r11
|
||||
regular_func push_r8_r11
|
||||
mov r4,r8
|
||||
mov r5,r9
|
||||
mov r6,r10
|
||||
mov r7,r11
|
||||
push {r4-r7}
|
||||
bx r14
|
||||
|
||||
double_section pop_r8_r11
|
||||
regular_func pop_r8_r11
|
||||
pop {r4-r7}
|
||||
mov r8,r4
|
||||
mov r9,r5
|
||||
mov r10,r6
|
||||
mov r11,r7
|
||||
bx r14
|
||||
|
||||
// note generally each function is in a separate section unless there is fall thru or branching between them
|
||||
// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
|
||||
|
||||
// note functions are word aligned except where they are an odd number of linear instructions
|
||||
|
||||
// double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition
|
||||
double_wrapper_section __aeabi_darithmetic
|
||||
// double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x
|
||||
|
||||
// frsub first because it is the only one that needs alignment
|
||||
.align 2
|
||||
wrapper_func __aeabi_drsub
|
||||
eors r0, r1
|
||||
eors r1, r0
|
||||
eors r0, r1
|
||||
// fall thru
|
||||
|
||||
// double FUNC_NAME(__aeabi_dsub)(double x, double y) double-precision subtraction, x - y
|
||||
wrapper_func_d2 __aeabi_dsub
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
// we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
|
||||
mov ip, r0
|
||||
mov r0, r1
|
||||
eors r0, r3
|
||||
bmi 1f // different signs
|
||||
mov r0, ip
|
||||
push {r0-r3, lr}
|
||||
bl 2f
|
||||
b ddiv_dsub_nan_helper
|
||||
1:
|
||||
mov r0, ip
|
||||
2:
|
||||
#endif
|
||||
shimmable_table_tail_call SF_TABLE_FSUB dsub_shim
|
||||
|
||||
wrapper_func_d2 __aeabi_dadd
|
||||
shimmable_table_tail_call SF_TABLE_FADD dadd_shim
|
||||
|
||||
// double FUNC_NAME(__aeabi_ddiv)(double n, double d) double-precision division, n / d
|
||||
wrapper_func_d2 __aeabi_ddiv
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
push {r0-r3, lr}
|
||||
bl 1f
|
||||
b ddiv_dsub_nan_helper
|
||||
1:
|
||||
#endif
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE)
|
||||
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
|
||||
bcs ddiv_save_state
|
||||
mov r2, ip
|
||||
#else
|
||||
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
|
||||
push {r4, lr}
|
||||
mrs r4, PRIMASK
|
||||
cpsid i
|
||||
bl ddiv_shim_call
|
||||
msr PRIMASK, r4
|
||||
pop {r4, pc}
|
||||
#endif
|
||||
ddiv_shim_call:
|
||||
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim
|
||||
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
ddiv_save_state:
|
||||
ldr r2, =(SIO_BASE)
|
||||
save_div_state_and_lr
|
||||
mov r2, ip
|
||||
bl ddiv_shim_call
|
||||
ldr r2, =(SIO_BASE)
|
||||
restore_div_state_and_return
|
||||
#endif
|
||||
|
||||
ddiv_dsub_nan_helper:
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
// check for infinite op infinite (or rather check for infinite result with both
|
||||
// operands being infinite)
|
||||
lsls r2, r1, #1
|
||||
asrs r2, r2, #21
|
||||
adds r2, #1
|
||||
beq 2f
|
||||
add sp, #16
|
||||
pop {pc}
|
||||
2:
|
||||
ldr r2, [sp, #4]
|
||||
ldr r3, [sp, #12]
|
||||
lsls r2, #1
|
||||
asrs r2, r2, #21
|
||||
lsls r3, #1
|
||||
asrs r3, r3, #24
|
||||
ands r2, r3
|
||||
adds r2, #1
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r2, #1
|
||||
lsls r2, #19
|
||||
orrs r1, r2
|
||||
3:
|
||||
add sp, #16
|
||||
pop {pc}
|
||||
#endif
|
||||
|
||||
// double FUNC_NAME(__aeabi_dmul)(double, double) double-precision multiplication
|
||||
wrapper_func_d2 __aeabi_dmul
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
push {r0-r3, lr}
|
||||
bl 1f
|
||||
|
||||
// check for multiplication of infinite by zero (or rather check for infinite result with either
|
||||
// operand 0)
|
||||
lsls r3, r1, #1
|
||||
asrs r3, r3, #21
|
||||
adds r3, #1
|
||||
beq 2f
|
||||
add sp, #16
|
||||
pop {pc}
|
||||
2:
|
||||
ldr r2, [sp, #4]
|
||||
ldr r3, [sp, #12]
|
||||
ands r2, r3
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r2, #1
|
||||
lsls r2, #19
|
||||
orrs r1, r2
|
||||
3:
|
||||
add sp, #16
|
||||
pop {pc}
|
||||
1:
|
||||
#endif
|
||||
shimmable_table_tail_call SF_TABLE_FMUL dmul_shim
|
||||
|
||||
// void FUNC_NAME(__aeabi_cdrcmple)(double, double) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
|
||||
double_wrapper_section __aeabi_cdcmple
|
||||
|
||||
wrapper_func __aeabi_cdrcmple
|
||||
push {r0-r7,r14}
|
||||
eors r0, r2
|
||||
eors r2, r0
|
||||
eors r0, r2
|
||||
eors r1, r3
|
||||
eors r3, r1
|
||||
eors r1, r3
|
||||
b __aeabi_dfcmple_guts
|
||||
|
||||
// NOTE these share an implementation as we have no excepting NaNs.
|
||||
// void FUNC_NAME(__aeabi_cdcmple)(double, double) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
|
||||
// void FUNC_NAME(__aeabi_cdcmpeq)(double, double) non-excepting equality comparison [1], result in PSR ZC flags
|
||||
@ compare r0:r1 against r2:r3, returning -1/0/1 for <, =, >
|
||||
@ also set flags accordingly
|
||||
.align 2
|
||||
wrapper_func __aeabi_cdcmple
|
||||
wrapper_func __aeabi_cdcmpeq
|
||||
push {r0-r7,r14}
|
||||
__aeabi_dfcmple_guts:
|
||||
ldr r7,=0x7ff @ flush NaNs and denormals
|
||||
lsls r4,r1,#1
|
||||
lsrs r4,#21
|
||||
beq 1f
|
||||
cmp r4,r7
|
||||
bne 2f
|
||||
lsls r4, r1, #12
|
||||
bhi 7f
|
||||
1:
|
||||
movs r0,#0
|
||||
lsrs r1,#20
|
||||
lsls r1,#20
|
||||
2:
|
||||
lsls r4,r3,#1
|
||||
lsrs r4,#21
|
||||
beq 1f
|
||||
cmp r4,r7
|
||||
bne 2f
|
||||
lsls r4, r3, #12
|
||||
bhi 7f
|
||||
1:
|
||||
movs r2,#0
|
||||
lsrs r3,#20
|
||||
lsls r3,#20
|
||||
2:
|
||||
movs r6,#1
|
||||
eors r3,r1
|
||||
bmi 4f @ opposite signs? then can proceed on basis of sign of x
|
||||
eors r3,r1 @ restore r3
|
||||
bpl 2f
|
||||
cmp r3,r1
|
||||
bne 7f
|
||||
1:
|
||||
cmp r2,r0
|
||||
7:
|
||||
pop {r0-r7,r15}
|
||||
2:
|
||||
cmp r1,r3
|
||||
bne 7b
|
||||
1:
|
||||
cmp r0,r2
|
||||
pop {r0-r7,r15}
|
||||
4:
|
||||
orrs r3,r1 @ make -0==+0
|
||||
adds r3,r3
|
||||
orrs r3,r0
|
||||
orrs r3,r2
|
||||
beq 7b
|
||||
mvns r1, r1 @ carry inverse of r1 sign
|
||||
adds r1, r1
|
||||
pop {r0-r7,r15}
|
||||
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmpeq)(double, double) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
|
||||
double_wrapper_section __aeabi_dcmpeq
|
||||
.align 2
|
||||
wrapper_func __aeabi_dcmpeq
|
||||
push {lr}
|
||||
bl __aeabi_cdcmpeq
|
||||
beq 1f
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #1
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmplt)(double, double) result (1, 0) denotes (<, ?>=) [2], use for C <
|
||||
double_wrapper_section __aeabi_dcmplt
|
||||
.align 2
|
||||
wrapper_func __aeabi_dcmplt
|
||||
push {lr}
|
||||
bl __aeabi_cdcmple
|
||||
sbcs r0, r0
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmple)(double, double) result (1, 0) denotes (<=, ?>) [2], use for C <=
|
||||
double_wrapper_section __aeabi_dcmple
|
||||
.align 2
|
||||
wrapper_func __aeabi_dcmple
|
||||
push {lr}
|
||||
bl __aeabi_cdcmple
|
||||
bls 1f
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #1
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmpge)(double, double) result (1, 0) denotes (>=, ?<) [2], use for C >=
|
||||
double_wrapper_section __aeabi_dcmpge
|
||||
.align 2
|
||||
wrapper_func __aeabi_dcmpge
|
||||
push {lr}
|
||||
// because of NaNs it is better to reverse the args than the result
|
||||
bl __aeabi_cdrcmple
|
||||
bls 1f
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #1
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmpgt)(double, double) result (1, 0) denotes (>, ?<=) [2], use for C >
|
||||
double_wrapper_section __aeabi_dcmpgt
|
||||
wrapper_func __aeabi_dcmpgt
|
||||
push {lr}
|
||||
// because of NaNs it is better to reverse the args than the result
|
||||
bl __aeabi_cdrcmple
|
||||
sbcs r0, r0
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_dcmpun)(double, double) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
|
||||
double_wrapper_section __aeabi_dcmpun
|
||||
wrapper_func __aeabi_dcmpun
|
||||
movs r0, #1
|
||||
lsls r0, #21
|
||||
lsls r2, r1, #1
|
||||
adds r2, r0
|
||||
bhi 1f
|
||||
lsls r2, r3, #1
|
||||
adds r2, r0
|
||||
bhi 1f
|
||||
movs r0, #0
|
||||
bx lr
|
||||
1:
|
||||
movs r0, #1
|
||||
bx lr
|
||||
|
||||
// double FUNC_NAME(__aeabi_ui2d)(unsigned) unsigned to double (double precision) conversion
|
||||
double_wrapper_section __aeabi_ui2d
|
||||
shimmable_table_tail_call SF_TABLE_UINT2FLOAT uint2double_shim
|
||||
|
||||
double_wrapper_section __aeabi_i2d
|
||||
|
||||
wrapper_func __aeabi_ui2d
|
||||
movs r1, #0
|
||||
cmp r0, #0
|
||||
bne 2f
|
||||
1:
|
||||
bx lr
|
||||
// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
|
||||
wrapper_func __aeabi_i2d
|
||||
asrs r1, r0, #31
|
||||
eors r0, r1
|
||||
subs r0, r1
|
||||
beq 1b
|
||||
lsls r1, #31
|
||||
2:
|
||||
push {r0, r1, r4, lr}
|
||||
ldr r3, =sf_clz_func
|
||||
ldr r3, [r3]
|
||||
blx r3
|
||||
pop {r2, r3}
|
||||
adds r4, r0, #1
|
||||
lsls r2, r4
|
||||
lsls r0, r2, #20
|
||||
lsrs r2, #12
|
||||
ldr r1,=1055
|
||||
subs r1, r4
|
||||
lsls r1, #20
|
||||
orrs r1, r3
|
||||
orrs r1, r2
|
||||
pop {r4, pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_d2iz)(double) double (double precision) to integer C-style conversion [3]
|
||||
double_wrapper_section __aeabi_d2iz
|
||||
wrapper_func __aeabi_d2iz
|
||||
regular_func double2int_z
|
||||
push {r4, lr}
|
||||
lsls r4, r1, #1
|
||||
lsrs r2, r4, #21
|
||||
movs r3, #0x80
|
||||
adds r2, r3
|
||||
lsls r3, #3
|
||||
subs r2, r3
|
||||
lsls r3, #21
|
||||
cmp r2, #126
|
||||
ble 1f
|
||||
subs r2, #158
|
||||
bge 2f
|
||||
asrs r4, r1, #31
|
||||
lsls r1, #12
|
||||
lsrs r1, #1
|
||||
orrs r1, r3
|
||||
negs r2, r2
|
||||
lsrs r1, r2
|
||||
lsls r4, #1
|
||||
adds r4, #1
|
||||
adds r2, #21
|
||||
cmp r2, #32
|
||||
bge 3f
|
||||
lsrs r0, r2
|
||||
orrs r0, r1
|
||||
muls r0, r4
|
||||
pop {r4, pc}
|
||||
1:
|
||||
movs r0, #0
|
||||
pop {r4, pc}
|
||||
3:
|
||||
mov r0, r1
|
||||
muls r0, r4
|
||||
pop {r4, pc}
|
||||
2:
|
||||
// overflow
|
||||
lsrs r0, r1, #31
|
||||
adds r0, r3
|
||||
subs r0, #1
|
||||
pop {r4, pc}
|
||||
|
||||
double_section double2int
|
||||
regular_func double2int
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2INT double2int_shim
|
||||
|
||||
// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
|
||||
double_wrapper_section __aeabi_d2uiz
|
||||
wrapper_func __aeabi_d2uiz
|
||||
regular_func double2uint
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
|
||||
|
||||
double_section fix2double
|
||||
regular_func fix2double
|
||||
shimmable_table_tail_call SF_TABLE_FIX2FLOAT fix2double_shim
|
||||
|
||||
double_section ufix2double
|
||||
regular_func ufix2double
|
||||
shimmable_table_tail_call SF_TABLE_UFIX2FLOAT ufix2double_shim
|
||||
|
||||
double_section fix642double
|
||||
regular_func fix642double
|
||||
shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642double_shim
|
||||
|
||||
double_section ufix2double
|
||||
regular_func ufix642double
|
||||
shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642double_shim
|
||||
|
||||
// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
|
||||
double_wrapper_section __aeabi_l2d
|
||||
wrapper_func __aeabi_l2d
|
||||
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
|
||||
|
||||
// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
|
||||
double_wrapper_section __aeabi_ul2d
|
||||
wrapper_func __aeabi_ul2d
|
||||
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
|
||||
|
||||
// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
|
||||
double_wrapper_section __aeabi_d2lz
|
||||
wrapper_func __aeabi_d2lz
|
||||
regular_func double2int64_z
|
||||
cmn r1, r1
|
||||
bcc double2int64
|
||||
push {lr}
|
||||
lsls r1, #1
|
||||
lsrs r1, #1
|
||||
movs r2, #0
|
||||
bl double2ufix64
|
||||
cmp r1, #0
|
||||
bmi 1f
|
||||
movs r2, #0
|
||||
negs r0, r0
|
||||
sbcs r2, r1
|
||||
mov r1, r2
|
||||
pop {pc}
|
||||
1:
|
||||
movs r1, #128
|
||||
lsls r1, #24
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
|
||||
double_section double2int64
|
||||
regular_func double2int64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2INT64 double2int64_shim
|
||||
|
||||
// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
|
||||
double_wrapper_section __aeabi_d2ulz
|
||||
wrapper_func __aeabi_d2ulz
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
|
||||
|
||||
double_section double2fix64
|
||||
regular_func double2fix64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
|
||||
|
||||
double_section double2ufix64
|
||||
regular_func double2ufix64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
|
||||
|
||||
double_section double2fix
|
||||
regular_func double2fix
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
|
||||
|
||||
double_section double2ufix
|
||||
regular_func double2ufix
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
|
||||
|
||||
double_wrapper_section __aeabi_d2f
|
||||
1:
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
// copy sign bit and 23 NAN id bits into sign bit and significant id bits, also set high id bit
|
||||
|
||||
lsrs r0, #30
|
||||
lsls r2, r1, #12
|
||||
lsrs r2, #9
|
||||
asrs r1, #22
|
||||
lsls r1, #22
|
||||
orrs r0, r1
|
||||
orrs r0, r2
|
||||
bx lr
|
||||
#endif
|
||||
wrapper_func __aeabi_d2f
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
movs r3, #1
|
||||
lsls r3, #21
|
||||
lsls r2, r1, #1
|
||||
adds r2, r3
|
||||
bhi 1b
|
||||
#endif
|
||||
// note double->float in double table at same index as float->double in double table
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE double2float_shim
|
||||
|
||||
double_wrapper_section srqt
|
||||
wrapper_func_d1 sqrt
|
||||
shimmable_table_tail_call SF_TABLE_FSQRT dsqrt_shim
|
||||
|
||||
double_wrapper_section sincostan_remainder
|
||||
regular_func sincostan_remainder
|
||||
ldr r2, =0x54442D18 // 2 * M_PI
|
||||
ldr r3, =0x401921FB
|
||||
push {lr}
|
||||
// note remainder only uses the divider thru integer divider functions
|
||||
// which save and restore themselves
|
||||
bl remainder
|
||||
pop {pc}
|
||||
|
||||
double_wrapper_section cos
|
||||
#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
|
||||
wrapper_func cos
|
||||
// rom version only works for -1024 < angle < 1024
|
||||
lsls r2, r1, #2
|
||||
bcc 1f
|
||||
lsrs r2, #22
|
||||
cmp r2, #9
|
||||
bge 2f
|
||||
1:
|
||||
shimmable_table_tail_call SF_TABLE_FCOS dcos_shim
|
||||
2:
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
lsls r2, r1, #1
|
||||
asrs r2, #21
|
||||
adds r2, #1
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r2, #1
|
||||
lsls r2, #19
|
||||
orrs r1, r2
|
||||
bx lr
|
||||
3:
|
||||
#endif
|
||||
push {lr}
|
||||
bl sincostan_remainder
|
||||
pop {r2}
|
||||
mov lr, r2
|
||||
b 1b
|
||||
|
||||
double_wrapper_section sin
|
||||
#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
|
||||
wrapper_func sin
|
||||
// rom version only works for -1024 < angle < 1024
|
||||
lsls r2, r1, #2
|
||||
bcc 1f
|
||||
lsrs r2, #22
|
||||
cmp r2, #9
|
||||
bge 2f
|
||||
1:
|
||||
shimmable_table_tail_call SF_TABLE_FSIN dsin_shim
|
||||
2:
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
lsls r2, r1, #1
|
||||
asrs r2, #21
|
||||
adds r2, #1
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r2, #1
|
||||
lsls r2, #19
|
||||
orrs r1, r2
|
||||
bx lr
|
||||
3:
|
||||
#endif
|
||||
push {lr}
|
||||
bl sincostan_remainder
|
||||
pop {r2}
|
||||
mov lr, r2
|
||||
b 1b
|
||||
|
||||
double_wrapper_section sincos
|
||||
// out of line remainder code for abs(angle)>=1024
|
||||
2:
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
lsls r2, r1, #1
|
||||
asrs r2, #21
|
||||
adds r2, #1
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r2, #1
|
||||
lsls r2, #19
|
||||
orrs r1, r2
|
||||
pop {r4-r5}
|
||||
stmia r4!, {r0, r1}
|
||||
stmia r5!, {r0, r1}
|
||||
pop {r4, r5, pc}
|
||||
3:
|
||||
#endif
|
||||
push {lr}
|
||||
bl sincostan_remainder
|
||||
pop {r2}
|
||||
mov lr, r2
|
||||
b 1f // continue with sincos
|
||||
|
||||
wrapper_func sincos
|
||||
push {r2-r5, lr}
|
||||
// rom version only works for -1024 < angle < 1024
|
||||
lsls r2, r1, #2
|
||||
bcc 1f
|
||||
lsrs r2, #22
|
||||
cmp r2, #9
|
||||
bge 2b
|
||||
1:
|
||||
bl 2f // call the shim
|
||||
pop {r4-r5}
|
||||
stmia r4!, {r0, r1}
|
||||
stmia r5!, {r2, r3}
|
||||
pop {r4, r5, pc}
|
||||
|
||||
2:
|
||||
shimmable_table_tail_call SF_TABLE_V3_FSINCOS sincos_shim_bootstrap
|
||||
|
||||
.thumb_func
|
||||
sincos_shim_bootstrap:
|
||||
push {r2, r3, r4}
|
||||
movs r3, #0x13
|
||||
ldrb r3, [r3]
|
||||
#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
cmp r3, #1
|
||||
bne 1f
|
||||
ldr r3, =dsincos_shim
|
||||
b 2f
|
||||
#endif
|
||||
1:
|
||||
ldr r3, =dsincos_shim_v2
|
||||
2:
|
||||
ldr r2, =sd_table
|
||||
str r3, [r2, #SF_TABLE_V3_FSINCOS]
|
||||
str r3, [sp, #8]
|
||||
pop {r2, r3, pc}
|
||||
.thumb_func
|
||||
dsincos_shim_v2:
|
||||
push {r4-r7,r14}
|
||||
bl push_r8_r11
|
||||
bl v2_rom_dsincos_internal
|
||||
mov r12,r0 @ save ε
|
||||
bl v2_rom_dcos_finish
|
||||
push {r0,r1}
|
||||
mov r0,r12
|
||||
bl v2_rom_dsin_finish
|
||||
pop {r2,r3}
|
||||
bl pop_r8_r11
|
||||
pop {r4-r7,r15}
|
||||
.thumb_func
|
||||
v2_rom_dsincos_internal:
|
||||
push {r0, lr}
|
||||
ldr r0, =0x3855
|
||||
str r0, [sp, #4]
|
||||
pop {r0, pc}
|
||||
.thumb_func
|
||||
v2_rom_dcos_finish:
|
||||
push {r0, r1}
|
||||
ldr r0, =0x389d
|
||||
str r0, [sp, #4]
|
||||
pop {r0, pc}
|
||||
.thumb_func
|
||||
v2_rom_dsin_finish:
|
||||
push {r0, r1}
|
||||
ldr r0, =0x38d9
|
||||
str r0, [sp, #4]
|
||||
pop {r0, pc}
|
||||
|
||||
double_wrapper_section tan
|
||||
#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
|
||||
wrapper_func tan
|
||||
// rom version only works for -1024 < angle < 1024
|
||||
lsls r2, r1, #2
|
||||
bcc dtan_in_range
|
||||
lsrs r2, #22
|
||||
cmp r2, #9
|
||||
bge dtan_angle_out_of_range
|
||||
dtan_in_range:
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE)
|
||||
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
|
||||
bcs dtan_save_state
|
||||
mov r2, ip
|
||||
#else
|
||||
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
|
||||
push {r4, lr}
|
||||
mrs r4, PRIMASK
|
||||
cpsid i
|
||||
bl dtan_shim_call
|
||||
msr PRIMASK, r4
|
||||
pop {r4, pc}
|
||||
#endif
|
||||
dtan_shim_call:
|
||||
shimmable_table_tail_call SF_TABLE_FTAN dtan_shim
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
dtan_save_state:
|
||||
ldr r2, =(SIO_BASE)
|
||||
save_div_state_and_lr
|
||||
mov r2, ip
|
||||
bl dtan_shim_call
|
||||
ldr r2, =(SIO_BASE)
|
||||
restore_div_state_and_return
|
||||
#endif
|
||||
dtan_angle_out_of_range:
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
lsls r2, r1, #1
|
||||
asrs r2, #21
|
||||
adds r2, #1
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r2, #1
|
||||
lsls r2, #19
|
||||
orrs r1, r2
|
||||
bx lr
|
||||
3:
|
||||
#endif
|
||||
push {lr}
|
||||
bl sincostan_remainder
|
||||
pop {r2}
|
||||
mov lr, r2
|
||||
b dtan_in_range
|
||||
|
||||
double_wrapper_section atan2
|
||||
wrapper_func_d2 atan2
|
||||
shimmable_table_tail_call SF_TABLE_FATAN2 datan2_shim
|
||||
|
||||
double_wrapper_section exp
|
||||
wrapper_func_d1 exp
|
||||
shimmable_table_tail_call SF_TABLE_FEXP dexp_shim
|
||||
|
||||
double_wrapper_section log
|
||||
wrapper_func_d1 log
|
||||
shimmable_table_tail_call SF_TABLE_FLN dln_shim
|
||||
|
||||
#endif
|
387
lib/main/pico-sdk/rp2_common/pico_double/double_conv_m33.S
Normal file
387
lib/main/pico-sdk/rp2_common/pico_double/double_conv_m33.S
Normal file
|
@ -0,0 +1,387 @@
|
|||
/*
|
||||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
#if HAS_DOUBLE_COPROCESSOR
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
.macro double_section name
|
||||
#if PICO_DOUBLE_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
#else
|
||||
.section SECTION_NAME(\name), "ax"
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro double_wrapper_section func
|
||||
double_section WRAPPER_FUNC_NAME(\func)
|
||||
.endm
|
||||
|
||||
double_wrapper_section conv_tod
|
||||
|
||||
@ convert int64 to double, rounding
|
||||
wrapper_func __aeabi_l2d
|
||||
regular_func int642double
|
||||
movs r2,#0 @ fall through
|
||||
@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2
|
||||
regular_func fix642double
|
||||
cmp r1,#0
|
||||
bge 10f @ positive? can use unsigned code
|
||||
rsbs r0,#0
|
||||
sbc r1,r1,r1,lsl#1 @ make positive
|
||||
cbz r1,7f @ high word is zero?
|
||||
clz r3,r1
|
||||
subs r3,#11
|
||||
bmi 2f
|
||||
rsbs r12,r3,#32
|
||||
lsrs r12,r0,r12
|
||||
lsls r0,r3
|
||||
lsls r1,r3
|
||||
orrs r1,r1,r12
|
||||
add r2,r2,r3
|
||||
rsbs r2,#0
|
||||
add r2,#0x3ff+19+32
|
||||
add r1,r1,r2,lsl#20 @ insert exponent
|
||||
orr r1,#0x80000000
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo @ over/underflow?
|
||||
bxlo r14
|
||||
b 3f
|
||||
7:
|
||||
mov r1,r2
|
||||
b fix2double_neg
|
||||
2:
|
||||
add r3,#33
|
||||
lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z
|
||||
sub r3,#1
|
||||
lsl r12,r1,r3
|
||||
rsb r3,#32
|
||||
lsr r0,r3
|
||||
lsr r1,r3
|
||||
orr r0,r0,r12
|
||||
@ push {r14}
|
||||
@ bl dumpreg
|
||||
@ pop {r14}
|
||||
sub r2,r3,r2
|
||||
add r2,#0x3ff+19+32
|
||||
beq 4f @ potential rounding tie?
|
||||
adcs r0,r0,#0
|
||||
5:
|
||||
adc r1,r1,r2,lsl#20 @ insert exponent, add rounding
|
||||
orr r1,#0x80000000
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo
|
||||
bxlo r14
|
||||
@ over/underflow?
|
||||
3:
|
||||
mov r1,#0
|
||||
it ge
|
||||
movtge r1,#0x7ff0 @ overflow
|
||||
mov r0,#0
|
||||
bx r14
|
||||
1:
|
||||
movs r1,#0
|
||||
bx r14
|
||||
4:
|
||||
bcc 5b @ not a rounding tie after all
|
||||
adcs r0,r0,#0
|
||||
bic r0,r0,#1 @ force to even
|
||||
b 5b
|
||||
|
||||
@ convert uint64 to double, rounding
|
||||
wrapper_func __aeabi_ul2d
|
||||
regular_func uint642double
|
||||
movs r2,#0 @ fall through
|
||||
@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2
|
||||
regular_func ufix642double
|
||||
10:
|
||||
cbz r1,7f @ high word zero?
|
||||
clz r3,r1
|
||||
subs r3,#11
|
||||
bmi 2f
|
||||
rsbs r12,r3,#32
|
||||
lsrs r12,r0,r12
|
||||
lsls r0,r3
|
||||
lsls r1,r3
|
||||
orrs r1,r1,r12
|
||||
add r2,r2,r3
|
||||
rsbs r2,#0
|
||||
add r2,#0x3ff+19+32
|
||||
add r1,r1,r2,lsl#20 @ insert exponent
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo @ over/underflow?
|
||||
bxlo r14
|
||||
b 3f
|
||||
7:
|
||||
mov r1,r2
|
||||
b ufix2double
|
||||
2:
|
||||
add r3,#33
|
||||
lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z
|
||||
sub r3,#1
|
||||
lsl r12,r1,r3
|
||||
rsb r3,#32
|
||||
lsr r0,r3
|
||||
lsr r1,r3
|
||||
orr r0,r0,r12
|
||||
@ push {r14}
|
||||
@ bl dumpreg
|
||||
@ pop {r14}
|
||||
sub r2,r3,r2
|
||||
add r2,#0x3ff+19+32
|
||||
beq 4f @ potential rounding tie?
|
||||
adcs r0,r0,#0
|
||||
5:
|
||||
adc r1,r1,r2,lsl#20 @ insert exponent, add rounding
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo
|
||||
bxlo r14
|
||||
@ over/underflow?
|
||||
3:
|
||||
mov r1,#0
|
||||
it ge
|
||||
movtge r1,#0x7ff0 @ overflow
|
||||
mov r0,#0
|
||||
bx r14
|
||||
1:
|
||||
movs r1,#0
|
||||
bx r14
|
||||
4:
|
||||
bcc 5b @ not a rounding tie after all
|
||||
adcs r0,r0,#0
|
||||
bic r0,r0,#1 @ force to even
|
||||
b 5b
|
||||
|
||||
regular_func fix2double
|
||||
cmp r0,#0
|
||||
bge ufix2double @ positive? can use unsigned code
|
||||
rsbs r0,#0 @ make positive
|
||||
fix2double_neg:
|
||||
clz r3,r0
|
||||
subs r3,#11
|
||||
bmi 2f
|
||||
lsls r0,r3
|
||||
add r2,r1,r3
|
||||
rsbs r2,#0
|
||||
add r2,#0x3ff+19
|
||||
add r1,r0,r2,lsl#20 @ insert exponent
|
||||
orr r1,#0x80000000
|
||||
mov r0,#0
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo @ over/underflow?
|
||||
bxlo r14
|
||||
b 3f
|
||||
2:
|
||||
rsb r3,#0
|
||||
lsrs r12,r0,r3
|
||||
rsb r2,r3,#32
|
||||
lsls r0,r0,r2
|
||||
@ push {r14}
|
||||
@ bl dumpreg
|
||||
@ pop {r14}
|
||||
sub r2,r3,r1
|
||||
add r2,#0x3ff+19
|
||||
add r1,r12,r2,lsl#20 @ insert exponent
|
||||
orr r1,#0x80000000
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo
|
||||
bxlo r14
|
||||
@ over/underflow?
|
||||
3:
|
||||
mov r1,#0x80000000
|
||||
it ge
|
||||
movtge r1,#0xfff0 @ overflow
|
||||
mov r0,#0
|
||||
bx r14
|
||||
1:
|
||||
movs r1,#0
|
||||
bx r14
|
||||
|
||||
regular_func ufix2double
|
||||
cbz r0,1f @ zero? return it
|
||||
clz r3,r0
|
||||
subs r3,#11
|
||||
bmi 2f
|
||||
lsls r0,r3
|
||||
add r2,r1,r3
|
||||
rsbs r2,#0
|
||||
add r2,#0x3ff+19
|
||||
add r1,r0,r2,lsl#20 @ insert exponent
|
||||
mov r0,#0
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo @ over/underflow?
|
||||
bxlo r14
|
||||
b 3f
|
||||
2:
|
||||
rsbs r3,#0
|
||||
lsrs r12,r0,r3
|
||||
rsb r2,r3,#32
|
||||
lsls r0,r0,r2
|
||||
@ push {r14}
|
||||
@ bl dumpreg
|
||||
@ pop {r14}
|
||||
sub r2,r3,r1
|
||||
add r2,#0x3ff+19
|
||||
add r1,r12,r2,lsl#20 @ insert exponent
|
||||
mov r3,0x7fe
|
||||
cmp r2,r3
|
||||
it lo
|
||||
bxlo r14
|
||||
@ over/underflow?
|
||||
3:
|
||||
mov r1,#0
|
||||
it ge
|
||||
movtge r1,#0x7ff0 @ overflow
|
||||
mov r0,#0
|
||||
bx r14
|
||||
1:
|
||||
movs r1,#0
|
||||
bx r14
|
||||
|
||||
double_wrapper_section conv_dtoi64
|
||||
|
||||
@ convert double to signed int64, rounding towards 0, clamping
|
||||
wrapper_func __aeabi_d2lz
|
||||
regular_func double2int64_z
|
||||
movs r2,#0 @ fall through
|
||||
@ convert double in r0:r1 to signed fixed point in r0:r1, clamping
|
||||
regular_func double2fix64_z
|
||||
sub r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length
|
||||
asrs r12,r1,#20 @ sign and exponent
|
||||
sub r3,r12,#1
|
||||
sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent
|
||||
lsls r3,#21
|
||||
@ push {r14}
|
||||
@ bl dumpreg
|
||||
@ pop {r14}
|
||||
cmp r3,#0xffc00000
|
||||
bhs 1f @ 0, ∞/NaN?
|
||||
adds r2,r2,r3,lsr#21 @ offset exponent by fix precision; r1 is now required left shift
|
||||
bmi 4f @ actually a right shift?
|
||||
cmp r2,#11 @ overflow?
|
||||
bge 5f
|
||||
lsls r1,r2
|
||||
rsbs r3,r2,#32
|
||||
lsrs r3,r0,r3
|
||||
orrs r1,r1,r3
|
||||
lsls r0,r2
|
||||
cmp r12,#0
|
||||
it ge
|
||||
bxge r14
|
||||
rsbs r0,#0
|
||||
sbc r1,r1,r1,lsl#1
|
||||
bx r14
|
||||
4:
|
||||
adds r2,#32
|
||||
ble 6f @ result fits in low word?
|
||||
lsl r3,r1,r2
|
||||
rsbs r2,#32
|
||||
lsrs r1,r2
|
||||
lsrs r0,r2
|
||||
orrs r0,r0,r3
|
||||
cmp r12,#0
|
||||
it ge
|
||||
bxge r14
|
||||
rsbs r0,#0
|
||||
sbc r1,r1,r1,lsl#1
|
||||
bx r14
|
||||
6:
|
||||
rsbs r2,#0
|
||||
usat r2,#5,r2 @ underflow to 0
|
||||
lsrs r0,r1,r2
|
||||
movs r1,#0
|
||||
cmp r12,#0
|
||||
it ge
|
||||
bxge r14
|
||||
rsbs r0,#0
|
||||
sbc r1,r1,r1,lsl#1
|
||||
bx r14
|
||||
1:
|
||||
beq 3f @ ±∞/±NaN?
|
||||
2:
|
||||
movs r0,#0 @ ±0: return 0
|
||||
movs r1,#0
|
||||
bx r14
|
||||
3:
|
||||
orrs r1,r0,r1,lsl#12 @ mantissa field
|
||||
it ne @ NaN?
|
||||
movne r12,#0 @ treat NaNs as +∞
|
||||
@ here original argument was ±Inf or we have under/overflow
|
||||
5:
|
||||
mvn r1,#0x80000000
|
||||
add r1,r1,r12,lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
|
||||
mvn r0,r12,asr#31
|
||||
bx r14
|
||||
|
||||
double_wrapper_section conv_dtoui64
|
||||
|
||||
@ convert double to unsigned int64, rounding towards -Inf, clamping
|
||||
wrapper_func __aeabi_d2ulz
|
||||
regular_func double2uint64
|
||||
regular_func double2uint64_z
|
||||
movs r2,#0 @ fall through
|
||||
@ convert double in r0:r1 to unsigned fixed point in r0:r1, clamping
|
||||
regular_func double2ufix64
|
||||
regular_func double2ufix64_z
|
||||
subw r2,r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length
|
||||
asrs r3,r1,#20 @ sign and exponent
|
||||
sub r3,#1
|
||||
sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent and sign
|
||||
bmi 7f @ argument negative?
|
||||
movw r12,#0x7fe
|
||||
cmp r3,r12
|
||||
bhs 1f @ 0, ∞/NaN?
|
||||
adds r2,r3 @ offset exponent by fix precision; r2 is now required left shift
|
||||
bmi 2f @ actually a right shift?
|
||||
cmp r2,#12 @ overflow?
|
||||
bge 4f
|
||||
lsls r1,r2
|
||||
rsbs r3,r2,#32
|
||||
lsrs r3,r0,r3
|
||||
lsls r0,r2
|
||||
orrs r1,r1,r3
|
||||
bx r14
|
||||
2:
|
||||
adds r2,#32
|
||||
ble 5f @ result fits in low word?
|
||||
lsl r3,r1,r2
|
||||
rsbs r2,#32
|
||||
lsrs r1,r2
|
||||
lsrs r0,r2
|
||||
orrs r0,r0,r3
|
||||
bx r14
|
||||
5:
|
||||
rsbs r2,#0
|
||||
usat r2,#5,r2 @ underflow to 0
|
||||
lsrs r0,r1,r2
|
||||
movs r1,#0
|
||||
bx r14
|
||||
1:
|
||||
bhi 3f @ 0? return 0
|
||||
4:
|
||||
@ here overflow has occurred
|
||||
mvn r0,#0
|
||||
mvn r1,#0
|
||||
bx r14
|
||||
7:
|
||||
cmp r3,#0xfffffffe
|
||||
bne 3f @ -0? return 0
|
||||
orrs r2,r0,r1,lsl#12 @ mantissa field
|
||||
bne 4b
|
||||
3:
|
||||
movs r0,#0
|
||||
movs r1,#0
|
||||
bx r14
|
||||
|
||||
#endif
|
608
lib/main/pico-sdk/rp2_common/pico_double/double_fma_dcp.S
Normal file
608
lib/main/pico-sdk/rp2_common/pico_double/double_fma_dcp.S
Normal file
|
@ -0,0 +1,608 @@
|
|||
/*
|
||||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
|
||||
#if !HAS_DOUBLE_COPROCESSOR
|
||||
#error attempt to compile double_fma_rp2350 when there is no DCP
|
||||
#else
|
||||
|
||||
#include "hardware/dcp_instr.inc.S"
|
||||
#include "hardware/dcp_canned.inc.S"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
// factor out save/restore (there is a copy in float code)
|
||||
|
||||
.macro double_section name
|
||||
#if PICO_DOUBLE_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
#else
|
||||
.section SECTION_NAME(\name), "ax"
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro double_wrapper_section func
|
||||
double_section WRAPPER_FUNC_NAME(\func)
|
||||
.endm
|
||||
|
||||
// ============== STATE SAVE AND RESTORE ===============
|
||||
|
||||
.macro saving_func_return
|
||||
bx lr
|
||||
.endm
|
||||
|
||||
double_section __rp2350_dcp_engaged_state_save_restore_copy
|
||||
.thumb_func
|
||||
__dcp_save_state:
|
||||
sub sp, #24
|
||||
push {r0, r1}
|
||||
// do save here
|
||||
PXMD r0, r1
|
||||
strd r0, r1, [sp, #8 + 0]
|
||||
PYMD r0, r1
|
||||
strd r0, r1, [sp, #8 + 8]
|
||||
REFD r0, r1
|
||||
strd r0, r1, [sp, #8 + 16]
|
||||
pop {r0, r1}
|
||||
blx lr
|
||||
// <- wrapped function returns here
|
||||
// fall through into restore:
|
||||
.thumb_func
|
||||
__dcp_restore_state:
|
||||
// do restore here
|
||||
pop {r12, r14}
|
||||
WXMD r12, r14
|
||||
pop {r12, r14}
|
||||
WYMD r12, r14
|
||||
pop {r12, r14}
|
||||
WEFD r12, r14
|
||||
pop {pc}
|
||||
|
||||
double_wrapper_section __dfma
|
||||
@ cf saving_func macro: but here we need to record the SP before the state save possibly changes it
|
||||
1:
|
||||
push {lr} // 16-bit instruction
|
||||
bl __dcp_save_state // 32-bit instruction
|
||||
b 1f // 16-bit instruction
|
||||
|
||||
@ compute mn+a with full intermediate precision
|
||||
@ r0:r1 m
|
||||
@ r2:r3 n
|
||||
@ [r13,#0] a
|
||||
wrapper_func fma
|
||||
mov r12,sp @ save the SP
|
||||
PCMP apsr_nzcv @ test the engaged flag
|
||||
bmi 1b
|
||||
1:
|
||||
push {r4-r8,r14}
|
||||
ldrd r4,r5,[r12,#0] @ fetch a using original SP
|
||||
ubfx r7,r1,#20,#11 @ r7=em
|
||||
ubfx r8,r3,#20,#11 @ r8=en
|
||||
add r8,r7,r8 @ em+en
|
||||
|
||||
eors r6,r1,r3 @ get sign of mn
|
||||
eors r6,r6,r5 @ set N if mn has opposite sign to a, i.e. if the operation is essentially a subtraction
|
||||
WXUP r4,r5 @ write a to coprocessor to get its classification
|
||||
PEFD r14,r12 @ r14=fa
|
||||
WXUP r0,r1 @ write m and n to coprocessor to get their classifications
|
||||
WYUP r2,r3
|
||||
PEFD r6,r12 @ r6=fm, r12=fn, r14=fa
|
||||
orr r14,r14,r6
|
||||
orr r14,r14,r12 @ OR of all the classification flags, so we can check if any are zero/Inf/NaN
|
||||
|
||||
RXMS r3,r6,0 @ we will almost always need the full product so compute it here (cf dmul macro)
|
||||
RYMS r7,r12,0
|
||||
umull r0,r1,r3,r7
|
||||
mov r2,#0 @ seems to be no 16-bit instruction which zeros a register without affecting the flags
|
||||
umlal r1,r2,r3,r12
|
||||
umlal r1,r2,r6,r7
|
||||
mov r3,#0
|
||||
umlal r2,r3,r6,r12 @ r0:r1:r2:r3: full product mn Q124 1≤mn<4
|
||||
|
||||
bmi 50f @ mn has opposite sign to a so operation is essentially a subtraction
|
||||
|
||||
@ ======================== ADDITION PATH ========================
|
||||
|
||||
tst r14,#0x70000000 @ were any of the arguments zero/inf/NaN?
|
||||
bne 90f @ then use mla path which gives the correct result in all these cases
|
||||
ubfx r14,r5,#20,#11 @ r14=ea
|
||||
@ here all operands are finite and non-zero
|
||||
@ r0:r1:r2:r3: full product mn Q124 1≤mn<4
|
||||
@ r4:r5 a IEEE packed
|
||||
@ r8: em+en [biased +0x3ff*2]
|
||||
@ r14: ea [biased +0x3ff]
|
||||
subw r7,r8,#0x3fd
|
||||
subs r7,r7,r14 @ em+en-ea+2 (debiased)
|
||||
blt 80f @ branch if |a| is big compared to |mn|, more precisely if ea-(em+en)≥3 so e.g. if ea=0 (hence 1≤a<2) then em+en≤-3 and mn<4.2¯³=1/2
|
||||
|
||||
@ ======================== ADDITION PATH, RESULT HAS COMPARABLE MAGNITUDE TO mn ========================
|
||||
|
||||
@ here |mn| is big compared to |a|; e.g. if em+en=0 (so 1≤mn<4) then ea≤2 and a<8
|
||||
movs r8,#1
|
||||
bfi r5,r8,#20,#12 @ insert implied 1 in a
|
||||
rsbs r7,r7,#74 @ shift up ≤74 (can be negative) that will be required for a (Q52) to align with mn (Q124, ending in 20 zeros)
|
||||
@ now add (shifted) a into mn, preserving flags
|
||||
and r8,r7,#0x1f @ k=shift mod 32
|
||||
mov r12,#1
|
||||
lsl r12,r12,r8 @ 2^k
|
||||
umull r5,r6,r5,r12 @ shift up high word: r4:r5:r6 is now a_lo + 2^k a_hi
|
||||
sub r12,#1 @ 2^k-1
|
||||
umlal r4,r5,r4,r12 @ shift up low word, adding in: r4:r5:r6 is now (a_lo + 2^k a_hi) + (2^k-1) a_lo = 2^k (a_lo + a_hi) = a shifted up by k
|
||||
bmi 91f @ use flags: will a be shifted down?
|
||||
cmp r7,#64 @ shift up by two more words?
|
||||
bge 92f
|
||||
cmp r7,#32 @ shift up by one more word?
|
||||
bge 93f
|
||||
adds r0,r0,r4 @ no more word shifts
|
||||
adcs r1,r1,r5
|
||||
adcs r2,r2,r6
|
||||
adcs r3,r3,#0 @ r0:r1:r2:r3: mn + a (cf dmul macro)
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD @ as dmul macro tail: exponent computed in coprocessor is correct
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
93:
|
||||
adds r1,r1,r4
|
||||
adcs r2,r2,r5
|
||||
adcs r3,r3,r6 @ r0:r1:r2:r3: mn + (a<<32)
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
92:
|
||||
adds r2,r2,r4
|
||||
adcs r3,r3,r5 @ r0:r1:r2:r3: mn + (a<<64); note this cannot overflow as total shift was at most 74 (see above)
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
91: @ case where a (Q52) is shifted down relative to mn (Q124); the mod 32 part of the shift of a has already been done
|
||||
@ r0:r1:r2:r3: mn
|
||||
@ r4:r5:r6: a
|
||||
@ r7: alignment shift required (negative)
|
||||
cmn r7,#32 @ shift down one word?
|
||||
bge 94f
|
||||
cmn r7,#64 @ shift down two words?
|
||||
bge 95f
|
||||
@ here a is shifted entirely below the bottom of m
|
||||
orr r0,r0,#1 @ a is non-zero so ensure we set the sticky bit
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
94:
|
||||
adds r0,r0,r5 @ one word shift down
|
||||
adcs r1,r1,r6
|
||||
adcs r2,r2,#0
|
||||
adcs r3,r3,#0
|
||||
orr r0,r0,r4 @ contribution from a to sticky bits
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
95:
|
||||
adds r0,r0,r6 @ two word shift down
|
||||
adcs r1,r1,#0
|
||||
adcs r2,r2,#0
|
||||
adcs r3,r3,#0
|
||||
orr r0,r0,r4 @ contribution from a to sticky bits
|
||||
orr r0,r0,r5
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
@ ======================== ADDITION PATH, RESULT HAS COMPARABLE MAGNITUDE TO a ========================
|
||||
|
||||
80:
|
||||
@ here |mn|<~|a|
|
||||
@ r0:r1:r2:r3: mn Q124
|
||||
@ r4:r5 a IEEE packed
|
||||
@ r7: -(shift down required to align mn with a), guaranteed negative
|
||||
@ r8: em+en [biased +0x3ff*2]
|
||||
@ r14: ea [biased +0x3ff]
|
||||
tst r3,#0x20000000
|
||||
bne 1f @ 2≤mn<4?
|
||||
adds r2,r2,r2 @ normalise so mn is 2..4 Q124; note that the contents of r0 and r1 are always destined for the sticky bit in this path
|
||||
adcs r3,r3,r3
|
||||
subs r7,r7,#1 @ correction to alignment shift
|
||||
1:
|
||||
@ now we construct an IEEE packed value in r2:r3 such that adding it to r4:r5 gives the correct final result
|
||||
@ observe that the exponent of this constructed value will be at least two less than that of a (by the "|a| is big compared to |mn|" test above)
|
||||
@ so the alignment shift in the final addition will be by at least two places; thus we can use bit 0 of the constructed
|
||||
@ value as a sticky bit, and we still have one bit in hand for rounding
|
||||
subs r7,r7,#2 @ now r7 < -2
|
||||
orr r0,r0,r2,lsl#23 @ shift r2:r3 down 9 places, ORing excess into sticky bits
|
||||
lsrs r2,r2,#9
|
||||
orr r2,r2,r3,lsl#23
|
||||
lsrs r3,r3,#9
|
||||
orrs r0,r0,r1
|
||||
it ne
|
||||
orrne r2,r2,#1 @ sticky bit from bottom 64 bits of mn as shifted
|
||||
@ r2:r3 mn 2..4 Q51, i.e. 1..2 Q52
|
||||
@ r2b0 holds sticky bit; note that for alignment with a in r4:r5, r2:r3 will be shifted down at least one place
|
||||
|
||||
lsrs r6,r5,#31 @ get sign of a (which in this path is the same as the sign of mn, and of the result)
|
||||
orr r3,r3,r6,lsl#31 @ set sign in mn
|
||||
|
||||
adds r14,r7,r14 @ get exponent for mn relative to a; note this can go negative
|
||||
add r3,r3,r14,lsl#20 @ note that "implied" 1 is present in r3, giving an offset of 1 in the exponent
|
||||
bmi 1f @ negative? then we have just constructed a denormal (or less) and the addition will give an incorrect result
|
||||
dcp_dadd_m r0,r1,r2,r3,r4,r5
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
1:
|
||||
@ compare with similar code in subtraction path: here we cannot underflow
|
||||
cmn r7,#64 @ if the alignment shift for mn is very large then the result is just a
|
||||
ble 82f
|
||||
add r3,r3,#0x40000000 @ ea cannot be very large (as adding r7 made it negative), so safe to add 1024 to exponents of both a and mn
|
||||
add r5,r5,#0x40000000
|
||||
dcp_dadd_m r0,r1,r2,r3,r4,r5
|
||||
sub r1,r1,#0x40000000 @ fix exponent
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
90:
|
||||
@ dcp_dmul_m tail then dadd ("mla path")
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
dcp_dadd_m r0,r1,r0,r1,r4,r5
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
82: @ |mn| is very small compared to |a|, so result is a
|
||||
RDDM r0,r1 @ clear the engaged flag
|
||||
movs r0,r4
|
||||
movs r1,r5
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
@ ======================== SUBTRACTION PATH ========================
|
||||
|
||||
50:
|
||||
tst r14,#0x70000000 @ were any of the arguments zero/inf/NaN?
|
||||
bne 90b @ then use mla path which gives the correct result in all these cases
|
||||
ubfx r14,r5,#20,#11 @ r14=ea
|
||||
@ now all operands are finite and non-zero
|
||||
@ r0:r1:r2:r3: full product mn Q124 1≤mn<4
|
||||
@ r4:r5 a IEEE packed (including sign bit; sign of mn is opposite as we are in the subtraction path)
|
||||
@ r8: em+en [+0x3ff*2]
|
||||
@ r14: ea [+0x3ff]
|
||||
subw r8,r8,#0x3fc @ em+en+3
|
||||
subs r7,r8,r14 @ em+en-ea+3 (debiased)
|
||||
blt 80f @ branch if |a| is big compared to |mn|, more precisely if ea-(em+en)≥4 so e.g. if ea=0 then em+en≤-4 and mn<4.2^-4=1/4
|
||||
beq 94f @ branch if ea-(em+en)=3 e.g. if ea=0 then em+en=-3 and 1/8=2^-3≤mn<4.2^-3=1/2
|
||||
@ in this branch, if e.g. em+en=0 (so 1≤mn<4) then ea≤2 and a<8
|
||||
rsbs r7,r7,#75 @ 75-(em+en-ea+3) = 72-(em+en-ea), shift up 0..74 that will be required for a (Q52) to align with mn (Q124, ending in 20 zeros)
|
||||
mvn r14,r5,lsr#31 @ save complement of sign of a
|
||||
@ subtract (shifted) a from mn
|
||||
and r6,r7,#0x1f @ k=shift mod 32
|
||||
mov r12,#1
|
||||
bfi r5,r12,#20,#12 @ insert implied 1 in a
|
||||
lsl r12,r12,r6 @ 2^k
|
||||
umull r5,r6,r5,r12
|
||||
sub r12,#1
|
||||
umlal r4,r5,r4,r12 @ shift a up by shift amount mod 32 (see comment in addition path)
|
||||
@ r4:r5:r6: a shifted up by k=shift mod 32
|
||||
bmi 91f @ will a be shifted down?
|
||||
cmp r7,#64 @ shift up by two more words?
|
||||
bge 92f
|
||||
cmp r7,#32 @ shift up by one more word?
|
||||
bge 93f
|
||||
subs r0,r0,r4 @ no more word shifts; this cannot go negative or have bad cancellation
|
||||
sbcs r1,r1,r5
|
||||
sbcs r2,r2,r6
|
||||
sbcs r3,r3,#0 @ r0:r1:r2:r3: mn - a (cf dmul macro)
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD @ as dmul macro tail: exponent and sign computed in coprocessor is correct
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
94:
|
||||
@ here if ea-(em+en)=3 e.g. if ea=0 then em+en=-3 and 1/8=2^-3≤mn<4.2^-3=1/2
|
||||
@ r0:r1:r2:r3: full product mn Q124 1≤mn<4
|
||||
@ r4:r5 a IEEE packed (including sign bit; sign of mn is opposite as we are in the subtraction path)
|
||||
lsls r5,r5,#11 @ convert a to mantissa Q63 in r4:r5
|
||||
orrs r5,r5,r4,lsr#21
|
||||
lsls r4,r4,#11
|
||||
orrs r5,r5,0x80000000 @ implied 1
|
||||
movs r6,#0
|
||||
subs r0,r6,r0 @ compute |a|-|mn|
|
||||
sbcs r6,r6,r1
|
||||
sbcs r4,r4,r2
|
||||
sbcs r5,r5,r3
|
||||
WXMS r0,r6 @ write sticky bits
|
||||
WXMO r4,r5 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
eor r1,r1,0x80000000 @ sign of result is opposite to that of product as yielded by coprocessor
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
93:
|
||||
subs r1,r1,r4 @ shifting a up by one word: this cannot go negative or have bad cancellation
|
||||
sbcs r2,r2,r5
|
||||
sbcs r3,r3,r6
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
92:
|
||||
subs r2,r2,r4 @ shifting a up by two words: this /can/ go negative or have bad cancellation
|
||||
sbcs r3,r3,r5
|
||||
cmp r3,#0x01000000 @ check we have at least 57 bits of product so that dmul tail will round correctly (this test is slightly conservative - 55 needed?)
|
||||
blt 1f @ also trap case where result is negative
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
@ heavy cancellation case
|
||||
@ r0:r1:r2:r3: result Q124, signed
|
||||
@ r8: em+en+3
|
||||
@ r14b0: save complement of sign of a
|
||||
1:
|
||||
sub r8,r8,#1 @ em+en+2
|
||||
RDDM r6,r7 @ clear engaged flag
|
||||
blo 2f @ if result is negative...
|
||||
movs r6,#0 @ ... negate it...
|
||||
subs r0,r6,r0
|
||||
sbcs r1,r6,r1
|
||||
sbcs r2,r6,r2
|
||||
sbcs r3,r6,r3
|
||||
eor r14,r14,#1 @ ... and flip saved sign
|
||||
2: @ now normalise result
|
||||
orrs r6,r2,r3 @ shift up by 64 possible?
|
||||
bne 7f
|
||||
movs r3,r1 @ do it
|
||||
movs r2,r0
|
||||
movs r1,#0
|
||||
movs r0,#0
|
||||
sub r8,r8,#64 @ fix exponent
|
||||
7:
|
||||
cmp r3,#0 @ shift up by 32 possible?
|
||||
bne 8f
|
||||
movs r3,r2 @ do it
|
||||
movs r2,r1
|
||||
movs r1,r0
|
||||
movs r0,#0
|
||||
sub r8,r8,#32
|
||||
8:
|
||||
cmp r3,#0 @ is result zero? return it
|
||||
beq 9f
|
||||
clz r6,r3 @ k=amount of final shift
|
||||
subs r8,r8,r6 @ final exponent
|
||||
movs r7,#1
|
||||
lsls r7,r7,r6 @ r7=2^k
|
||||
muls r3,r3,r7
|
||||
subs r7,r7,#1 @ 2^k-1
|
||||
umlal r2,r3,r2,r7
|
||||
umlal r1,r2,r1,r7
|
||||
umlal r0,r1,r0,r7 @ r0:r1:r2:r3: normalised result
|
||||
orrs r0,r0,r1 @ any sticky bits below top 64?
|
||||
it ne
|
||||
orrne r2,r2,#1 @ or into sticky bit
|
||||
lsrs r0,r2,#11 @ align to mantissa position for IEEE format
|
||||
lsrs r1,r3,#11
|
||||
orr r0,r0,r3,lsl#21
|
||||
lsls r2,r2,#22 @ rounding bit in C, sticky bit in ~Z
|
||||
bcc 10f @ no rounding?
|
||||
beq 11f @ rounding tie?
|
||||
adcs r0,r0,#0 @ round up (C is set)
|
||||
adcs r1,r1,#0
|
||||
adds r8,r8,r1,lsr#20 @ candidate for exponent field
|
||||
ble 12f @ underflow? overflow cannot occur here as the result is smaller in magnitude than a
|
||||
bfi r1,r8,#20,#11 @ insert exponent
|
||||
orr r1,r1,r14,lsl#31 @ or in sign
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
11:
|
||||
adcs r0,r0,#0 @ round up as above
|
||||
adcs r1,r1,#0
|
||||
bic r0,r0,#1 @ to even
|
||||
adds r8,r8,r1,lsr#20 @ candidate for exponent field
|
||||
ble 12f @ underflow?
|
||||
bfi r1,r8,#20,#11 @ insert exponent
|
||||
orr r1,r1,r14,lsl#31 @ or in sign
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
10:
|
||||
adds r8,r8,r1,lsr#20 @ candidate for exponent field
|
||||
ble 12f @ underflow?
|
||||
bfi r1,r8,#20,#11 @ insert exponent
|
||||
orr r1,r1,r14,lsl#31 @ or in sign
|
||||
9:
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
12:
|
||||
mov r1,r14,lsl#31 @ underflow: return signed zero
|
||||
movs r0,#0
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
91: @ case where a (Q52) is shifted down relative to mn (Q124); the mod 32 part of the shift of a has already been done
|
||||
@ r0:r1:r2:r3: mn
|
||||
@ r4:r5:r6: a
|
||||
@ r7: alignment shift required (negative)
|
||||
cmn r7,#32 @ shift down one word?
|
||||
bge 94f
|
||||
cmn r7,#64 @ shift down two words?
|
||||
bge 95f
|
||||
@ here a is shifted entirely below the bottom of m
|
||||
subs r0,r0,#1 @ subtract an epsilon (a is non-zero)
|
||||
sbcs r1,r1,#0
|
||||
sbcs r2,r2,#0
|
||||
sbcs r3,r3,#0
|
||||
orr r0,r0,#1 @ ensure the sticky bit is set (a is non-zero)
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
94:
|
||||
rsbs r4,r4,#0 @ one word shift down
|
||||
sbcs r0,r0,r5
|
||||
sbcs r1,r1,r6
|
||||
sbcs r2,r2,#0
|
||||
sbcs r3,r3,#0
|
||||
orr r0,r0,r4 @ sticky bits
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
95:
|
||||
movs r7,#0 @ two words shift down
|
||||
subs r4,r7,r4
|
||||
sbcs r5,r7,r5
|
||||
sbcs r0,r0,r6
|
||||
sbcs r1,r1,r7
|
||||
sbcs r2,r2,r7
|
||||
sbcs r3,r3,r7
|
||||
orrs r0,r0,r4 @ sticky bits
|
||||
orrs r0,r0,r5
|
||||
WXMS r0,r1 @ write sticky bits
|
||||
WXMO r2,r3 @ write sticky+result bits
|
||||
NRDD
|
||||
RDDM r0,r1
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
80:
|
||||
@ here |a| is big compared to |mn|, more precisely ea-(em+en)≥4 so e.g. if ea=0 then em+en≤-4 and mn<4.2^-4=1/4
|
||||
@ r0:r1:r2:r3: mn Q124
|
||||
@ r4:r5: a IEEE packed
|
||||
@ r7<0, em+en-ea+3 (debiased)
|
||||
@ r14: ea [+0x3ff]
|
||||
lsrs r6,r3,#29
|
||||
bne 1f @ 2≤mn<4?
|
||||
adds r2,r2,r2 @ shift up one place
|
||||
adcs r3,r3,r3
|
||||
subs r7,r7,#1 @ fix exponent
|
||||
1: @ now r2:r3 is mn Q61, sticky bits in r0:r1
|
||||
subs r7,r7,#3
|
||||
@ r7=emn-ea <-3
|
||||
orr r0,r0,r2,lsl#23 @ gather sticky bits
|
||||
lsrs r2,r2,#9 @ adjust mn to Q52 ready to create packed IEEE version of mn
|
||||
orr r2,r2,r3,lsl#23
|
||||
lsrs r3,r3,#9
|
||||
orrs r0,r0,r1 @ or of all sticky bits
|
||||
it ne
|
||||
orrne r2,r2,#1 @ sticky bit from bottom 64 bits of mn
|
||||
|
||||
mvn r6,r5,lsr#31 @ complement of sign of a
|
||||
orr r3,r3,r6,lsl#31 @ fix sign of mn so we do a subtraction
|
||||
|
||||
adds r14,r7,r14 @ this can go negative; r14 is now at most ea[+0x3ff]-4
|
||||
add r3,r3,r14,lsl#20
|
||||
@ the exponent field in r2:r3 (mn) is now at most ea[+0x3ff]-3
|
||||
@ that means that in the dadd operation that follows, mn will be shifted down at least three places to align with a,
|
||||
@ and a post-normalisation shift up of at most one place will be needed
|
||||
@ therefore in the worst case r2b2 affects b0 of the result; r2b1 affects the rounding of the result; and r2b0 can be used as a sticky bit
|
||||
bmi 1f @ did exponent go negative?
|
||||
|
||||
dcp_dadd_m r0,r1,r2,r3,r4,r5
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
1:
|
||||
cmn r7,#64 @ is mn being shifted well below the bottom of a?
|
||||
ble 82b @ then result is just a
|
||||
add r3,r3,#0x40000000 @ otherwise offset exponents by +1024
|
||||
add r5,r5,#0x40000000
|
||||
dcp_dadd_m r0,r1,r2,r3,r4,r5
|
||||
ubfx r2,r1,#20,#11 @ get exponent
|
||||
cmp r2,#0x400 @ too small?
|
||||
itte ls
|
||||
andls r1,r1,0x80000000 @ flush to signed zero
|
||||
movls r0,#0
|
||||
subhi r1,r1,#0x40000000 @ else fix exponent of result
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4-r8,lr}
|
||||
saving_func_return
|
||||
|
||||
|
||||
double_wrapper_section __dmla
|
||||
@ cf saving_func macro: but here we need to record the SP before the state save possibly changes it
|
||||
1:
|
||||
push {lr} // 16-bit instruction
|
||||
bl __dcp_save_state // 32-bit instruction
|
||||
b 1f // 16-bit instruction
|
||||
|
||||
@ r0:r1 m
|
||||
@ r2:r3 n
|
||||
@ [r13,#0] a
|
||||
regular_func mla
|
||||
mov r12,sp @ save the SP
|
||||
PCMP apsr_nzcv @ test the engaged flag
|
||||
bmi 1b
|
||||
1:
|
||||
push {r4,r5,r14}
|
||||
dcp_dmul_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r4,r5,r14
|
||||
ldrd r2,r3,[r12,#0] @ fetch a using original SP
|
||||
dcp_dadd_m r0,r1,r0,r1,r2,r3
|
||||
// todo optimize this based on final decision on saving_func_entry
|
||||
pop {r4,r5,r14}
|
||||
saving_func_return
|
||||
|
||||
#endif
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "pico/bootrom.h"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
|
||||
// NOTE THIS FUNCTION TABLE IS NOT PUBLIC OR NECESSARILY COMPLETE...
|
||||
// IT IS ***NOT*** SAFE TO CALL THESE FUNCTION POINTERS FROM ARBITRARY CODE
|
||||
uint32_t sd_table[SF_TABLE_V2_SIZE / 2];
|
||||
|
||||
#if !(PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED)
|
||||
static __attribute__((noreturn)) void missing_double_func_shim(void) {
|
||||
panic("missing double function");
|
||||
}
|
||||
#endif
|
||||
extern void double_table_shim_on_use_helper(void);
|
||||
|
||||
void __attribute__((weak)) *sf_clz_func;
|
||||
|
||||
void __aeabi_double_init(void) {
|
||||
int rom_version = rp2040_rom_version();
|
||||
#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
if (rom_version == 1) {
|
||||
|
||||
// this is a little tricky.. we only want to pull in a shim if the corresponding function
|
||||
// is called. to that end we include a SVC instruction with the table offset as the call number
|
||||
// followed by the shim function pointer inside the actual wrapper function. that way if the wrapper
|
||||
// function is garbage collected, so is the shim function.
|
||||
//
|
||||
// double_table_shim_on_use_helper expects this SVC instruction in the calling code soon after the address
|
||||
// pointed to by IP and patches the double_table entry with the real shim the first time the function is called.
|
||||
for(uint i=0; i<SF_TABLE_V2_SIZE/4; i++) {
|
||||
sd_table[i] = (uintptr_t)double_table_shim_on_use_helper;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (rom_version == 1) {
|
||||
// opting for soft failure for now - you'll get a panic at runtime if you call any of the missing methods
|
||||
for(uint i=0;i<SF_TABLE_V2_SIZE/4;i++) {
|
||||
sd_table[i] = (uintptr_t)missing_double_func_shim;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (rom_version >= 2) {
|
||||
void *rom_table_double = rom_data_lookup(rom_table_code('S', 'D'));
|
||||
void *rom_table_float = rom_data_lookup(rom_table_code('S', 'F'));
|
||||
assert(*((uint8_t *)(((void *)rom_table_float)-2)) * 4 >= SF_TABLE_V2_SIZE);
|
||||
(void)rom_table_float;
|
||||
memcpy(&sd_table, rom_table_double, SF_TABLE_V2_SIZE);
|
||||
if (rom_version == 2) {
|
||||
#ifndef NDEBUG
|
||||
if (*(uint16_t *)0x3854 != 0xb500 || // this is dsincos(_internal)
|
||||
|
||||
*(uint16_t *)0x38d8 != 0x4649 || // this is dsin_finish
|
||||
*(uint16_t *)0x389c != 0x4659 // this is dcos_finish
|
||||
) {
|
||||
panic(NULL);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (rom_version < 3) {
|
||||
// we use the unused entry for SINCOS
|
||||
sd_table[SF_TABLE_V3_FSINCOS / 4] = (uintptr_t) double_table_shim_on_use_helper;
|
||||
}
|
||||
|
||||
sf_clz_func = rom_func_lookup(ROM_FUNC_CLZ32);
|
||||
}
|
626
lib/main/pico-sdk/rp2_common/pico_double/double_math.c
Normal file
626
lib/main/pico-sdk/rp2_common/pico_double/double_math.c
Normal file
|
@ -0,0 +1,626 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "pico/double.h"
|
||||
|
||||
// opened a separate issue https://github.com/raspberrypi/pico-sdk/issues/166 to deal with these warnings if at all
|
||||
GCC_Pragma("GCC diagnostic push")
|
||||
GCC_Pragma("GCC diagnostic ignored \"-Wconversion\"")
|
||||
GCC_Pragma("GCC diagnostic ignored \"-Wsign-conversion\"")
|
||||
|
||||
typedef uint64_t ui64;
|
||||
typedef uint32_t ui32;
|
||||
typedef int64_t i64;
|
||||
|
||||
#define PINF ( HUGE_VAL)
|
||||
#define MINF (-HUGE_VAL)
|
||||
#define PZERO (+0.0)
|
||||
#define MZERO (-0.0)
|
||||
|
||||
|
||||
#define PI 3.14159265358979323846
|
||||
#define LOG2 0.69314718055994530941
|
||||
// Unfortunately in double precision ln(10) is very close to half-way between to representable numbers
|
||||
#define LOG10 2.30258509299404568401
|
||||
#define LOG2E 1.44269504088896340737
|
||||
#define LOG10E 0.43429448190325182765
|
||||
#define ONETHIRD 0.33333333333333333333
|
||||
|
||||
#define PIf 3.14159265358979323846f
|
||||
#define LOG2f 0.69314718055994530941f
|
||||
#define LOG2Ef 1.44269504088896340737f
|
||||
#define LOG10Ef 0.43429448190325182765f
|
||||
#define ONETHIRDf 0.33333333333333333333f
|
||||
|
||||
#define DUNPACK(x,e,m) e=((x)>>52)&0x7ff,m=((x)&0x000fffffffffffffULL)|0x0010000000000000ULL
|
||||
#define DUNPACKS(x,s,e,m) s=((x)>>63),DUNPACK((x),(e),(m))
|
||||
|
||||
typedef union {
|
||||
double d;
|
||||
ui64 ix;
|
||||
} double_ui64;
|
||||
|
||||
static inline double ui642double(ui64 ix) {
|
||||
double_ui64 tmp;
|
||||
tmp.ix = ix;
|
||||
return tmp.d;
|
||||
}
|
||||
|
||||
static inline ui64 double2ui64(double d) {
|
||||
double_ui64 tmp;
|
||||
tmp.d = d;
|
||||
return tmp.ix;
|
||||
}
|
||||
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
static inline bool disnan(double x) {
|
||||
ui64 ix= double2ui64(x);
|
||||
// checks the top bit of the low 32 bit of the NAN, but it I think that is ok
|
||||
return ((uint32_t)(ix >> 31)) > 0xffe00000u;
|
||||
}
|
||||
|
||||
#define check_nan_d1(x) if (disnan((x))) return (x)
|
||||
#define check_nan_d2(x,y) if (disnan((x))) return (x); else if (disnan((y))) return (y);
|
||||
#else
|
||||
#define check_nan_d1(x) ((void)0)
|
||||
#define check_nan_d2(x,y) ((void)0)
|
||||
#endif
|
||||
|
||||
static inline int dgetsignexp(double x) {
|
||||
ui64 ix=double2ui64(x);
|
||||
return (ix>>52)&0xfff;
|
||||
}
|
||||
|
||||
static inline int dgetexp(double x) {
|
||||
ui64 ix=double2ui64(x);
|
||||
return (ix>>52)&0x7ff;
|
||||
}
|
||||
|
||||
static inline double dldexp(double x,int de) {
|
||||
ui64 ix=double2ui64(x),iy;
|
||||
int e;
|
||||
e=dgetexp(x);
|
||||
if(e==0||e==0x7ff) return x;
|
||||
e+=de;
|
||||
if(e<=0) iy=ix&0x8000000000000000ULL; // signed zero for underflow
|
||||
else if(e>=0x7ff) iy=(ix&0x8000000000000000ULL)|0x7ff0000000000000ULL; // signed infinity on overflow
|
||||
else iy=ix+((ui64)de<<52);
|
||||
return ui642double(iy);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(ldexp)(double x, int de) {
|
||||
check_nan_d1(x);
|
||||
return dldexp(x, de);
|
||||
}
|
||||
|
||||
|
||||
static inline double dcopysign(double x,double y) {
|
||||
ui64 ix=double2ui64(x),iy=double2ui64(y);
|
||||
ix=((ix&0x7fffffffffffffffULL)|(iy&0x8000000000000000ULL));
|
||||
return ui642double(ix);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(copysign)(double x, double y) {
|
||||
check_nan_d2(x,y);
|
||||
return dcopysign(x, y);
|
||||
}
|
||||
static inline int diszero(double x) { return dgetexp (x)==0; }
|
||||
//static inline int dispzero(double x) { return dgetsignexp(x)==0; }
|
||||
//static inline int dismzero(double x) { return dgetsignexp(x)==0x800; }
|
||||
static inline int disinf(double x) { return dgetexp (x)==0x7ff; }
|
||||
static inline int dispinf(double x) { return dgetsignexp(x)==0x7ff; }
|
||||
static inline int disminf(double x) { return dgetsignexp(x)==0xfff; }
|
||||
|
||||
static inline int disint(double x) {
|
||||
ui64 ix=double2ui64(x),m;
|
||||
int e=dgetexp(x);
|
||||
if(e==0) return 1; // 0 is an integer
|
||||
e-=0x3ff; // remove exponent bias
|
||||
if(e<0) return 0; // |x|<1
|
||||
e=52-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return 1; // |x| large, so must be an integer
|
||||
m=(1ULL<<e)-1; // mask for bits of significance <1
|
||||
if(ix&m) return 0; // not an integer
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int disoddint(double x) {
|
||||
ui64 ix=double2ui64(x),m;
|
||||
int e=dgetexp(x);
|
||||
e-=0x3ff; // remove exponent bias
|
||||
if(e<0) return 0; // |x|<1; 0 is not odd
|
||||
e=52-e; // bit position in mantissa with significance 1
|
||||
if(e<0) return 0; // |x| large, so must be even
|
||||
m=(1ULL<<e)-1; // mask for bits of significance <1 (if any)
|
||||
if(ix&m) return 0; // not an integer
|
||||
if(e==52) return 1; // value is exactly 1
|
||||
return (ix>>e)&1;
|
||||
}
|
||||
|
||||
static inline int disstrictneg(double x) {
|
||||
ui64 ix=double2ui64(x);
|
||||
if(diszero(x)) return 0;
|
||||
return ix>>63;
|
||||
}
|
||||
|
||||
static inline int disneg(double x) {
|
||||
ui64 ix=double2ui64(x);
|
||||
return ix>>63;
|
||||
}
|
||||
|
||||
static inline double dneg(double x) {
|
||||
ui64 ix=double2ui64(x);
|
||||
ix^=0x8000000000000000ULL;
|
||||
return ui642double(ix);
|
||||
}
|
||||
|
||||
static inline int dispo2(double x) {
|
||||
ui64 ix=double2ui64(x);
|
||||
if(diszero(x)) return 0;
|
||||
if(disinf(x)) return 0;
|
||||
ix&=0x000fffffffffffffULL;
|
||||
return ix==0;
|
||||
}
|
||||
|
||||
static inline double dnan_or(double x) {
|
||||
#if PICO_DOUBLE_PROPAGATE_NANS
|
||||
return NAN;
|
||||
#else
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(trunc)(double x) {
|
||||
check_nan_d1(x);
|
||||
ui64 ix=double2ui64(x),m;
|
||||
int e=dgetexp(x);
|
||||
e-=0x3ff; // remove exponent bias
|
||||
if(e<0) { // |x|<1
|
||||
ix&=0x8000000000000000ULL;
|
||||
return ui642double(ix);
|
||||
}
|
||||
e=52-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=(1ULL<<e)-1; // mask for bits of significance <1
|
||||
ix&=~m;
|
||||
return ui642double(ix);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(round)(double x) {
|
||||
check_nan_d1(x);
|
||||
ui64 ix=double2ui64(x),m;
|
||||
int e=dgetexp(x);
|
||||
e-=0x3ff; // remove exponent bias
|
||||
if(e<-1) { // |x|<0.5
|
||||
ix&=0x8000000000000000ULL;
|
||||
return ui642double(ix);
|
||||
}
|
||||
if(e==-1) { // 0.5<=|x|<1
|
||||
ix&=0x8000000000000000ULL;
|
||||
ix|=0x3ff0000000000000ULL; // ±1
|
||||
return ui642double(ix);
|
||||
}
|
||||
e=52-e; // bit position in mantissa with significance 1, <=52
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=1ULL<<(e-1); // mask for bit of significance 0.5
|
||||
ix+=m;
|
||||
m=m+m-1; // mask for bits of significance <1
|
||||
ix&=~m;
|
||||
return ui642double(ix);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(floor)(double x) {
|
||||
check_nan_d1(x);
|
||||
ui64 ix=double2ui64(x),m;
|
||||
int e=dgetexp(x);
|
||||
if(e==0) { // x==0
|
||||
ix&=0x8000000000000000ULL;
|
||||
return ui642double(ix);
|
||||
}
|
||||
e-=0x3ff; // remove exponent bias
|
||||
if(e<0) { // |x|<1, not zero
|
||||
if(disneg(x)) return -1;
|
||||
return PZERO;
|
||||
}
|
||||
e=52-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=(1ULL<<e)-1; // mask for bit of significance <1
|
||||
if(disneg(x)) ix+=m; // add 1-ε to magnitude if negative
|
||||
ix&=~m; // truncate
|
||||
return ui642double(ix);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(ceil)(double x) {
|
||||
check_nan_d1(x);
|
||||
ui64 ix=double2ui64(x),m;
|
||||
int e=dgetexp(x);
|
||||
if(e==0) { // x==0
|
||||
ix&=0x8000000000000000ULL;
|
||||
return ui642double(ix);
|
||||
}
|
||||
e-=0x3ff; // remove exponent bias
|
||||
if(e<0) { // |x|<1, not zero
|
||||
if(disneg(x)) return MZERO;
|
||||
return 1;
|
||||
}
|
||||
e=52-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=(1ULL<<e)-1; // mask for bit of significance <1
|
||||
if(!disneg(x)) ix+=m; // add 1-ε to magnitude if positive
|
||||
ix&=~m; // truncate
|
||||
return ui642double(ix);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(asin)(double x) {
|
||||
check_nan_d1(x);
|
||||
double u;
|
||||
u=(1-x)*(1+x);
|
||||
if(disstrictneg(u)) return dnan_or(PINF);
|
||||
return atan2(x,sqrt(u));
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(acos)(double x) {
|
||||
check_nan_d1(x);
|
||||
double u;
|
||||
u=(1-x)*(1+x);
|
||||
if(disstrictneg(u)) return dnan_or(PINF);
|
||||
return atan2(sqrt(u),x);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(atan)(double x) {
|
||||
check_nan_d1(x);
|
||||
if(dispinf(x)) return PI/2;
|
||||
if(disminf(x)) return -PI/2;
|
||||
return atan2(x,1);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(sinh)(double x) {
|
||||
check_nan_d1(x);
|
||||
return dldexp((exp(x)-exp(dneg(x))),-1);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(cosh)(double x) {
|
||||
check_nan_d1(x);
|
||||
return dldexp((exp(x)+exp(dneg(x))),-1);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(tanh)(double x) {
|
||||
check_nan_d1(x);
|
||||
double u;
|
||||
int e;
|
||||
e=dgetexp(x);
|
||||
if(e>=5+0x3ff) { // |x|>=32?
|
||||
if(!disneg(x)) return 1; // 1 << exp 2x; avoid generating infinities later
|
||||
else return -1; // 1 >> exp 2x
|
||||
}
|
||||
u=exp(dldexp(x,1));
|
||||
return (u-1)/(u+1);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(asinh)(double x) {
|
||||
check_nan_d1(x);
|
||||
int e;
|
||||
e=dgetexp(x);
|
||||
if(e>=32+0x3ff) { // |x|>=2^32?
|
||||
if(!disneg(x)) return log( x )+LOG2; // 1/x^2 << 1
|
||||
else return dneg(log(dneg(x))+LOG2); // 1/x^2 << 1
|
||||
}
|
||||
if(x>0) return log(sqrt(x*x+1)+x);
|
||||
else return dneg(log(sqrt(x*x+1)-x));
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(acosh)(double x) {
|
||||
check_nan_d1(x);
|
||||
int e;
|
||||
if(disneg(x)) x=dneg(x);
|
||||
e=dgetexp(x);
|
||||
if(e>=32+0x3ff) return log(x)+LOG2; // |x|>=2^32?
|
||||
return log(sqrt((x-1)*(x+1))+x);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(atanh)(double x) {
|
||||
check_nan_d1(x);
|
||||
return dldexp(log((1+x)/(1-x)),-1);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(exp2)(double x) {
|
||||
check_nan_d1(x);
|
||||
int e;
|
||||
// extra check for disminf as this catches -Nan, and x<=-4096 doesn't.
|
||||
if (disminf(x) || x<=-4096) return 0; // easily underflows
|
||||
else if (x>=4096) return PINF; // easily overflows
|
||||
e=(int)round(x);
|
||||
x-=e;
|
||||
return dldexp(exp(x*LOG2),e);
|
||||
}
|
||||
double WRAPPER_FUNC(log2)(double x) { check_nan_d1(x); return log(x)*LOG2E; }
|
||||
double WRAPPER_FUNC(exp10)(double x) { check_nan_d1(x); return pow(10,x); }
|
||||
double WRAPPER_FUNC(log10)(double x) { check_nan_d1(x); return log(x)*LOG10E; }
|
||||
|
||||
// todo these are marked as lofi
|
||||
double WRAPPER_FUNC(expm1)(double x) { check_nan_d1(x); return exp(x)-1; }
|
||||
double WRAPPER_FUNC(log1p)(double x) { check_nan_d1(x); return log(1+x); }
|
||||
#if !HAS_DOUBLE_COPROCESSOR
|
||||
double WRAPPER_FUNC(fma)(double x,double y,double z) { check_nan_d1(x); return x*y+z; }
|
||||
#endif
|
||||
|
||||
// general power, x>0, finite
|
||||
static double dpow_1(double x,double y) {
|
||||
int a,b,c;
|
||||
double t,rt,u,v,v0,v1,w,ry;
|
||||
a=dgetexp(x)-0x3ff;
|
||||
u=log2(dldexp(x,-a)); // now log_2 x = a+u
|
||||
if(u>0.5) u-=1,a++; // |u|<=~0.5
|
||||
if(a==0) return exp2(u*y);
|
||||
// here |log_2 x| >~0.5
|
||||
if(y>= 4096) { // then easily over/underflows
|
||||
if(a<0) return 0;
|
||||
return PINF;
|
||||
}
|
||||
if(y<=-4096) { // then easily over/underflows
|
||||
if(a<0) return PINF;
|
||||
return 0;
|
||||
}
|
||||
ry=round(y);
|
||||
v=y-ry;
|
||||
v0=dldexp(round(ldexp(v,26)),-26);
|
||||
v1=v-v0;
|
||||
b=(int)ry; // guaranteed to fit in an int; y=b+v0+v1
|
||||
// now the result is exp2( (a+u) * (b+v0+v1) )
|
||||
c=a*b; // integer
|
||||
t=a*v0;
|
||||
rt=round(t);
|
||||
c+=(int)rt;
|
||||
w=t-rt;
|
||||
t=a*v1;
|
||||
w+=t;
|
||||
t=u*b;
|
||||
rt=round(t);
|
||||
c+=(int)rt;
|
||||
w+=t-rt;
|
||||
w+=u*v;
|
||||
return dldexp(exp2(w),c);
|
||||
}
|
||||
|
||||
static double dpow_int2(double x,int y) {
|
||||
double u;
|
||||
if(y==1) return x;
|
||||
u=dpow_int2(x,y/2);
|
||||
u*=u;
|
||||
if(y&1) u*=x;
|
||||
return u;
|
||||
}
|
||||
|
||||
// for the case where x not zero or infinity, y small and not zero
|
||||
static inline double dpowint_1(double x,int y) {
|
||||
if(y<0) x=1/x,y=-y;
|
||||
return dpow_int2(x,y);
|
||||
}
|
||||
|
||||
// for the case where x not zero or infinity
|
||||
static double dpowint_0(double x,int y) {
|
||||
int e;
|
||||
if(disneg(x)) {
|
||||
if(disoddint(y)) return dneg(dpowint_0(dneg(x),y));
|
||||
else return dpowint_0(dneg(x),y);
|
||||
}
|
||||
if(dispo2(x)) {
|
||||
e=dgetexp(x)-0x3ff;
|
||||
if(y>=2048) y= 2047; // avoid overflow
|
||||
if(y<-2048) y=-2048;
|
||||
y*=e;
|
||||
return dldexp(1,y);
|
||||
}
|
||||
if(y==0) return 1;
|
||||
if(y>=-32&&y<=32) return dpowint_1(x,y);
|
||||
return dpow_1(x,y);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(powint)(double x,int y) {
|
||||
GCC_Like_Pragma("GCC diagnostic push")
|
||||
GCC_Like_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
|
||||
if(x==1.0||y==0) return 1;
|
||||
GCC_Like_Pragma("GCC diagnostic pop")
|
||||
check_nan_d1(x);
|
||||
if(diszero(x)) {
|
||||
if(y>0) {
|
||||
if(y&1) return x;
|
||||
else return 0;
|
||||
}
|
||||
if((y&1)) return dcopysign(PINF,x);
|
||||
return PINF;
|
||||
}
|
||||
if(dispinf(x)) {
|
||||
if(y<0) return 0;
|
||||
else return PINF;
|
||||
}
|
||||
if(disminf(x)) {
|
||||
if(y>0) {
|
||||
if((y&1)) return MINF;
|
||||
else return PINF;
|
||||
}
|
||||
if((y&1)) return MZERO;
|
||||
else return PZERO;
|
||||
}
|
||||
return dpowint_0(x,y);
|
||||
}
|
||||
|
||||
// for the case where y is guaranteed a finite integer, x not zero or infinity
|
||||
static double dpow_0(double x,double y) {
|
||||
int e,p;
|
||||
if(disneg(x)) {
|
||||
if(disoddint(y)) return dneg(dpow_0(dneg(x),y));
|
||||
else return dpow_0(dneg(x),y);
|
||||
}
|
||||
p=(int)y;
|
||||
if(dispo2(x)) {
|
||||
e=dgetexp(x)-0x3ff;
|
||||
if(p>=2048) p= 2047; // avoid overflow
|
||||
if(p<-2048) p=-2048;
|
||||
p*=e;
|
||||
return dldexp(1,p);
|
||||
}
|
||||
if(p==0) return 1;
|
||||
if(p>=-32&&p<=32) return dpowint_1(x,p);
|
||||
return dpow_1(x,y);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(pow)(double x,double y) {
|
||||
GCC_Like_Pragma("GCC diagnostic push")
|
||||
GCC_Like_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
|
||||
|
||||
if(x==1.0||diszero(y)) return 1;
|
||||
check_nan_d2(x, y);
|
||||
if(x==-1.0&&disinf(y)) return 1;
|
||||
GCC_Like_Pragma("GCC diagnostic pop")
|
||||
|
||||
if(diszero(x)) {
|
||||
if(!disneg(y)) {
|
||||
if(disoddint(y)) return x;
|
||||
else return 0;
|
||||
}
|
||||
if(disoddint(y)) return dcopysign(PINF,x);
|
||||
return PINF;
|
||||
}
|
||||
if(dispinf(x)) {
|
||||
if(disneg(y)) return 0;
|
||||
else return PINF;
|
||||
}
|
||||
if(disminf(x)) {
|
||||
if(!disneg(y)) {
|
||||
if(disoddint(y)) return MINF;
|
||||
else return PINF;
|
||||
}
|
||||
if(disoddint(y)) return MZERO;
|
||||
else return PZERO;
|
||||
}
|
||||
if(dispinf(y)) {
|
||||
if(dgetexp(x)<0x3ff) return PZERO;
|
||||
else return PINF;
|
||||
}
|
||||
if(disminf(y)) {
|
||||
if(dgetexp(x)<0x3ff) return PINF;
|
||||
else return PZERO;
|
||||
}
|
||||
if(disint(y)) return dpow_0(x,y);
|
||||
if(disneg(x)) return PINF;
|
||||
return dpow_1(x,y);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(hypot)(double x,double y) {
|
||||
check_nan_d2(x, y);
|
||||
int ex,ey;
|
||||
ex=dgetexp(x); ey=dgetexp(y);
|
||||
if(ex>=0x3ff+400||ey>=0x3ff+400) { // overflow, or nearly so
|
||||
x=dldexp(x,-600),y=dldexp(y,-600);
|
||||
return dldexp(sqrt(x*x+y*y), 600);
|
||||
}
|
||||
else if(ex<=0x3ff-400&&ey<=0x3ff-400) { // underflow, or nearly so
|
||||
x=dldexp(x, 600),y=dldexp(y, 600);
|
||||
return dldexp(sqrt(x*x+y*y),-600);
|
||||
}
|
||||
return sqrt(x*x+y*y);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(cbrt)(double x) {
|
||||
check_nan_d1(x);
|
||||
int e;
|
||||
if(disneg(x)) return dneg(cbrt(dneg(x)));
|
||||
if(diszero(x)) return dcopysign(PZERO,x);
|
||||
e=dgetexp(x)-0x3ff;
|
||||
e=(e*0x5555+0x8000)>>16; // ~e/3, rounded
|
||||
x=dldexp(x,-e*3);
|
||||
x=exp(log(x)*ONETHIRD);
|
||||
return dldexp(x,e);
|
||||
}
|
||||
|
||||
// reduces mx*2^e modulo my, returning bottom bits of quotient at *pquo
|
||||
// 2^52<=|mx|,my<2^53, e>=0; 0<=result<my
|
||||
static i64 drem_0(i64 mx,i64 my,int e,int*pquo) {
|
||||
int quo=0,q,r=0,s;
|
||||
if(e>0) {
|
||||
r=0xffffffffU/(ui32)(my>>36); // reciprocal estimate Q16
|
||||
}
|
||||
while(e>0) {
|
||||
s=e; if(s>12) s=12; // gain up to 12 bits on each iteration
|
||||
q=(mx>>38)*r; // Q30
|
||||
q=((q>>(29-s))+1)>>1; // Q(s), rounded
|
||||
mx=(mx<<s)-my*q;
|
||||
quo=(quo<<s)+q;
|
||||
e-=s;
|
||||
}
|
||||
if(mx>=my) mx-=my,quo++; // when e==0 mx can be nearly as big as 2my
|
||||
if(mx>=my) mx-=my,quo++;
|
||||
if(mx<0) mx+=my,quo--;
|
||||
if(mx<0) mx+=my,quo--;
|
||||
if(pquo) *pquo=quo;
|
||||
return mx;
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(fmod)(double x,double y) {
|
||||
check_nan_d2(x, y);
|
||||
ui64 ix=double2ui64(x),iy=double2ui64(y);
|
||||
int sx,ex,ey;
|
||||
i64 mx,my;
|
||||
DUNPACKS(ix,sx,ex,mx);
|
||||
DUNPACK(iy,ey,my);
|
||||
if(ex==0x7ff) return dnan_or(PINF);
|
||||
if(ey==0) return PINF;
|
||||
if(ex==0) {
|
||||
if(!disneg(x)) return PZERO;
|
||||
return MZERO;
|
||||
}
|
||||
if(ex<ey) return x; // |x|<|y|, including case x=±0
|
||||
mx=drem_0(mx,my,ex-ey,0);
|
||||
if(sx) mx=-mx;
|
||||
return fix642double(mx,0x3ff-ey+52);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(remquo)(double x,double y,int*quo) {
|
||||
check_nan_d2(x, y);
|
||||
ui64 ix=double2ui64(x),iy=double2ui64(y);
|
||||
int sx,sy,ex,ey,q;
|
||||
i64 mx,my;
|
||||
DUNPACKS(ix,sx,ex,mx);
|
||||
DUNPACKS(iy,sy,ey,my);
|
||||
if(quo) *quo=0;
|
||||
if(ex==0x7ff) return PINF;
|
||||
if(ey==0) return PINF;
|
||||
if(ex==0) return PZERO;
|
||||
if(ey==0x7ff) return x;
|
||||
if(ex<ey-1) return x; // |x|<|y|/2
|
||||
if(ex==ey-1) {
|
||||
if(mx<=my) return x; // |x|<=|y|/2, even quotient
|
||||
// here |y|/2<|x|<|y|
|
||||
if(!sx) { // x>|y|/2
|
||||
mx-=my+my;
|
||||
ey--;
|
||||
q=1;
|
||||
} else { // x<-|y|/2
|
||||
mx=my+my-mx;
|
||||
ey--;
|
||||
q=-1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(sx) mx=-mx;
|
||||
mx=drem_0(mx,my,ex-ey,&q);
|
||||
if(mx+mx>my || (mx+mx==my&&(q&1)) ) { // |x|>|y|/2, or equality and an odd quotient?
|
||||
mx-=my;
|
||||
q++;
|
||||
}
|
||||
}
|
||||
if(sy) q=-q;
|
||||
if(quo) *quo=q;
|
||||
return fix642double(mx,0x3ff-ey+52);
|
||||
}
|
||||
|
||||
double WRAPPER_FUNC(drem)(double x,double y) { check_nan_d2(x, y); return remquo(x,y,0); }
|
||||
|
||||
double WRAPPER_FUNC(remainder)(double x,double y) { check_nan_d2(x, y); return remquo(x,y,0); }
|
||||
|
||||
GCC_Pragma("GCC diagnostic pop") // conversion
|
85
lib/main/pico-sdk/rp2_common/pico_double/double_none.S
Normal file
85
lib/main/pico-sdk/rp2_common/pico_double/double_none.S
Normal file
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
wrapper_func __aeabi_dadd
|
||||
wrapper_func __aeabi_ddiv
|
||||
wrapper_func __aeabi_dmul
|
||||
wrapper_func __aeabi_drsub
|
||||
wrapper_func __aeabi_dsub
|
||||
wrapper_func __aeabi_cdcmpeq
|
||||
wrapper_func __aeabi_cdrcmple
|
||||
wrapper_func __aeabi_cdcmple
|
||||
wrapper_func __aeabi_dcmpeq
|
||||
wrapper_func __aeabi_dcmplt
|
||||
wrapper_func __aeabi_dcmple
|
||||
wrapper_func __aeabi_dcmpge
|
||||
wrapper_func __aeabi_dcmpgt
|
||||
wrapper_func __aeabi_dcmpun
|
||||
wrapper_func __aeabi_i2d
|
||||
wrapper_func __aeabi_l2d
|
||||
wrapper_func __aeabi_ui2d
|
||||
wrapper_func __aeabi_ul2d
|
||||
wrapper_func __aeabi_d2iz
|
||||
wrapper_func __aeabi_d2lz
|
||||
wrapper_func __aeabi_d2uiz
|
||||
wrapper_func __aeabi_d2ulz
|
||||
wrapper_func __aeabi_d2f
|
||||
wrapper_func sqrt
|
||||
wrapper_func cos
|
||||
wrapper_func sin
|
||||
wrapper_func tan
|
||||
wrapper_func atan2
|
||||
wrapper_func exp
|
||||
wrapper_func log
|
||||
|
||||
wrapper_func ldexp
|
||||
wrapper_func copysign
|
||||
wrapper_func trunc
|
||||
wrapper_func floor
|
||||
wrapper_func ceil
|
||||
wrapper_func round
|
||||
wrapper_func sincos
|
||||
wrapper_func asin
|
||||
wrapper_func acos
|
||||
wrapper_func atan
|
||||
wrapper_func sinh
|
||||
wrapper_func cosh
|
||||
wrapper_func tanh
|
||||
wrapper_func asinh
|
||||
wrapper_func acosh
|
||||
wrapper_func atanh
|
||||
wrapper_func exp2
|
||||
wrapper_func log2
|
||||
wrapper_func exp10
|
||||
wrapper_func log10
|
||||
wrapper_func pow
|
||||
wrapper_func powint
|
||||
wrapper_func hypot
|
||||
wrapper_func cbrt
|
||||
wrapper_func fmod
|
||||
wrapper_func drem
|
||||
wrapper_func remainder
|
||||
wrapper_func remquo
|
||||
wrapper_func expm1
|
||||
wrapper_func log1p
|
||||
wrapper_func fma
|
||||
|
||||
#ifdef __riscv
|
||||
la a0, str
|
||||
j panic
|
||||
#else
|
||||
push {lr} // keep stack trace sane
|
||||
ldr r0, =str
|
||||
bl panic
|
||||
#endif
|
||||
|
||||
str:
|
||||
.asciz "double support is disabled"
|
1518
lib/main/pico-sdk/rp2_common/pico_double/double_sci_m33.S
Normal file
1518
lib/main/pico-sdk/rp2_common/pico_double/double_sci_m33.S
Normal file
File diff suppressed because it is too large
Load diff
2174
lib/main/pico-sdk/rp2_common/pico_double/double_v1_rom_shim_rp2040.S
Normal file
2174
lib/main/pico-sdk/rp2_common/pico_double/double_v1_rom_shim_rp2040.S
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#ifndef _PICO_DOUBLE_H
|
||||
#define _PICO_DOUBLE_H
|
||||
|
||||
#include <math.h>
|
||||
#include "pico.h"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file double.h
|
||||
* \defgroup pico_double pico_double
|
||||
*
|
||||
* \brief Optimized double-precision floating point functions
|
||||
*
|
||||
* (Replacement) optimized implementations are provided of the following compiler built-ins
|
||||
* and math library functions:
|
||||
*
|
||||
* - __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub, __aeabi_cdcmpeq, __aeabi_cdrcmple, __aeabi_cdcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun, __aeabi_i2d, __aeabi_l2d, __aeabi_ui2d, __aeabi_ul2d, __aeabi_d2iz, __aeabi_d2lz, __aeabi_d2uiz, __aeabi_d2ulz, __aeabi_d2f
|
||||
* - sqrt, cos, sin, tan, atan2, exp, log, ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow,, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma
|
||||
* - powint, sincos (GNU extensions)
|
||||
*
|
||||
* The following additional optimized functions are also provided:
|
||||
*
|
||||
* - int2double, uint2double, int642double, uint642double, fix2double, ufix2double, fix642double, ufix642double
|
||||
* - double2fix, double2ufix, double2fix64, double2ufix64, double2int, double2uint, double2int64, double2uint64, double2int_z, double2int64_z,
|
||||
* - exp10, sincos, powint
|
||||
*
|
||||
* On RP2350 the following additional functions are available; the _fast methods are faster but do not round correctly"
|
||||
*
|
||||
* - ddiv_fast, sqrt_fast
|
||||
*/
|
||||
|
||||
double int2double(int32_t i);
|
||||
double uint2double(uint32_t u);
|
||||
double int642double(int64_t i);
|
||||
double uint642double(uint64_t u);
|
||||
double fix2double(int32_t m, int e);
|
||||
double ufix2double(uint32_t m, int e);
|
||||
double fix642double(int64_t m, int e);
|
||||
double ufix642double(uint64_t m, int e);
|
||||
|
||||
// These methods round towards -Infinity.
|
||||
int32_t double2fix(double d, int e);
|
||||
uint32_t double2ufix(double d, int e);
|
||||
int64_t double2fix64(double d, int e);
|
||||
uint64_t double2ufix64(double d, int e);
|
||||
int32_t double2int(double d);
|
||||
uint32_t double2uint(double d);
|
||||
int64_t double2int64(double d);
|
||||
uint64_t double2uint64(double d);
|
||||
|
||||
// These methods round towards 0.
|
||||
int32_t double2int_z(double d);
|
||||
int64_t double2int64_z(double d);
|
||||
|
||||
double exp10(double x);
|
||||
void sincos(double x, double *sinx, double *cosx);
|
||||
double powint(double x, int y);
|
||||
|
||||
#if !PICO_RP2040
|
||||
double ddiv_fast(double n, double d);
|
||||
double sqrt_fast(double d);
|
||||
double mla(double x, double y, double z); // note this is not fused
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue