Adding RP2350 SDK and target framework (#13988)

* Adding RP2350 SDK and target framework * Spacing * Removing board definitions
2025-07-15 04:15:44 +03:00 · 2024-10-23 10:02:48 +11:00 · 2024-10-23 10:02:48 +11:00 · 2dd6f95aad
commit 2dd6f95aad
parent 462cb05930
576 changed files with 435012 additions and 0 deletions
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_aeabi_dcp.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_aeabi_dcp.S
@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+#if HAS_DOUBLE_COPROCESSOR
+#include "hardware/dcp_instr.inc.S"
+#include "hardware/dcp_canned.inc.S"
+
+pico_default_asm_setup
+
+// todo alignment
+//__pre_init __aeabi_float_init, 00020
+// factor out save/restore (there is a copy in double code)
+
+.macro float_section name
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+.macro float_wrapper_section func
+float_section WRAPPER_FUNC_NAME(\func)
+.endm
+
+// ============== STATE SAVE AND RESTORE ===============
+
+.macro saving_func func
+  // Note we are usually 32-bit aligned already at this point, as most of the
+  // function bodies contain exactly two 16-bit instructions: bmi and bx lr.
+  // We want the PCMP word-aligned.
+.p2align 2
+  // When the engaged flag is set, branch back here to invoke save routine and
+  // hook lr with the restore routine, then fall back through to the entry
+  // point. The engaged flag will be clear when checked a second time.
+1:
+  push {lr}              // 16-bit instruction
+  bl generic_save_state  // 32-bit instruction
+  b 1f                   // 16-bit instruction
+  // This is the actual entry point:
+wrapper_func \func
+  PCMP apsr_nzcv
+  bmi 1b
+1:
+.endm
+
+.macro saving_func_return
+  bx lr
+.endm
+
+float_section __rp2350_dcp_engaged_state_save_restore
+.thumb_func
+generic_save_state:
+  sub sp, #24
+  push {r0, r1}
+  // do save here
+  PXMD r0, r1
+  strd r0, r1, [sp, #8 + 0]
+  PYMD r0, r1
+  strd r0, r1, [sp, #8 + 8]
+  REFD r0, r1
+  strd r0, r1, [sp, #8 + 16]
+  pop {r0, r1}
+  blx lr
+  // <- wrapped function returns here
+  // fall through into restore:
+.thumb_func
+generic_restore_state:
+  // do restore here
+  pop {r12, r14}
+  WXMD r12, r14
+  pop {r12, r14}
+  WYMD r12, r14
+  pop {r12, r14}
+  WEFD r12, r14
+  pop {pc}
+
+// ============== ARITHMETIC FUNCTIONS ===============
+
+float_wrapper_section __aeabi_fadd
+saving_func __aeabi_fadd
+  dcp_fadd_m r0,r0,r1
+  saving_func_return
+
+float_wrapper_section __aeabi_fsub
+saving_func __aeabi_fsub
+  dcp_fsub_m r0,r0,r1
+  saving_func_return
+
+float_wrapper_section __aeabi_frsub
+saving_func __aeabi_frsub
+  dcp_fsub_m r0,r1,r0
+  saving_func_return
+
+float_wrapper_section __aeabi_fmul
+saving_func __aeabi_fmul
+  dcp_fmul_m r0,r0,r1,r0,r1
+  saving_func_return
+
+float_section fdiv_fast
+saving_func fdiv_fast
+  dcp_fdiv_fast_m r0,r0,r1,r0,r1,r2
+  saving_func_return
+
+float_wrapper_section __aeabi_fdiv
+saving_func __aeabi_fdiv
+@ with correct rounding
+  dcp_fdiv_m r0,r0,r1,r0,r1,r2,r3
+  saving_func_return
+
+float_section sqrtf_fast
+saving_func sqrtf_fast
+  dcp_fsqrt_fast_m r0,r0,r0,r1,r2,r3
+  saving_func_return
+
+float_wrapper_section sqrtf
+saving_func sqrtf
+@ with correct rounding
+  dcp_fsqrt_m r0,r0,r0,r1,r2,r3
+  saving_func_return
+
+// todo not a real thing
+float_wrapper_section __aeabi_fclassify
+saving_func __aeabi_fclassify
+  dcp_fclassify_m apsr_nzcv,r0
+  saving_func_return
+
+// ============== CONVERSION FUNCTIONS ===============
+
+float_wrapper_section __aeabi_f2d
+saving_func __aeabi_f2d
+  dcp_float2double_m r0,r1,r0
+  saving_func_return
+
+float_wrapper_section __aeabi_i2f
+saving_func __aeabi_i2f
+@ with rounding
+  dcp_int2float_m r0,r0
+  saving_func_return
+
+float_wrapper_section __aeabi_ui2f
+saving_func __aeabi_ui2f
+@ with rounding
+  dcp_uint2float_m r0,r0
+  saving_func_return
+
+float_wrapper_section __aeabi_f2iz
+saving_func __aeabi_f2iz
+@ with truncation towards 0
+  dcp_float2int_m r0,r0
+  saving_func_return
+
+float_wrapper_section __aeabi_f2uiz
+saving_func __aeabi_f2uiz
+@ with truncation towards 0
+  dcp_float2uint_m r0,r0
+  saving_func_return
+
+// todo not a real thing
+float_wrapper_section __aeabi_f2i_r
+saving_func __aeabi_f2i_r
+@ with rounding
+  dcp_float2int_r_m r0,r0
+  saving_func_return
+
+// todo not a real thing
+float_wrapper_section __aeabi_f2ui_r
+saving_func __aeabi_f2ui_r
+@ with rounding
+  dcp_float2uint_r_m r0,r0
+  saving_func_return
+
+// ============== COMPARISON FUNCTIONS ===============
+
+float_wrapper_section __aeabi_fcmpun
+saving_func __aeabi_fcmpun
+  dcp_fcmp_m r0,r0,r1
+  // extract unordered bit
+  ubfx r0, r0, #28, #1
+  saving_func_return
+
+float_wrapper_section __aeabi_fcmp
+saving_func __aeabi_cfrcmple
+  dcp_fcmp_m apsr_nzcv,r1,r0 // with arguments reversed
+  bvs cmp_nan
+  saving_func_return
+
+// these next two can be the same function in the absence of exceptions
+saving_func __aeabi_cfcmple
+  dcp_fcmp_m apsr_nzcv,r0,r1
+  bvs cmp_nan
+  saving_func_return
+
+// It is not clear from the ABI documentation whether cfcmpeq must set the C flag
+// in the same way as cfcmple. If not, we could save the "bvs" below; but we
+// err on the side of caution.
+saving_func __aeabi_cfcmpeq
+  dcp_fcmp_m apsr_nzcv,r0,r1
+  bvs cmp_nan
+  saving_func_return
+
+// If the result of a flag-setting comparison is "unordered" then we need to set C and clear Z.
+// We could conceivably just do lsrs r12,r14,#1, or even cmp r14,r14,lsr#1 as (a) r14 here is a
+// return address and r14b0=1 for Thumb mode; (b) we are unlikely to be returning to address 0.
+cmp_nan:
+  movs r12, #3 // r12 does not need to be preserved by the flag-setting comparisons
+  lsrs r12, #1 // set C, clear Z
+  saving_func_return
+
+float_wrapper_section __aeabi_fcmpeq
+saving_func __aeabi_fcmpeq
+  dcp_fcmp_m r0,r0,r1
+  // extract Z
+  ubfx r0, r0, #30, #1
+  saving_func_return
+
+float_wrapper_section __aeabi_fcmplt
+saving_func __aeabi_fcmplt
+  dcp_fcmp_m apsr_nzcv,r1,r0
+  ite hi
+  movhi r0,#1
+  movls r0,#0
+  saving_func_return
+
+float_wrapper_section __aeabi_fcmple
+saving_func __aeabi_fcmple
+  dcp_fcmp_m apsr_nzcv,r1,r0
+  ite hs
+  movhs r0,#1
+  movlo r0,#0
+  saving_func_return
+
+float_wrapper_section __aeabi_fcmpge
+saving_func __aeabi_fcmpge
+  dcp_fcmp_m apsr_nzcv,r0,r1
+  ite hs
+  movhs r0,#1
+  movlo r0,#0
+  saving_func_return
+
+float_wrapper_section __aeabi_fcmpgt
+saving_func __aeabi_fcmpgt
+  dcp_fcmp_m apsr_nzcv,r0,r1
+  ite hi
+  movhi r0,#1
+  movls r0,#0
+  saving_func_return
+
+#endif
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_aeabi_rp2040.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_aeabi_rp2040.S
@ -0,0 +1,776 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+#include "pico/runtime_init.h"
+#include "pico/bootrom/sf_table.h"
+#include "hardware/divider_helper.S"
+
+PICO_RUNTIME_INIT_FUNC_RUNTIME(__aeabi_float_init, PICO_RUNTIME_INIT_AEABI_FLOAT)
+
+pico_default_asm_setup
+
+.macro float_section name
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+.macro float_wrapper_section func
+float_section WRAPPER_FUNC_NAME(\func)
+.endm
+
+.macro _float_wrapper_func x
+    wrapper_func \x
+.endm
+
+.macro wrapper_func_f1 x
+   _float_wrapper_func \x
+#if PICO_FLOAT_PROPAGATE_NANS
+    mov ip, lr
+    bl __check_nan_f1
+    mov lr, ip
+#endif
+.endm
+
+.macro wrapper_func_f2 x
+   _float_wrapper_func \x
+#if PICO_FLOAT_PROPAGATE_NANS
+    mov ip, lr
+    bl __check_nan_f2
+    mov lr, ip
+#endif
+.endm
+
+.section .text
+
+#if PICO_FLOAT_PROPAGATE_NANS
+.thumb_func
+__check_nan_f1:
+   movs r3, #1
+   lsls r3, #24
+   lsls r2, r0, #1
+   adds r2, r3
+   bhi 1f
+   bx lr
+1:
+   bx ip
+
+.thumb_func
+__check_nan_f2:
+   movs r3, #1
+   lsls r3, #24
+   lsls r2, r0, #1
+   adds r2, r3
+   bhi 1f
+   lsls r2, r1, #1
+   adds r2, r3
+   bhi 2f
+   bx lr
+2:
+   mov r0, r1
+1:
+   bx ip
+#endif
+
+.macro table_tail_call SF_TABLE_OFFSET
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+#ifndef NDEBUG
+    movs r3, #0
+    mov ip, r3
+#endif
+#endif
+    ldr r3, =sf_table
+    ldr r3, [r3, #\SF_TABLE_OFFSET]
+    bx r3
+.endm
+
+.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
+    ldr r3, =sf_table
+    ldr r3, [r3, #\SF_TABLE_OFFSET]
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+    mov ip, pc
+#endif
+    bx r3
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+.byte \SF_TABLE_OFFSET, 0xdf
+.word \shim
+#endif
+.endm
+
+
+// note generally each function is in a separate section unless there is fall thru or branching between them
+// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
+
+// note functions are word aligned except where they are an odd number of linear instructions
+
+// float FUNC_NAME(__aeabi_fadd)(float, float)         single-precision addition
+float_wrapper_section __aeabi_farithmetic
+// float FUNC_NAME(__aeabi_frsub)(float x, float y)    single-precision reverse subtraction, y - x
+
+// frsub first because it is the only one that needs alignment
+.align 2
+wrapper_func __aeabi_frsub
+    eors r0, r1
+    eors r1, r0
+    eors r0, r1
+    // fall thru
+
+// float FUNC_NAME(__aeabi_fsub)(float x, float y)     single-precision subtraction, x - y
+wrapper_func_f2 __aeabi_fsub
+#if PICO_FLOAT_PROPAGATE_NANS
+    // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
+    mov r2, r0
+    eors r2, r1
+    bmi 1f // different signs
+    push {r0, r1, lr}
+    bl 1f
+    b fdiv_fsub_nan_helper
+1:
+#endif
+    table_tail_call SF_TABLE_FSUB
+
+wrapper_func_f2 __aeabi_fadd
+    table_tail_call SF_TABLE_FADD
+
+// float FUNC_NAME(__aeabi_fdiv)(float n, float d)     single-precision division, n / d
+wrapper_func_f2 __aeabi_fdiv
+#if PICO_FLOAT_PROPAGATE_NANS
+    push {r0, r1, lr}
+    bl 1f
+    b fdiv_fsub_nan_helper
+1:
+#endif
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+    // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
+    ldr r2, =(SIO_BASE)
+    ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
+    lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
+    bcs fdiv_save_state
+#else
+    // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
+    push {r4, lr}
+    mrs r4, PRIMASK
+    cpsid i
+    bl fdiv_shim_call
+    msr PRIMASK, r4
+    pop {r4, pc}
+#endif
+fdiv_shim_call:
+    table_tail_call SF_TABLE_FDIV
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+fdiv_save_state:
+    save_div_state_and_lr
+    bl fdiv_shim_call
+    ldr r2, =(SIO_BASE)
+    restore_div_state_and_return
+#endif
+
+fdiv_fsub_nan_helper:
+#if PICO_FLOAT_PROPAGATE_NANS
+    pop {r1, r2}
+
+    // check for infinite op infinite (or rather check for infinite result with both
+    // operands being infinite)
+    lsls r3, r0, #1
+    asrs r3, r3, #24
+    adds r3, #1
+    beq 2f
+    pop {pc}
+2:
+    lsls r1, #1
+    asrs r1, r1, #24
+    lsls r2, #1
+    asrs r2, r2, #24
+    ands r1, r2
+    adds r1, #1
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+3:
+    pop {pc}
+#endif
+
+// float FUNC_NAME(__aeabi_fmul)(float, float)         single-precision multiplication
+wrapper_func_f2 __aeabi_fmul
+#if PICO_FLOAT_PROPAGATE_NANS
+    push {r0, r1, lr}
+    bl 1f
+    pop {r1, r2}
+
+    // check for multiplication of infinite by zero (or rather check for infinite result with either
+    // operand 0)
+    lsls r3, r0, #1
+    asrs r3, r3, #24
+    adds r3, #1
+    beq 2f
+    pop {pc}
+2:
+    ands r1, r2
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+3:
+    pop {pc}
+1:
+#endif
+    table_tail_call SF_TABLE_FMUL
+
+// void FUNC_NAME(__aeabi_cfrcmple)(float, float)         reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
+float_wrapper_section __aeabi_cfcmple
+.align 2
+wrapper_func __aeabi_cfrcmple
+    push {r0-r2, lr}
+    eors r0, r1
+    eors r1, r0
+    eors r0, r1
+    b __aeabi_cfcmple_guts
+
+// NOTE these share an implementation as we have no excepting NaNs.
+// void FUNC_NAME(__aeabi_cfcmple)(float, float)         3-way (<, =, ?>) compare [1], result in PSR ZC flags
+// void FUNC_NAME(__aeabi_cfcmpeq)(float, float)         non-excepting equality comparison [1], result in PSR ZC flags
+.align 2
+wrapper_func __aeabi_cfcmple
+wrapper_func __aeabi_cfcmpeq
+    push {r0-r2, lr}
+
+__aeabi_cfcmple_guts:
+    lsls r2,r0,#1
+    lsrs r2,#24
+    beq 1f
+    cmp r2,#0xff
+    bne 2f
+    lsls r2, r0, #9
+    bhi 3f
+1:
+    lsrs r0,#23     @ clear mantissa if denormal or infinite
+    lsls r0,#23
+2:
+    lsls r2,r1,#1
+    lsrs r2,#24
+    beq 1f
+    cmp r2,#0xff
+    bne 2f
+    lsls r2, r1, #9
+    bhi 3f
+1:
+    lsrs r1,#23     @ clear mantissa if denormal or infinite
+    lsls r1,#23
+2:
+    movs r2,#1      @ initialise result
+    eors r1,r0
+    bmi 2f          @ opposite signs? then can proceed on basis of sign of x
+    eors r1,r0      @ restore y
+    bpl 1f
+    cmp r1,r0
+    pop {r0-r2, pc}
+1:
+    cmp r0,r1
+    pop {r0-r2, pc}
+2:
+    orrs r1, r0     @ handle 0/-0
+    adds r1, r1     @ note this always sets C
+    beq 3f
+    mvns r0, r0     @ carry inverse of r0 sign
+    adds r0, r0
+3:
+    pop {r0-r2, pc}
+
+
+// int FUNC_NAME(__aeabi_fcmpeq)(float, float)         result (1, 0) denotes (=, ?<>) [2], use for C == and !=
+float_wrapper_section __aeabi_fcmpeq
+.align 2
+wrapper_func __aeabi_fcmpeq
+    push {lr}
+    bl __aeabi_cfcmpeq
+    beq 1f
+    movs r0, #0
+    pop {pc}
+1:
+    movs r0, #1
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmplt)(float, float)         result (1, 0) denotes (<, ?>=) [2], use for C <
+float_wrapper_section __aeabi_fcmplt
+.align 2
+wrapper_func __aeabi_fcmplt
+    push {lr}
+    bl __aeabi_cfcmple
+    sbcs r0, r0
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmple)(float, float)         result (1, 0) denotes (<=, ?>) [2], use for C <=
+float_wrapper_section __aeabi_fcmple
+.align 2
+wrapper_func __aeabi_fcmple
+    push {lr}
+    bl __aeabi_cfcmple
+    bls 1f
+    movs r0, #0
+    pop {pc}
+1:
+    movs r0, #1
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmpge)(float, float)         result (1, 0) denotes (>=, ?<) [2], use for C >=
+float_wrapper_section __aeabi_fcmpge
+.align 2
+wrapper_func __aeabi_fcmpge
+    push {lr}
+    // because of NaNs it is better to reverse the args than the result
+    bl __aeabi_cfrcmple
+    bls 1f
+    movs r0, #0
+    pop {pc}
+1:
+    movs r0, #1
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmpgt)(float, float)         result (1, 0) denotes (>, ?<=) [2], use for C >
+float_wrapper_section __aeabi_fcmpgt
+wrapper_func __aeabi_fcmpgt
+    push {lr}
+    // because of NaNs it is better to reverse the args than the result
+    bl __aeabi_cfrcmple
+    sbcs r0, r0
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmpun)(float, float)         result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
+float_wrapper_section __aeabi_fcmpun
+wrapper_func __aeabi_fcmpun
+   movs r3, #1
+   lsls r3, #24
+   lsls r2, r0, #1
+   adds r2, r3
+   bhi 1f
+   lsls r2, r1, #1
+   adds r2, r3
+   bhi 1f
+   movs r0, #0
+   bx lr
+1:
+   movs r0, #1
+   bx lr
+
+
+// float FUNC_NAME(__aeabi_ui2f)(unsigned)             unsigned to float (single precision) conversion
+float_wrapper_section __aeabi_ui2f
+wrapper_func __aeabi_ui2f
+regular_func uint2float
+        subs r1, r1
+        cmp r0, #0
+        bne __aeabi_i2f_main
+        mov r0, r1
+        bx lr
+
+float_wrapper_section __aeabi_i2f
+// float FUNC_NAME(__aeabi_i2f)(int)                     integer to float (single precision) conversion
+wrapper_func __aeabi_i2f
+regular_func int2float
+        lsrs r1, r0, #31
+        lsls r1, #31
+        bpl 1f
+        negs r0, r0
+1:
+        cmp r0, #0
+        beq 7f
+__aeabi_i2f_main:
+
+        mov ip, lr
+        push {r0, r1}
+        ldr r3, =sf_clz_func
+        ldr r3, [r3]
+        blx r3
+        pop {r1, r2}
+        lsls r1, r0
+        subs r0, #158
+        negs r0, r0
+
+        adds r1,#0x80  @ rounding
+        bcs 5f         @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
+
+        lsls r3,r1,#24 @ check bottom 8 bits of r1
+        beq 6f         @ in rounding-tie case?
+        lsls r1,#1     @ remove leading 1
+3:
+        lsrs r1,#9     @ align mantissa
+        lsls r0,#23    @ align exponent
+        orrs r0,r2     @ assemble exponent and mantissa
+4:
+        orrs r0,r1     @ apply sign
+1:
+        bx ip
+5:
+        adds r0,#1     @ correct exponent offset
+        b 3b
+6:
+        lsrs r1,#9     @ ensure even result
+        lsls r1,#10
+        b 3b
+7:
+        bx lr
+
+
+// int FUNC_NAME(__aeabi_f2iz)(float)                     float (single precision) to integer C-style conversion [3]
+float_wrapper_section __aeabi_f2iz
+wrapper_func __aeabi_f2iz
+regular_func float2int_z
+    lsls r1, r0, #1
+    lsrs r2, r1, #24
+    movs r3, #0x80
+    lsls r3, #24
+    cmp r2, #126
+    ble 1f
+    subs r2, #158
+    bge 2f
+    asrs r1, r0, #31
+    lsls r0, #9
+    lsrs r0, #1
+    orrs r0, r3
+    negs r2, r2
+    lsrs r0, r2
+    lsls r1, #1
+    adds r1, #1
+    muls r0, r1
+    bx lr
+1:
+    movs r0, #0
+    bx lr
+2:
+    lsrs r0, #31
+    adds r0, r3
+    subs r0, #1
+    bx lr
+
+    cmn r0, r0
+    bcc float2int
+    push {lr}
+    lsls r0, #1
+    lsrs r0, #1
+    movs r1, #0
+    bl __aeabi_f2uiz
+    cmp r0, #0
+    bmi 1f
+    negs r0, r0
+    pop {pc}
+1:
+    movs r0, #128
+    lsls r0, #24
+    pop {pc}
+
+float_section float2int
+regular_func float2int
+    shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
+
+float_section float2fix
+regular_func float2fix
+    shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
+
+float_section float2ufix
+regular_func float2ufix
+    table_tail_call SF_TABLE_FLOAT2UFIX
+
+// unsigned FUNC_NAME(__aeabi_f2uiz)(float)             float (single precision) to unsigned C-style conversion [3]
+float_wrapper_section __aeabi_f2uiz
+wrapper_func __aeabi_f2uiz
+regular_func float2uint_z
+    table_tail_call SF_TABLE_FLOAT2UINT
+
+float_section fix2float
+regular_func fix2float
+    table_tail_call SF_TABLE_FIX2FLOAT
+
+float_section ufix2float
+regular_func ufix2float
+    table_tail_call SF_TABLE_UFIX2FLOAT
+
+float_section fix642float
+regular_func fix642float
+    shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim
+
+float_section ufix642float
+regular_func ufix642float
+    shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim
+
+// float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
+float_wrapper_section __aeabi_l2f
+1:
+    ldr r2, =__aeabi_i2f
+    bx r2
+wrapper_func __aeabi_l2f
+regular_func int642float
+    asrs r2, r0, #31
+    cmp r1, r2
+    beq 1b
+    shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim
+
+// float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
+float_wrapper_section __aeabi_ul2f
+1:
+    ldr r2, =__aeabi_ui2f
+    bx r2
+wrapper_func __aeabi_ul2f
+regular_func uint642float
+    cmp r1, #0
+    beq 1b
+    shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim
+
+// long long FUNC_NAME(__aeabi_f2lz)(float)             float (single precision) to long long C-style conversion [3]
+float_wrapper_section __aeabi_f2lz
+wrapper_func __aeabi_f2lz
+regular_func float2int64_z
+    cmn r0, r0
+    bcc float2int64
+    push {lr}
+    lsls r0, #1
+    lsrs r0, #1
+    movs r1, #0
+    bl float2ufix64
+    cmp r1, #0
+    bmi 1f
+    movs r2, #0
+    negs r0, r0
+    sbcs r2, r1
+    mov r1, r2
+    pop {pc}
+1:
+    movs r1, #128
+    lsls r1, #24
+    movs r0, #0
+    pop {pc}
+
+float_section float2int64
+regular_func float2int64
+    shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
+
+float_section float2fix64
+regular_func float2fix64
+    shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
+
+// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float)     float to unsigned long long C-style conversion [3]
+float_wrapper_section __aeabi_f2ulz
+wrapper_func __aeabi_f2ulz
+regular_func float2uint64_z
+    shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
+
+float_section float2ufix64
+regular_func float2ufix64
+    shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
+
+float_wrapper_section __aeabi_f2d
+1:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
+    asrs r1, r0, #3
+    movs r2, #0xf
+    lsls r2, #27
+    orrs r1, r2
+    lsls r0, #25
+    bx lr
+#endif
+wrapper_func __aeabi_f2d
+#if PICO_FLOAT_PROPAGATE_NANS
+    movs r3, #1
+    lsls r3, #24
+    lsls r2, r0, #1
+    adds r2, r3
+    bhi 1b
+#endif
+    shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim
+
+float_wrapper_section sqrtf
+wrapper_func_f1 sqrtf
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+    // check for negative
+    asrs r1, r0, #23
+    bmi 1f
+#endif
+    table_tail_call SF_TABLE_FSQRT
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+1:
+    mvns r0, r1
+    cmp r0, #255
+    bne 2f
+    // -0 or -Denormal return -0 (0x80000000)
+    lsls r0, #31
+    bx lr
+2:
+    // return -Inf (0xff800000)
+    asrs r0, r1, #31
+    lsls r0, #23
+    bx lr
+#endif
+
+float_wrapper_section cosf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func cosf
+    // rom version only works for -128 < angle < 128
+    lsls r1, r0, #1
+    lsrs r1, #24
+    cmp r1, #127 + 7
+    bge 1f
+2:
+    table_tail_call SF_TABLE_FCOS
+1:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r1, #255
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+    bx lr
+3:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b 2b
+
+float_wrapper_section sinf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func sinf
+    // rom version only works for -128 < angle < 128
+    lsls r1, r0, #1
+    lsrs r1, #24
+    cmp r1, #127 + 7
+    bge 1f
+2:
+    table_tail_call SF_TABLE_FSIN
+1:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r1, #255
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+    bx lr
+3:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b 2b
+
+float_wrapper_section sincosf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func sincosf
+    push {r1, r2, lr}
+    // rom version only works for -128 < angle < 128
+    lsls r3, r0, #1
+    lsrs r3, #24
+    cmp r3, #127 + 7
+    bge 3f
+2:
+    ldr r3, =sf_table
+    ldr r3, [r3, #SF_TABLE_FSIN]
+    blx r3
+    pop {r2, r3}
+    str r0, [r2]
+    str r1, [r3]
+    pop {pc}
+#if PICO_FLOAT_PROPAGATE_NANS
+.align 2
+    pop {pc}
+#endif
+3:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r3, #255
+    bne 4f
+    // infinite to nan
+    movs r3, #1
+    lsls r3, #22
+    orrs r0, r3
+    str r0, [r1]
+    str r0, [r2]
+    add sp, #12
+    bx lr
+4:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b 2b
+
+float_wrapper_section tanf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func tanf
+    // rom version only works for -128 < angle < 128
+    lsls r1, r0, #1
+    lsrs r1, #24
+    cmp r1, #127 + 7
+    bge ftan_out_of_range
+ftan_in_range:
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+    // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
+    ldr r2, =(SIO_BASE)
+    ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
+    lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
+    bcs ftan_save_state
+#else
+    // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
+    push {r4, lr}
+    mrs r4, PRIMASK
+    cpsid i
+    bl ftan_shim_call
+    msr PRIMASK, r4
+    pop {r4, pc}
+#endif
+ftan_shim_call:
+    table_tail_call SF_TABLE_FTAN
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+ftan_save_state:
+    save_div_state_and_lr
+    bl ftan_shim_call
+    ldr r2, =(SIO_BASE)
+    restore_div_state_and_return
+#endif
+ftan_out_of_range:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r1, #255
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+    bx lr
+3:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b ftan_in_range
+
+float_wrapper_section atan2f
+wrapper_func_f2 atan2f
+    shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim
+
+float_wrapper_section expf
+wrapper_func_f1 expf
+    table_tail_call SF_TABLE_FEXP
+
+float_wrapper_section logf
+wrapper_func_f1 logf
+    table_tail_call SF_TABLE_FLN
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_conv_m33.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_conv_m33.S
@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#if !PICO_RP2040
+#include "pico/asm_helper.S"
+
+pico_default_asm_setup
+
+.macro float_section name
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+.macro float_wrapper_section func
+float_section WRAPPER_FUNC_NAME(\func)
+.endm
+
+float_wrapper_section conv_tof
+
+@ convert int64 to float, rounding
+wrapper_func __aeabi_l2f
+regular_func int642float
+ movs r2,#0       @ fall through
+@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
+regular_func fix642float
+ cmp r1,#0
+ bge 10f @ positive? use unsigned code
+ rsbs r0,#0
+ sbc r1,r1,r1,lsl#1 @ make positive
+ cbz r1,7f @ high word is zero?
+ clz r3,r1
+ subs r3,#8
+ bmi 2f
+ lsls r1,r3
+ lsls r12,r0,r3 @ bits that will be lost
+ rsb r3,#32
+ lsr r0,r3
+ orr r0,r0,r1
+ sub r2,r2,r3
+ rsb r2,#149
+ adds r12,r12,r12 @ rounding bit into carry
+ adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
+ orr r0,r0,#0x80000000
+ beq 4f @ potential rounding tie?
+ cmp r2,#0xfe
+ bhs 3f @ over/underflow?
+ bx r14
+2:
+ add r3,#33
+ lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12
+ orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C
+ rsb r3,#33
+ lsr r0,r1,r3
+@ push {r14}
+@ bl dumpreg
+@ pop {r14}
+ sub r2,r3,r2
+ add r2,#22+127+32
+ adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
+ orr r0,r0,#0x80000000
+ beq 4f @ potential rounding tie?
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+@ over/underflow?
+3:
+ mov r0,#0x80000000 @ underflow
+ it ge
+ movtge r0,#0xff80 @ overflow
+1:
+ bx r14
+7:
+ mov r1,r2
+ b fix2float_neg
+4:
+ it cs @ rounding tie?
+ biccs r0,r0,#1 @ force to even if we rounded up
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+ b 3b
+
+@ convert signed 32-bit fix to float, rounding; number of r0 bits after point in r1
+.thumb_func
+regular_func fix2float
+ cmp r0,#0
+ bge ufix2float @ positive? can use unsigned code
+ rsbs r0,#0 @ make positive
+fix2float_neg:
+ clz r3,r0
+ subs r3,#8
+ bmi 2f
+ lsls r0,r3
+ add r2,r1,r3
+ rsb r2,#149
+ add r0,r0,r2,lsl#23 @ insert exponent
+ orr r0,#0x80000000
+ cmp r2,#0xfe
+ it lo @ over/underflow?
+ bxlo r14
+ b 3f
+2:
+ add r3,#33
+ lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12
+ rsb r3,#33
+ lsr r0,r3
+@ push {r14}
+@ bl dumpreg
+@ pop {r14}
+ sub r2,r3,r1
+ add r2,#22+127
+ adc r0,r0,r2,lsl#23 @ insert exponent
+ orr r0,#0x80000000
+ beq 4f @ potential rounding tie?
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+@ over/underflow?
+3:
+ mov r0,#0x80000000 @ underflow
+ it ge
+ orrge r0,#0x7f800000 @ overflow
+1:
+ bx r14
+4:
+ it cs @ rounding tie?
+ biccs r0,r0,#1 @ force to even if we rounded up
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+ b 3b
+
+@ convert unsigned 32-bit fix to float, rounding; number of r0 bits after point in r1
+regular_func ufix2float
+ cbz r0,1f @ zero? return it
+ clz r3,r0
+ subs r3,#8
+ bmi 2f
+ lsls r0,r3
+ add r2,r1,r3
+ rsb r2,#149
+ add r0,r0,r2,lsl#23 @ insert exponent
+@ push {r14}
+@ bl dumpreg
+@ pop {r14}
+ cmp r2,#0xfe
+ it lo @ over/underflow?
+ bxlo r14
+ b 3f
+2:
+ add r3,#33
+ lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12
+ rsb r3,#33
+ lsr r0,r3
+@ push {r14}
+@ bl dumpreg
+@ pop {r14}
+ sub r2,r3,r1
+ add r2,#22+127
+ adc r0,r0,r2,lsl#23 @ insert exponent
+ beq 4f @ potential rounding tie?
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+@ over/underflow?
+3:
+ ite ge
+ movge r0,#0x7f800000 @ overflow
+ movlt r0,#0x00000000 @ underflow
+1:
+ bx r14
+4:
+ it cs @ rounding tie?
+ biccs r0,r0,#1 @ force to even if we rounded up
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+ b 3b
+
+@ convert uint64 to float, rounding
+wrapper_func __aeabi_ul2f
+regular_func uint642float
+ movs r2,#0       @ fall through
+@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
+regular_func ufix642float
+10:
+ cbz r1,7f @ high word is zero?
+ clz r3,r1
+ subs r3,#8
+ bmi 2f
+ lsls r1,r3
+ lsls r12,r0,r3 @ bits that will be lost
+ rsb r3,#32
+ lsr r0,r3
+ orr r0,r0,r1
+ sub r2,r2,r3
+ rsb r2,#149
+ adds r12,r12,r12 @ rounding bit into carry
+ adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
+ beq 4f @ potential rounding tie?
+ cmp r2,#0xfe
+ bhs 3f @ over/underflow?
+ bx r14
+2:
+ add r3,#33
+ lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12
+ orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C
+ rsb r3,#33
+ lsr r0,r1,r3
+@ push {r14}
+@ bl dumpreg
+@ pop {r14}
+ sub r2,r3,r2
+ add r2,#22+127+32
+ adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
+ beq 4f @ potential rounding tie?
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+@ over/underflow?
+3:
+ ite ge
+ movge r0,#0x7f800000 @ overflow
+ movlt r0,#0x00000000 @ underflow
+1:
+ bx r14
+7:
+ mov r1,r2
+ b ufix2float
+4:
+ it cs @ rounding tie?
+ biccs r0,r0,#1 @ force to even if we rounded up
+ cmp r2,#0xfe
+ it lo
+ bxlo r14
+ b 3b
+
+float_wrapper_section conv_ftoi64
+
+@ convert float to signed int64, rounding towards 0, clamping
+wrapper_func __aeabi_f2lz
+regular_func float2int64_z
+ movs r1,#0      @ fall through
+@ convert float in r0 to signed fixed point in r0:r1, clamping
+regular_func float2fix64_z
+ subs r1,#0x95 @ remove exponent bias, compensate for mantissa length
+ asrs r2,r0,#23 @ sign and exponent
+ sub r3,r2,#1
+ sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent
+ uxtb r3,r3
+ cmp r3,#0xfe
+ bhs 1f @ 0 or Inf/NaN?
+ adds r1,r3 @ offset exponent by fix precision; r1 is now required left shift
+ bmi 4f @ actually a right shift?
+ subs r3,r1,#32 @ result fits in high 32 bits only?
+ bge 8f
+ subs r3,r1,#8 @ result fits in low 32 bits only?
+ ble 7f
+ lsls r0,#8
+ rsbs r1,r3,#32
+ lsrs r1,r0,r1
+ lsls r0,r3
+ cmp r2,#0
+ it ge
+ bxge r14
+ rsbs r0,#0 @ negate if necessary
+ sbcs r1,r1,r1,lsl#1
+ bx r14
+7:
+ lsls r0,r0,r1
+ movs r1,r2,asr#31 @ sign extend
+ eors r0,r0,r1 @ negate if necessary
+ subs r0,r0,r1
+ bx r14
+8:
+ cmp r3,#8 @ overflow?
+ bge 5f
+ lsls r0,r0,r3
+ eor r1,r0,r2,asr#31 @ negate if necessary
+ add r1,r1,r2,lsr#31
+ movs r0,#0
+ bx r14
+1:
+ bhi 3f @ 0?
+ lsls r1,r0,#9 @ mantissa field
+ it ne @ NaN?
+ movne r2,#0 @ treat NaNs as +∞
+5:
+ mvn r1,#0x80000000 @ = 0x7fffffff
+ add r1,r1,r2,lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
+ mvn r0,r2,asr#31
+ bx r14
+3:
+ movs r0,#0
+ movs r1,#0
+ bx r14
+4:
+ rsbs r1,#0
+ usat r1,#5,r1
+ lsrs r0,r0,r1
+ eors r0,r0,r2,asr#31 @ negate if necessary
+ adds r0,r0,r2,lsr#31
+ movs r1,r0,asr#31 @ sign extend
+ bx r14
+
+float_wrapper_section conv_ftoui64
+
+@ convert float to unsigned int64, rounding towards -Inf, clamping
+wrapper_func __aeabi_f2ulz
+regular_func float2uint64
+regular_func float2uint64_z
+ movs r1,#0      @ fall through
+@ convert float in r0 to unsigned fixed point in r0:r1, clamping
+regular_func float2ufix64
+//regular_func float2ufix64_z
+ subs r1,#0x96 @ remove exponent bias, compensate for mantissa length
+ asrs r2,r0,#23 @ sign and exponent
+ sub r3,r2,#1
+ cmp r3,#0xfe
+ bhs 1f @ -ve, 0 or Inf/NaN?
+ sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent
+ adds r1,r2 @ offset exponent by fix precision; r1 is now required left shift
+ bmi 4f @ actually a right shift?
+ subs r2,r1,#7
+ ble 7f @ result (easily) fits in lo 32 bits?
+ subs r3,r1,#32
+ bge 8f @ results might fit in hi 32 bits?
+ lsls r0,r0,#7
+ rsbs r3,r2,#32
+ lsrs r1,r0,r3
+ lsls r0,r0,r2
+ bx r14
+7:
+ lsls r0,r1
+ movs r1,#0
+ bx r14
+8:
+ cmp r1,#32+9 @ overflow?
+ bge 5f
+ lsls r1,r0,r3
+ movs r0,#0
+ bx r14
+5:
+ mvn r0,#0 @ = 0xffffffff
+ mvn r1,#0 @ = 0xffffffff
+ bx r14
+4:
+ rsbs r1,#0
+ usat r1,#5,r1 @ if shift is long return 0
+ lsrs r0,r0,r1
+ movs r1,#0
+ bx r14
+1:
+ cmp r0,#0xff800000
+ bhi 5b @ -NaN, return 0xffffffff
+ cmp r0,#0x00800000
+ bgt 5b @ +Inf or +NaN, return 0xfffffff
+2:
+ movs r0,#0 @ return 0
+ movs r1,#0
+ bx r14
+
+#endif
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_init_rom_rp2040.c
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_init_rom_rp2040.c
@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <string.h>
+#include "pico/bootrom.h"
+#include "pico/bootrom/sf_table.h"
+
+// NOTE THIS FUNCTION TABLE IS NOT PUBLIC OR NECESSARILY COMPLETE...
+// IT IS ***NOT*** SAFE TO CALL THESE FUNCTION POINTERS FROM ARBITRARY CODE
+uint32_t sf_table[SF_TABLE_V2_SIZE / 2];
+void __attribute__((weak)) *sf_clz_func;
+
+#if !(PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED)
+static __attribute__((noreturn)) void missing_float_func_shim(void) {
+    panic("");
+}
+#endif
+
+void __aeabi_float_init(void) {
+    int rom_version = rp2040_rom_version();
+    void *rom_table = rom_data_lookup(rom_table_code('S', 'F'));
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+    if (rom_version == 1) {
+        memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
+        extern void float_table_shim_on_use_helper(void);
+        // todo replace NDEBUG with a more exclusive assertion guard
+#ifndef NDEBUG
+        if (*(uint16_t *)0x29ee != 0x0fc4 || // this is packx
+            *(uint16_t *)0x29c0 != 0x0dc2 || // this is upackx
+            *(uint16_t *)0x2b96 != 0xb5c0 || // this is cordic_vec
+            *(uint16_t *)0x2b18 != 0x2500 || // this is packretns
+            *(uint16_t *)0x2acc != 0xb510 || // this is float2fix
+            *(uint32_t *)0x2cfc != 0x6487ed51 // pi_q29
+        ) {
+            panic("");
+        }
+#endif
+
+        // this is a little tricky.. we only want to pull in a shim if the corresponding function
+        // is called. to that end we include a SVC instruction with the table offset as the call number
+        // followed by the shim function pointer inside the actual wrapper function. that way if the wrapper
+        // function is garbage collected, so is the shim function.
+        //
+        // float_table_shim_on_use_helper expects this SVC instruction in the calling code soon after the address
+        // pointed to by IP and patches the float_table entry with the real shim the first time the function is called.
+
+        for(uint i=SF_TABLE_V1_SIZE/4; i<SF_TABLE_V2_SIZE/4; i++) {
+            sf_table[i] = (uintptr_t)float_table_shim_on_use_helper;
+        }
+        // we shim these for -0 and -denormal handling
+        sf_table[SF_TABLE_FLOAT2INT/4] = sf_table[SF_TABLE_FLOAT2FIX/4] = (uintptr_t)float_table_shim_on_use_helper;
+    }
+#else
+    if (rom_version == 1) {
+        memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
+        // opting for soft failure for now - you'll get a panic at runtime if you call any of the missing methods
+        for(uint i=0;i<SF_TABLE_V2_SIZE/4;i++) {
+            if (!sf_table[i]) sf_table[i] = (uintptr_t)missing_float_func_shim;
+        }
+    }
+#endif
+    if (rom_version >= 2) {
+        assert(*((uint8_t *)rom_table-2) * 4 >= SF_TABLE_V2_SIZE);
+        memcpy(&sf_table, rom_table, SF_TABLE_V2_SIZE);
+    }
+    sf_clz_func = rom_func_lookup(ROM_FUNC_CLZ32);
+}
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_math.c
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_math.c
@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/float.h"
+
+// opened a separate issue https://github.com/raspberrypi/pico-sdk/issues/166 to deal with these warnings if at all
+GCC_Pragma("GCC diagnostic push")
+GCC_Pragma("GCC diagnostic ignored \"-Wconversion\"")
+GCC_Pragma("GCC diagnostic ignored \"-Wsign-conversion\"")
+
+typedef uint32_t ui32;
+typedef int32_t i32;
+
+#define FPINF ( HUGE_VALF)
+#define FMINF (-HUGE_VALF)
+#define NANF ((float)NAN)
+#define PZERO (+0.0)
+#define MZERO (-0.0)
+
+#define PI       3.14159265358979323846
+#define LOG2     0.69314718055994530941
+// Unfortunately in double precision ln(10) is very close to half-way between to representable numbers
+#define LOG10    2.30258509299404568401
+#define LOG2E    1.44269504088896340737
+#define LOG10E   0.43429448190325182765
+#define ONETHIRD 0.33333333333333333333
+
+#define PIf       3.14159265358979323846f
+#define LOG2f     0.69314718055994530941f
+#define LOG2Ef    1.44269504088896340737f
+#define LOG10Ef   0.43429448190325182765f
+#define ONETHIRDf 0.33333333333333333333f
+
+#define FUNPACK(x,e,m) e=((x)>>23)&0xff,m=((x)&0x007fffff)|0x00800000
+#define FUNPACKS(x,s,e,m) s=((x)>>31),FUNPACK((x),(e),(m))
+
+typedef union {
+    float f;
+    ui32 ix;
+} float_ui32;
+
+static inline float ui322float(ui32 ix) {
+    float_ui32 tmp;
+    tmp.ix = ix;
+    return tmp.f;
+}
+
+static inline ui32 float2ui32(float f) {
+    float_ui32 tmp;
+    tmp.f = f;
+    return tmp.ix;
+}
+
+#if PICO_FLOAT_PROPAGATE_NANS
+static inline bool fisnan(float x) {
+    ui32 ix=float2ui32(x);
+    return ix * 2 > 0xff000000u;
+}
+
+#define check_nan_f1(x) if (fisnan((x))) return (x)
+#define check_nan_f2(x,y) if (fisnan((x))) return (x); else if (fisnan((y))) return (y);
+#else
+#define check_nan_f1(x) ((void)0)
+#define check_nan_f2(x,y) ((void)0)
+#endif
+
+static inline int fgetsignexp(float x) {
+    ui32 ix=float2ui32(x);
+    return (ix>>23)&0x1ff;
+}
+
+static inline int fgetexp(float x) {
+    ui32 ix=float2ui32(x);
+    return (ix>>23)&0xff;
+}
+
+static inline float fldexp(float x,int de) {
+    ui32 ix=float2ui32(x),iy;
+    int e;
+    e=fgetexp(x);
+    if(e==0||e==0xff) return x;
+    e+=de;
+    if(e<=0) iy=ix&0x80000000; // signed zero for underflow
+    else if(e>=0xff) iy=(ix&0x80000000)|0x7f800000ULL; // signed infinity on overflow
+    else iy=ix+((ui32)de<<23);
+    return ui322float(iy);
+}
+
+float WRAPPER_FUNC(ldexpf)(float x, int de) {
+    check_nan_f1(x);
+    return fldexp(x, de);
+}
+
+static inline float fcopysign(float x,float y) {
+    ui32 ix=float2ui32(x),iy=float2ui32(y);
+    ix=((ix&0x7fffffff)|(iy&0x80000000));
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(copysignf)(float x, float y) {
+    check_nan_f2(x,y);
+    return fcopysign(x, y);
+}
+
+static inline int fiszero(float x)  { return fgetexp    (x)==0; }
+//static inline int fispzero(float x) { return fgetsignexp(x)==0; }
+//static inline int fismzero(float x) { return fgetsignexp(x)==0x100; }
+static inline int fisinf(float x)   { return fgetexp    (x)==0xff; }
+static inline int fispinf(float x)  { return fgetsignexp(x)==0xff; }
+static inline int fisminf(float x)  { return fgetsignexp(x)==0x1ff; }
+
+static inline int fisint(float x) {
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    if(e==0) return 1;       // 0 is an integer
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) return 0;        // |x|<1
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return 1;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bits of significance <1
+    if(ix&m) return 0;       // not an integer
+    return 1;
+}
+
+static inline int fisoddint(float x) {
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) return 0;        // |x|<1; 0 is not odd
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<0) return 0;        // |x| large, so must be even
+    m=(1<<e)-1;              // mask for bits of significance <1 (if any)
+    if(ix&m) return 0;       // not an integer
+    if(e==23) return 1;      // value is exactly 1
+    return (ix>>e)&1;
+}
+
+static inline int fisstrictneg(float x) {
+    ui32 ix=float2ui32(x);
+    if(fiszero(x)) return 0;
+    return ix>>31;
+}
+
+static inline int fisneg(float x) {
+    ui32 ix=float2ui32(x);
+    return ix>>31;
+}
+
+static inline float fneg(float x) {
+    ui32 ix=float2ui32(x);
+    ix^=0x80000000;
+    return ui322float(ix);
+}
+
+static inline int fispo2(float x) {
+    ui32 ix=float2ui32(x);
+    if(fiszero(x)) return 0;
+    if(fisinf(x)) return 0;
+    ix&=0x007fffff;
+    return ix==0;
+}
+
+static inline float fnan_or(float x) {
+#if PICO_FLOAT_PROPAGATE_NANS
+    return NANF;
+#else
+    return x;
+#endif
+}
+
+float WRAPPER_FUNC(truncf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) {                // |x|<1
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bits of significance <1
+    ix&=~m;
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(roundf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    e-=0x7f;                 // remove exponent bias
+    if(e<-1) {               // |x|<0.5
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    if(e==-1) {              // 0.5<=|x|<1
+        ix&=0x80000000;
+        ix|=0x3f800000;        // ±1
+        return ui322float(ix);
+    }
+    e=23-e;                  // bit position in mantissa with significance 1, <=23
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=1<<(e-1);              // mask for bit of significance 0.5
+    ix+=m;
+    m=m+m-1;                 // mask for bits of significance <1
+    ix&=~m;
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(floorf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    if(e==0) {       // x==0
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) {                // |x|<1, not zero
+        if(fisneg(x)) return -1;
+        return PZERO;
+    }
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bit of significance <1
+    if(fisneg(x)) ix+=m;     // add 1-ε to magnitude if negative
+    ix&=~m;                  // truncate
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(ceilf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    if(e==0) {       // x==0
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) {                // |x|<1, not zero
+        if(fisneg(x)) return MZERO;
+        return 1;
+    }
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bit of significance <1
+    if(!fisneg(x)) ix+=m;    // add 1-ε to magnitude if positive
+    ix&=~m;                  // truncate
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(asinf)(float x) {
+    check_nan_f1(x);
+    float u;
+    u=(1.0f-x)*(1.0f+x);
+    if(fisstrictneg(u)) return fnan_or(FPINF);
+    return atan2f(x,sqrtf(u));
+}
+
+float WRAPPER_FUNC(acosf)(float x) {
+    check_nan_f1(x);
+    float u;
+    u=(1.0f-x)*(1.0f+x);
+    if(fisstrictneg(u)) return fnan_or(FPINF);
+    return atan2f(sqrtf(u),x);
+}
+
+float WRAPPER_FUNC(atanf)(float x) {
+    check_nan_f1(x);
+    if(fispinf(x)) return (float)( PIf/2);
+    if(fisminf(x)) return (float)(-PIf/2);
+    return atan2f(x,1.0f);
+}
+
+float WRAPPER_FUNC(sinhf)(float x) {
+    check_nan_f1(x);
+    return fldexp((expf(x)-expf(fneg(x))),-1);
+}
+
+float WRAPPER_FUNC(coshf)(float x) {
+    check_nan_f1(x);
+    return fldexp((expf(x)+expf(fneg(x))),-1);
+}
+
+float WRAPPER_FUNC(tanhf)(float x) {
+    check_nan_f1(x);
+    float u;
+    int e;
+    e=fgetexp(x);
+    if(e>=4+0x7f) {             // |x|>=16?
+        if(!fisneg(x)) return  1;  // 1 << exp 2x; avoid generating infinities later
+        else           return -1;  // 1 >> exp 2x
+    }
+    u=expf(fldexp(x,1));
+    return (u-1.0f)/(u+1.0f);
+}
+
+float WRAPPER_FUNC(asinhf)(float x) {
+    check_nan_f1(x);
+    int e;
+    e=fgetexp(x);
+    if(e>=16+0x7f) {                                   // |x|>=2^16?
+        if(!fisneg(x)) return      logf(     x )+LOG2f;  // 1/x^2 << 1
+        else           return fneg(logf(fneg(x))+LOG2f); // 1/x^2 << 1
+    }
+    if(x>0) return      (float)log(sqrt((double)x*(double)x+1.0)+(double)x);
+    else    return fneg((float)log(sqrt((double)x*(double)x+1.0)-(double)x));
+}
+
+float WRAPPER_FUNC(acoshf)(float x) {
+    check_nan_f1(x);
+    int e;
+    if(fisneg(x)) x=fneg(x);
+    e=fgetexp(x);
+    if(e>=16+0x7f) return logf(x)+LOG2f;           // |x|>=2^16?
+    return (float)log(sqrt(((double)x+1.0)*((double)x-1.0))+(double)x);
+}
+
+float WRAPPER_FUNC(atanhf)(float x) {
+    check_nan_f1(x);
+    return fldexp(logf((1.0f+x)/(1.0f-x)),-1);
+}
+
+float WRAPPER_FUNC(exp2f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG2); }
+float WRAPPER_FUNC(log2f)(float x) { check_nan_f1(x); return logf(x)*LOG2Ef;  }
+float WRAPPER_FUNC(exp10f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG10); }
+float WRAPPER_FUNC(log10f)(float x) { check_nan_f1(x); return logf(x)*LOG10Ef; }
+
+float WRAPPER_FUNC(expm1f)(float x) { check_nan_f1(x); return (float)(exp((double)x)-1); }
+float WRAPPER_FUNC(log1pf)(float x) { check_nan_f1(x); return (float)(log(1+(double)x)); }
+float WRAPPER_FUNC(fmaf)(float x,float y,float z) {
+    check_nan_f2(x,y);
+    check_nan_f1(z);
+    return (float)((double)x*(double)y+(double)z);
+} // has double rounding so not exact
+
+// general power, x>0
+static inline float fpow_1(float x,float y) {
+    return (float)exp(log((double)x)*(double)y); // using double-precision intermediates for better accuracy
+}
+
+static float fpow_int2(float x,int y) {
+    float u;
+    if(y==1) return x;
+    u=fpow_int2(x,y/2);
+    u*=u;
+    if(y&1) u*=x;
+    return u;
+}
+
+// for the case where x not zero or infinity, y small and not zero
+static inline float fpowint_1(float x,int y) {
+    if(y<0) x=1.0f/x,y=-y;
+    return fpow_int2(x,y);
+}
+
+// for the case where x not zero or infinity
+static float fpowint_0(float x,int y) {
+    int e;
+    if(fisneg(x)) {
+        if(fisoddint(y)) return fneg(fpowint_0(fneg(x),y));
+        else             return      fpowint_0(fneg(x),y);
+    }
+    if(fispo2(x)) {
+        e=fgetexp(x)-0x7f;
+        if(y>=256) y= 255;  // avoid overflow
+        if(y<-256) y=-256;
+        y*=e;
+        return fldexp(1,y);
+    }
+    if(y==0) return 1;
+    if(y>=-32&&y<=32) return fpowint_1(x,y);
+    return fpow_1(x,y);
+}
+
+float WRAPPER_FUNC(powintf)(float x,int y) {
+    GCC_Pragma("GCC diagnostic push")
+    GCC_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+    if(x==1.0f||y==0) return 1;
+    if(x==0.0f) {
+        if(y>0) {
+            if(y&1) return x;
+            else    return 0;
+        }
+        if((y&1)) return fcopysign(FPINF,x);
+        return FPINF;
+    }
+    GCC_Pragma("GCC diagnostic pop")
+    check_nan_f1(x);
+    if(fispinf(x)) {
+        if(y<0) return 0;
+        else    return FPINF;
+    }
+    if(fisminf(x)) {
+        if(y>0) {
+            if((y&1)) return FMINF;
+            else      return FPINF;
+        }
+        if((y&1)) return MZERO;
+        else      return PZERO;
+    }
+    return fpowint_0(x,y);
+}
+
+// for the case where y is guaranteed a finite integer, x not zero or infinity
+static float fpow_0(float x,float y) {
+    int e,p;
+    if(fisneg(x)) {
+        if(fisoddint(y)) return fneg(fpow_0(fneg(x),y));
+        else             return      fpow_0(fneg(x),y);
+    }
+    p=(int)y;
+    if(fispo2(x)) {
+        e=fgetexp(x)-0x7f;
+        if(p>=256) p= 255;  // avoid overflow
+        if(p<-256) p=-256;
+        p*=e;
+        return fldexp(1,p);
+    }
+    if(p==0) return 1;
+    if(p>=-32&&p<=32) return fpowint_1(x,p);
+    return fpow_1(x,y);
+}
+
+float WRAPPER_FUNC(powf)(float x,float y) {
+    GCC_Like_Pragma("GCC diagnostic push")
+    GCC_Like_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+    if(x==1.0f||fiszero(y)) return 1;
+    check_nan_f2(x,y);
+    if(x==-1.0f&&fisinf(y)) return 1;
+    GCC_Like_Pragma("GCC diagnostic pop")
+    if(fiszero(x)) {
+        if(!fisneg(y)) {
+            if(fisoddint(y)) return x;
+            else             return 0;
+        }
+        if(fisoddint(y)) return fcopysign(FPINF,x);
+        return FPINF;
+    }
+    if(fispinf(x)) {
+        if(fisneg(y)) return 0;
+        else          return FPINF;
+    }
+    if(fisminf(x)) {
+        if(!fisneg(y)) {
+            if(fisoddint(y)) return FMINF;
+            else             return FPINF;
+        }
+        if(fisoddint(y)) return MZERO;
+        else             return PZERO;
+    }
+    if(fispinf(y)) {
+        if(fgetexp(x)<0x7f) return PZERO;
+        else                return FPINF;
+    }
+    if(fisminf(y)) {
+        if(fgetexp(x)<0x7f) return FPINF;
+        else                return PZERO;
+    }
+    if(fisint(y)) return fpow_0(x,y);
+    if(fisneg(x)) return FPINF;
+    return fpow_1(x,y);
+}
+
+float WRAPPER_FUNC(hypotf)(float x,float y) {
+    check_nan_f2(x,y);
+    int ex,ey;
+    ex=fgetexp(x); ey=fgetexp(y);
+    if(ex>=0x7f+50||ey>=0x7f+50) { // overflow, or nearly so
+        x=fldexp(x,-70),y=fldexp(y,-70);
+        return fldexp(sqrtf(x*x+y*y), 70);
+    }
+    else if(ex<=0x7f-50&&ey<=0x7f-50) { // underflow, or nearly so
+        x=fldexp(x, 70),y=fldexp(y, 70);
+        return fldexp(sqrtf(x*x+y*y),-70);
+    }
+    return sqrtf(x*x+y*y);
+}
+
+float WRAPPER_FUNC(cbrtf)(float x) {
+    check_nan_f1(x);
+    int e;
+    if(fisneg(x)) return fneg(cbrtf(fneg(x)));
+    if(fiszero(x)) return fcopysign(PZERO,x);
+    e=fgetexp(x)-0x7f;
+    e=(e*0x5555+0x8000)>>16;  // ~e/3, rounded
+    x=fldexp(x,-e*3);
+    x=expf(logf(x)*ONETHIRDf);
+    return fldexp(x,e);
+}
+
+// reduces mx*2^e modulo my, returning bottom bits of quotient at *pquo
+// 2^23<=|mx|,my<2^24, e>=0; 0<=result<my
+static i32 frem_0(i32 mx,i32 my,int e,int*pquo) {
+    int quo=0,q,r=0,s;
+    if(e>0) {
+        r=0xffffffffU/(ui32)(my>>7);  // reciprocal estimate Q16
+    }
+    while(e>0) {
+        s=e; if(s>12) s=12;    // gain up to 12 bits on each iteration
+        q=(mx>>9)*r;           // Q30
+        q=((q>>(29-s))+1)>>1;  // Q(s), rounded
+        mx=(mx<<s)-my*q;
+        quo=(quo<<s)+q;
+        e-=s;
+    }
+    if(mx>=my) mx-=my,quo++; // when e==0 mx can be nearly as big as 2my
+    if(mx>=my) mx-=my,quo++;
+    if(mx<0) mx+=my,quo--;
+    if(mx<0) mx+=my,quo--;
+    if(pquo) *pquo=quo;
+    return mx;
+}
+
+float WRAPPER_FUNC(fmodf)(float x,float y) {
+    check_nan_f2(x,y);
+    ui32 ix=float2ui32(x),iy=float2ui32(y);
+    int sx,ex,ey;
+    i32 mx,my;
+    FUNPACKS(ix,sx,ex,mx);
+    FUNPACK(iy,ey,my);
+    if(ex==0xff) {
+        return fnan_or(FPINF);
+    }
+    if(ey==0) return FPINF;
+    if(ex==0) {
+        if(!fisneg(x)) return PZERO;
+        return MZERO;
+    }
+    if(ex<ey) return x;  // |x|<|y|, including case x=±0
+    mx=frem_0(mx,my,ex-ey,0);
+    if(sx) mx=-mx;
+    return fix2float(mx,0x7f-ey+23);
+}
+
+float WRAPPER_FUNC(remquof)(float x,float y,int*quo) {
+    check_nan_f2(x,y);
+    ui32 ix=float2ui32(x),iy=float2ui32(y);
+    int sx,sy,ex,ey,q;
+    i32 mx,my;
+    FUNPACKS(ix,sx,ex,mx);
+    FUNPACKS(iy,sy,ey,my);
+    if(quo) *quo=0;
+    if(ex==0xff) return FPINF;
+    if(ey==0)    return FPINF;
+    if(ex==0)    return PZERO;
+    if(ey==0xff) return x;
+    if(ex<ey-1)  return x;  // |x|<|y|/2
+    if(ex==ey-1) {
+        if(mx<=my) return x;  // |x|<=|y|/2, even quotient
+        // here |y|/2<|x|<|y|
+        if(!sx) { // x>|y|/2
+            mx-=my+my;
+            ey--;
+            q=1;
+        } else { // x<-|y|/2
+            mx=my+my-mx;
+            ey--;
+            q=-1;
+        }
+    }
+    else {
+        if(sx) mx=-mx;
+        mx=frem_0(mx,my,ex-ey,&q);
+        if(mx+mx>my || (mx+mx==my&&(q&1)) ) { // |x|>|y|/2, or equality and an odd quotient?
+            mx-=my;
+            q++;
+        }
+    }
+    if(sy) q=-q;
+    if(quo) *quo=q;
+    return fix2float(mx,0x7f-ey+23);
+}
+
+float WRAPPER_FUNC(dremf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
+
+float WRAPPER_FUNC(remainderf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
+
+GCC_Pragma("GCC diagnostic pop") // conversion
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_none.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_none.S
@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+#include "pico/bootrom/sf_table.h"
+
+pico_default_asm_setup
+
+wrapper_func __aeabi_fadd
+wrapper_func __aeabi_fdiv
+wrapper_func __aeabi_fmul
+wrapper_func __aeabi_frsub
+wrapper_func __aeabi_fsub
+wrapper_func __aeabi_cfcmpeq
+wrapper_func __aeabi_cfrcmple
+wrapper_func __aeabi_cfcmple
+wrapper_func __aeabi_fcmpeq
+wrapper_func __aeabi_fcmplt
+wrapper_func __aeabi_fcmple
+wrapper_func __aeabi_fcmpge
+wrapper_func __aeabi_fcmpgt
+wrapper_func __aeabi_fcmpun
+wrapper_func __aeabi_i2f
+wrapper_func __aeabi_l2f
+wrapper_func __aeabi_ui2f
+wrapper_func __aeabi_ul2f
+wrapper_func __aeabi_f2iz
+wrapper_func __aeabi_f2lz
+wrapper_func __aeabi_f2uiz
+wrapper_func __aeabi_f2ulz
+wrapper_func __aeabi_f2d
+wrapper_func sqrtf
+wrapper_func cosf
+wrapper_func sinf
+wrapper_func tanf
+wrapper_func atan2f
+wrapper_func expf
+wrapper_func logf
+wrapper_func ldexpf
+wrapper_func copysignf
+wrapper_func truncf
+wrapper_func floorf
+wrapper_func ceilf
+wrapper_func roundf
+wrapper_func sincosf
+wrapper_func asinf
+wrapper_func acosf
+wrapper_func atanf
+wrapper_func sinhf
+wrapper_func coshf
+wrapper_func tanhf
+wrapper_func asinhf
+wrapper_func acoshf
+wrapper_func atanhf
+wrapper_func exp2f
+wrapper_func log2f
+wrapper_func exp10f
+wrapper_func log10f
+wrapper_func powf
+wrapper_func powintf
+wrapper_func hypotf
+wrapper_func cbrtf
+wrapper_func fmodf
+wrapper_func dremf
+wrapper_func remainderf
+wrapper_func remquof
+wrapper_func expm1f
+wrapper_func log1pf
+wrapper_func fmaf
+#ifdef __riscv
+    la a0, str
+    j  panic
+#else
+    push {lr}       // keep stack trace sane
+    ldr r0, =str
+    bl panic
+#endif
+str:
+    .asciz "float support is disabled"
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_sci_m33.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_sci_m33.S
@ -0,0 +1,801 @@
+/*
+ * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#if !PICO_RP2040
+#include "pico/asm_helper.S"
+
+pico_default_asm_setup
+
+.macro float_section name
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+.macro float_wrapper_section func
+float_section WRAPPER_FUNC_NAME(\func)
+.endm
+
+.extern rtwopi
+.extern logtab0
+.extern exptab0
+.extern exptab1
+.extern exptab2
+.extern trigtab
+
+@ load a 32-bit constant n into register rx
+.macro movlong rx,n
+ movw \rx,#(\n)&0xffff
+ movt \rx,#((\n)>>16)&0xffff
+.endm
+
+float_section frrcore
+
+@ input:
+@ r0 mantissa m Q23
+@ r1 exponent e>=-32, typically offset by +9
+@ output:
+@ r0..r1 preserved
+@ r6 range reduced result
+@ r2,r3,r4,r5 trashed
+frr_core:
+ ldr r2,=rtwopi
+ asrs r3,r1,#5               @ k=e/32, k<=5 for e offsets up to 9+32
+ add r2,r2,r3,lsl#2 @ p
+ and r3,r1,#31               @ s=e%32
+ mov r4,#1
+ lsls r4,r4,r3               @ 1<<s
+ umull r3,r4,r4,r0
+@ r2    p
+@ r3:r4 u0:u1 = m<<(e%32); u1 is never more than 2<<23
+ ldr r5,[r2,#12]             @ a0=p[3]
+ umull r5,r6,r5,r4           @ r0=a0*u1 hi, discard lo
+@ r6  r0
+ ldr r5,[r2,#8]              @ a1=p[2]
+ mla r6,r5,r4,r6             @ a1*u1 lo, discard hi
+ umlal r5,r6,r5,r3           @ a1*u0 hi, discard lo
+@ r6  r0
+ ldr r5,[r2,#4]              @ a2=p[1]
+ mla r6,r5,r3,r6             @ r0+=a2*u0
+ bx r14
+
+
+float_wrapper_section expf
+
+2:
+@ we could use fadd macro to calculate x+1 here
+@ need to add quadratic term
+ rsb r3,#1                   @ 1-(e+9)
+ lsrs r0,r3                  @ Q31
+ smmulr r1,r0,r0             @ Q30
+ cmp r12,#0
+ bne 1f
+ add r0,r0,r1
+ lsrs r0,#8
+ adc r0,r0,#0x3f800000       @ result is just over 1
+ bx r14
+1:
+ sub r0,r1,r0
+ asrs r0,#7
+ adc r0,r0,#0x3f800000       @ result is just under 1
+ bx r14
+
+7:
+ movs r1,#1
+ bfi r0,r1,#23,#9            @ implied 1, clear sign bit
+ adds r3,#9
+ bmi 2b                      @ <~2^-9? return 1+x+x²/2
+ movlong r1,0xb17217f8       @ ln2 Q32
+ lsls r0,r3                  @ Q32
+ cmp r12,#0
+ bne 8f
+ cmp r0,r1
+ blo 6f
+ sub r0,r0,r1
+ mov r12,#1
+ b 6f
+
+8:
+ mvn r12,#0
+ subs r0,r1,r0
+ bhs 6f
+ add r0,r0,r1
+ mvn r12,#1
+ b 6f
+
+wrapper_func expf
+ ands r12,r0,#1<<31          @ save sign
+ ubfx r3,r0,#23,#8           @ get exponent
+ subs r3,r3,#0x7f
+ bmi 7b                      @ e<0?
+ cmp r3,#7
+ bge 20f                     @ overflow, Inf or NaN?
+ movs r1,#1
+ bfi r0,r1,#23,#9            @ implied 1, clear sign bit
+ lsls r0,r3                  @ x Q23
+ eors r0,r0,r12,asr#31
+ adds r0,r0,r12,lsr#31       @ negate if sign bit set
+ mov r1,#0xB8AA              @ 1/ln2 Q15, rounded down
+ add r1,r1,r12,lsr#31        @ rounded up if we have a negative argument
+ smull r1,r2,r1,r0           @ Q38
+ movlong r1,0xb17217f8       @ ln2 Q32
+ asr r12,r2,#6               @ Q0 exponent candidate
+ lsls r0,#9                  @ Q32
+ mls r0,r12,r1,r0            @ Q32-Q0*Q32=Q32
+
+6:
+ lsrs r1,r0,#28
+ ldr r2,=exptab0
+ add r2,r2,r1,lsl#3
+ ldmia r2,{r2-r3}
+ and r2,r2,#63
+ orr r2,#64                  @ y=(t&0x3f)+0x40; Q6
+ sub r0,r3
+
+ lsrs r1,r0,#24
+ ldr r3,=exptab1+4
+ ldr r3,[r3,r1,lsl#3]
+ add r1,#256
+ muls r2,r2,r1 @ y Q14
+ sub r0,r3
+
+ lsrs r1,r0,#21
+ ldr r3,=exptab2+4
+ ldr r3,[r3,r1,lsl#3]
+ add r1,r1,r1
+ add r1,#4096
+ mla r2,r2,r1,r2             @ y Q26
+ sub r0,r3                   @ ε Q32
+ smmlar r2,r2,r0,r2          @ y(1+ε) Q26
+
+ cmp r2,#1<<27
+ bhs 1f
+2:
+ cmp r12,#0x7f
+ bgt 23f
+ cmn r12,#0x7e
+ blt 23f
+ lsls r0,r12,#23
+ adds r0,r0,r2,lsr#3
+ adc r0,r0,#0x3f000000
+ bx r14
+
+@ mantissa calculation has overflowed
+1:
+ lsrs r2,#1
+ adds r12,#1
+ b 2b
+
+20:
+@ process Inf/NaN for fexp
+ cmp r3,#0x80
+ bne 22f
+ lsls r2,r0,#9
+ ite eq
+ biceq r0,r0,r0,asr#31       @ +Inf -> + Inf; -Inf -> +0
+ orrne r0,r0,#0x00400000
+ bx r14
+
+23:
+ ands r12,r12,#1<<31
+22:
+ eors r0,r12,#1<<31
+ subs r0,r0,r0,lsr#8         @ overflow: convert to +0 or +Inf
+ bx r14
+
+
+
+
+float_wrapper_section logf
+
+1:
+ movlong r0,0xff800000       @ return -Inf
+ bx r14
+
+4:
+ lsls r1,r0,#9
+ it ne
+ orrne r0,r0,#0x00400000
+ bx r14
+
+@ here result may be close to zero
+5:
+ sub r1,r0,#0x3f800000
+ bmi 7f
+ cmp r1,#0x8000
+ bge 6f                      @ |ε|>~2^-8?
+@ here ε positive
+ clz r12,r1
+ sub r3,r12,#1
+ b 8f
+
+7:
+ rsbs r2,r1,#0
+ cmp r2,#0x8000
+ bge 6f                      @ |ε|>~2^-8?
+@ here ε negative
+@ r1: x Q24
+ clz r12,r2
+ sub r3,r12,#2
+8:
+ sub r12,#10
+ lsls r0,r1,r3               @ ε Q24+r3
+ beq 10f                     @ ε=0?
+ smmulr r1,r0,r0
+ asrs r1,r1,r12
+ sub r0,r0,r1,asr#1          @ - ε²/2
+ smmulr r1,r1,r0
+ asrs r2,r1,r12
+ movs r3,#0x55
+ mul r2,r2,r3
+ adds r0,r0,r2,asr#8         @ + ~ ε³/3
+ rsb r1,r12,#117
+ b 9f
+
+wrapper_func logf
+ lsls r3,r0,#1
+ bcs 1b                      @ x<0?
+ lsrs r3,#24
+ beq 1b                      @ x==0/denormal?
+ sub r12,r3,#0x7e
+ cmp r12,#0x81               @ +Inf/NaN?
+ beq 4b
+ push {r14}
+ cmp r12,#1                  @ result will be near zero?
+ bls 5b
+6:
+ movs r2,#1
+ bfi r0,r2,#23,#9            @ set implied 1, clear exponent Q52
+
+ lsls r0,#8
+
+ lsrs r1,r0,#27
+ ldr r2,=logtab0-16*8
+ add r2,r2,r1,lsl#3
+ ldmia r2,{r2-r3}
+ lsls r2,#26
+ umlal r2,r0,r2,r0
+
+ mvn r1,r0,asr#24
+ ldr r2,=exptab1+4
+ ldr r2,[r2,r1,lsl#3]
+ add r3,r3,r2
+ lsls r1,#24
+ umlal r1,r0,r1,r0
+
+ ldr r2,=exptab2+4
+ mvn r1,r0,asr#21
+ ldr r2,[r2,r1,lsl#3]
+ lsls r1,#21
+ orr r1,#0x00100000
+ umlal r1,r0,r1,r0
+ add r3,r3,r2
+
+@ r0: ε Q32
+@ r3: log y
+ smmulr r1,r0,r0             @ ε² Q32
+ sub r3,r0
+ add r3,r3,r1,lsr#1          @ log u - ε + ε²/2 Q32
+ movlong r2,0xb17218         @ ln2 Q24, but accurate to 2^-29 or so
+ cmp r12,#1                  @ result will be near zero?
+ bls 2f
+ mul r2,r2,r12
+ mov r1,#125
+ add r3,#255                 @ reduce systematic error
+ subs r0,r2,r3,lsr#8
+9:
+ bmi 1f
+ bl fpack_q
+10:
+ pop {r15}
+
+2:
+ mov r1,#117
+ bne 3f @ r12=0?
+@ here r12=1
+ movlong r2,0xb17217f8
+ sub r0,r2,r3
+ bl fpack_q
+ pop {r15}
+
+3:
+ mov r0,r3
+ bl fpack_q
+ orrs r0,r0,#1<<31
+ pop {r15}
+
+1:
+ rsbs r0,#0
+ bl fpack_q
+ orrs r0,r0,#1<<31
+ pop {r15}
+
+
+float_wrapper_section atan2f
+
+@ fatan small reduced angle case
+@ r0 y/x in IEEE format, 0..2^-8
+@ r2 e+6 <0
+@ r3 #1
+@ r6 flags
+20:
+ rsbs r4,r2,#0               @ -e-6 = shift down of mantissa required to get to Q29 >0
+ cmp r4,#7                   @ -e-6 ≥ 7 [ e ≤ -13 ]
+ bge 1f                      @ very small reduced angle? atn θ ~ θ
+@ do a power series
+ bfi r0,r3,#23,#9            @ fix up mantissa Q23-e
+ lsls r0,#7                  @ Q30-e
+ smmulr r1,r0,r0             @ θ² Q60-2e-Q32=Q28-2e
+ lsrs r1,r4                  @ Q28-2e + (e+6) = Q34-e
+ smmulr r1,r1,r0             @ θ³ Q34-e+Q30-e-Q32=Q32-2e
+ mov r3,#0x55555555          @ 1/3 Q32
+ lsrs r1,r4                  @ Q32-2e + e+6 = Q38-e
+ smmulr r1,r1,r3             @ θ³/3 Q38-e
+ sub r0,r0,r1,lsr#8          @ Q30-e
+ cmp r6,#4                   @ at least one quadrant to add?
+ bhs 2f
+ add r1,r2,#113
+ bl fpack_q
+ eors r0,r0,r6,lsl#31        @ fix sign
+ pop {r4-r6,r15}
+
+@ here reduced angle is < 2^-12
+1:
+ cmp r6,#4
+ bhs 3f                      @ at least one quadrant to add?
+ eors r0,r0,r6,lsl#31        @ no: return y/x with the correct sign
+ pop {r4-r6,r15}
+
+3:
+ bfi r0,r3,#23,#9            @ fix up mantissa
+ lsrs r0,r4                  @ Q29
+ lsls r0,#1                  @ Q30
+ b 40f
+
+2:
+ lsrs r0,r4                  @ Q30-e + e+6 = Q36
+ lsrs r0,#6                  @ Q30
+ b 40f
+
+@ case where reduced (x',y') has x' infinite
+71:
+ sbfx r4,r0,#23,#8
+ movs r0,#0
+ cmn r4,#1                   @ y' also infinite?
+ bne 80f
+ mov r0,#0x3f800000          @ both infinite: pretend ∞/∞=1
+ b 80f
+
+@ case where reduced (x',y') has y' zero
+70:
+ ubfx r4,r1,#23,#8
+ movs r0,#0
+ cbnz r4,80f                 @ x' also zero?
+ tst r6,#4
+ beq 80f                     @ already in quadrants 0/±2? then 0/0 result will be correct
+ tst r6,#2
+ ite eq
+ addeq r6,#6
+ subne r6,#6                 @ fix up when in quadrants ±0
+ b 80f
+
+90:
+ movs r0,r1
+91:
+ orrs r0,r0,#0x00400000
+ pop {r4-r6,r15}
+
+wrapper_func atan2f
+ push {r4-r6,r14}
+ lsls r4,r1,#1
+ cmp r4,#0xff000000
+ bhi 90b                     @ y NaN?
+ lsls r4,r0,#1
+ cmp r4,#0xff000000
+ bhi 91b                     @ x NaN?
+ lsrs r6,r0,#31              @ b31..2: quadrant count; b1: sign to apply before addition; b0: sign to apply after addition
+ bic r0,#1<<31
+@ y now positive
+ movs r1,r1
+ bpl 1f
+
+@ here y positive, x negative
+ adds r6,#10
+ bic r1,r1,#1<<31
+ cmp r1,r0
+ bhi 4f                      @ |x| > y: 3rd octant
+@ |x| < y: 2nd octant
+ subs r6,#6
+ b 3f
+
+1:
+@ here x and y positive
+ cmp r1,r0
+ bhs 4f
+@ x < y: 1st octant
+ adds r6,#6
+3:
+ movs r2,r1                  @ exchange x and y
+ movs r1,r0
+ movs r0,r2
+4:
+@ here
+@ r0 y'
+@ r1 x'
+@ where both x' and y' are positive, y'/x' < 1+δ, and the final result is
+@ ± (Q.π/2 ± atn y/x) where 0≤Q≤2 is a quadrant count in r6b3..2, the inner negation
+@ is given by r6b1 and the outer negation by r6b0. x' can be infinite, or both x' and
+@ y' can be infinite, but not y' alone.
+
+ sbfx r2,r1,#23,#8
+ cmn r2,#1
+ beq 71b                     @ x' infinite?
+ ubfx r2,r0,#23,#8
+ cmp r2,#0
+ beq 70b                     @ y' zero?
+ bl __aeabi_fdiv
+80:
+                             @ r0 y/x in IEEE format, 0..1
+ lsr r2,r0,#23               @ exponent
+ movs r3,#1
+ subs r2,#0x7f-6
+ bmi 20b
+ bfi r0,r3,#23,#9            @ fix up mantissa
+ lsls r0,r2
+ lsls r0,#2
+50:
+
+@ from here atan2(y,1) where 1 implied
+@ r0 y Q31 0≤y<1+δ
+
+ lsrs r2,r0,#16
+ mul r3,r2,r2                @ y^2
+ movw r4,#0x895c
+ muls r2,r2,r4               @ y*0x895c
+ movw r5,#0x1227
+ lsrs r3,#14
+ mls r2,r3,r5,r2
+ subs r2,#0x330000           @ Chebyshev approximation to atn(y)
+ lsrs r2,#25
+ ldr r3,=trigtab+4
+ add r3,r3,r2,lsl#4
+ ldmia r3,{r3-r5}
+@ r0 y  Q30
+@ r3 phi0  Q32
+@ r4 sphi0 Q31
+@ r5 cphi0 Q31
+@ r6 flags
+@ x0= cphi0 + sphi0*y
+@ y0=-sphi0 + cphi0*y
+ umull r12,r1,r0,r4          @ sphi0*y
+ umull r12,r2,r0,r5          @ cphi0*y
+ add r1,r1,r5,lsr#1          @ x0 Q30
+ sub r2,r2,r4,lsr#1          @ y0 Q30
+11:
+@ r1 x0 Q30
+@ r2 y0 Q30
+@ r3 phi0 Q32
+@ r6 flags
+ mov r0,#0xffffffff
+ lsrs r4,r1,#15
+ udiv r12,r0,r4              @ rx0 Q17
+ lsls r4,r2,#6               @ y0 Q36
+ smmul r4,r4,r12             @ t2=y0*rx0 Q21
+ lsls r5,r4,#11              @ t2 Q32
+ smmlar r0,r5,r2,r1
+ smmlsr r1,r5,r1,r2
+@ r0 x1 Q30
+@ r1 y1 Q30
+@ r3 phi0
+@ r4 r2 Q21
+@ r12 rx0 Q17
+ mul r5,r4,r4                @ t2_2 Q42
+ smull r2,r5,r4,r5           @ t2_3 Q63
+ add r3,r3,r4,lsl#11         @ Q32
+ lsls r5,#1                  @ Q32
+ mov r2,#0x55555555          @ 1/3
+ smmlsr r3,r2,r5,r3          @ t2_3/3
+
+@ r0 x1 Q30
+@ r1 y1 Q30
+@ r3 phi0+phi1 Q32
+@ r12 rx0 Q17
+ mul r0,r1,r12               @ y1*rx0 Q30+Q17=Q47
+ add r3,r3,r0,asr#15
+@ r3 phi0+phi1+phi2, result over reduced range Q32
+@ r6 flags
+
+ lsrs r0,r3,#2               @ Q30
+@ adc r0,#0                  @ rounding
+40:
+ lsl r5,r6,#30               @ b1 -> b31
+ eor r0,r0,r5,asr#31         @ negate if required
+ movlong r1,0x6487ED51       @ π/2 Q30
+
+ lsr r5,r6,#2                @ quadrants to add
+ mla r0,r5,r1,r0
+ mov r1,#0x80-9              @ for packing Q30
+60:
+ bl fpack_q
+ eors r0,r0,r6,lsl#31
+ pop {r4-r6,r15}
+
+@=======================================
+
+float_wrapper_section fpack
+
+@ fnegpack: negate and pack
+@ fpack_q31:
+@ input
+@ r0 Q31 result, must not be zero
+@ r1 exponent offset [fpack_q only]
+@ output
+@ r0 IEEE single
+@ trashes (r1,)r2
+fnegpack_q31:
+ rsbs r0,r0,#0
+fpack_q31:
+ mov r1,#0x7f-9              @ exponent
+fpack_q:
+ clz r2,r0
+ rsbs r2,#8
+ bmi 1f
+ lsrs r0,r0,r2               @ save rounding bit in carry
+ add r2,r2,r1
+ adc r0,r0,r2,lsl#23         @ insert exponent
+ bx r14
+
+1:
+ rsb r2,#0
+ lsls r0,r0,r2
+ sub r2,r1,r2
+ add r0,r0,r2,lsl#23
+ bx r14
+
+float_wrapper_section tanf
+
+wrapper_func tanf
+ push {r14}
+ bl sincosf_raw
+ bl __aeabi_fdiv
+ pop {r15}
+
+float_wrapper_section fsin_fcos
+
+10: @ process Inf/NaN for sinf and fcos
+ lsls r1,r0,#9
+ it eq
+ orreq r0,#0x80000000
+ orr r0,#0x00400000          @ turn Inf into NaN
+ movs r1,r0                  @ copy result to cosine output
+ bx r14
+
+@ case where angle is very small (<2^-32) before reduction
+32:
+ adds r1,r1,#0x7f
+ it eq
+ moveq r0,#0                 @ flush denormal to zero
+
+ movs r1,0x3f800000          @ return x for sine, 1 for cosine
+ tst r12,#2
+ it ne
+ movne r0,r1                 @ calculating cosine? move to result registers
+ bx r14
+
+40:
+@ case where range-reduced angle is very small
+@ here
+@ r0 mantissa
+@ r1 exponent+9
+@ r6 abs range-reduced angle / 2π < 2^-7 Q32
+@ r12b31: dsincos flag
+@ r12b30: original argument sign
+@ r12b2..1: quadrant count
+@ r12b0: sign of reduced angle
+ push {r12}
+ movs r12,#0
+2:
+ add r12,#2
+ add r1,#2
+ bl frr_core                 @ repeat range reduction with extra factor of 2^2 (, 2^4, 2^6, 2^8,...)
+ eors r4,r6,r6,asr#31        @ we want to keep the sign in r6b31 for later
+ cmp r4,#1<<26               @ ≥ 2^-6?
+ bhs 1f                      @ loop until the result is big enough
+ cmp r12,#32                 @ safety net
+ bne 2b
+1:
+@ here r4 is the abs range-reduced angle Q32+r12, 2^-6..2^-4 in Q32 terms
+
+@ 2π=6.487ED511 (0B4611A6...)
+ movlong r5,0x487ED511       @ 2π Q64 high fractional word
+ umull r2,r0,r4,r5
+ movs r5,#6                  @ 2π integer part
+ mla r0,r4,r5,r0
+
+@ here
+@ r0 θ, abs range reduced angle θ 0..π/4 Q32+r12, 2π * 2^-6..2^-4 in Q32 terms (so top bit is clear)
+@ r6b31: sign of reduced angle
+@ r12: excess exponent ≥2, multiple of 2
+@ r0 / 2^r12 < ~ 2π * 2^-7 so for sin we need to go to term in x^3
+
+ smmulr r1,r0,r0             @ θ² Q32+2*r12
+ lsrs r1,r1,r12              @ θ² Q32+r12
+ lsrs r1,r1,r12              @ θ² Q32
+ movs r4,#0x55555555         @ 1/3 Q32
+ smmulr r4,r1,r4             @ θ²/3 Q32
+ smmulr r2,r4,r1             @ θ⁴/3 Q32
+ rsb r3,r1,r2,lsr#2          @ -θ²+θ⁴/12) Q32
+ asrs r3,#9                  @ -θ²/2+θ⁴/24 Q24
+ adc r3,r3,#0x3f800000       @ IEEE single with rounding
+
+ smmulr r2,r4,r0             @ θ³/3 Q32+r12
+ sub r0,r0,r2,lsr#1          @ θ-θ³/6 Q32+r12
+ rsb r1,r12,#117
+ bl fpack_q
+@ here
+@ r0 packed sin
+@ r3 packed cos
+@ r6b31: sign of reduced angle
+ pop {r12}                   @ get flags
+ lsrs r6,#31
+ bfi r12,r6,#0,#1
+
+ asrs r2,r12,#1
+ bmi 23f                     @ doing sincos?
+ asrs r2,#1
+ bcc 21f                     @ need sine?
+@ need cosine:
+ ands r12,#4
+ orrs r0,r3,r12,lsl#29       @ insert sign
+ pop {r4-r6,r15}
+
+21:
+ eors r12,r12,r12,lsr#2
+ orrs r0,r0,r12,lsl#31       @ insert sign
+ pop {r4-r6,r15}
+
+23:
+ ands r2,r12,#4
+ orrs r3,r3,r2,lsl#29        @ insert sign
+ push {r3}
+@ drop into sincosf code below...
+ b 20f
+
+wrapper_func sincosf
+ push {r1, r2, lr}
+ bl sincosf_raw
+ pop {r2, r3}
+ str r0, [r2]
+ str r1, [r3]
+ pop {pc}
+
+sincosf_raw:
+ ands r12,r0,#1<<31
+ lsrs r12,#1                 @ save argument sign in r12b30
+ orrs r12,r12,#1<<31         @ flag we want both results in r12b31
+ b 1f
+
+wrapper_func sinf
+ lsrs r12,r0,#29             @ negative argument -> +2 quadrants
+ ands r12,#4
+ b 1f
+
+wrapper_func cosf
+ movs r12,#2                 @ cos -> +1 quadrant
+1:
+ ubfx r1,r0,#23,#8           @ get exponent
+ sub r1,r1,#0x7f
+ cmp r1,#0x80
+ beq 10b                     @ Inf or NaN?
+ cmn r1,#32
+ blt 32b                     @ very small argument?
+ movs r3,#1
+ bfi r0,r3,#23,#9            @ fix implied 1 in mantissa
+ push {r4-r6,r14}
+ add r1,#9                   @ e+9
+ bl frr_core
+@ r6 θ/2π 0..1 Q64
+ lsrs r4,r6,#30              @ quadrant count
+ adc r4,r4,#0                @ rounded
+ sub r6,r6,r4,lsl#30         @ now -0.125≤r6<+0.125 Q32
+ add r12,r12,r4,lsl#1
+ orr r12,r12,r6,lsr#31       @ sign into r12b0
+@ r12b2..1: quadrant count
+@ r12b0: sign of reduced angle
+ eors r6,r6,r6,asr#31        @ absolute value of reduced angle 0≤r7<0.125 Q32
+ cmp r6,#1<<25               @ θ / 2π < 2^-7?
+ blo 40b
+
+@ 2π=6.487ED511 (0B4611A6...)
+ movlong r5,0x487ED511       @ 2π Q64 high fractional word
+ umull r2,r0,r6,r5
+ movs r5,#6                  @ 2π integer part
+ mla r0,r6,r5,r0
+@ r0 range reduced angle θ 0..π/4 Q32
+ lsrs r2,r0,#27
+ ldr r3,=trigtab+4
+ add r3,r3,r2,lsl#4
+ ldmia r3,{r1-r3}
+ subs r0,r1
+@ r0:   ε     Q32 |ε| < 1.17*2^-6
+@ r2:   sin φ Q31
+@ r3:   cos φ Q31
+ asrs r1,r12,#1
+ bmi 5f                      @ doing sincosf?
+ asrs r1,#1
+ bcs 3f                      @ need cosine?
+@ here need sine
+ smmulr r1,r0,r0             @ ε² Q32
+ mov r4,#0x55555555          @ 1/3 Q32
+ asrs r1,#1
+ smmlsr r2,r1,r2,r2          @ sin φ - ε²/2*sin φ ~ sin φ cos ε
+ smmulr r1,r1,r0             @ ε³/2
+ smmlsr r1,r1,r4,r0          @ ε - ε³/6
+
+ smmlar r0,r1,r3,r2          @ sin φ cos ε + cos φ (ε - ε³/6) ~ sin (φ+ε)
+@ the sign of the result is r12b0^r12b2
+ bl fpack_q31
+ eors r12,r12,r12,lsr#2
+ orrs r0,r0,r12,lsl#31       @ insert sign
+ pop {r4-r6,r15}
+
+3:
+@ here need cosine
+ smmulr r1,r0,r0             @ ε² Q32
+ mov r4,#0x55555555          @ 1/3 Q32
+ asrs r1,#1
+ smmlsr r3,r1,r3,r3          @ cos φ - ε²/2*cos φ ~ cos φ cos ε
+ smmulr r1,r1,r0             @ ε³/2
+ smmlsr r1,r1,r4,r0          @ ε - ε³/6
+
+ smmlsr r0,r1,r2,r3 @ cos φ cos ε - sin φ (ε - ε³/6) ~ cos (φ+ε)
+@ the sign of the result is r12b2
+ bl fpack_q31
+ ands r12,#4
+ orrs r0,r0,r12,lsl#29       @ insert sign
+ pop {r4-r6,r15}
+
+5:
+@ here doing sincosf
+ smmulr r1,r0,r0             @ ε² Q32
+ mov r6,#0x55555555          @ 1/3 Q32
+ asrs r1,#1
+ smmlsr r4,r1,r2,r2          @ sin φ - ε²/2*sin φ ~ sin φ cos ε
+ smmlsr r5,r1,r3,r3          @ cos φ - ε²/2*cos φ ~ cos φ cos ε
+ smmulr r1,r1,r0             @ ε³/2
+ smmlsr r6,r1,r6,r0          @ ε - ε³/6
+@ here
+@ r2 sin φ
+@ r3 cos φ
+@ r4 sin φ cos ε
+@ r5 cos φ cos ε
+@ r6 ε - ε³/6 ~ sin ε
+ smmlsr r0,r6,r2,r5          @ cos φ cos ε - sin φ (ε - ε³/6) ~ cos (φ+ε)
+ bl fpack_q31
+ ands r5,r12,#4
+ eors r0,r0,r5,lsl#29        @ negate cosine in quadrants 2 and 3
+ push {r0}
+ smmlar r0,r6,r3,r4          @ sin φ cos ε + cos φ (ε - ε³/6) ~ sin (φ+ε)
+ bl fpack_q31
+20:
+ eors r4,r12,r12,lsr#1
+ eors r4,r4,r12,lsr#2
+ ands r5,r12,#1<<30
+ tst r12,#2                  @ exchange sine and cosine in odd quadrants
+ beq 1f
+ eors r1,r0,r4,lsl#31        @ negate sine on b0^b1^b2
+ pop {r0}
+ eors r0,r0,r5,lsl#1         @ negate sine result if argument was negative
+ pop {r4-r6,r15}
+1:
+ pop {r1}
+ eors r0,r0,r4,lsl#31        @ negate sine on b0^b1^b2
+ eors r0,r0,r5,lsl#1         @ negate sine result if argument was negative
+ pop {r4-r6,r15}
+
+#endif
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_sci_m33_vfp.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_sci_m33_vfp.S
@ -0,0 +1,856 @@
+/*
+ * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#if !PICO_RP2040
+#include "pico/asm_helper.S"
+
+pico_default_asm_setup
+
+.macro float_section name
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+.macro float_wrapper_section func
+float_section WRAPPER_FUNC_NAME(\func)
+.endm
+
+@ load a 32-bit constant n into register rx
+.macro movlong rx,n
+ movw \rx,#(\n)&0xffff
+ movt \rx,#((\n)>>16)&0xffff
+.endm
+
+float_section frrcore_v
+
+// 1/2π to plenty of accuracy
+.long 0                      @ this allows values of e down to -32
+rtwopi:
+.long 0,0
+.long 0x28BE60DB, 0x9391054A, 0x7F09D5F4, 0x7D4D3770, 0x36D8A566, 0x4F10E410
+
+@ input:
+@ r0 mantissa m Q23
+@ r1 exponent e>=-32, typically offset by +9
+@ output:
+@ r0..r1 preserved
+@ r6 range reduced result in revolutions Q32
+@ r2,r3,r4,r5 trashed
+.thumb_func
+frr_core:
+ adr r2,rtwopi
+ asrs r3,r1,#5               @ k=e/32, k<=5 for e offsets up to 9+32
+ add r2,r2,r3,lsl#2          @ p
+ and r3,r1,#31               @ s=e%32
+ mov r4,#1
+ lsls r4,r4,r3               @ 1<<s
+ umull r3,r4,r4,r0
+@ r2    p
+@ r3:r4 u0:u1 = m<<(e%32); u1 is never more than 2<<23
+ ldr r5,[r2,#12]             @ a0=p[3]
+ umull r5,r6,r5,r4           @ r0=a0*u1 hi, discard lo
+@ r6  r0
+ ldr r5,[r2,#8]              @ a1=p[2]
+ mla r6,r5,r4,r6             @ a1*u1 lo, discard hi
+ umlal r5,r6,r5,r3           @ a1*u0 hi, discard lo
+@ r6  r0
+ ldr r5,[r2,#4]              @ a2=p[1]
+ mla r6,r5,r3,r6             @ r0+=a2*u0
+ bx r14
+
+float_wrapper_section expf
+
+wrapper_func expf
+@ soft float version, via 2^x
+ asrs r1,r0,#23
+ bmi 1f
+ cmp r1,#0x85
+ bge 3f
+10:
+ movs r2,#1
+ bfi r0,r2,#23,#9
+ subs r1,#0x7e
+ bmi 2f
+ lsl r0,r1                   @ x Q24
+11:
+ movlong r3,0x5c551d95       @ 1/log(2) Q30
+ smull r0,r1,r0,r3           @ Q54
+ adr r2,k_exp2
+ vldmia r2,{s8-s10}
+ lsrs r2,r0,#22
+ bfi r2,r1,#10,#17           @ ε Q32
+ vmov s0,r2
+ vcvt.f32.u32 s0,s0,#32
+ vmul.f32 s1,s0,s0
+ ubfx r2,r1,#17,#5
+ vmul.f32 s4,s0,s8
+ adr r3,exptab3
+ vmul.f32 s2,s0,s1
+ ldr r2,[r3,r2,lsl#2]
+ vmla.f32 s4,s1,s9
+ asrs r1,#22
+ vmla.f32 s4,s2,s10
+ add r2,r2,r1,lsl#23
+ vmov s0,r2
+ vmla.f32 s0,s0,s4
+ vmov r0,s0
+ bx r14
+
+2:                           @ x≤0.5
+ rsbs r1,#0
+ lsrs r0,r1
+@ adc r0,#0                  @ rounding not needed
+ b 11b
+
+3:                           @ risk of overflow, Inf,NaN
+ movlong r2,0x42B17218
+ cmp r0,r2
+ blo 10b                     @ in range after all
+ cmp r0,#0x7f800000
+ bls 4f                      @ not NaN?
+ orrs r0,#0x00400000
+ bx r14
+
+4:
+ movlong r0,0x7f800000       @ return +Inf
+ bx r14
+
+1:                           @ x<0, r1=0xffffffXX where XX is biased exponent
+ cmn r1,#0x7b
+ bge 5f                      @ risk of underflow, -Inf, -NaN?
+13:
+ movs r2,#1
+ bfi r0,r2,#23,#9
+ adds r1,#0x82
+ bpl 6f
+ rsbs r1,#0
+ lsrs r0,r1
+ adc r0,r0,#0                @ rounding
+12:
+ rsbs r0,#0
+ b 11b
+
+6:
+ lsls r0,r1
+ b 12b
+
+5:
+ movlong r2,0xC2AEAC4F
+ cmp r0,r2
+ bls 13b
+ cmp r0,#0xff800000
+ bls 14f
+ orrs r0,#0x00400000
+ bx r14
+14:
+ mov r0,#0
+ bx r14
+
+.align 3
+k_exp2:
+.float 0.693147181           @ log2
+.float 0.240226507           @ log²2/2
+.float 0.055504109           @ log³2/6
+
+exptab3:                     @ pow(2,[0..31]/32)
+.word 0x3f800000
+.word 0x3f82cd87
+.word 0x3f85aac3
+.word 0x3f88980f
+.word 0x3f8b95c2
+.word 0x3f8ea43a
+.word 0x3f91c3d3
+.word 0x3f94f4f0
+.word 0x3f9837f0
+.word 0x3f9b8d3a
+.word 0x3f9ef532
+.word 0x3fa27043
+.word 0x3fa5fed7
+.word 0x3fa9a15b
+.word 0x3fad583f
+.word 0x3fb123f6
+.word 0x3fb504f3
+.word 0x3fb8fbaf
+.word 0x3fbd08a4
+.word 0x3fc12c4d
+.word 0x3fc5672a
+.word 0x3fc9b9be
+.word 0x3fce248c
+.word 0x3fd2a81e
+.word 0x3fd744fd
+.word 0x3fdbfbb8
+.word 0x3fe0ccdf
+.word 0x3fe5b907
+.word 0x3feac0c7
+.word 0x3fefe4ba
+.word 0x3ff5257d
+.word 0x3ffa83b3
+
+float_wrapper_section logf
+
+wrapper_func logf
+ cmp r0,#0x7f800000          @ catch Inf, NaN, -ve
+ bhs 1f
+ asrs r1,r0,#23              @ get exponent; C from here is preserved...
+ beq 2f                      @ ±0?
+ mov r2,#1
+ bfi r0,r2,#23,#9            @ fix implied 1
+ it cc                       @ 50% ... to here...
+ lslcc r0,#1                 @ this plus sbc below means we work relative to nearby power of 2
+ adr r3,#k_log3
+ vldmia r3,{s8-s10}
+@ 0x00c00000 ≤ r0 < 0x017fffff
+ adr r3,logtab3-24*8+4
+ add r3,r3,r0,lsr#16         @ look up r0>>19 rounded, preserving flags
+ bic r3,#7
+
+ ldrd r2,r3,[r3]
+ mul r0,r0,r2                @ ε
+ vmov s0,s1,r3,r0            @ s0=-log u, s1=ε
+
+ vcvt.f32.s32 s1,s1,#32
+ vmul.f32 s2,s1,s1           @ power series in ε
+ sbc r1,r1,#0x7e             @ ... and here
+ vmul.f32 s3,s1,s2
+ lsls r1,#23                 @ e Q23
+ vmul.f32 s4,s2,s2           @ to ε⁴
+@ movlong r2,0x58b90bfc      @ log 2 Q31, more accurate than we deserve
+ movw r2,0x0bfc
+ vmul.f32 s2,s2,s8
+ movt r2,0x58b9
+ vmul.f32 s3,s3,s9
+ smmulr r1,r1,r2             @ Q22
+ vmul.f32 s4,s4,s10
+ vmov s7,r1
+ vsub.f32 s3,s3,s4
+ vcvt.f32.s32 s7,s7,#22
+ vsub.f32 s2,s2,s3
+ vsub.f32 s1,s1,s2
+ vadd.f32 s0,s0,s1           @ log ε - log u
+ vadd.f32 s0,s0,s7           @ e log 2 + log ε - log u
+ vmov r0,s0
+ bx r14
+
+1:
+ bgt 3f                      @ +NaN?
+ beq 10f                     @ +Inf?
+2:
+ cmp r0,#0x80800000          @ -0?
+ blo 11f
+ cmp r0,#0xff800000          @ -NaN/-Inf?
+3:
+ orr r0,#0x00400000
+ bhi 10f
+ movlong r0,0xffc00000
+10:
+ bx r14
+11:
+ movlong r0,0xff800000
+ bx r14
+
+.align 3
+k_log3:
+.float 0.5
+.float 0.333333333333333
+.float 0.25
+.float 0                     @ alignment
+
+logtab3:
+@ u=64/[48:2:96]; u Q8, -log u F32
+.word 0x0155,0xbe92cb01      @ 00003e9b..00004145
+.word 0x0148,0xbe7dc8c3      @ 00003ec8..00004158
+.word 0x013b,0xbe545f68      @ 00003ec1..00004137
+.word 0x012f,0xbe2c99c7      @ 00003ebb..00004119
+.word 0x0125,0xbe0a3c2c      @ 00003ef3..0000413d
+.word 0x011a,0xbdc61a2f      @ 00003eca..000040fe
+.word 0x0111,0xbd83acc2      @ 00003eeb..0000410d
+.word 0x0108,0xbcfc14d8      @ 00003ee8..000040f8
+.word 0x0100,0x00000000      @ 00003f00..00004100
+.word 0x00f8,0x3d020aec      @ 00003ef8..000040e8
+.word 0x00f1,0x3d77518e      @ 00003f13..000040f5
+.word 0x00ea,0x3db80698      @ 00003f12..000040e6
+.word 0x00e4,0x3ded393b      @ 00003f3c..00004104
+.word 0x00dd,0x3e168b08      @ 00003f05..000040bf
+.word 0x00d8,0x3e2dfa03      @ 00003f48..000040f8
+.word 0x00d2,0x3e4ad2d7      @ 00003f2a..000040ce
+.word 0x00cd,0x3e637fde      @ 00003f43..000040dd
+.word 0x00c8,0x3e7cc8e3      @ 00003f48..000040d8
+.word 0x00c3,0x3e8b5ae6      @ 00003f39..000040bf
+.word 0x00bf,0x3e95f784      @ 00003f6b..000040e9
+.word 0x00ba,0x3ea38c6e      @ 00003f36..000040aa
+.word 0x00b6,0x3eaeadef      @ 00003f46..000040b2
+.word 0x00b2,0x3eba0ec4      @ 00003f46..000040aa
+.word 0x00ae,0x3ec5b1cd      @ 00003f36..00004092
+.word 0x00ab,0x3ece995f      @ 00003f75..000040cb
+
+float_wrapper_section fsin_fcos
+
+30:
+ lsls r1,r0,#9
+ bne 1f                      @ NaN? return it
+ orrs r0,r0,#0x80000000      @ Inf: make a NaN
+1:
+ orrs r0,r0,#0x00400000      @ set top mantissa bit of NaN
+ bx r14
+
+@ heavy-duty range reduction
+@ here x≥256, -e in r1
+40:
+ push {r4-r7,r14}
+ movs r3,#1
+ bfi r0,r3,#23,#9            @ insert implied 1 in mantissa, clear sign
+ rsb r1,#9                   @ e+9
+ mov r7,#0x7e                @ this will be the exponent of the reduced angle - 1
+42:
+ bl frr_core
+@ here r6 is revolutions Q32
+ lsrs r3,r6,#30              @ quadrant count
+ adcs r3,r3,#0               @ rounded
+ add r12,r12,r3
+ subs r6,r6,r3,lsl#30        @ reduced angle/2π Q32 -.125≤x<+.125
+@ comment out from here...
+ lsls r2,r6,#2               @ Q34
+ it cs
+ rsbcs r2,r2,#0              @ absolute value
+ cmp r2,#1<<28               @ big enough for accuracy?
+ bhs 41f
+@ ... to here for slightly better accuracy
+43:
+ adds r1,r1,#2               @ try again with increased exponent
+ bl frr_core
+ eors r2,r6,r6,asr#32        @ absolute value
+ adc r2,r2,#0
+ cmp r2,#1<<28               @ big enough yet?
+ bhs 44f
+ subs r7,r7,#2
+ bpl 43b                     @ safety net
+44:
+
+41:
+ ldr r4,=0xC90FDAA2          @ 2π Q29
+ umull r2,r4,r2,r4           @ r4 has reduced angle Q34+Q29-Q32=Q31
+@ add r4,r4,r2,lsr#31
+ clz r2,r4                   @ normalise
+ lsls r4,r4,r2
+ lsrs r4,r4,#8
+ sub r2,r7,r2
+ adc r0,r4,r2,lsl#23         @ with rounding
+ lsrs r1,r0,#23              @ re-extract exponent as there may have been a carry into it
+ rsbs r1,r1,#0x7f            @ prepare exponent for re-entry
+ lsrs r6,r6,#31
+ add r3,r0,r6,lsl#31         @ apply sign of reduced angle
+ pop {r4-r7,r14}
+ b 5f                        @ re-enter with no risk of looping
+
+.ltorg
+
+@ light-duty range reduction
+@ here argument ≥1
+@ r0: argument
+@ r1: -e
+@ r12: quadrant count
+@ required result is sin(r0+r12*π/2)
+10:
+ cmn r1,#0x80
+ beq 30b                     @ Inf/NaN
+ bics r2,r0,r12,lsl#31       @ negative argument,doing sin -> +2 quadrants
+ it mi
+ addmi r12,r12,#2
+ bic r0,r0,#0x80000000       @ make positive: original sign is now captured in quadrant count in r12
+
+@ this may not actually be faster than doing it in integer registers
+ vmov s0,r0
+ adr r2,k_sc4
+ vldmia r2!,{s5-s7}
+@ vmul.f32 s4,s4,s0          @ this accurate calculation of the quadrant count does not seem necessary
+@ vfma.f32 s4,s5,s0
+ vmul.f32 s4,s5,s0           @ this is BALGE
+ cmn r1,#8                   @ ≥256?
+ vrintn.f32.f32 s4,s4        @ round to quadrant count: x<256 so count≤163
+ ble 40b                     @ then do heavy-duty range reduction
+ vfms.f32 s0,s4,s7
+ vfms.f32 s0,s4,s6 
+ vmov r3,s0                  @ reduced angle
+ vcvt.s32.f32 s3,s4
+ ubfx r2,r3,#23,#8           @ get exponent
+ cmp r2,#0x78
+ blo 40b                     @ very small result? use heavy-duty reduction to get a more accurate answer
+ rsbs r1,r2,#0x7f            @ ready for re-entry
+ vmov r2,s3                  @ integer quadrant count
+ add r12,r12,r2
+@ prepare to re-enter with no risk of looping
+ b 5f
+
+k_sc4:
+@ 2/π=0.A2F9836E4E441529FC...
+.word 0x3f22f983             @ 2/π
+@ π/2=1.921FB54442D1846989...
+.word 0xb695777a,0x3fc91000  @ these two add up to π/2 with error ~1.6e-13
+
+wrapper_func sincosf
+
+ push {r0-r2,r14}
+ ubfx r1,r0,#23,#8
+ cmp r1,#0xff                @ Inf/NaN?
+ beq 2f
+ bl cosf_entry               @ this will exit via 1f or 2f...
+ pop {r1-r2,r14}
+ str r0,[r14]
+@ here C is still set from lsrs r12,r12,#1
+ bcs 1f
+ mvns r1,r1
+ eor r12,r12,r1,lsr#31
+@ this is fsc_costail:
+@ here calculate cos φ+ε = cosθ
+ vmul.f32 s5,s7,s1           @ sinφ sinε
+ vfma.f32 s5,s2,s6           @ sinφ sinε + cosφ(1-cosε)
+ vsub.f32 s5,s6,s5           @ cosφ - (sinφ sinε + cosφ(1-cosε)) = cosφ cosε - sinφ sinε
+ vmov.f32 r0,s5
+ eor r0,r0,r12,lsl#31
+ str r0,[r2]
+ pop {r15}
+
+1:
+ eor r12,r12,r1,lsr#31
+@ this is fsc_sintail:
+@ here calculate sin φ+ε = sinθ
+ vmul.f32 s4,s2,s7           @ sinφ(1-cosε)
+ vfms.f32 s4,s6,s1           @ sinφ(1-cosε) - cosφ sinε
+ eor r1,r12,r3,lsr#31        @ flip sign if (reduced) argument was negative
+ vsub.f32 s4,s7,s4           @ cosφ sinε + sinφ cosε
+ vmov.f32 r0,s4
+ eor r0,r0,r1,lsl#31
+ str r0,[r2]                 @ save cos result
+ pop {r15}
+
+@ sincos of Inf or NaN
+2:
+ lsls r1,r0,#9
+ pop {r1-r3,r14}
+ bne 1f                      @ NaN? return it
+ orrs r0,r0,#0x80000000      @ Inf: make a NaN
+1:
+ orrs r0,r0,#0x00400000      @ set top mantissa bit of NaN
+ str r0,[r2]                 @ both sin and cos results
+ str r0,[r3]
+ bx r14
+
+wrapper_func sinf
+@ r12b1..0: quadrant count
+ movs r12,#0
+ b 1f
+
+wrapper_func cosf
+.thumb_func
+cosf_entry:
+ movs r12,#1                 @ cos -> +1 quadrant
+1:
+ ubfx r1,r0,#23,#8           @ get exponent
+ cbz r1,20f                  @ 0/denormal?
+22:
+ rsbs r1,r1,#0x7f
+ bls 10b                     @ argument ≥1? needs reduction; also Inf/NaN handling
+ bic r3,r0,r12,lsl#31        @ this would mess up NaNs so do it here
+5:
+@ here we have a quadrant count in r12 and a signed offset r0 from r12*π/2
+ bic r0,r3,#0x80000000       @ this would mess up NaNs so do it here
+ vmov s0,r0
+ ubfx r0,r0,#18,#5           @ extract top of mantissa
+ adds r0,r0,#32              @ insert implied 1
+ lsrs r1,r0,r1               @ to fixed point Q5
+ ldr r2,=k_sc3
+ adcs r1,r1,#0               @ rounding
+ vldmia r2!,{s8-s9}
+ add r2,r2,r1,lsl#2          @ 12 bytes per entry
+ add r2,r2,r1,lsl#3
+
+ vldmia r2,{s5-s7}           @ φ, cosφ, sinφ
+ vsub.f32 s1,s0,s5           @ ε
+ vmul.f32 s2,s1,s1           @ ε²
+ lsrs r12,r12,#1             @ computing cosine?
+ vmul.f32 s3,s2,s1           @ ε³
+ bcs 2f
+
+ vmul.f32 s2,s2,s8           @ ε²/2! ~ 1-cosε
+ vmul.f32 s3,s3,s9           @ ε³/3!
+ vsub.f32 s1,s1,s3           @ ε-ε³/3! ~ sinε
+
+@ here:
+@ s1: sinε
+@ s2: 1-cosε
+@ s6: cosφ
+@ s7: sinφ
+@ r12: quadrant count
+fsc_sintail:
+@ here calculate sin φ+ε = sinθ
+ vmul.f32 s4,s2,s7           @ sinφ(1-cosε)
+ vfms.f32 s4,s6,s1           @ sinφ(1-cosε) - cosφ sinε
+ eor r1,r12,r3,lsr#31        @ flip sign if (reduced) argument was negative
+ vsub.f32 s4,s7,s4           @ cosφ sinε + sinφ cosε
+ vmov.f32 r0,s4
+ eor r0,r0,r1,lsl#31
+ bx r14
+
+20:
+ and r0,r0,#0x80000000       @ make signed zero
+ b 22b
+
+.align 2
+2:
+ vmul.f32 s3,s3,s9           @ ε³/3!
+ vsub.f32 s1,s1,s3           @ ε-ε³/3! ~ sinε
+ vmul.f32 s2,s2,s8           @ ε²/2! ~ 1-cosε
+fsc_costail:
+@ here calculate cos φ+ε = cosθ
+ vmul.f32 s5,s7,s1           @ sinφ sinε
+ vfma.f32 s5,s2,s6           @ sinφ sinε + cosφ(1-cosε)
+ vsub.f32 s5,s6,s5           @ cosφ - (sinφ sinε + cosφ(1-cosε)) = cosφ cosε - sinφ sinε
+ vmov.f32 r0,s5
+ eor r0,r0,r12,lsl#31
+ bx r14
+
+.align 3
+k_sc3:
+.word 0x3EFFFEC1             @ ~ 1/2! with PMC
+.word 0x3e2aaa25             @ ~ 1/3! with PMC
+
+trigtab2:
+//      φ          cos φ      sin φ
+.word 0x00000000,0x3f800000,0x00000000
+.word 0x3cfcc961,0x3f7fe0cd,0x3cfcbf1c           @ φ=0.03085774 : cos φ=3feffc199ff28ef4 33.3b; sin φ=3f9f97e38006c678 39.2b
+.word 0x3d810576,0x3f7f7dfe,0x3d80ef9e           @ φ=0.06299870 : cos φ=3fefefbfc00d6b6d 33.3b; sin φ=3fb01df3c000dfd5 40.2b
+.word 0x3dbf0c09,0x3f7ee30f,0x3dbec522           @ φ=0.09328467 : cos φ=3fefdc61dff4f58e 33.5b; sin φ=3fb7d8a43ffdf9ac 39.0b
+.word 0x3dff24b6,0x3f7e0414,0x3dfe7be2           @ φ=0.12458174 : cos φ=3fefc0827fdaf90f 31.8b; sin φ=3fbfcf7c3ff9dd0c 37.4b
+.word 0x3e1f0713,0x3f7ceb48,0x3e1e63a0           @ φ=0.15530042 : cos φ=3fef9d68ffe680a0 32.3b; sin φ=3fc3cc73fffa6d09 36.5b
+.word 0x3e40306d,0x3f7b811d,0x3e3f1015           @ φ=0.18768473 : cos φ=3fef70239fe32301 32.1b; sin φ=3fc7e2029ffdbc2c 37.8b
+.word 0x3e60ada2,0x3f79dccf,0x3e5ee13e           @ φ=0.21941236 : cos φ=3fef3b99e023f5aa 31.8b; sin φ=3fcbdc27bffe216d 38.1b
+.word 0x3e800d7b,0x3f7808fa,0x3e7d7196           @ φ=0.25010285 : cos φ=3fef011f401572a6 32.6b; sin φ=3fcfae32c00328bb 37.3b
+.word 0x3e8f986e,0x3f75ff65,0x3e8db868           @ φ=0.28045982 : cos φ=3feebfeca0aaaf99 29.6b; sin φ=3fd1b70cfffc1468 36.0b
+.word 0x3e9fe1f4,0x3f739e93,0x3e9d4bfd           @ φ=0.31227076 : cos φ=3fee73d25fbf733b 31.0b; sin φ=3fd3a97fa0002ced 40.5b
+.word 0x3eb054c6,0x3f70f7ae,0x3eacddb3           @ φ=0.34439677 : cos φ=3fee1ef5bfcf70cb 31.4b; sin φ=3fd59bb65fff5c30 38.6b
+.word 0x3ebf89c5,0x3f6e4b60,0x3ebb1a0a           @ φ=0.37409797 : cos φ=3fedc96bffdebb8a 31.9b; sin φ=3fd763414003344b 36.3b
+.word 0x3ecfc426,0x3f6b35ca,0x3eca1c63           @ φ=0.40579337 : cos φ=3fed66b93fe27dc6 32.1b; sin φ=3fd9438c5ffe5d45 37.3b
+.word 0x3ee054f2,0x3f67d166,0x3ed93907           @ φ=0.43814808 : cos φ=3fecfa2cbffc16e9 35.0b; sin φ=3fdb2720dffef5b6 37.9b
+.word 0x3eeff0dd,0x3f64664b,0x3ee74116           @ φ=0.46863452 : cos φ=3fec8cc95f714272 29.8b; sin φ=3fdce822c00479ad 35.8b
+.word 0x3f002b31,0x3f609488,0x3ef5c30f           @ φ=0.50065905 : cos φ=3fec1290ffc99208 31.2b; sin φ=3fdeb861dfff3932 38.4b
+.word 0x3f07e407,0x3f5cc5a2,0x3f01992b           @ φ=0.53082317 : cos φ=3feb98b44034cd46 31.3b; sin φ=3fe033255ffff628 41.7b
+.word 0x3f101fc5,0x3f587d8f,0x3f08a165           @ φ=0.56298476 : cos φ=3feb0fb1e0ceda6f 29.3b; sin φ=3fe1142c9ffd5ae4 35.6b
+.word 0x3f17f68a,0x3f5434b5,0x3f0f31ca           @ φ=0.59360564 : cos φ=3fea8696a038a06f 31.2b; sin φ=3fe1e639400269fb 35.7b
+.word 0x3f1fffe2,0x3f4f9b59,0x3f15c8d7           @ φ=0.62499821 : cos φ=3fe9f36b1f428363 29.4b; sin φ=3fe2b91ae001d55d 36.1b
+.word 0x3f280646,0x3f4acf6b,0x3f1c37c4           @ φ=0.65634573 : cos φ=3fe959ed61449f08 28.7b; sin φ=3fe386f87ffd9617 35.7b
+.word 0x3f303041,0x3f45b9e0,0x3f229ae4           @ φ=0.68823630 : cos φ=3fe8b73c0047ae7a 30.8b; sin φ=3fe4535c7ffdf1ac 36.0b
+.word 0x3f381da7,0x3f4098ca,0x3f28a620           @ φ=0.71920246 : cos φ=3fe81319402ae6e1 31.6b; sin φ=3fe514c3ffff423c 37.4b
+.word 0x3f3fc72f,0x3f3b76ac,0x3f2e564a           @ φ=0.74913305 : cos φ=3fe76ed5809d419f 29.7b; sin φ=3fe5cac93fffaf1d 38.7b
+.word 0x3f4813db,0x3f35b6cc,0x3f34526b           @ φ=0.78155297 : cos φ=3fe6b6d9800e8b52 33.1b; sin φ=3fe68a4d5ffe89fc 36.5b
+.word 0x3f4fc779,0x3f30352f,0x3f39b4d0           @ φ=0.81163746 : cos φ=3fe606a5dfdc2b5b 31.8b; sin φ=3fe73699fffd7fc8 35.7b
+.word 0x3f57dd52,0x3f2a4170,0x3f3f2d91           @ φ=0.84322083 : cos φ=3fe5482e011ba752 28.9b; sin φ=3fe7e5b21ffcb223 35.3b
+.word 0x3f5fce26,0x3f243e9f,0x3f445dc3           @ φ=0.87423933 : cos φ=3fe487d3e0b9864b 29.5b; sin φ=3fe88bb85ffde6d5 35.9b
+.word 0x3f6825f1,0x3f1dc250,0x3f499d1c           @ φ=0.90682894 : cos φ=3fe3b849ffea9b8f 32.6b; sin φ=3fe933a38002730d 35.7b
+.word 0x3f703be1,0x3f175041,0x3f4e7ebf           @ φ=0.93841368 : cos φ=3fe2ea0820791b4e 30.1b; sin φ=3fe9cfd7e0053e65 34.6b
+.word 0x3f781078,0x3f10ed71,0x3f5306af           @ φ=0.96900129 : cos φ=3fe21dae1fdea23e 31.9b; sin φ=3fea60d5e001b90b 36.2b
+.word 0x3f7ff4d4,0x3f0a5aa7,0x3f57649b           @ φ=0.99982953 : cos φ=3fe14b54deeaa407 28.9b; sin φ=3feaec9360012825 36.8b
+
+float_wrapper_section tanf
+
+wrapper_func tanf
+ push {r0,r14}
+ ubfx r1,r0,#23,#8
+ cmp r1,#0xff                @ Inf/NaN?
+ beq 2f
+ bl cosf_entry               @ this will exit via sintail or costail...
+ ldr r1,[sp,#0]
+@ here C is still set from lsrs r12,r12,#1
+ bcs 1f
+@ we exited via sintail
+@ this is fsc_costail:
+@ here calculate cos φ+ε = cosθ
+ vmul.f32 s5,s7,s1           @ sinφ sinε
+ vfma.f32 s5,s2,s6           @ sinφ sinε + cosφ(1-cosε)
+ eors r1,r1,r3
+ vsub.f32 s5,s6,s5           @ cosφ - (sinφ sinε + cosφ(1-cosε)) = cosφ cosε - sinφ sinε
+ vdiv.f32 s0,s5,s4
+ vmov.f32 r0,s0
+ it pl
+ eorpl r0,r0,#0x80000000
+ pop {r1,r15}
+
+1:
+@ we exited via costail
+@ this is fsc_sintail:
+@ here calculate sin φ+ε = sinθ
+ vmul.f32 s4,s2,s7           @ sinφ(1-cosε)
+ vfms.f32 s4,s6,s1           @ sinφ(1-cosε) - cosφ sinε
+ eors r1,r1,r3
+ vsub.f32 s4,s7,s4           @ cosφ sinε + sinφ cosε
+ vdiv.f32 s0,s4,s5
+ vmov.f32 r0,s0
+ it mi
+ eormi r0,r0,#0x80000000
+ pop {r1,r15}
+
+@ tan of Inf or NaN
+2:
+ lsls r1,r0,#9
+ bne 1f                      @ NaN? return it
+ orrs r0,r0,#0x80000000      @ Inf: make a NaN
+1:
+ orrs r0,r0,#0x00400000      @ set top mantissa bit of NaN
+ pop {r3,r15}
+
+
+float_wrapper_section atan2f
+
+50:
+60:
+ orrs r0,r1,#0x00400000
+ bx r14
+
+51:
+ bne 52f                     @ NaN?
+ cmp r3,#0x7f800000          @ y an infinity; x an infinity too?
+ bne 55f                     @ no: carry on
+@ here x and y are both infinities
+ b 66f
+
+52:
+62:
+ orrs r0,r0,#0x00400000
+ bx r14
+
+61:
+ bne 62b                     @ NaN?
+ cmp r3,#0x7f800000          @ y an infinity; x an infinity too?
+ bne 65f                     @ no: carry on
+66:
+@ here x and y are both infinities
+ subs r0,r0,#1               @ make both finite (and equal) with same sign and retry
+ subs r1,r1,#1
+ b 86f
+
+70:
+ and r3,#0x80000000
+ cmp r2,#0x00800000
+ bhs 72f                     @ y 0 or denormal?
+@ here both x and y are zeros
+ b 85f
+71:
+ and r2,#0x80000000
+72:
+ vmov s0,s1,r2,r3
+ vdiv.f32 s2,s0,s1           @ restart the division
+ b 73f                       @ and go back and check for NaNs
+
+80:
+ and r3,#0x80000000
+ cmp r2,#0x00800000
+ bhs 82f                     @ y 0 or denormal?
+85:
+@ here both x and y are zeros
+ orr r1,r1,0x3f800000        @ retry with x replaced by ~1 with appropriate sign
+ b 86f
+ 
+81:
+ and r2,#0x80000000
+82:
+ vmov s0,s1,r2,r3
+ vdiv.f32 s2,s1,s0           @ restart the division
+ b 83f                       @ and go back and check for NaNs
+
+wrapper_func atan2f
+86:
+ bic r2,r0,#0x80000000
+ bic r3,r1,#0x80000000
+ vmov s0,s1,r2,r3
+ cmp r2,r3                   @ |y| vs. |x|
+ bhi 1f
+@ here |x|≥|y| so we need |y|/|x|; octant/xs/ys: 0++,3-+,4--,7+-
+ vdiv.f32 s2,s0,s1           @ get this division started; result ≤1
+ cmp r3,#0x00800000
+ blo 70b                     @ x 0 or denormal?
+ cmp r2,#0x00800000
+ blo 71b                     @ y 0 or denormal?
+73:
+ cmp r3,#0x7f800000
+ bhi 50b                     @ x NaN?
+ cmp r2,#0x7f800000
+ bhs 51b                     @ y Inf or NaN?
+55:
+ cmp r1,#0
+ ite mi
+ ldrmi r12,pi                @ if x<0, need two extra quadrants
+ movpl r12,#0
+                             @ inner negation is the sign of x
+ b 2f
+
+1:
+@ here |x|<|y| so we need |x|/|y|; octant/xs/ys: 1++,2-+,5--,6+-
+ vdiv.f32 s2,s1,s0           @ result <1
+ cmp r3,#0x00800000
+ blo 80b                     @ x 0 or denormal?
+ cmp r2,#0x00800000
+ blo 81b                     @ y 0 or denormal?
+83:
+ cmp r3,#0x7f800000
+ bhi 60b                     @ x NaN?
+ cmp r2,#0x7f800000
+ bhs 61b                     @ y Inf or NaN?
+65:
+ ldr r12,piover2             @ always one extra quadrant in this path
+ eors r1,r1,#0x80000000      @ inner negation is the complement of the sign of x
+
+2:
+@ here
+@ r0 y
+@ r1 ±x
+@ r2 |y|
+@ r3 |x|
+@ s0,s1 = |x|,|y|
+@ s2=s0/s1 or s1/s0, 0≤s2≤1
+@ r12=quadrant count * π/2
+@ where the final result is
+@ ± (r12 ± atn s2) where the inner negation is given by r1b31 and the outer negation by r0b31
+
+ adr r2,trigtab3
+ vmov.f32 s3,s2
+ vcvt.u32.f32 s3,s3,#6
+ vmov.f32 r3,s3
+ lsrs r3,r3,#1
+ adcs r3,r3,#0               @ rounding; set Z if in φ==0 case
+ add r2,r2,r3,lsl#3
+ vldr s5,[r2,#4]             @ t=tanφ
+ vmul.f32 s0,s5,s2           @ ty
+ vsub.f32 s1,s2,s5           @ y-t
+ vmov.f32 s5,#1.0
+ vadd.f32 s0,s5,s0           @ 1+ty
+ beq 9f                      @ did we look up zeroth table entry?
+
+@ now (s0,s1) = (x,y)
+ vdiv.f32 s0,s1,s0           @ ε
+ ldr r2,[r2]                 @ φ Q29
+@ result is now ±(r12±(r2+atn(s0))
+ cmp r1,#0                   @ inner negation
+ it mi
+ rsbmi r2,r2,#0
+ add r2,r12,r2               @ Q29
+ cmp r0,#0                   @ outer negation
+ it mi
+ rsbmi r2,r2,#0
+ cmp r2,#0
+ bpl 1f
+ rsbs r2,r2,#0
+ clz r3,r2
+ lsls r2,r2,r3
+ beq 3f
+ rsb r3,#0x180
+ b 2f
+1:
+ clz r3,r2
+ lsls r2,r2,r3
+ beq 3f
+ rsb r3,#0x80
+2:
+ lsrs r2,r2,#8               @ rounding bit to carry
+ adc r2,r2,r3,lsl#23         @ with rounding
+3:
+ vmul.f32 s2,s0,s0           @ ε²
+ vldr.f32 s3,onethird
+ vmul.f32 s2,s2,s0           @ ε³
+ teq r0,r1
+ vmul.f32 s2,s2,s3           @ ε³/3
+ vmov.f32 s4,r2
+ vsub.f32 s0,s0,s2           @ ~atn(ε)
+ ite pl
+ vaddpl.f32 s0,s4,s0
+ vsubmi.f32 s0,s4,s0
+ vmov.f32 r0,s0
+ bx r14
+
+9:                           @ we looked up the zeroth table entry; we could generate slightly more accurate results here
+@ now (s0,s1) = (x,y)
+ vdiv.f32 s0,s1,s0           @ ε
+@ result is now ±(r12±(0+atn(s0))
+ mov r2,r12                  @ Q29; in fact r12 is only ±π/2 or ±π so can probably simplify this
+ cmp r0,#0                   @ outer negation
+ it mi
+ rsbmi r2,r2,#0
+ cmp r2,#0
+ bpl 1f
+ rsbs r2,r2,#0
+ clz r3,r2
+ lsls r2,r2,r3
+ beq 3f
+ rsb r3,#0x180
+ b 2f
+1:
+ clz r3,r2
+ lsls r2,r2,r3
+ beq 3f
+ rsb r3,#0x80
+2:
+ lsrs r2,r2,#8               @ rounding bit to carry
+ adc r2,r2,r3,lsl#23         @ with rounding
+3:
+ vmul.f32 s2,s0,s0           @ ε²
+ vldr.f32 s3,onethird
+ vmul.f32 s2,s2,s0           @ ε³
+ teq r0,r1
+ vmul.f32 s2,s2,s3           @ ε³/3
+ vmov.f32 s4,r2
+ vsub.f32 s0,s0,s2           @ ~atn(ε)
+ ite pl
+ vaddpl.f32 s0,s4,s0
+ vsubmi.f32 s0,s4,s0
+ vmov.f32 r0,s0
+ tst r0,#0x7f800000          @ about to return a denormal?
+ it ne
+ bxne r14
+ and r0,r0,#0x80000000       @ make it zero
+ bx r14
+
+piover2:  .word 0x3243f6a9   @ Q29
+pi:       .word 0x6487ed51   @ Q29
+onethird: .float 0.33333333
+
+trigtab3:
+//      φ Q29      tan φ SP
+.word 0x00000000,0x00000000
+.word 0x00ffee23,0x3d0001bb  @ φ=0.03124148 : tan φ=3fa000375fffff9d 50.4b
+.word 0x01fe88dc,0x3d7f992a  @ φ=0.06232112 : tan φ=3faff3253fffea1f 44.5b
+.word 0x02fe0a70,0x3dc01203  @ φ=0.09351084 : tan φ=3fb8024060002522 42.8b
+.word 0x03fad228,0x3e000368  @ φ=0.12436779 : tan φ=3fc0006cfffffc90 45.2b
+.word 0x04f5ab70,0x3e1ffdea  @ φ=0.15498897 : tan φ=3fc3ffbd400014d5 42.6b
+.word 0x05ed56f8,0x3e3fdddc  @ φ=0.18522213 : tan φ=3fc7fbbb80000beb 43.4b
+.word 0x06e4cfa0,0x3e601425  @ φ=0.21543103 : tan φ=3fcc02849fffe817 42.4b
+.word 0x07d8d3e0,0x3e80215d  @ φ=0.24521822 : tan φ=3fd0042b9ffff89f 43.1b
+.word 0x08c60460,0x3e9000a5  @ φ=0.27417201 : tan φ=3fd20014a000182b 41.4b
+.word 0x09b26770,0x3ea01492  @ φ=0.30302784 : tan φ=3fd402923ffff932 43.2b
+.word 0x0a996d50,0x3eb01377  @ φ=0.33122888 : tan φ=3fd6026ee0001062 42.0b
+.word 0x0b7a6d10,0x3ebff4a0  @ φ=0.35869458 : tan φ=3fd7fe93ffff8c38 39.1b
+.word 0x0c593ce0,0x3ed0019f  @ φ=0.38589329 : tan φ=3fda0033e0001354 41.7b
+.word 0x0d33ebd0,0x3ee01bbc  @ φ=0.41258803 : tan φ=3fdc0377800162a1 37.5b
+.word 0x0e087ab0,0x3ef01fbd  @ φ=0.43853506 : tan φ=3fde03f79fffddf2 40.9b
+.word 0x0ed56180,0x3effef98  @ φ=0.46354747 : tan φ=3fdffdf30000767d 39.1b
+.word 0x0fa1de80,0x3f080ebf  @ φ=0.48850942 : tan φ=3fe101d7dfffb9fc 38.9b
+.word 0x10639d00,0x3f0fec31  @ φ=0.51215982 : tan φ=3fe1fd862000aad5 37.6b
+.word 0x112690e0,0x3f180cfd  @ φ=0.53595775 : tan φ=3fe3019fa00069ea 38.3b
+.word 0x11e014c0,0x3f200065  @ φ=0.55860364 : tan φ=3fe4000ca00022e5 39.9b
+.word 0x129651e0,0x3f2808be  @ φ=0.58084959 : tan φ=3fe50117c00015a7 40.6b
+.word 0x1346d400,0x3f300a7d  @ φ=0.60239601 : tan φ=3fe6014f9fffa020 38.4b
+.word 0x13efc7c0,0x3f37ee2f  @ φ=0.62302005 : tan φ=3fe6fdc5dfff98d7 38.3b
+.word 0x14988960,0x3f400c32  @ φ=0.64362019 : tan φ=3fe801863fffff81 46.0b
+.word 0x1537a8c0,0x3f47ef42  @ φ=0.66304433 : tan φ=3fe8fde8400062a4 38.4b
+.word 0x15d4cc60,0x3f4ff630  @ φ=0.68222636 : tan φ=3fe9fec5ffff76e2 37.9b
+.word 0x166ef280,0x3f581534  @ φ=0.70104337 : tan φ=3feb02a680004e91 38.7b
+.word 0x16ff75c0,0x3f5fef1e  @ φ=0.71868408 : tan φ=3febfde3c0001404 40.7b
+.word 0x179116a0,0x3f68184d  @ φ=0.73646098 : tan φ=3fed03099ffed6e5 36.8b
+.word 0x181b5aa0,0x3f701722  @ φ=0.75333911 : tan φ=3fee02e43fffd351 39.5b
+.word 0x18a10560,0x3f781071  @ φ=0.76965588 : tan φ=3fef020e20005c05 38.5b
+.word 0x19214060,0x3f7ff451  @ φ=0.78530902 : tan φ=3feffe8a1fffe11b 40.1b
+
+#endif
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_single_hazard3.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_single_hazard3.S
@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+#include "hardware/hazard3.h"
+
+// This file reimplements some common single-precision soft float routines
+// from libgcc. It targets the RV32IMBZbkb dialect (plus optionally Xh3bextm)
+// and is tuned for Hazard3 execution timings.
+
+// Subnormal values are always flushed to zero on both input and output.
+// Rounding is always to nearest (even on tie).
+
+pico_default_asm_setup
+
+.macro float_section name
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+float_section __addsf3
+.global __subsf3
+.p2align 2
+__subsf3:
+    binvi a1, a1, 31
+.global __addsf3
+__addsf3:
+    // Unpack exponent:
+    h3.bextmi a2, a0, 23, 8
+    h3.bextmi a3, a1, 23, 8
+    // Flush-to-zero => 0 + y = y applies, including nan, with the sole
+    // exception of y being subnormal (which also needs to be flushed)
+    beqz a2, __addsf_return_y_flushed
+    // Don't have to handle this case for x + 0 = 0 because we already know x
+    // is nonzero
+    beqz a3, __addsf_return_x
+    // Unpack significand, plus 3 extra zeroes for working space:
+    slli a4, a0, 9
+    slli a5, a1, 9
+    // check nan/inf on input
+    li t0, 255
+    beq a2, t0, __addsf_x_nan_inf
+    beq a3, t0, __addsf_y_nan_inf
+    // (finish unpacking significand)
+    srli a4, a4, 6
+    srli a5, a5, 6
+
+    // If we're still on the straight path then we are adding two normal
+    // values. Add implicit one (1.xx...xx000)
+    bseti a4, a4, 23 + 3
+    bseti a5, a5, 23 + 3
+    // Negate if sign bit is set
+    bgez a0, 1f
+    neg a4, a4
+1:
+    // (tuck this 16-bit here to avoid alignment penalty)
+    li t1, 25
+    bgez a1, 1f
+    neg a5, a5
+1:
+
+    bltu a2, a3, __addsf_ye_gt_xe
+
+    // The main body is repeated twice with different register assignments.
+    // lhs is the more-significant addend:
+.macro addsf_core packed_lhs, packed_rhs, sig_lhs, sig_rhs, exp_lhs, exp_rhs, rhs_is_x
+    sub \packed_rhs, \exp_lhs, \exp_rhs
+    // If there is a large exponent difference then there is no effect on lhs
+.if \rhs_is_x
+    bgeu \packed_rhs, t1, __addsf_return_y
+.else
+    bgeu \packed_rhs, t1, __addsf_return_x
+.endif
+    // Shift rhs down to correct relative significance
+    sra \packed_lhs, \sig_rhs, \packed_rhs
+    // Set sticky bit if ones were shifted out
+    sll \packed_rhs, \packed_lhs, \packed_rhs
+    sltu \packed_rhs, \packed_rhs, \sig_rhs
+    or \packed_lhs, \packed_lhs, \packed_rhs
+    // Add significands
+    add \sig_lhs, \sig_lhs, \packed_lhs
+    // Detect exact cancellation (may be beyond max normalisation shift; also
+    // IEEE 754 requires +0 for exact cancellation, no matter input signs)
+    beqz \sig_lhs, __addsf_return_0
+    // Convert two's complement back to sign + magnitude
+    srai \exp_rhs, \sig_lhs, 31
+    xor \sig_lhs, \sig_lhs, \exp_rhs
+    sub \sig_lhs, \sig_lhs, \exp_rhs
+    // Renormalise significand: bit 31 is now implicit one
+    clz \packed_lhs, \sig_lhs
+    sll \sig_lhs, \sig_lhs, \packed_lhs
+    // Adjust exponent
+    addi \packed_lhs, \packed_lhs, -5
+    sub \exp_lhs, \exp_lhs, \packed_lhs
+
+    // Round to nearest, even on tie (bias upward if above odd number)
+    bexti \packed_lhs, \sig_lhs, 8
+    addi \sig_lhs, \sig_lhs, 127
+    add \sig_lhs, \sig_lhs, \packed_lhs
+    // Exponent may increase by one due to rounding up from all-ones; this is
+    // detected by clearing of implicit one (there is a carry-out too)
+    bgez \sig_lhs, 3f
+4:
+    // Detect underflow/overflow
+    bgeu \exp_lhs, t0, 1f
+
+    // Pack and return
+    packh \exp_lhs, \exp_lhs, \exp_rhs
+    slli \exp_lhs, \exp_lhs, 23
+    slli \sig_lhs, \sig_lhs, 1
+    srli \sig_lhs, \sig_lhs, 9
+    add a0, \sig_lhs, \exp_lhs
+    ret
+1:
+    bgez \exp_lhs, 2f
+    // Signed zero on underflow
+    slli a0, \exp_rhs, 31
+    ret
+2:
+    // Signed infinity on overflow
+    packh a0, t0, \exp_rhs
+    slli a0, a0, 23
+    ret
+3:
+    // Exponent increase due to rounding (uncommon)
+    srli \sig_lhs, \sig_lhs, 1
+    addi \exp_lhs, \exp_lhs, 1
+    j 4b
+.endm
+
+__addsf_xe_gte_ye:
+    addsf_core a0, a1, a4, a5, a2, a3, 0
+.p2align 2
+__addsf_ye_gt_xe:
+    addsf_core a1, a0, a5, a4, a3, a2, 1
+
+__addsf_x_nan_inf:
+    // When at least one operand is nan, we must propagate at least one of
+    // those nan payloads (sign of nan result is unspecified, which we take
+    // advantage of by implementing x - y as x + -y). Check x nan vs inf:
+    bnez a4, __addsf_return_x
+__addsf_x_inf:
+    // If x is +-inf, need to distinguish the following cases:
+    bne  a3, t0, __addsf_return_x // y is neither inf nor nan   -> return x (propagate inf)
+    bnez a5,     __addsf_return_y // y is nan:                  -> return y (propagate nan)
+    xor a5, a0, a1
+    srli a5, a5, 31
+    beqz a5,     __addsf_return_x // y is inf of same sign      -> return either x or y (x is faster)
+    li a0, -1                     // y is inf of different sign -> return nan
+    ret
+
+__addsf_y_nan_inf:
+    // Mirror of __addsf_x_nan_inf
+    bnez a5, __addsf_return_y
+__addsf_y_inf:
+    bne  a2, t0, __addsf_return_y
+    bnez a4,     __addsf_return_x
+    xor a4, a0, a1
+    srli a4, a4, 31
+    beqz a4,     __addsf_return_x
+    li a0, -1
+    ret
+
+__addsf_return_y_flushed:
+    bnez a3, 1f
+    srli a1, a1, 23
+    slli a1, a1, 23
+1:
+__addsf_return_y:
+    mv a0, a1
+__addsf_return_x:
+    ret
+__addsf_return_0:
+    li a0, 0
+    ret
+
+
+float_section __mulsf3
+.global __mulsf3
+.p2align 2
+__mulsf3:
+    // Force y to be positive (by possibly negating x) *before* unpacking.
+    // This allows many special cases to be handled without repacking.
+    bgez a1, 1f
+    binvi a0, a0, 31
+1:
+    // Unpack exponent:
+    h3.bextmi a2, a0, 23, 8
+    h3.bextmi a3, a1, 23, 8
+    // Check special cases
+    li t0, 255
+    beqz a2, __mulsf_x_0
+    beqz a3, __mulsf_y_0
+    beq a2, t0, __mulsf_x_nan_inf
+    beq a3, t0, __mulsf_y_nan_inf
+
+    // Finish unpacking sign
+    srai a6, a0, 31
+    // Unpack significand (with implicit one in MSB)
+    slli a4, a0, 8
+    slli a5, a1, 8
+    bseti a4, a4, 31
+    bseti a5, a5, 31
+    // Get full 64-bit multiply result in a4:a1 (one cycle each half)
+    // Going from Q1.23 to Q2.46 (both left-justified)
+    mul a1, a4, a5
+    mulhu a4, a4, a5
+    // Normalise (shift left by either 0 or 1) -- bit 8 is the LSB of the
+    // final significand (ignoring rounding)
+    clz a0, a4
+    sll a4, a4, a0
+    sub a2, a2, a0
+    // After normalising we can calculate the final exponent, since rounding
+    // cannot increase the exponent for multiplication (unlike addition)
+    add a2, a2, a3
+    // Subtract redundant bias term (127), add 1 for normalisation correction
+    addi a2, a2, -126
+    blez a2, __mulsf_underflow
+    bge a2, t0, __mulsf_overflow
+
+    // Gather sticky bits from low fraction:
+    snez a1, a1
+    or a4, a4, a1
+    // Round to nearest, even on tie (aka bias upward if odd)
+    bexti a1, a4, 8
+    add a4, a4, a1
+    addi a4, a4, 127
+    // Pack it and ship it
+    packh a2, a2, a6
+    slli a2, a2, 23
+    slli a4, a4, 1
+    srli a4, a4, 9
+    add a0, a4, a2
+    ret
+
+__mulsf_underflow:
+    // Signed zero
+    slli a0, a6, 31
+    ret
+__mulsf_overflow:
+    // Signed inf
+    packh a0, t0, a6
+    slli a0, a0, 23
+    ret
+
+__mulsf_x_0:
+    // 0 times nan    -> propagate nan
+    // 0 times inf    -> generate nan
+    // 0 times others -> 0 (need to flush significand too as we are FTZ)
+    bne a3, t0, __mulsf_return_flushed_x
+    slli a5, a1, 9
+    beqz a5, 1f
+    // Propagate nan from y
+__mulsf_return_y:
+    mv a0, a1
+    ret
+1:
+    // Generate new nan
+    li a0, -1
+    ret
+
+__mulsf_y_0:
+    // Mirror image of x_0 except we still return x for signed 0, since the
+    // signs were already resolved.
+    bne a2, t0, __mulsf_return_flushed_x
+    slli a1, a0, 9
+    bnez a1, 1f
+    li a0, -1
+1:
+    ret
+
+__mulsf_return_flushed_x:
+    // If we don't support subnormals we at least need to flush to a canonical
+    // zero. This is just a sign bit in bit 31.
+    srli a0, a0, 31
+    slli a0, a0, 31
+__mulsf_return_x:
+    ret
+
+__mulsf_x_nan_inf:
+    // We know that y is not zero and is positive. So...
+    //      x is nan    -> return x
+    // else y is nan    -> return y
+    // else y is inf    -> return x
+    // else y is normal -> return x
+    // (the order of the first two clauses is actually our free choice)
+    slli a4, a0, 9
+    bnez a4, __mulsf_return_x
+    bne a3, t0, __mulsf_return_x
+    slli a5, a1, 9
+    bnez a5, __mulsf_return_y
+    ret // return x
+
+__mulsf_y_nan_inf:
+    // We know that x is not zero, nan, nor inf. That just leaves normals.
+    // y is nan -> return y
+    // y is inf -> return inf * sgn(x) (since we already merged the signs)
+    slli a5, a1, 9
+    bnez a5, __mulsf_return_y
+    srai a0, a0, 31
+    packh a0, t0, a0
+    slli a0, a0, 23
+    ret
+
+
+// This is a hack to improve soft float performance for the routines we don't
+// implement (e.g. libm) in libraries built against a non-Zbb ISA dialect:
+float_section __clz2si
+.global __clz2si
+__clz2si:
+    clz a0, a0
+    ret
--- a/lib/main/pico-sdk/rp2_common/pico_float/float_v1_rom_shim_rp2040.S
+++ b/lib/main/pico-sdk/rp2_common/pico_float/float_v1_rom_shim_rp2040.S
@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+
+#ifndef PICO_FLOAT_IN_RAM
+#define PICO_FLOAT_IN_RAM 0
+#endif
+
+pico_default_asm_setup
+
+.macro float_section name
+// todo separate flag for shims?
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+float_section float_table_shim_on_use_helper
+regular_func float_table_shim_on_use_helper
+    push {r0-r2, lr}
+    mov r0, ip
+#ifndef NDEBUG
+    // sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro
+    cmp r0, #0
+    bne 1f
+    bkpt #0
+#endif
+1:
+    ldrh r1, [r0]
+    lsrs r2, r1, #8
+    adds r0, #2
+    cmp r2, #0xdf
+    bne 1b
+    uxtb r1, r1 // r1 holds table offset
+    lsrs r2, r0, #2
+    bcc 1f
+    // unaligned
+    ldrh r2, [r0, #0]
+    ldrh r0, [r0, #2]
+    lsls r0, #16
+    orrs r0, r2
+    b 2f
+1:
+    ldr r0, [r0]
+2:
+    ldr r2, =sf_table
+    str r0, [r2, r1]
+    str r0, [sp, #12]
+    pop {r0-r2, pc}
+
+float_section 642float_shims
+
+@ convert uint64 to float, rounding
+regular_func uint642float_shim
+ movs r2,#0       @ fall through
+
+@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
+regular_func ufix642float_shim
+ push {r4,r5,r14}
+ cmp r1,#0
+ bpl 3f          @ positive? we can use signed code
+ lsls r5,r1,#31  @ contribution to sticky bits
+ orrs r5,r0
+ lsrs r0,r1,#1
+ subs r2,#1
+ b 4f
+
+@ convert int64 to float, rounding
+regular_func int642float_shim
+ movs r2,#0       @ fall through
+
+@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
+regular_func fix642float_shim
+ push {r4,r5,r14}
+3:
+ movs r5,r0
+ orrs r5,r1
+ beq ret_pop45   @ zero? return +0
+ asrs r5,r1,#31  @ sign bits
+2:
+ asrs r4,r1,#24  @ try shifting 7 bits at a time
+ cmp r4,r5
+ bne 1f          @ next shift will overflow?
+ lsls r1,#7
+ lsrs r4,r0,#25
+ orrs r1,r4
+ lsls r0,#7
+ adds r2,#7
+ b 2b
+1:
+ movs r5,r0
+ movs r0,r1
+4:
+ negs r2,r2
+ adds r2,#32+29
+
+ // bl packx
+ ldr r1, =0x29ef // packx
+ blx r1
+ret_pop45:
+ pop {r4,r5,r15}
+
+float_section fatan2_shim
+regular_func fatan2_shim
+ push {r4,r5,r14}
+
+ ldr r4, =0x29c1 // unpackx
+ mov ip, r4
+@ unpack arguments and shift one down to have common exponent
+ blx ip
+ mov r4,r0
+ mov r0,r1
+ mov r1,r4
+ mov r4,r2
+ mov r2,r3
+ mov r3,r4
+ blx ip
+ lsls r0,r0,#5  @ Q28
+ lsls r1,r1,#5  @ Q28
+ adds r4,r2,r3  @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise
+ asrs r4,#9
+ adds r4,#1
+ bmi 2f         @ force y to 0 proper, so result will be zero
+ subs r4,r2,r3  @ calculate shift
+ bge 1f         @ ex>=ey?
+ negs r4,r4     @ make shift positive
+ asrs r0,r4
+ cmp r4,#28
+ blo 3f
+ asrs r0,#31
+ b 3f
+1:
+ asrs r1,r4
+ cmp r4,#28
+ blo 3f
+2:
+@ here |x|>>|y| or both x and y are ±0
+ cmp r0,#0
+ bge 4f         @ x positive, return signed 0
+ ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
+ ldr r0,[r3]    @ x negative, return +/- pi
+ asrs r1,#31
+ eors r0,r1
+ b 7f
+4:
+ asrs r0,r1,#31
+ b 7f
+3:
+ movs r2,#0              @ initial angle
+ ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
+ cmp r0,#0               @ x negative
+ bge 5f
+ negs r0,r0              @ rotate to 1st/4th quadrants
+ negs r1,r1
+ ldr r2,[r3]             @ pi Q29
+5:
+ movs r4,#1              @ m=1
+ ldr r5, =0x2b97         @ cordic_vec
+ blx r5                  @ also produces magnitude (with scaling factor 1.646760119), which is discarded
+ mov r0,r2               @ result here is -pi/2..3pi/2 Q29
+@ asrs r2,#29
+@ subs r0,r2
+ ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
+ ldr r2,[r3]             @ pi Q29
+ adds r4,r0,r2           @ attempt to fix -3pi/2..-pi case
+ bcs 6f                  @ -pi/2..0? leave result as is
+ subs r4,r0,r2           @ <pi? leave as is
+ bmi 6f
+ subs r0,r4,r2           @ >pi: take off 2pi
+6:
+ subs r0,#1              @ fiddle factor so atan2(0,1)==0
+7:
+ movs r2,#0              @ exponent for pack
+ ldr r3, =0x2b19
+ bx r3
+
+float_section float232_shims
+
+regular_func float2int_shim
+     movs r1,#0                    @ fall through
+regular_func float2fix_shim
+     // check for -0 or -denormal upfront
+     asrs r2, r0, #23
+     adds r2, #128
+     adds r2, #128
+     beq 1f
+     // call original
+     ldr r2, =0x2acd
+     bx r2
+     1:
+     movs r0, #0
+     bx lr
+
+float_section float264_shims
+
+regular_func float2int64_shim
+ movs r1,#0                    @ and fall through
+regular_func float2fix64_shim
+ push {r14}
+ bl f2fix
+ b d2f64_a
+
+regular_func float2uint64_shim
+ movs r1,#0                    @ and fall through
+regular_func float2ufix64_shim
+ asrs r3,r0,#23                @ negative? return 0
+ bmi ret_dzero
+@ and fall through
+
+@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf
+@ result clamped so that r3 can only be 0 or -1
+@ trashes r12
+.thumb_func
+f2fix:
+ push {r4,r14}
+ mov r12,r1
+ asrs r3,r0,#31
+ lsls r0,#1
+ lsrs r2,r0,#24
+ beq 1f                        @ zero?
+ cmp r2,#0xff                  @ Inf?
+ beq 2f
+ subs r1,r2,#1
+ subs r2,#0x7f                 @ remove exponent bias
+ lsls r1,#24
+ subs r0,r1                    @ insert implied 1
+ eors r0,r3
+ subs r0,r3                    @ top two's complement
+ asrs r1,r0,#4                 @ convert to double format
+ lsls r0,#28
+ ldr r4, =d2fix_a
+ bx r4
+1:
+ movs r0,#0
+ movs r1,r0
+ movs r3,r0
+ pop {r4,r15}
+2:
+ mvns r0,r3                    @ return max/min value
+ mvns r1,r3
+ pop {r4,r15}
+
+ret_dzero:
+ movs r0,#0
+ movs r1,#0
+ bx r14
+
+float_section d2fix_a_float
+
+.weak d2fix_a // weak because it exists in float shims too
+.thumb_func
+d2fix_a:
+@ here
+@ r0:r1 two's complement mantissa
+@ r2    unbaised exponent
+@ r3    mantissa sign extension bits
+ add r2,r12                    @ exponent plus offset for required binary point position
+ subs r2,#52                   @ required shift
+ bmi 1f                        @ shift down?
+@ here a shift up by r2 places
+ cmp r2,#12                    @ will clamp?
+ bge 2f
+ movs r4,r0
+ lsls r1,r2
+ lsls r0,r2
+ negs r2,r2
+ adds r2,#32                   @ complementary shift
+ lsrs r4,r2
+ orrs r1,r4
+ pop {r4,r15}
+2:
+ mvns r0,r3
+ mvns r1,r3                    @ overflow: clamp to extreme fixed-point values
+ pop {r4,r15}
+1:
+@ here a shift down by -r2 places
+ adds r2,#32
+ bmi 1f                        @ long shift?
+ mov r4,r1
+ lsls r4,r2
+ negs r2,r2
+ adds r2,#32                   @ complementary shift
+ asrs r1,r2
+ lsrs r0,r2
+ orrs r0,r4
+ pop {r4,r15}
+1:
+@ here a long shift down
+ movs r0,r1
+ asrs r1,#31                   @ shift down 32 places
+ adds r2,#32
+ bmi 1f                        @ very long shift?
+ negs r2,r2
+ adds r2,#32
+ asrs r0,r2
+ pop {r4,r15}
+1:
+ movs r0,r3                    @ result very near zero: use sign extension bits
+ movs r1,r3
+ pop {r4,r15}
+d2f64_a:
+ asrs r2,r1,#31
+ cmp r2,r3
+ bne 1f                        @ sign extension bits fail to match sign of result?
+ pop {r15}
+1:
+ mvns r0,r3
+ movs r1,#1
+ lsls r1,#31
+ eors r1,r1,r0                 @ generate extreme fixed-point values
+ pop {r15}
+
+float_section float2double_shim
+regular_func float2double_shim
+ lsrs r3,r0,#31                @ sign bit
+ lsls r3,#31
+ lsls r1,r0,#1
+ lsrs r2,r1,#24                @ exponent
+ beq 1f                        @ zero?
+ cmp r2,#0xff                  @ Inf?
+ beq 2f
+ lsrs r1,#4                    @ exponent and top 20 bits of mantissa
+ ldr r2,=(0x3ff-0x7f)<<20     @ difference in exponent offsets
+ adds r1,r2
+ orrs r1,r3
+ lsls r0,#29                   @ bottom 3 bits of mantissa
+ bx r14
+1:
+ movs r1,r3                    @ return signed zero
+3:
+ movs r0,#0
+ bx r14
+2:
+ ldr r1,=0x7ff00000           @ return signed infinity
+ adds r1,r3
+ b 3b
+
+#endif
--- a/lib/main/pico-sdk/rp2_common/pico_float/include/pico/float.h
+++ b/lib/main/pico-sdk/rp2_common/pico_float/include/pico/float.h
@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef _PICO_FLOAT_H
+#define _PICO_FLOAT_H
+
+#include <math.h>
+#include <float.h>
+#include "pico.h"
+#include "pico/bootrom/sf_table.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file float.h
+* \defgroup pico_float pico_float
+*
+* \brief Optimized single-precision floating point functions
+*
+* (Replacement) optimized implementations are provided for the following compiler built-ins
+* and math library functions on Arm:
+*
+* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf
+* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf
+* - powintf, sincosf (GNU extensions)
+*
+* The following additional optimized functions are also provided:
+*
+* - int2float, uint2float, int642float, uint642float, fix2float, ufix2float, fix642float, ufix642float
+* - float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2uint, float2int64, float2uint64, float2int_z, float2int64_z, float2uint_z, float2uint64_z
+* - exp10f, sincosf, powintf
+*
+* On RP2350 (Arm) the following additional functions are available; the _fast methods are faster but do not round correctly
+*
+* - float2fix64_z, fdiv_fast, fsqrt_fast,
+*
+* On RP2350 RISC-V, only a small number of compiler runtime functions are overridden with faster implementations:
+*
+* - __addsf3, __subsf3, __mulsf3
+*/
+
+// None of these functions are available on RISC-V:
+#if !defined(__riscv) || PICO_COMBINED_DOCS
+
+float int2float(int32_t f);
+float uint2float(uint32_t f);
+float int642float(int64_t f);
+float uint642float(uint64_t f);
+float fix2float(int32_t m, int e);
+float ufix2float(uint32_t m, int e);
+float fix642float(int64_t m, int e);
+float ufix642float(uint64_t m, int e);
+
+// These methods round towards -Infinity.
+int32_t float2fix(float f, int e);
+uint32_t float2ufix(float f, int e);
+int64_t float2fix64(float f, int e);
+uint64_t float2ufix64(float f, int e);
+int32_t float2int(float f);
+uint32_t float2uint(float f);
+int64_t float2int64(float f);
+uint64_t float2uint64(float f);
+
+// These methods round towards 0.
+int32_t float2int_z(float f);
+int64_t float2int64_z(float f);
+int32_t float2uint_z(float f);
+int64_t float2uint64_z(float f);
+
+float exp10f(float x);
+void sincosf(float x, float *sinx, float *cosx);
+float powintf(float x, int y);
+
+#if !PICO_RP2040 || PICO_COMBINED_DOCS
+int64_t float2fix64_z(float f, int e);
+float fdiv_fast(float n, float d);
+float fsqrt_fast(float f);
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif