optimize math (#5287)

* optimize math Results in considerable flash saving * log_approx, exp_approx, pow_approx Taken from https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h * Fix pow in rangefinder * Use approximate function in baro calculation Maximum error is < 20cm * fixup! Fix pow in rangefinder
2025-07-23 16:25:31 +03:00 · 2018-03-03 13:26:33 +01:00 · 2018-03-03 13:26:33 +01:00 · c11d016bc7
commit c11d016bc7
parent b64802d931
7 changed files with 117 additions and 7 deletions
--- a/make/source.mk
+++ b/make/source.mk
@ -11,6 +11,7 @@ COMMON_SRC = \
            common/huffman.c \
            common/huffman_table.c \
            common/maths.c \
+            common/explog_approx.c \
            common/printf.c \
            common/streambuf.c \
            common/string_light.c \
--- a/src/main/common/explog_approx.c
+++ b/src/main/common/explog_approx.c
@ -0,0 +1,99 @@
+/*
+
+The MIT License (MIT)
+
+Copyright (c) 2015 Jacques-Henri Jourdan <jourgun@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Taken from https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h
+Stripped down for BF use
+
+*/
+
+#include <math.h>
+#include <stdint.h>
+
+/* Workaround a lack of optimization in gcc */
+float exp_cst1 = 2139095040.f;
+float exp_cst2 = 0.f;
+
+/* Relative error bounded by 1e-5 for normalized outputs
+   Returns invalid outputs for nan inputs
+   Continuous error */
+float exp_approx(float val) {
+  union { int32_t i; float f; } xu, xu2;
+  float val2, val3, val4, b;
+  int32_t val4i;
+  val2 = 12102203.1615614f*val+1065353216.f;
+  val3 = val2 < exp_cst1 ? val2 : exp_cst1;
+  val4 = val3 > exp_cst2 ? val3 : exp_cst2;
+  val4i = (int32_t) val4;
+  xu.i = val4i & 0x7F800000;                   // mask exponent  / round down to neareset 2^n (implicit mantisa bit)
+  xu2.i = (val4i & 0x7FFFFF) | 0x3F800000;     // force exponent to 0
+  b = xu2.f;
+
+  /* Generated in Sollya with:
+     > f=remez(1-x*exp(-(x-1)*log(2)),
+               [|(x-1)*(x-2), (x-1)*(x-2)*x, (x-1)*(x-2)*x*x|],
+               [1.000001,1.999999], exp(-(x-1)*log(2)));
+     > plot(exp((x-1)*log(2))/(f+x)-1, [1,2]);
+     > f+x;
+  */
+  return
+    xu.f * (0.509871020343597804469416f + b *
+            (0.312146713032169896138863f + b *
+             (0.166617139319965966118107f + b *
+              (-2.19061993049215080032874e-3f + b *
+               1.3555747234758484073940937e-2f))));
+}
+
+/* Absolute error bounded by 1e-6 for normalized inputs
+   Returns a finite number for +inf input
+   Returns -inf for nan and <= 0 inputs.
+   Continuous error. */
+float log_approx(float val) {
+  union { float f; int32_t i; } valu;
+  float exp, addcst, x;
+  valu.f = val;
+  exp = valu.i >> 23;
+  /* 89.970756366f = 127 * log(2) - constant term of polynomial */
+  addcst = val > 0 ? -89.970756366f : -(float)INFINITY;
+  valu.i = (valu.i & 0x7FFFFF) | 0x3F800000;
+  x = valu.f;
+
+
+  /* Generated in Sollya using:
+    > f = remez(log(x)-(x-1)*log(2),
+            [|1,(x-1)*(x-2), (x-1)*(x-2)*x, (x-1)*(x-2)*x*x,
+              (x-1)*(x-2)*x*x*x|], [1,2], 1, 1e-8);
+    > plot(f+(x-1)*log(2)-log(x), [1,2]);
+    > f+(x-1)*log(2)
+ */
+  return
+    x * (3.529304993f + x * (-2.461222105f +
+      x * (1.130626167f + x * (-0.288739945f +
+        x * 3.110401639e-2f))))
+    + (addcst + 0.69314718055995f*exp);
+}
+
+float pow_approx(float a, float b)
+{
+    return exp_approx(b * log_approx(a));
+}
--- a/src/main/common/filter.c
+++ b/src/main/common/filter.c
@ -79,10 +79,13 @@ FAST_CODE float slewFilterApply(slewFilter_t *filter, float input)
    return filter->state;
 }

-
+// get notch filter Q given center frequency (f0) and lower cutoff frequency (f1)
+// Q = f0 / (f2 - f1) ; f2 = f0^2 / f1
 float filterGetNotchQ(uint16_t centerFreq, uint16_t cutoff) {
-    float octaves = log2f((float) centerFreq  / (float) cutoff) * 2;
-    return sqrtf(powf(2, octaves)) / (powf(2, octaves) - 1);
+    const float f0sq = (float)centerFreq * centerFreq;
+    const float f1 = cutoff;
+
+    return (f1 * f0sq) / (f0sq - f1 * f1);
 }

 /* sets up a biquad Filter */
--- a/src/main/common/maths.h
+++ b/src/main/common/maths.h
@ -117,12 +117,18 @@ float cos_approx(float x);
 float atan2_approx(float y, float x);
 float acos_approx(float x);
 #define tan_approx(x)       (sin_approx(x) / cos_approx(x))
+float exp_approx(float val);
+float log_approx(float val);
+float pow_approx(float a, float b);
 #else
 #define sin_approx(x)   sinf(x)
 #define cos_approx(x)   cosf(x)
 #define atan2_approx(y,x)   atan2f(y,x)
 #define acos_approx(x)      acosf(x)
 #define tan_approx(x)       tanf(x)
+#define exp_approx(x)       expf(x)
+#define log_approx(x)       logf(x)
+#define pow_approx(a, b)    powf(b, a)
 #endif

 void arraySubInt32(int32_t *dest, int32_t *array1, int32_t *array2, int count);
--- a/src/main/io/osd.c
+++ b/src/main/io/osd.c
@ -728,7 +728,7 @@ static bool osdDrawSingleElement(uint8_t item)
            const float value = constrain(batteryConfig()->batteryCapacity - getMAhDrawn(), 0, batteryConfig()->batteryCapacity);

            // Calculate mAh used progress
-            const uint8_t mAhUsedProgress = ceil((value / (batteryConfig()->batteryCapacity / MAIN_BATT_USAGE_STEPS)));
+            const uint8_t mAhUsedProgress = ceilf((value / (batteryConfig()->batteryCapacity / MAIN_BATT_USAGE_STEPS)));

            // Create empty battery indicator bar
            buff[0] = SYM_PB_START;
--- a/src/main/sensors/barometer.c
+++ b/src/main/sensors/barometer.c
@ -332,7 +332,7 @@ int32_t baroCalculateAltitude(void)
    // calculates height from ground via baro readings
    // see: https://github.com/diydrones/ardupilot/blob/master/libraries/AP_Baro/AP_Baro.cpp#L140
    if (isBaroCalibrationComplete()) {
-        BaroAlt_tmp = lrintf((1.0f - powf((float)(baroPressureSum / PRESSURE_SAMPLE_COUNT) / 101325.0f, 0.190295f)) * 4433000.0f); // in cm
+        BaroAlt_tmp = lrintf((1.0f - pow_approx((float)(baroPressureSum / PRESSURE_SAMPLE_COUNT) / 101325.0f, 0.190295f)) * 4433000.0f); // in cm
        BaroAlt_tmp -= baroGroundAltitude;
        baro.BaroAlt = lrintf((float)baro.BaroAlt * CONVERT_PARAMETER_TO_FLOAT(barometerConfig()->baro_noise_lpf) + (float)BaroAlt_tmp * (1.0f - CONVERT_PARAMETER_TO_FLOAT(barometerConfig()->baro_noise_lpf))); // additional LPF to reduce baro noise
    }
@ -348,7 +348,7 @@ void performBaroCalibrationCycle(void)

    baroGroundPressure -= baroGroundPressure / 8;
    baroGroundPressure += baroPressureSum / PRESSURE_SAMPLE_COUNT;
-    baroGroundAltitude = (1.0f - powf((baroGroundPressure / 8) / 101325.0f, 0.190295f)) * 4433000.0f;
+    baroGroundAltitude = (1.0f - pow_approx((baroGroundPressure / 8) / 101325.0f, 0.190295f)) * 4433000.0f;

    if (baroGroundPressure == savedGroundPressure)
      calibratingB = 0;
--- a/src/main/sensors/rangefinder.c
+++ b/src/main/sensors/rangefinder.c
@ -233,7 +233,8 @@ static int16_t computePseudoSnr(int32_t newReading) {
    static bool snrReady = false;
    int16_t pseudoSnr = 0;

-    snrSamples[snrSampleIndex] = constrain((int)(pow(newReading - previousReading, 2) / 10), 0, 6400);
+    const int delta = newReading - previousReading;
+    snrSamples[snrSampleIndex] = constrain(delta * delta / 10, 0, 6400);
    ++snrSampleIndex;
    if (snrSampleIndex == SNR_SAMPLES) {
        snrSampleIndex = 0;