1
0
Fork 0
mirror of https://github.com/betaflight/betaflight.git synced 2025-07-20 06:45:16 +03:00

AT32F435/7 Libraries (#12158) (#12263)

Source: https://github.com/ArteryTek/AT32F435_437_Firmware_Library
Version: 2.1.1
This commit is contained in:
J Blackman 2023-01-31 08:05:32 +11:00 committed by GitHub
parent 5e16ddb01b
commit 8900a831e5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
559 changed files with 289319 additions and 0 deletions

View file

@ -0,0 +1,19 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPSupport)
include(configLib)
include(configDsp)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPSupport STATIC ${SRC})
configLib(CMSISDSPSupport ${ROOT})
configDsp(CMSISDSPSupport ${ROOT})
### Includes
target_include_directories(CMSISDSPSupport PUBLIC "${DSP}/Include")

View file

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: SupportFunctions.c
* Description: Combination of all support function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_barycenter_f32.c"
#include "arm_bitonic_sort_f32.c"
#include "arm_bubble_sort_f32.c"
#include "arm_copy_f32.c"
#include "arm_copy_q15.c"
#include "arm_copy_q31.c"
#include "arm_copy_q7.c"
#include "arm_fill_f32.c"
#include "arm_fill_q15.c"
#include "arm_fill_q31.c"
#include "arm_fill_q7.c"
#include "arm_heap_sort_f32.c"
#include "arm_insertion_sort_f32.c"
#include "arm_merge_sort_f32.c"
#include "arm_merge_sort_init_f32.c"
#include "arm_quick_sort_f32.c"
#include "arm_selection_sort_f32.c"
#include "arm_sort_f32.c"
#include "arm_sort_init_f32.c"
#include "arm_spline_interp_f32.c"
#include "arm_spline_interp_init_f32.c"
#include "arm_weighted_sum_f32.c"
#include "arm_float_to_q15.c"
#include "arm_float_to_q31.c"
#include "arm_float_to_q7.c"
#include "arm_q15_to_float.c"
#include "arm_q15_to_q31.c"
#include "arm_q15_to_q7.c"
#include "arm_q31_to_float.c"
#include "arm_q31_to_q15.c"
#include "arm_q31_to_q7.c"
#include "arm_q7_to_float.c"
#include "arm_q7_to_q15.c"
#include "arm_q7_to_q31.c"

View file

@ -0,0 +1,412 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_barycenter_f32.c
* Description: Barycenter
*
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include <limits.h>
#include <math.h>
/**
@ingroup groupSupport
*/
/**
* @brief Barycenter
*
*
* @param[in] *in List of vectors
* @param[in] *weights Weights of the vectors
* @param[out] *out Barycenter
* @param[in] nbVectors Number of vectors
* @param[in] vecDim Dimension of space (vector dimension)
* @return None
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_barycenter_f32(const float32_t *in,
const float32_t *weights,
float32_t *out,
uint32_t nbVectors,
uint32_t vecDim)
{
const float32_t *pIn, *pW;
const float32_t *pIn1, *pIn2, *pIn3, *pIn4;
float32_t *pOut;
uint32_t blkCntVector, blkCntSample;
float32_t accum, w;
blkCntVector = nbVectors;
blkCntSample = vecDim;
accum = 0.0f;
pW = weights;
pIn = in;
arm_fill_f32(0.0f, out, vecDim);
/* Sum */
pIn1 = pIn;
pIn2 = pIn1 + vecDim;
pIn3 = pIn2 + vecDim;
pIn4 = pIn3 + vecDim;
blkCntVector = nbVectors >> 2;
while (blkCntVector > 0)
{
f32x4_t outV, inV1, inV2, inV3, inV4;
float32_t w1, w2, w3, w4;
pOut = out;
w1 = *pW++;
w2 = *pW++;
w3 = *pW++;
w4 = *pW++;
accum += w1 + w2 + w3 + w4;
blkCntSample = vecDim >> 2;
while (blkCntSample > 0) {
outV = vld1q((const float32_t *) pOut);
inV1 = vld1q(pIn1);
inV2 = vld1q(pIn2);
inV3 = vld1q(pIn3);
inV4 = vld1q(pIn4);
outV = vfmaq(outV, inV1, w1);
outV = vfmaq(outV, inV2, w2);
outV = vfmaq(outV, inV3, w3);
outV = vfmaq(outV, inV4, w4);
vst1q(pOut, outV);
pOut += 4;
pIn1 += 4;
pIn2 += 4;
pIn3 += 4;
pIn4 += 4;
blkCntSample--;
}
blkCntSample = vecDim & 3;
while (blkCntSample > 0) {
*pOut = *pOut + *pIn1++ * w1;
*pOut = *pOut + *pIn2++ * w2;
*pOut = *pOut + *pIn3++ * w3;
*pOut = *pOut + *pIn4++ * w4;
pOut++;
blkCntSample--;
}
pIn1 += 3 * vecDim;
pIn2 += 3 * vecDim;
pIn3 += 3 * vecDim;
pIn4 += 3 * vecDim;
blkCntVector--;
}
pIn = pIn1;
blkCntVector = nbVectors & 3;
while (blkCntVector > 0)
{
f32x4_t inV, outV;
pOut = out;
w = *pW++;
accum += w;
blkCntSample = vecDim >> 2;
while (blkCntSample > 0)
{
outV = vld1q_f32(pOut);
inV = vld1q_f32(pIn);
outV = vfmaq(outV, inV, w);
vst1q_f32(pOut, outV);
pOut += 4;
pIn += 4;
blkCntSample--;
}
blkCntSample = vecDim & 3;
while (blkCntSample > 0)
{
*pOut = *pOut + *pIn++ * w;
pOut++;
blkCntSample--;
}
blkCntVector--;
}
/* Normalize */
pOut = out;
accum = 1.0f / accum;
blkCntSample = vecDim >> 2;
while (blkCntSample > 0)
{
f32x4_t tmp;
tmp = vld1q((const float32_t *) pOut);
tmp = vmulq(tmp, accum);
vst1q(pOut, tmp);
pOut += 4;
blkCntSample--;
}
blkCntSample = vecDim & 3;
while (blkCntSample > 0)
{
*pOut = *pOut * accum;
pOut++;
blkCntSample--;
}
}
#else
#if defined(ARM_MATH_NEON)
#include "NEMath.h"
void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
{
const float32_t *pIn,*pW, *pIn1, *pIn2, *pIn3, *pIn4;
float32_t *pOut;
uint32_t blkCntVector,blkCntSample;
float32_t accum, w,w1,w2,w3,w4;
float32x4_t tmp, inV,outV, inV1, inV2, inV3, inV4;
blkCntVector = nbVectors;
blkCntSample = vecDim;
accum = 0.0f;
pW = weights;
pIn = in;
/* Set counters to 0 */
tmp = vdupq_n_f32(0.0f);
pOut = out;
blkCntSample = vecDim >> 2;
while(blkCntSample > 0)
{
vst1q_f32(pOut, tmp);
pOut += 4;
blkCntSample--;
}
blkCntSample = vecDim & 3;
while(blkCntSample > 0)
{
*pOut = 0.0f;
pOut++;
blkCntSample--;
}
/* Sum */
pIn1 = pIn;
pIn2 = pIn1 + vecDim;
pIn3 = pIn2 + vecDim;
pIn4 = pIn3 + vecDim;
blkCntVector = nbVectors >> 2;
while(blkCntVector > 0)
{
pOut = out;
w1 = *pW++;
w2 = *pW++;
w3 = *pW++;
w4 = *pW++;
accum += w1 + w2 + w3 + w4;
blkCntSample = vecDim >> 2;
while(blkCntSample > 0)
{
outV = vld1q_f32(pOut);
inV1 = vld1q_f32(pIn1);
inV2 = vld1q_f32(pIn2);
inV3 = vld1q_f32(pIn3);
inV4 = vld1q_f32(pIn4);
outV = vmlaq_n_f32(outV,inV1,w1);
outV = vmlaq_n_f32(outV,inV2,w2);
outV = vmlaq_n_f32(outV,inV3,w3);
outV = vmlaq_n_f32(outV,inV4,w4);
vst1q_f32(pOut, outV);
pOut += 4;
pIn1 += 4;
pIn2 += 4;
pIn3 += 4;
pIn4 += 4;
blkCntSample--;
}
blkCntSample = vecDim & 3;
while(blkCntSample > 0)
{
*pOut = *pOut + *pIn1++ * w1;
*pOut = *pOut + *pIn2++ * w2;
*pOut = *pOut + *pIn3++ * w3;
*pOut = *pOut + *pIn4++ * w4;
pOut++;
blkCntSample--;
}
pIn1 += 3*vecDim;
pIn2 += 3*vecDim;
pIn3 += 3*vecDim;
pIn4 += 3*vecDim;
blkCntVector--;
}
pIn = pIn1;
blkCntVector = nbVectors & 3;
while(blkCntVector > 0)
{
pOut = out;
w = *pW++;
accum += w;
blkCntSample = vecDim >> 2;
while(blkCntSample > 0)
{
outV = vld1q_f32(pOut);
inV = vld1q_f32(pIn);
outV = vmlaq_n_f32(outV,inV,w);
vst1q_f32(pOut, outV);
pOut += 4;
pIn += 4;
blkCntSample--;
}
blkCntSample = vecDim & 3;
while(blkCntSample > 0)
{
*pOut = *pOut + *pIn++ * w;
pOut++;
blkCntSample--;
}
blkCntVector--;
}
/* Normalize */
pOut = out;
accum = 1.0f / accum;
blkCntSample = vecDim >> 2;
while(blkCntSample > 0)
{
tmp = vld1q_f32(pOut);
tmp = vmulq_n_f32(tmp,accum);
vst1q_f32(pOut, tmp);
pOut += 4;
blkCntSample--;
}
blkCntSample = vecDim & 3;
while(blkCntSample > 0)
{
*pOut = *pOut * accum;
pOut++;
blkCntSample--;
}
}
#else
void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
{
const float32_t *pIn,*pW;
float32_t *pOut;
uint32_t blkCntVector,blkCntSample;
float32_t accum, w;
blkCntVector = nbVectors;
blkCntSample = vecDim;
accum = 0.0f;
pW = weights;
pIn = in;
/* Set counters to 0 */
blkCntSample = vecDim;
pOut = out;
while(blkCntSample > 0)
{
*pOut = 0.0f;
pOut++;
blkCntSample--;
}
/* Sum */
while(blkCntVector > 0)
{
pOut = out;
w = *pW++;
accum += w;
blkCntSample = vecDim;
while(blkCntSample > 0)
{
*pOut = *pOut + *pIn++ * w;
pOut++;
blkCntSample--;
}
blkCntVector--;
}
/* Normalize */
blkCntSample = vecDim;
pOut = out;
while(blkCntSample > 0)
{
*pOut = *pOut / accum;
pOut++;
blkCntSample--;
}
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSupport group
*/

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,104 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_bubble_sort_f32.c
* Description: Floating point bubble sort
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @private
* @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*
* @par Algorithm
* The bubble sort algorithm is a simple comparison algorithm that
* reads the elements of a vector from the beginning to the end,
* compares the adjacent ones and swaps them if they are in the
* wrong order. The procedure is repeated until there is nothing
* left to swap. Bubble sort is fast for input vectors that are
* nearly sorted.
*
* @par It's an in-place algorithm. In order to obtain an out-of-place
* function, a memcpy of the source vector is performed
*/
void arm_bubble_sort_f32(
const arm_sort_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint8_t dir = S->dir;
uint32_t i;
uint8_t swapped =1;
float32_t * pA;
float32_t temp;
if(pSrc != pDst) // out-of-place
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
pA = pDst;
}
else
pA = pSrc;
while(swapped==1) // If nothing has been swapped after one loop stop
{
swapped=0;
for(i=0; i<blockSize-1; i++)
{
if(dir==(pA[i]>pA[i+1]))
{
// Swap
temp = pA[i];
pA[i] = pA[i+1];
pA[i+1] = temp;
// Update flag
swapped = 1;
}
}
blockSize--;
}
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,192 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_copy_f32.c
* Description: Copies the elements of a floating-point vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@defgroup copy Vector Copy
Copies sample by sample from source vector to destination vector.
<pre>
pDst[n] = pSrc[n]; 0 <= n < blockSize.
</pre>
There are separate functions for floating point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup copy
@{
*/
/**
@brief Copies the elements of a floating-point vector.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_copy_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time */
while (blkCnt > 0U)
{
vstrwq_f32(pDst, vldrwq_f32(pSrc));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
blkCnt --;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
void arm_copy_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
float32x4_t inV;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = A */
/* Copy and then store the results in the destination buffer */
inV = vld1q_f32(pSrc);
vst1q_f32(pDst, inV);
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = A */
/* Copy and then store the results in the destination buffer */
*pDst++ = *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_copy_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
*pDst++ = *pSrc++;
*pDst++ = *pSrc++;
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicCopy group
*/

View file

@ -0,0 +1,130 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_copy_q15.c
* Description: Copies the elements of a Q15 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup copy
@{
*/
/**
@brief Copies the elements of a Q15 vector.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_copy_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vstrhq_s16(pDst,vldrhq_s16(pSrc));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
blkCnt --;
}
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_copy_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A */
/* read 2 times 2 samples at a time */
write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicCopy group
*/

View file

@ -0,0 +1,135 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_copy_q31.c
* Description: Copies the elements of a Q31 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup copy
@{
*/
/**
@brief Copies the elements of a Q31 vector.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_copy_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time */
while (blkCnt > 0U)
{
vstrwq_s32(pDst,vldrwq_s32(pSrc));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
blkCnt --;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_copy_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
*pDst++ = *pSrc++;
*pDst++ = *pSrc++;
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicCopy group
*/

View file

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_copy_q7.c
* Description: Copies the elements of a Q7 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup copy
@{
*/
/**
@brief Copies the elements of a Q7 vector.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_copy_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
vstrbq_s8(pDst,vldrbq_s8(pSrc));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 16;
pDst += 16;
blkCnt --;
}
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_copy_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A */
/* read 4 samples at a time */
write_q7x4_ia (&pDst, read_q7x4_ia ((q7_t **) &pSrc));
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A */
/* Copy and store result in destination buffer */
*pDst++ = *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicCopy group
*/

View file

@ -0,0 +1,189 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_fill_f32.c
* Description: Fills a constant value into a floating-point vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@defgroup Fill Vector Fill
Fills the destination vector with a constant value.
<pre>
pDst[n] = value; 0 <= n < blockSize.
</pre>
There are separate functions for floating point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup Fill
@{
*/
/**
@brief Fills a constant value into a floating-point vector.
@param[in] value input value to be filled
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_fill_f32(
float32_t value,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time */
while (blkCnt > 0U)
{
vstrwq_f32(pDst,vdupq_n_f32(value));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pDst += 4;
blkCnt --;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
void arm_fill_f32(
float32_t value,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
float32x4_t inV = vdupq_n_f32(value);
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = value */
/* Fill the value in the destination buffer */
vst1q_f32(pDst, inV);
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = value */
/* Fill the value in the destination buffer */
*pDst++ = value;
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_fill_f32(
float32_t value,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
*pDst++ = value;
*pDst++ = value;
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Fill group
*/

View file

@ -0,0 +1,134 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_fill_q15.c
* Description: Fills a constant value into a Q15 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Fill
@{
*/
/**
@brief Fills a constant value into a Q15 vector.
@param[in] value input value to be filled
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_fill_q15(
q15_t value,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vstrhq_s16(pDst,vdupq_n_s16(value));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pDst += 8;
blkCnt --;
}
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_fill_q15(
q15_t value,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t packedValue; /* value packed to 32 bits */
/* Packing two 16 bit values to 32 bit value in order to use SIMD */
packedValue = __PKHBT(value, value, 16U);
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = value */
/* fill 2 times 2 samples at a time */
write_q15x2_ia (&pDst, packedValue);
write_q15x2_ia (&pDst, packedValue);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Fill group
*/

View file

@ -0,0 +1,135 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_fill_q31.c
* Description: Fills a constant value into a Q31 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Fill
@{
*/
/**
@brief Fills a constant value into a Q31 vector.
@param[in] value input value to be filled
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_fill_q31(
q31_t value,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time */
while (blkCnt > 0U)
{
vstrwq_s32(pDst,vdupq_n_s32(value));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pDst += 4;
blkCnt --;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_fill_q31(
q31_t value,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
*pDst++ = value;
*pDst++ = value;
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Fill group
*/

View file

@ -0,0 +1,133 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_fill_q7.c
* Description: Fills a constant value into a Q7 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Fill
@{
*/
/**
@brief Fills a constant value into a Q7 vector.
@param[in] value input value to be filled
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_fill_q7(
q7_t value,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
vstrbq_s8(pDst,vdupq_n_s8(value));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pDst += 16;
blkCnt --;
}
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_fill_q7(
q7_t value,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t packedValue; /* value packed to 32 bits */
/* Packing four 8 bit values to 32 bit value in order to use SIMD */
packedValue = __PACKq7(value, value, value, value);
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = value */
/* fill 4 samples at a time */
write_q7x4_ia (&pDst, packedValue);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = value */
/* Fill value in destination buffer */
*pDst++ = value;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Fill group
*/

View file

@ -0,0 +1,308 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_float_to_q15.c
* Description: Converts the elements of the floating-point vector to Q15 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup float_to_x
@{
*/
/**
@brief Converts the elements of the floating-point vector to Q15 vector.
@param[in] pSrc points to the floating-point input vector
@param[out] pDst points to the Q15 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q15_t)(pSrc[n] * 32768); 0 <= n < blockSize.
</pre>
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
@note
In order to apply rounding, the library should be rebuilt with the ROUNDING macro
defined in the preprocessor section of project options.
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_float_to_q15(
const float32_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
float32_t maxQ = (float32_t) Q15_MAX;
f32x4x2_t tmp;
q15x8_t vecDst;
#ifdef ARM_MATH_ROUNDING
float32_t in;
#endif
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/* C = A * 32768 */
/* convert from float to q15 and then store the results in the destination buffer */
tmp = vld2q(pSrc);
tmp.val[0] = vmulq(tmp.val[0], maxQ);
tmp.val[1] = vmulq(tmp.val[1], maxQ);
vecDst = vqmovnbq(vecDst, vcvtaq_s32_f32(tmp.val[0]));
vecDst = vqmovntq(vecDst, vcvtaq_s32_f32(tmp.val[1]));
vst1q(pDst, vecDst);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
pDst += 8;
pSrc += 8;
}
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
/* C = A * 32768 */
/* convert from float to Q15 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pSrc++ * 32768.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
#else
/* C = A * 32768 */
/* Convert from float to q15 and then store the results in the destination buffer */
*pDst++ = (q15_t) __SSAT((q31_t) (*pSrc++ * 32768.0f), 16);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
void arm_float_to_q15(
const float32_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
float32x4_t inV;
#ifdef ARM_MATH_ROUNDING
float32x4_t zeroV = vdupq_n_f32(0.0f);
float32x4_t pHalf = vdupq_n_f32(0.5f / 32768.0f);
float32x4_t mHalf = vdupq_n_f32(-0.5f / 32768.0f);
float32x4_t r;
uint32x4_t cmp;
float32_t in;
#endif
int32x4_t cvt;
int16x4_t outV;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
#ifdef ARM_MATH_ROUNDING
/* C = A * 32768 */
/* Convert from float to q15 and then store the results in the destination buffer */
inV = vld1q_f32(pIn);
cmp = vcgtq_f32(inV,zeroV);
r = vbslq_f32(cmp,pHalf,mHalf);
inV = vaddq_f32(inV, r);
pIn += 4;
cvt = vcvtq_n_s32_f32(inV,15);
outV = vqmovn_s32(cvt);
vst1_s16(pDst, outV);
pDst += 4;
#else
/* C = A * 32768 */
/* Convert from float to q15 and then store the results in the destination buffer */
inV = vld1q_f32(pIn);
cvt = vcvtq_n_s32_f32(inV,15);
outV = vqmovn_s32(cvt);
vst1_s16(pDst, outV);
pDst += 4;
pIn += 4;
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
#ifdef ARM_MATH_ROUNDING
/* C = A * 32768 */
/* Convert from float to q15 and then store the results in the destination buffer */
in = *pIn++;
in = (in * 32768.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
#else
/* C = A * 32768 */
/* Convert from float to q15 and then store the results in the destination buffer */
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_float_to_q15(
const float32_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const float32_t *pIn = pSrc; /* Source pointer */
#ifdef ARM_MATH_ROUNDING
float32_t in;
#endif /* #ifdef ARM_MATH_ROUNDING */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * 32768 */
/* convert from float to Q15 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pIn++ * 32768.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
in = (*pIn++ * 32768.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
in = (*pIn++ * 32768.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
in = (*pIn++ * 32768.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
#else
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * 32768 */
/* convert from float to Q15 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pIn++ * 32768.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
#else
/* C = A * 32768 */
/* Convert from float to q15 and then store the results in the destination buffer */
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of float_to_x group
*/

View file

@ -0,0 +1,314 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_float_to_q31.c
* Description: Converts the elements of the floating-point vector to Q31 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
* @defgroup float_to_x Convert 32-bit floating point value
*/
/**
@addtogroup float_to_x
@{
*/
/**
@brief Converts the elements of the floating-point vector to Q31 vector.
@param[in] pSrc points to the floating-point input vector
@param[out] pDst points to the Q31 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q31_t)(pSrc[n] * 2147483648); 0 <= n < blockSize.
</pre>
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
@note
In order to apply rounding, the library should be rebuilt with the ROUNDING macro
defined in the preprocessor section of project options.
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_float_to_q31(
const float32_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
float32_t maxQ = (float32_t) Q31_MAX;
f32x4_t vecDst;
#ifdef ARM_MATH_ROUNDING
float32_t in;
#endif
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time. */
while (blkCnt > 0U)
{
vecDst = vldrwq_f32(pSrc);
/* C = A * 2147483648 */
/* convert from float to Q31 and then store the results in the destination buffer */
vecDst = vmulq(vecDst, maxQ);
vstrwq_s32(pDst, vcvtaq_s32_f32(vecDst));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
blkCnt --;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = A * 2147483648 */
/* convert from float to Q31 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pSrc++ * 2147483648.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = clip_q63_to_q31((q63_t) (in));
#else
/* C = A * 2147483648 */
/* Convert from float to Q31 and then store the results in the destination buffer */
*pDst++ = clip_q63_to_q31((q63_t) (*pSrc++ * 2147483648.0f));
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON)
void arm_float_to_q31(
const float32_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
float32x4_t inV;
#ifdef ARM_MATH_ROUNDING
float32_t in;
float32x4_t zeroV = vdupq_n_f32(0.0f);
float32x4_t pHalf = vdupq_n_f32(0.5f / 2147483648.0f);
float32x4_t mHalf = vdupq_n_f32(-0.5f / 2147483648.0f);
float32x4_t r;
uint32x4_t cmp;
#endif
int32x4_t outV;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
#ifdef ARM_MATH_ROUNDING
/* C = A * 32768 */
/* Convert from float to Q31 and then store the results in the destination buffer */
inV = vld1q_f32(pIn);
cmp = vcgtq_f32(inV,zeroV);
r = vbslq_f32(cmp,pHalf,mHalf);
inV = vaddq_f32(inV, r);
pIn += 4;
outV = vcvtq_n_s32_f32(inV,31);
vst1q_s32(pDst, outV);
pDst += 4;
#else
/* C = A * 2147483648 */
/* Convert from float to Q31 and then store the results in the destination buffer */
inV = vld1q_f32(pIn);
outV = vcvtq_n_s32_f32(inV,31);
vst1q_s32(pDst, outV);
pDst += 4;
pIn += 4;
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
#ifdef ARM_MATH_ROUNDING
/* C = A * 2147483648 */
/* Convert from float to Q31 and then store the results in the destination buffer */
in = *pIn++;
in = (in * 2147483648.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = clip_q63_to_q31((q63_t) (in));
#else
/* C = A * 2147483648 */
/* Convert from float to Q31 and then store the results in the destination buffer */
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_float_to_q31(
const float32_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const float32_t *pIn = pSrc; /* Source pointer */
#ifdef ARM_MATH_ROUNDING
float32_t in;
#endif /* #ifdef ARM_MATH_ROUNDING */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * 2147483648 */
/* convert from float to Q31 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pIn++ * 2147483648.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = clip_q63_to_q31((q63_t) (in));
in = (*pIn++ * 2147483648.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = clip_q63_to_q31((q63_t) (in));
in = (*pIn++ * 2147483648.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = clip_q63_to_q31((q63_t) (in));
in = (*pIn++ * 2147483648.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = clip_q63_to_q31((q63_t) (in));
#else
/* C = A * 2147483648 */
/* Convert from float to Q31 and then store the results in the destination buffer */
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * 2147483648 */
/* convert from float to Q31 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pIn++ * 2147483648.0f);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = clip_q63_to_q31((q63_t) (in));
#else
/* C = A * 2147483648 */
/* Convert from float to Q31 and then store the results in the destination buffer */
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of float_to_x group
*/

View file

@ -0,0 +1,330 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_float_to_q7.c
* Description: Converts the elements of the floating-point vector to Q7 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup float_to_x
@{
*/
/**
* @brief Converts the elements of the floating-point vector to Q7 vector.
* @param[in] *pSrc points to the floating-point input vector
* @param[out] *pDst points to the Q7 output vector
* @param[in] blockSize length of the input vector
* @return none.
*
*\par Description:
* \par
* The equation used for the conversion process is:
* <pre>
* pDst[n] = (q7_t)(pSrc[n] * 128); 0 <= n < blockSize.
* </pre>
* \par Scaling and Overflow Behavior:
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
* \note
* In order to apply rounding, the library should be rebuilt with the ROUNDING macro
* defined in the preprocessor section of project options.
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_float_to_q7(
const float32_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
float32_t maxQ = powf(2.0, 7);
f32x4x4_t tmp;
q15x8_t evVec, oddVec;
q7x16_t vecDst;
float32_t const *pSrcVec;
#ifdef ARM_MATH_ROUNDING
float32_t in;
#endif
pSrcVec = (float32_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0U) {
tmp = vld4q(pSrcVec);
pSrcVec += 16;
/*
* C = A * 128.0
* convert from float to q7 and then store the results in the destination buffer
*/
tmp.val[0] = vmulq(tmp.val[0], maxQ);
tmp.val[1] = vmulq(tmp.val[1], maxQ);
tmp.val[2] = vmulq(tmp.val[2], maxQ);
tmp.val[3] = vmulq(tmp.val[3], maxQ);
/*
* convert and pack evens
*/
evVec = vqmovnbq(evVec, vcvtaq_s32_f32(tmp.val[0]));
evVec = vqmovntq(evVec, vcvtaq_s32_f32(tmp.val[2]));
/*
* convert and pack odds
*/
oddVec = vqmovnbq(oddVec, vcvtaq_s32_f32(tmp.val[1]));
oddVec = vqmovntq(oddVec, vcvtaq_s32_f32(tmp.val[3]));
/*
* merge
*/
vecDst = vqmovnbq(vecDst, evVec);
vecDst = vqmovntq(vecDst, oddVec);
vst1q(pDst, vecDst);
pDst += 16;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = A * 128 */
/* Convert from float to q7 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pSrcVec++ * 128);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
#else
*pDst++ = (q7_t) __SSAT((q31_t) (*pSrcVec++ * 128.0f), 8);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON)
void arm_float_to_q7(
const float32_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
float32x4_t inV;
#ifdef ARM_MATH_ROUNDING
float32_t in;
float32x4_t zeroV = vdupq_n_f32(0.0f);
float32x4_t pHalf = vdupq_n_f32(0.5f / 128.0f);
float32x4_t mHalf = vdupq_n_f32(-0.5f / 128.0f);
float32x4_t r;
uint32x4_t cmp;
#endif
int16x4_t cvt1,cvt2;
int8x8_t outV;
blkCnt = blockSize >> 3U;
/* Compute 8 outputs at a time.
** a second loop below computes the remaining 1 to 7 samples. */
while (blkCnt > 0U)
{
#ifdef ARM_MATH_ROUNDING
/* C = A * 128 */
/* Convert from float to q7 and then store the results in the destination buffer */
inV = vld1q_f32(pIn);
cmp = vcgtq_f32(inV,zeroV);
r = vbslq_f32(cmp,pHalf,mHalf);
inV = vaddq_f32(inV, r);
cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
pIn += 4;
inV = vld1q_f32(pIn);
cmp = vcgtq_f32(inV,zeroV);
r = vbslq_f32(cmp,pHalf,mHalf);
inV = vaddq_f32(inV, r);
cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
pIn += 4;
outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
vst1_s8(pDst, outV);
pDst += 8;
#else
/* C = A * 128 */
/* Convert from float to q7 and then store the results in the destination buffer */
inV = vld1q_f32(pIn);
cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
pIn += 4;
inV = vld1q_f32(pIn);
cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
pIn += 4;
outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
vst1_s8(pDst, outV);
pDst += 8;
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
#ifdef ARM_MATH_ROUNDING
/* C = A * 128 */
/* Convert from float to q7 and then store the results in the destination buffer */
in = *pIn++;
in = (in * 128);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
#else
/* C = A * 128 */
/* Convert from float to q7 and then store the results in the destination buffer */
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_float_to_q7(
const float32_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const float32_t *pIn = pSrc; /* Source pointer */
#ifdef ARM_MATH_ROUNDING
float32_t in;
#endif /* #ifdef ARM_MATH_ROUNDING */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * 128 */
/* Convert from float to q7 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pIn++ * 128);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
in = (*pIn++ * 128);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
in = (*pIn++ * 128);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
in = (*pIn++ * 128);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
#else
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * 128 */
/* Convert from float to q7 and store result in destination buffer */
#ifdef ARM_MATH_ROUNDING
in = (*pIn++ * 128);
in += in > 0.0f ? 0.5f : -0.5f;
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
#else
*pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
#endif /* #ifdef ARM_MATH_ROUNDING */
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of float_to_x group
*/

View file

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_heap_sort_f32.c
* Description: Floating point heap sort
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
static void arm_heapify(float32_t * pSrc, uint32_t n, uint32_t i, uint8_t dir)
{
/* Put all the elements of pSrc in heap order */
uint32_t k = i; // Initialize largest/smallest as root
uint32_t l = 2*i + 1; // left = 2*i + 1
uint32_t r = 2*i + 2; // right = 2*i + 2
float32_t temp;
if (l < n && dir==(pSrc[l] > pSrc[k]) )
k = l;
if (r < n && dir==(pSrc[r] > pSrc[k]) )
k = r;
if (k != i)
{
temp = pSrc[i];
pSrc[i]=pSrc[k];
pSrc[k]=temp;
arm_heapify(pSrc, n, k, dir);
}
}
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @private
* @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*
* @par Algorithm
* The heap sort algorithm is a comparison algorithm that
* divides the input array into a sorted and an unsorted region,
* and shrinks the unsorted region by extracting the largest
* element and moving it to the sorted region. A heap data
* structure is used to find the maximum.
*
* @par It's an in-place algorithm. In order to obtain an out-of-place
* function, a memcpy of the source vector is performed.
*/
void arm_heap_sort_f32(
const arm_sort_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
float32_t * pA;
int32_t i;
float32_t temp;
if(pSrc != pDst) // out-of-place
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
pA = pDst;
}
else
pA = pSrc;
// Build the heap array so that the largest value is the root
for (i = blockSize/2 - 1; i >= 0; i--)
arm_heapify(pA, blockSize, i, S->dir);
for (i = blockSize - 1; i >= 0; i--)
{
// Swap
temp = pA[i];
pA[i] = pA[0];
pA[0] = temp;
// Restore heap order
arm_heapify(pA, i, 0, S->dir);
}
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,93 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_insertion_sort_f32.c
* Description: Floating point insertion sort
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @private
* @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*
* @par Algorithm
* The insertion sort is a simple sorting algorithm that
* reads all the element of the input array and removes one element
* at a time, finds the location it belongs in the final sorted list,
* and inserts it there.
*
* @par It's an in-place algorithm. In order to obtain an out-of-place
* function, a memcpy of the source vector is performed.
*/
void arm_insertion_sort_f32(
const arm_sort_instance_f32 * S,
float32_t *pSrc,
float32_t* pDst,
uint32_t blockSize)
{
float32_t * pA;
uint8_t dir = S->dir;
uint32_t i, j;
float32_t temp;
if(pSrc != pDst) // out-of-place
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
pA = pDst;
}
else
pA = pSrc;
// Real all the element of the input array
for(i=0; i<blockSize; i++)
{
// Move the i-th element to the right position
for (j = i; j>0 && dir==(pA[j]<pA[j-1]); j--)
{
// Swap
temp = pA[j];
pA[j] = pA[j-1];
pA[j-1] = temp;
}
}
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,127 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_merge_sort_f32.c
* Description: Floating point merge sort
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
static void topDownMerge(float32_t * pA, uint32_t begin, uint32_t middle, uint32_t end, float32_t * pB, uint8_t dir)
{
/* Left array is pA[begin:middle-1]
* Right Array is pA[middle:end-1]
* They are merged in pB
*/
uint32_t i = begin;
uint32_t j = middle;
uint32_t k;
// Read all the elements in the sublist
for (k = begin; k < end; k++)
{
// Merge
if (i < middle && (j >= end || dir==(pA[i] <= pA[j])) )
{
pB[k] = pA[i];
i++;
}
else
{
pB[k] = pA[j];
j++;
}
}
}
static void arm_merge_sort_core_f32(float32_t * pB, uint32_t begin, uint32_t end, float32_t * pA, uint8_t dir)
{
if((int32_t)end - (int32_t)begin >= 2 ) // If run size != 1 divide
{
int32_t middle = (end + begin) / 2; // Take the middle point
arm_merge_sort_core_f32(pA, begin, middle, pB, dir); // Sort the left part
arm_merge_sort_core_f32(pA, middle, end, pB, dir); // Sort the right part
topDownMerge(pB, begin, middle, end, pA, dir);
}
}
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*
* @par Algorithm
* The merge sort algorithm is a comparison algorithm that
* divide the input array in sublists and merge them to produce
* longer sorted sublists until there is only one list remaining.
*
* @par A work array is always needed. It must be allocated by the user
* linked to the instance at initialization time.
*
* @par It's an in-place algorithm. In order to obtain an out-of-place
* function, a memcpy of the source vector is performed
*/
void arm_merge_sort_f32(
const arm_merge_sort_instance_f32 * S,
float32_t *pSrc,
float32_t *pDst,
uint32_t blockSize)
{
float32_t * pA;
/* Out-of-place */
if(pSrc != pDst)
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
pA = pDst;
}
else
pA = pSrc;
/* A working buffer is needed */
memcpy(S->buffer, pSrc, blockSize*sizeof(float32_t));
arm_merge_sort_core_f32(S->buffer, 0, blockSize, pA, S->dir);
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,53 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_merge_sort_init_f32.c
* Description: Floating point merge sort initialization function
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @param[in,out] S points to an instance of the sorting structure.
* @param[in] dir Sorting order.
* @param[in] buffer Working buffer.
*/
void arm_merge_sort_init_f32(arm_merge_sort_instance_f32 * S, arm_sort_dir dir, float32_t * buffer)
{
S->dir = dir;
S->buffer = buffer;
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,207 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q15_to_float.c
* Description: Converts the elements of the Q15 vector to floating-point vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
* @defgroup q15_to_x Convert 16-bit Integer value
*/
/**
@addtogroup q15_to_x
@{
*/
/**
@brief Converts the elements of the Q15 vector to floating-point vector.
@param[in] pSrc points to the Q15 input vector
@param[out] pDst points to the floating-point output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (float32_t) pSrc[n] / 32768; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_q15_to_float(
const q15_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
q15x8_t vecDst;
q15_t const *pSrcVec;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 32768 */
/* convert from q15 to float and then store the results in the destination buffer */
vecDst = vldrhq_s32(pSrcVec);
pSrcVec += 4;
vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 15));
pDst += 4;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 32768 */
/* Convert from q15 to float and store result in destination buffer */
*pDst++ = ((float32_t) *pSrcVec++ / 32768.0f);
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
void arm_q15_to_float(
const q15_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const q15_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
int16x8_t inV;
int32x4_t inV0, inV1;
float32x4_t outV;
blkCnt = blockSize >> 3U;
/* Compute 8 outputs at a time.
** a second loop below computes the remaining 1 to 7 samples. */
while (blkCnt > 0U)
{
/* C = (float32_t) A / 32768 */
/* convert from q15 to float and then store the results in the destination buffer */
inV = vld1q_s16(pIn);
pIn += 8;
inV0 = vmovl_s16(vget_low_s16(inV));
inV1 = vmovl_s16(vget_high_s16(inV));
outV = vcvtq_n_f32_s32(inV0,15);
vst1q_f32(pDst, outV);
pDst += 4;
outV = vcvtq_n_f32_s32(inV1,15);
vst1q_f32(pDst, outV);
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 8, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 32768 */
/* convert from q15 to float and then store the results in the destination buffer */
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_q15_to_float(
const q15_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q15_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 32768 */
/* Convert from q15 to float and store result in destination buffer */
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (float32_t) A / 32768 */
/* Convert from q15 to float and store result in destination buffer */
*pDst++ = ((float32_t) *pIn++ / 32768.0f);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of q15_to_x group
*/

View file

@ -0,0 +1,182 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q15_to_q31.c
* Description: Converts the elements of the Q15 vector to Q31 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup q15_to_x
@{
*/
/**
@brief Converts the elements of the Q15 vector to Q31 vector.
@param[in] pSrc points to the Q15 input vector
@param[out] pDst points to the Q31 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q31_t) pSrc[n] << 16; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEI)
void arm_q15_to_q31(
const q15_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
q31x4_t vecDst;
blkCnt = blockSize>> 2;
while (blkCnt > 0U)
{
/* C = (q31_t)A << 16 */
/* convert from q15 to q31 and then store the results in the destination buffer */
/* load q15 + 32-bit widening */
vecDst = vldrhq_s32((q15_t const *) pSrc);
vecDst = vshlq_n(vecDst, 16);
vstrwq_s32(pDst, vecDst);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pDst += 4;
pSrc += 4;
blkCnt --;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = (q31_t) A << 16 */
/* Convert from q15 to q31 and store result in destination buffer */
*pDst++ = (q31_t) *pSrc++ << 16;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_q15_to_q31(
const q15_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q15_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t in1, in2;
q31_t out1, out2, out3, out4;
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (q31_t)A << 16 */
/* Convert from q15 to q31 and store result in destination buffer */
in1 = read_q15x2_ia ((q15_t **) &pIn);
in2 = read_q15x2_ia ((q15_t **) &pIn);
#ifndef ARM_MATH_BIG_ENDIAN
/* extract lower 16 bits to 32 bit result */
out1 = in1 << 16U;
/* extract upper 16 bits to 32 bit result */
out2 = in1 & 0xFFFF0000;
/* extract lower 16 bits to 32 bit result */
out3 = in2 << 16U;
/* extract upper 16 bits to 32 bit result */
out4 = in2 & 0xFFFF0000;
#else
/* extract upper 16 bits to 32 bit result */
out1 = in1 & 0xFFFF0000;
/* extract lower 16 bits to 32 bit result */
out2 = in1 << 16U;
/* extract upper 16 bits to 32 bit result */
out3 = in2 & 0xFFFF0000;
/* extract lower 16 bits to 32 bit result */
out4 = in2 << 16U;
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
*pDst++ = out1;
*pDst++ = out2;
*pDst++ = out3;
*pDst++ = out4;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (q31_t) A << 16 */
/* Convert from q15 to q31 and store result in destination buffer */
*pDst++ = (q31_t) *pIn++ << 16;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of q15_to_x group
*/

View file

@ -0,0 +1,190 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q15_to_q7.c
* Description: Converts the elements of the Q15 vector to Q7 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup q15_to_x
@{
*/
/**
@brief Converts the elements of the Q15 vector to Q7 vector.
@param[in] pSrc points to the Q15 input vector
@param[out] pDst points to the Q7 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q7_t) pSrc[n] >> 8; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEI)
void arm_q15_to_q7(
const q15_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8x2_t tmp;
q15_t const *pSrcVec;
q7x16_t vecDst;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/* C = (q7_t) A >> 8 */
/* convert from q15 to q7 and then store the results in the destination buffer */
tmp = vld2q(pSrcVec);
pSrcVec += 16;
vecDst = vqshrnbq_n_s16(vecDst, tmp.val[0], 8);
vecDst = vqshrntq_n_s16(vecDst, tmp.val[1], 8);
vst1q(pDst, vecDst);
pDst += 16;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = (q7_t) A >> 8 */
/* Convert from q15 to q7 and store result in destination buffer */
*pDst++ = (q7_t) (*pSrcVec++ >> 8);
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_q15_to_q7(
const q15_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q15_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in1, in2;
q31_t out1, out2;
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (q7_t) A >> 8 */
/* Convert from q15 to q7 and store result in destination buffer */
#if defined (ARM_MATH_DSP)
in1 = read_q15x2_ia ((q15_t **) &pIn);
in2 = read_q15x2_ia ((q15_t **) &pIn);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __PKHTB(in2, in1, 16);
out2 = __PKHBT(in2, in1, 16);
#else
out1 = __PKHTB(in1, in2, 16);
out2 = __PKHBT(in1, in2, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* rotate packed value by 24 */
out2 = ((uint32_t) out2 << 8) | ((uint32_t) out2 >> 24);
/* anding with 0xff00ff00 to get two 8 bit values */
out1 = out1 & 0xFF00FF00;
/* anding with 0x00ff00ff to get two 8 bit values */
out2 = out2 & 0x00FF00FF;
/* oring two values(contains two 8 bit values) to get four packed 8 bit values */
out1 = out1 | out2;
/* store 4 samples at a time to destiantion buffer */
write_q7x4_ia (&pDst, out1);
#else
*pDst++ = (q7_t) (*pIn++ >> 8);
*pDst++ = (q7_t) (*pIn++ >> 8);
*pDst++ = (q7_t) (*pIn++ >> 8);
*pDst++ = (q7_t) (*pIn++ >> 8);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (q7_t) A >> 8 */
/* Convert from q15 to q7 and store result in destination buffer */
*pDst++ = (q7_t) (*pIn++ >> 8);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of q15_to_x group
*/

View file

@ -0,0 +1,202 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q31_to_float.c
* Description: Converts the elements of the Q31 vector to floating-point vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
* @defgroup q31_to_x Convert 32-bit Integer value
*/
/**
@addtogroup q31_to_x
@{
*/
/**
@brief Converts the elements of the Q31 vector to floating-point vector.
@param[in] pSrc points to the Q31 input vector
@param[out] pDst points to the floating-point output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (float32_t) pSrc[n] / 2147483648; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_q31_to_float(
const q31_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecDst;
q31_t const *pSrcVec;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 2147483648 */
/* convert from q31 to float and then store the results in the destination buffer */
vecDst = vld1q(pSrcVec);
pSrcVec += 4;
vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 31));
pDst += 4;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 2147483648 */
/* Convert from q31 to float and store result in destination buffer */
*pDst++ = ((float32_t) *pSrcVec++ / 2147483648.0f);
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
void arm_q31_to_float(
const q31_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const q31_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
int32x4_t inV;
float32x4_t outV;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = (float32_t) A / 2147483648 */
/* Convert from q31 to float and then store the results in the destination buffer */
inV = vld1q_s32(pIn);
pIn += 4;
outV = vcvtq_n_f32_s32(inV,31);
vst1q_f32(pDst, outV);
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 2147483648 */
/* Convert from q31 to float and then store the results in the destination buffer */
*pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_q31_to_float(
const q31_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const q31_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 2147483648 */
/* Convert from q31 to float and store result in destination buffer */
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (float32_t) A / 2147483648 */
/* Convert from q31 to float and store result in destination buffer */
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of q31_to_x group
*/

View file

@ -0,0 +1,181 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q31_to_q15.c
* Description: Converts the elements of the Q31 vector to Q15 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup q31_to_x
@{
*/
/**
@brief Converts the elements of the Q31 vector to Q15 vector.
@param[in] pSrc points to the Q31 input vector
@param[out] pDst points to the Q15 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q15_t) pSrc[n] >> 16; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEI)
void arm_q31_to_q15(
const q31_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4x2_t tmp;
q15x8_t vecDst;
q31_t const *pSrcVec;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/* C = (q15_t) A >> 16 */
/* convert from q31 to q15 and then store the results in the destination buffer */
tmp = vld2q(pSrcVec);
pSrcVec += 8;
vecDst = vshrnbq_n_s32(vecDst, tmp.val[0], 16);
vecDst = vshrntq_n_s32(vecDst, tmp.val[1], 16);
vst1q(pDst, vecDst);
pDst += 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
*/
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
/* C = (q15_t) (A >> 16) */
/* Convert from q31 to q15 and store result in destination buffer */
*pDst++ = (q15_t) (*pSrcVec++ >> 16);
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_q31_to_q15(
const q31_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q31_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in1, in2, in3, in4;
q31_t out1, out2;
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (q15_t) (A >> 16) */
/* Convert from q31 to q15 and store result in destination buffer */
#if defined (ARM_MATH_DSP)
in1 = *pIn++;
in2 = *pIn++;
in3 = *pIn++;
in4 = *pIn++;
/* pack two higher 16-bit values from two 32-bit values */
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __PKHTB(in2, in1, 16);
out2 = __PKHTB(in4, in3, 16);
#else
out1 = __PKHTB(in1, in2, 16);
out2 = __PKHTB(in3, in4, 16);
#endif /* #ifdef ARM_MATH_BIG_ENDIAN */
write_q15x2_ia (&pDst, out1);
write_q15x2_ia (&pDst, out2);
#else
*pDst++ = (q15_t) (*pIn++ >> 16);
*pDst++ = (q15_t) (*pIn++ >> 16);
*pDst++ = (q15_t) (*pIn++ >> 16);
*pDst++ = (q15_t) (*pIn++ >> 16);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (q15_t) (A >> 16) */
/* Convert from q31 to q15 and store result in destination buffer */
*pDst++ = (q15_t) (*pIn++ >> 16);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of q31_to_x group
*/

View file

@ -0,0 +1,169 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q31_to_q7.c
* Description: Converts the elements of the Q31 vector to Q7 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup q31_to_x
@{
*/
/**
@brief Converts the elements of the Q31 vector to Q7 vector.
@param[in] pSrc points to the Q31 input vector
@param[out] pDst points to the Q7 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q7_t) pSrc[n] >> 24; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEI)
void arm_q31_to_q7(
const q31_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4x4_t tmp;
q15x8_t evVec, oddVec;
q7x16_t vecDst;
q31_t const *pSrcVec;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
tmp = vld4q(pSrcVec);
pSrcVec += 16;
/* C = (q7_t) A >> 24 */
/* convert from q31 to q7 and then store the results in the destination buffer */
/*
* narrow and pack evens
*/
evVec = vshrnbq_n_s32(evVec, tmp.val[0], 16);
evVec = vshrntq_n_s32(evVec, tmp.val[2], 16);
/*
* narrow and pack odds
*/
oddVec = vshrnbq_n_s32(oddVec, tmp.val[1], 16);
oddVec = vshrntq_n_s32(oddVec, tmp.val[3], 16);
/*
* narrow & merge
*/
vecDst = vshrnbq_n_s16(vecDst, evVec, 8);
vecDst = vshrntq_n_s16(vecDst, oddVec, 8);
vst1q(pDst, vecDst);
pDst += 16;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = (q7_t) (A >> 24) */
/* Convert from q31 to q7 and store result in destination buffer */
*pDst++ = (q7_t) (*pSrcVec++ >> 24);
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_q31_to_q7(
const q31_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q31_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL)
q7_t out1, out2, out3, out4;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (q7_t) (A >> 24) */
/* Convert from q31 to q7 and store result in destination buffer */
out1 = (q7_t) (*pIn++ >> 24);
out2 = (q7_t) (*pIn++ >> 24);
out3 = (q7_t) (*pIn++ >> 24);
out4 = (q7_t) (*pIn++ >> 24);
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (q7_t) (A >> 24) */
/* Convert from q31 to q7 and store result in destination buffer */
*pDst++ = (q7_t) (*pIn++ >> 24);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of q31_to_x group
*/

View file

@ -0,0 +1,218 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q7_to_float.c
* Description: Converts the elements of the Q7 vector to floating-point vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
* @defgroup q7_to_x Convert 8-bit Integer value
*/
/**
@addtogroup q7_to_x
@{
*/
/**
@brief Converts the elements of the Q7 vector to floating-point vector.
@param[in] pSrc points to the Q7 input vector
@param[out] pDst points to the floating-point output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (float32_t) pSrc[n] / 128; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_q7_to_float(
const q7_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecDst;
q7_t const *pSrcVec;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 32768 */
/* convert from q7 to float and then store the results in the destination buffer */
vecDst = vldrbq_s32(pSrcVec);
pSrcVec += 4;
vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 7));
pDst += 4;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 128 */
/* Convert from q7 to float and store result in destination buffer */
*pDst++ = ((float32_t) * pSrcVec++ / 128.0f);
/* Decrement loop counter */
blkCnt--;
}
}
#else
#if defined(ARM_MATH_NEON)
void arm_q7_to_float(
const q7_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const q7_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
int8x16_t inV;
int16x8_t inVLO, inVHI;
int32x4_t inVLL, inVLH, inVHL, inVHH;
float32x4_t outV;
blkCnt = blockSize >> 4U;
/* Compute 16 outputs at a time.
** a second loop below computes the remaining 1 to 15 samples. */
while (blkCnt > 0U)
{
/* C = (float32_t) A / 128 */
/* Convert from q7 to float and then store the results in the destination buffer */
inV = vld1q_s8(pIn);
pIn += 16;
inVLO = vmovl_s8(vget_low_s8(inV));
inVHI = vmovl_s8(vget_high_s8(inV));
inVLL = vmovl_s16(vget_low_s16(inVLO));
inVLH = vmovl_s16(vget_high_s16(inVLO));
inVHL = vmovl_s16(vget_low_s16(inVHI));
inVHH = vmovl_s16(vget_high_s16(inVHI));
outV = vcvtq_n_f32_s32(inVLL,7);
vst1q_f32(pDst, outV);
pDst += 4;
outV = vcvtq_n_f32_s32(inVLH,7);
vst1q_f32(pDst, outV);
pDst += 4;
outV = vcvtq_n_f32_s32(inVHL,7);
vst1q_f32(pDst, outV);
pDst += 4;
outV = vcvtq_n_f32_s32(inVHH,7);
vst1q_f32(pDst, outV);
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 16, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 128 */
/* Convert from q7 to float and then store the results in the destination buffer */
*pDst++ = ((float32_t) * pIn++ / 128.0f);
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_q7_to_float(
const q7_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q7_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (float32_t) A / 128 */
/* Convert from q7 to float and store result in destination buffer */
*pDst++ = ((float32_t) * pIn++ / 128.0f);
*pDst++ = ((float32_t) * pIn++ / 128.0f);
*pDst++ = ((float32_t) * pIn++ / 128.0f);
*pDst++ = ((float32_t) * pIn++ / 128.0f);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (float32_t) A / 128 */
/* Convert from q7 to float and store result in destination buffer */
*pDst++ = ((float32_t) * pIn++ / 128.0f);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of q7_to_x group
*/

View file

@ -0,0 +1,188 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q7_to_q15.c
* Description: Converts the elements of the Q7 vector to Q15 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup q7_to_x
@{
*/
/**
@brief Converts the elements of the Q7 vector to Q15 vector.
@param[in] pSrc points to the Q7 input vector
@param[out] pDst points to the Q15 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q15_t) pSrc[n] << 8; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEI)
void arm_q7_to_q15(
const q7_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecDst;
q7_t const *pSrcVec;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/* C = (q15_t) A << 8 */
/* convert from q7 to q15 and then store the results in the destination buffer */
/* load q7 + 32-bit widening */
vecDst = vldrbq_s16(pSrcVec);
pSrcVec += 8;
vecDst = vecDst << 8;
vstrhq(pDst, vecDst);
pDst += 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
/* C = (q15_t) A << 8 */
/* Convert from q7 to q15 and store result in destination buffer */
*pDst++ = (q15_t) * pSrcVec++ << 8;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_q7_to_q15(
const q7_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q7_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in;
q31_t in1, in2;
q31_t out1, out2;
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (q15_t) A << 8 */
/* Convert from q7 to q15 and store result in destination buffer */
#if defined (ARM_MATH_DSP)
in = read_q7x4_ia ((q7_t **) &pIn);
/* rotatate in by 8 and extend two q7_t values to q15_t values */
in1 = __SXTB16(__ROR(in, 8));
/* extend remainig two q7_t values to q15_t values */
in2 = __SXTB16(in);
in1 = in1 << 8U;
in2 = in2 << 8U;
in1 = in1 & 0xFF00FF00;
in2 = in2 & 0xFF00FF00;
#ifndef ARM_MATH_BIG_ENDIAN
out2 = __PKHTB(in1, in2, 16);
out1 = __PKHBT(in2, in1, 16);
#else
out1 = __PKHTB(in1, in2, 16);
out2 = __PKHBT(in2, in1, 16);
#endif
write_q15x2_ia (&pDst, out1);
write_q15x2_ia (&pDst, out2);
#else
*pDst++ = (q15_t) *pIn++ << 8;
*pDst++ = (q15_t) *pIn++ << 8;
*pDst++ = (q15_t) *pIn++ << 8;
*pDst++ = (q15_t) *pIn++ << 8;
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (q15_t) A << 8 */
/* Convert from q7 to q15 and store result in destination buffer */
*pDst++ = (q15_t) * pIn++ << 8;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of q7_to_x group
*/

View file

@ -0,0 +1,164 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_q7_to_q31.c
* Description: Converts the elements of the Q7 vector to Q31 vector
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup q7_to_x
@{
*/
/**
@brief Converts the elements of the Q7 vector to Q31 vector.
@param[in] pSrc points to the Q7 input vector
@param[out] pDst points to the Q31 output vector
@param[in] blockSize number of samples in each vector
@return none
@par Details
The equation used for the conversion process is:
<pre>
pDst[n] = (q31_t) pSrc[n] << 24; 0 <= n < blockSize.
</pre>
*/
#if defined(ARM_MATH_MVEI)
void arm_q7_to_q31(
const q7_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
q31x4_t vecDst;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/* C = (q31_t)A << 16 */
/* convert from q15 to q31 and then store the results in the destination buffer */
/* load q15 + 32-bit widening */
vecDst = vldrbq_s32((q7_t const *) pSrc);
vecDst = vshlq_n(vecDst, 24);
vstrwq_s32(pDst, vecDst);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pDst += 4;
pSrc += 4;
blkCnt --;
}
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = (q31_t) A << 24 */
/* Convert from q7 to q31 and store result in destination buffer */
*pDst++ = (q31_t) *pSrc++ << 24;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_q7_to_q31(
const q7_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const q7_t *pIn = pSrc; /* Source pointer */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t in;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (q31_t) A << 24 */
/* Convert from q7 to q31 and store result in destination buffer */
in = read_q7x4_ia ((q7_t **) &pIn);
#ifndef ARM_MATH_BIG_ENDIAN
*pDst++ = (__ROR(in, 8)) & 0xFF000000;
*pDst++ = (__ROR(in, 16)) & 0xFF000000;
*pDst++ = (__ROR(in, 24)) & 0xFF000000;
*pDst++ = (in & 0xFF000000);
#else
*pDst++ = (in & 0xFF000000);
*pDst++ = (__ROR(in, 24)) & 0xFF000000;
*pDst++ = (__ROR(in, 16)) & 0xFF000000;
*pDst++ = (__ROR(in, 8)) & 0xFF000000;
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (q31_t) A << 24 */
/* Convert from q7 to q31 and store result in destination buffer */
*pDst++ = (q31_t) * pIn++ << 24;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of q7_to_x group
*/

View file

@ -0,0 +1,182 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_quick_sort_f32.c
* Description: Floating point quick sort
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
static uint32_t arm_quick_sort_partition_f32(float32_t *pSrc, int32_t first, int32_t last, uint8_t dir)
{
/* This function will be called */
int32_t i, j, pivot_index;
float32_t pivot;
float32_t temp;
/* The first element is the pivot */
pivot_index = first;
pivot = pSrc[pivot_index];
/* Initialize indices for do-while loops */
i = first - 1;
j = last + 1;
while(i < j)
{
/* The loop will stop as soon as the indices i and j cross each other.
*
* This event will happen surely since the values of the indices are incremented and
* decrement in the do-while loops that are executed at least once.
* It is impossible to loop forever inside the do-while loops since the pivot is
* always an element of the array and the conditions cannot be always true (at least
* the i-th or the j-th element will be equal to the pivot-th element).
* For example, in the extreme case of an ordered array the do-while loop related to i will stop
* at the first iteration (because pSrc[i]=pSrc[pivot] already), and the loop related to j
* will stop after (last-first) iterations (when j=pivot=i=first). j is returned and
* j+1 is going to be used as pivot by other calls of the function, until j=pivot=last. */
/* Move indices to the right and to the left */
if(dir)
{
/* Compare left elements with pivot */
do
{
i++;
} while (pSrc[i] < pivot && i<last);
/* Compare right elements with pivot */
do
{
j--;
} while (pSrc[j] > pivot);
}
else
{
/* Compare left elements with pivot */
do
{
i++;
} while (pSrc[i] > pivot && i<last);
/* Compare right elements with pivot */
do
{
j--;
} while (pSrc[j] < pivot);
}
/* If the indices didn't cross each other */
if (i < j)
{
/* i and j are in the wrong position -> Swap */
temp=pSrc[i];
pSrc[i]=pSrc[j];
pSrc[j]=temp;
}
}
return j;
}
static void arm_quick_sort_core_f32(float32_t *pSrc, int32_t first, int32_t last, uint8_t dir)
{
/* If the array [first ... last] has more than one element */
if(first<last)
{
int32_t pivot;
/* Compute pivot */
pivot = arm_quick_sort_partition_f32(pSrc, first, last, dir);
/* Iterate algorithm with two sub-arrays [first ... pivot] and [pivot+1 ... last] */
arm_quick_sort_core_f32(pSrc, first, pivot, dir);
arm_quick_sort_core_f32(pSrc, pivot+1, last, dir);
}
}
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @private
* @param[in] S points to an instance of the sorting structure.
* @param[in,out] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data.
* @param[in] blockSize number of samples to process.
*
* @par Algorithm
* The quick sort algorithm is a comparison algorithm that
* divides the input array into two smaller sub-arrays and
* recursively sort them. An element of the array (the pivot)
* is chosen, all the elements with values smaller than the
* pivot are moved before the pivot, while all elements with
* values greater than the pivot are moved after it (partition).
*
* @par
* In this implementation the Hoare partition scheme has been
* used [Hoare, C. A. R. (1 January 1962). "Quicksort". The Computer
* Journal. 5 (1): 10â?6.] The first element has always been chosen
* as the pivot. The partition algorithm guarantees that the returned
* pivot is never placed outside the vector, since it is returned only
* when the pointers crossed each other. In this way it isn't
* possible to obtain empty partitions and infinite recursion is avoided.
*
* @par
* It's an in-place algorithm. In order to obtain an out-of-place
* function, a memcpy of the source vector is performed.
*/
void arm_quick_sort_f32(
const arm_sort_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
float32_t * pA;
/* Out-of-place */
if(pSrc != pDst)
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
pA = pDst;
}
else
pA = pSrc;
arm_quick_sort_core_f32(pA, 0, blockSize-1, S->dir);
/* The previous function could be called recursively a maximum
* of (blockSize-1) times, generating a stack consumption of 4*(blockSize-1) bytes. */
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,108 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_selection_sort_f32.c
* Description: Floating point selection sort
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @private
* @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*
* @par Algorithm
* The Selection sort algorithm is a comparison algorithm that
* divides the input array into a sorted and an unsorted sublist
* (initially the sorted sublist is empty and the unsorted sublist
* is the input array), looks for the smallest (or biggest)
* element in the unsorted sublist, swapping it with the leftmost
* one, and moving the sublists boundary one element to the right.
*
* @par It's an in-place algorithm. In order to obtain an out-of-place
* function, a memcpy of the source vector is performed.
*/
void arm_selection_sort_f32(
const arm_sort_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t i, j, k;
uint8_t dir = S->dir;
float32_t temp;
float32_t * pA;
if(pSrc != pDst) // out-of-place
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
pA = pDst;
}
else
pA = pSrc;
/* Move the boundary one element to the right */
for (i=0; i<blockSize-1; i++)
{
/* Initialize the minimum/maximum as the first element */
k = i;
/* Look in the unsorted list to find the minimum/maximum value */
for (j=i+1; j<blockSize; j++)
{
if (dir==(pA[j] < pA[k]) )
{
/* Update value */
k = j;
}
}
if (k != i)
{
/* Swap the minimum/maximum with the leftmost element */
temp=pA[i];
pA[i]=pA[k];
pA[k]=temp;
}
}
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,87 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sort_f32.c
* Description: Floating point sort
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @brief Generic sorting function
*
* @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data.
* @param[in] blockSize number of samples to process.
*/
void arm_sort_f32(
const arm_sort_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
switch(S->alg)
{
case ARM_SORT_BITONIC:
arm_bitonic_sort_f32(S, pSrc, pDst, blockSize);
break;
case ARM_SORT_BUBBLE:
arm_bubble_sort_f32(S, pSrc, pDst, blockSize);
break;
case ARM_SORT_HEAP:
arm_heap_sort_f32(S, pSrc, pDst, blockSize);
break;
case ARM_SORT_INSERTION:
arm_insertion_sort_f32(S, pSrc, pDst, blockSize);
break;
case ARM_SORT_QUICK:
arm_quick_sort_f32(S, pSrc, pDst, blockSize);
break;
case ARM_SORT_SELECTION:
arm_selection_sort_f32(S, pSrc, pDst, blockSize);
break;
}
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,55 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sort_init_f32.c
* Description: Floating point sort initialization function
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_sorting.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup Sorting
@{
*/
/**
* @param[in,out] S points to an instance of the sorting structure.
* @param[in] alg Selected algorithm.
* @param[in] dir Sorting order.
*/
void arm_sort_init_f32(arm_sort_instance_f32 * S, arm_sort_alg alg, arm_sort_dir dir)
{
S->alg = alg;
S->dir = dir;
}
/**
@} end of Sorting group
*/

View file

@ -0,0 +1,283 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_spline_interp_f32.c
* Description: Floating-point cubic spline interpolation
*
* $Date: 13 November 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@defgroup SplineInterpolate Cubic Spline Interpolation
Spline interpolation is a method of interpolation where the interpolant
is a piecewise-defined polynomial called "spline".
@par Introduction
Given a function f defined on the interval [a,b], a set of n nodes x(i)
where a=x(1)<x(2)<...<x(n)=b and a set of n values y(i) = f(x(i)),
a cubic spline interpolant S(x) is defined as:
<pre>
S1(x) x(1) < x < x(2)
S(x) = ...
Sn-1(x) x(n-1) < x < x(n)
</pre>
where
<pre>
Si(x) = a_i+b_i(x-xi)+c_i(x-xi)^2+d_i(x-xi)^3 i=1, ..., n-1
</pre>
@par Algorithm
Having defined h(i) = x(i+1) - x(i)
<pre>
h(i-1)c(i-1)+2[h(i-1)+h(i)]c(i)+h(i)c(i+1) = 3/h(i)*[a(i+1)-a(i)]-3/h(i-1)*[a(i)-a(i-1)] i=2, ..., n-1
</pre>
It is possible to write the previous conditions in matrix form (Ax=B).
In order to solve the system two boundary conidtions are needed.
- Natural spline: S1''(x1)=2*c(1)=0 ; Sn''(xn)=2*c(n)=0
In matrix form:
<pre>
| 1 0 0 ... 0 0 0 || c(1) | | 0 |
| h(0) 2[h(0)+h(1)] h(1) ... 0 0 0 || c(2) | | 3/h(2)*[a(3)-a(2)]-3/h(1)*[a(2)-a(1)] |
| ... ... ... ... ... ... ... || ... |=| ... |
| 0 0 0 ... h(n-2) 2[h(n-2)+h(n-1)] h(n-1) || c(n-1) | | 3/h(n-1)*[a(n)-a(n-1)]-3/h(n-2)*[a(n-1)-a(n-2)] |
| 0 0 0 ... 0 0 1 || c(n) | | 0 |
</pre>
- Parabolic runout spline: S1''(x1)=2*c(1)=S2''(x2)=2*c(2) ; Sn-1''(xn-1)=2*c(n-1)=Sn''(xn)=2*c(n)
In matrix form:
<pre>
| 1 -1 0 ... 0 0 0 || c(1) | | 0 |
| h(0) 2[h(0)+h(1)] h(1) ... 0 0 0 || c(2) | | 3/h(2)*[a(3)-a(2)]-3/h(1)*[a(2)-a(1)] |
| ... ... ... ... ... ... ... || ... |=| ... |
| 0 0 0 ... h(n-2) 2[h(n-2)+h(n-1)] h(n-1) || c(n-1) | | 3/h(n-1)*[a(n)-a(n-1)]-3/h(n-2)*[a(n-1)-a(n-2)] |
| 0 0 0 ... 0 -1 1 || c(n) | | 0 |
</pre>
A is a tridiagonal matrix (a band matrix of bandwidth 3) of size N=n+1. The factorization
algorithms (A=LU) can be simplified considerably because a large number of zeros appear
in regular patterns. The Crout method has been used:
1) Solve LZ=B
<pre>
u(1,2) = A(1,2)/A(1,1)
z(1) = B(1)/l(11)
FOR i=2, ..., N-1
l(i,i) = A(i,i)-A(i,i-1)u(i-1,i)
u(i,i+1) = a(i,i+1)/l(i,i)
z(i) = [B(i)-A(i,i-1)z(i-1)]/l(i,i)
l(N,N) = A(N,N)-A(N,N-1)u(N-1,N)
z(N) = [B(N)-A(N,N-1)z(N-1)]/l(N,N)
</pre>
2) Solve UX=Z
<pre>
c(N)=z(N)
FOR i=N-1, ..., 1
c(i)=z(i)-u(i,i+1)c(i+1)
</pre>
c(i) for i=1, ..., n-1 are needed to compute the n-1 polynomials.
b(i) and d(i) are computed as:
- b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3
- d(i) = [c(i+1)-c(i)]/[3*h(i)]
Moreover, a(i)=y(i).
@par Behaviour outside the given intervals
It is possible to compute the interpolated vector for x values outside the
input range (xq<x(1); xq>x(n)). The coefficients used to compute the y values for
xq<x(1) are going to be the ones used for the first interval, while for xq>x(n) the
coefficients used for the last interval.
*/
/**
@addtogroup SplineInterpolate
@{
*/
/**
* @brief Processing function for the floating-point cubic spline interpolation.
* @param[in] S points to an instance of the floating-point spline structure.
* @param[in] xq points to the x values ot the interpolated data points.
* @param[out] pDst points to the block of output data.
* @param[in] blockSize number of samples of output data.
*/
void arm_spline_f32(
arm_spline_instance_f32 * S,
const float32_t * xq,
float32_t * pDst,
uint32_t blockSize)
{
const float32_t * x = S->x;
const float32_t * y = S->y;
int32_t n = S->n_x;
/* Coefficients (a==y for i<=n-1) */
float32_t * b = (S->coeffs);
float32_t * c = (S->coeffs)+(n-1);
float32_t * d = (S->coeffs)+(2*(n-1));
const float32_t * pXq = xq;
int32_t blkCnt = (int32_t)blockSize;
int32_t blkCnt2;
int32_t i;
float32_t x_sc;
#ifdef ARM_MATH_NEON
float32x4_t xiv;
float32x4_t aiv;
float32x4_t biv;
float32x4_t civ;
float32x4_t div;
float32x4_t xqv;
float32x4_t temp;
float32x4_t diff;
float32x4_t yv;
#endif
/* Create output for x(i)<x<x(i+1) */
for (i=0; i<n-1; i++)
{
#ifdef ARM_MATH_NEON
xiv = vdupq_n_f32(x[i]);
aiv = vdupq_n_f32(y[i]);
biv = vdupq_n_f32(b[i]);
civ = vdupq_n_f32(c[i]);
div = vdupq_n_f32(d[i]);
while( *(pXq+4) <= x[i+1] && blkCnt > 4 )
{
/* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */
xqv = vld1q_f32(pXq);
pXq+=4;
/* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */
diff = vsubq_f32(xqv, xiv);
temp = diff;
/* y(i) = a(i) + ... */
yv = aiv;
/* ... + b(i)*(x-x(i)) + ... */
yv = vmlaq_f32(yv, biv, temp);
/* ... + c(i)*(x-x(i))^2 + ... */
temp = vmulq_f32(temp, diff);
yv = vmlaq_f32(yv, civ, temp);
/* ... + d(i)*(x-x(i))^3 */
temp = vmulq_f32(temp, diff);
yv = vmlaq_f32(yv, div, temp);
/* Store [y(k) y(k+1) y(k+2) y(k+3)] */
vst1q_f32(pDst, yv);
pDst+=4;
blkCnt-=4;
}
#endif
while( *pXq <= x[i+1] && blkCnt > 0 )
{
x_sc = *pXq++;
*pDst = y[i]+b[i]*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+d[i]*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);
pDst++;
blkCnt--;
}
}
/* Create output for remaining samples (x>=x(n)) */
#ifdef ARM_MATH_NEON
/* Compute 4 outputs at a time */
blkCnt2 = blkCnt >> 2;
while(blkCnt2 > 0)
{
/* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */
xqv = vld1q_f32(pXq);
pXq+=4;
/* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */
diff = vsubq_f32(xqv, xiv);
temp = diff;
/* y(i) = a(i) + ... */
yv = aiv;
/* ... + b(i)*(x-x(i)) + ... */
yv = vmlaq_f32(yv, biv, temp);
/* ... + c(i)*(x-x(i))^2 + ... */
temp = vmulq_f32(temp, diff);
yv = vmlaq_f32(yv, civ, temp);
/* ... + d(i)*(x-x(i))^3 */
temp = vmulq_f32(temp, diff);
yv = vmlaq_f32(yv, div, temp);
/* Store [y(k) y(k+1) y(k+2) y(k+3)] */
vst1q_f32(pDst, yv);
pDst+=4;
blkCnt2--;
}
/* Tail */
blkCnt2 = blkCnt & 3;
#else
blkCnt2 = blkCnt;
#endif
while(blkCnt2 > 0)
{
x_sc = *pXq++;
*pDst = y[i-1]+b[i-1]*(x_sc-x[i-1])+c[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])+d[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])*(x_sc-x[i-1]);
pDst++;
blkCnt2--;
}
}
/**
@} end of SplineInterpolate group
*/

View file

@ -0,0 +1,175 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_spline_interp_init_f32.c
* Description: Floating-point cubic spline initialization function
*
* $Date: 13 November 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupSupport
*/
/**
@addtogroup SplineInterpolate
@{
@par Initialization function
The initialization function takes as input two arrays that the user has to allocate:
<code>coeffs</code> will contain the b, c, and d coefficients for the (n-1) intervals
(n is the number of known points), hence its size must be 3*(n-1); <code>tempBuffer</code>
is temporally used for internal computations and its size is n+n-1.
@par
The x input array must be strictly sorted in ascending order and it must
not contain twice the same value (x(i)<x(i+1)).
*/
/**
* @brief Initialization function for the floating-point cubic spline interpolation.
* @param[in,out] S points to an instance of the floating-point spline structure.
* @param[in] type type of cubic spline interpolation (boundary conditions)
* @param[in] x points to the x values of the known data points.
* @param[in] y points to the y values of the known data points.
* @param[in] n number of known data points.
* @param[in] coeffs coefficients array for b, c, and d
* @param[in] tempBuffer buffer array for internal computations
*
*/
void arm_spline_init_f32(
arm_spline_instance_f32 * S,
arm_spline_type type,
const float32_t * x,
const float32_t * y,
uint32_t n,
float32_t * coeffs,
float32_t * tempBuffer)
{
/*** COEFFICIENTS COMPUTATION ***/
/* Type (boundary conditions):
- Natural spline ( S1''(x1) = 0 ; Sn''(xn) = 0 )
- Parabolic runout spline ( S1''(x1) = S2''(x2) ; Sn-1''(xn-1) = Sn''(xn) ) */
/* (n-1)-long buffers for b, c, and d coefficients */
float32_t * b = coeffs;
float32_t * c = coeffs+(n-1);
float32_t * d = coeffs+(2*(n-1));
float32_t * u = tempBuffer; /* (n-1)-long scratch buffer for u elements */
float32_t * z = tempBuffer+(n-1); /* n-long scratch buffer for z elements */
float32_t hi, hm1; /* h(i) and h(i-1) */
float32_t Bi; /* B(i), i-th element of matrix B=LZ */
float32_t li; /* l(i), i-th element of matrix L */
float32_t cp1; /* Temporary value for c(i+1) */
int32_t i; /* Loop counter */
S->x = x;
S->y = y;
S->n_x = n;
/* == Solve LZ=B to obtain z(i) and u(i) == */
/* -- Row 1 -- */
/* B(0) = 0, not computed */
/* u(1,2) = a(1,2)/a(1,1) = a(1,2) */
if(type == ARM_SPLINE_NATURAL)
u[0] = 0; /* a(1,2) = 0 */
else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
u[0] = -1; /* a(1,2) = -1 */
z[0] = 0; /* z(1) = B(1)/a(1,1) = 0 always */
/* -- Rows 2 to N-1 (N=n+1) -- */
hm1 = x[1] - x[0]; /* Initialize h(i-1) = h(1) = x(2)-x(1) */
for (i=1; i<(int32_t)n-1; i++)
{
/* Compute B(i) */
hi = x[i+1]-x[i];
Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
/* l(i) = a(i)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
li = 2*(hi+hm1) - hm1*u[i-1];
/* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
u[i] = hi/li;
/* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
z[i] = (Bi-hm1*z[i-1])/li;
/* Update h(i-1) for next iteration */
hm1 = hi;
}
/* -- Row N -- */
/* l(N) = a(N,N)-a(N,N-1)u(N-1) */
/* z(N) = [-a(N,N-1)z(N-1)]/l(N) */
if(type == ARM_SPLINE_NATURAL)
{
/* li = 1; a(N,N) = 1; a(N,N-1) = 0 */
z[n-1] = 0; /* a(N,N-1) = 0 */
}
else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
{
li = 1+u[n-2]; /* a(N,N) = 1; a(N,N-1) = -1 */
z[n-1] = z[n-2]/li; /* a(N,N-1) = -1 */
}
/* == Solve UX = Z to obtain c(i) and */
/* compute b(i) and d(i) from c(i) == */
cp1 = z[n-1]; /* Initialize c(i+1) = c(N) = z(N) */
for (i=n-2; i>=0; i--)
{
/* c(i) = z(i)-u(i+1)c(i+1) */
c[i] = z[i]-u[i]*cp1;
hi = x[i+1]-x[i];
/* b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3 */
b[i] = (y[i+1]-y[i])/hi-hi*(cp1+2*c[i])/3;
/* d(i) = [c(i+1)-c(i)]/[3*h(i)] */
d[i] = (cp1-c[i])/(3*hi);
/* Update c(i+1) for next iteration */
cp1 = c[i];
}
/* == Finally, store the coefficients in the instance == */
S->coeffs = coeffs;
}
/**
@} end of SplineInterpolate group
*/

View file

@ -0,0 +1,185 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_weighted_sum_f32.c
* Description: Weighted Sum
*
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup groupSupport
* @{
*/
/**
* @brief Weighted sum
*
*
* @param[in] *in Array of input values.
* @param[in] *weigths Weights
* @param[in] blockSize Number of samples in the input array.
* @return Weighted sum
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
float32_t arm_weighted_sum_f32(const float32_t *in,const float32_t *weigths, uint32_t blockSize)
{
float32_t accum1, accum2;
f32x4_t accum1V, accum2V;
f32x4_t inV, wV;
const float32_t *pIn, *pW;
uint32_t blkCnt;
pIn = in;
pW = weigths;
accum1V = vdupq_n_f32(0.0);
accum2V = vdupq_n_f32(0.0);
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
inV = vld1q(pIn);
wV = vld1q(pW);
pIn += 4;
pW += 4;
accum1V = vfmaq(accum1V, inV, wV);
accum2V = vaddq(accum2V, wV);
blkCnt--;
}
accum1 = vecAddAcrossF32Mve(accum1V);
accum2 = vecAddAcrossF32Mve(accum2V);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
accum1 += *pIn++ * *pW;
accum2 += *pW++;
blkCnt--;
}
return (accum1 / accum2);
}
#else
#if defined(ARM_MATH_NEON)
#include "NEMath.h"
float32_t arm_weighted_sum_f32(const float32_t *in,const float32_t *weigths, uint32_t blockSize)
{
float32_t accum1, accum2;
float32x4_t accum1V, accum2V;
float32x2_t tempV;
float32x4_t inV,wV;
const float32_t *pIn, *pW;
uint32_t blkCnt;
pIn = in;
pW = weigths;
accum1=0.0f;
accum2=0.0f;
accum1V = vdupq_n_f32(0.0f);
accum2V = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
inV = vld1q_f32(pIn);
wV = vld1q_f32(pW);
pIn += 4;
pW += 4;
accum1V = vmlaq_f32(accum1V,inV,wV);
accum2V = vaddq_f32(accum2V,wV);
blkCnt--;
}
tempV = vpadd_f32(vget_low_f32(accum1V),vget_high_f32(accum1V));
accum1 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
tempV = vpadd_f32(vget_low_f32(accum2V),vget_high_f32(accum2V));
accum2 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
accum1 += *pIn++ * *pW;
accum2 += *pW++;
blkCnt--;
}
return(accum1 / accum2);
}
#else
float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, uint32_t blockSize)
{
float32_t accum1, accum2;
const float32_t *pIn, *pW;
uint32_t blkCnt;
pIn = in;
pW = weigths;
accum1=0.0f;
accum2=0.0f;
blkCnt = blockSize;
while(blkCnt > 0)
{
accum1 += *pIn++ * *pW;
accum2 += *pW++;
blkCnt--;
}
return(accum1 / accum2);
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSupport group
*/