1
0
Fork 0
mirror of https://github.com/betaflight/betaflight.git synced 2025-07-16 04:45:24 +03:00

AT32F435/7 Libraries (#12158) (#12263)

Source: https://github.com/ArteryTek/AT32F435_437_Firmware_Library
Version: 2.1.1
This commit is contained in:
J Blackman 2023-01-31 08:05:32 +11:00 committed by GitHub
parent 5e16ddb01b
commit 8900a831e5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
559 changed files with 289319 additions and 0 deletions

View file

@ -0,0 +1,144 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPTransform)
include(configLib)
include(configDsp)
add_library(CMSISDSPTransform STATIC)
configLib(CMSISDSPTransform ${ROOT})
configDsp(CMSISDSPTransform ${ROOT})
include(fft)
fft(CMSISDSPTransform)
if (CONFIGTABLE AND ALLFFT)
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_ALL_FFT_TABLES)
endif()
target_sources(CMSISDSPTransform PRIVATE arm_bitreversal.c)
target_sources(CMSISDSPTransform PRIVATE arm_bitreversal2.c)
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F32_16 OR CFFT_F32_32 OR CFFT_F32_64 OR CFFT_F32_128 OR CFFT_F32_256 OR CFFT_F32_512
OR CFFT_F32_1024 OR CFFT_F32_2048 OR CFFT_F32_4096)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F64_16 OR CFFT_F64_32 OR CFFT_F64_64 OR CFFT_F64_128 OR CFFT_F64_256 OR CFFT_F64_512
OR CFFT_F64_1024 OR CFFT_F64_2048 OR CFFT_F64_4096)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f64.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q15_16 OR CFFT_Q15_32 OR CFFT_Q15_64 OR CFFT_Q15_128 OR CFFT_Q15_256 OR CFFT_Q15_512
OR CFFT_Q15_1024 OR CFFT_Q15_2048 OR CFFT_Q15_4096)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q31_16 OR CFFT_Q31_32 OR CFFT_Q31_64 OR CFFT_Q31_128 OR CFFT_Q31_256 OR CFFT_Q31_512
OR CFFT_Q31_1024 OR CFFT_Q31_2048 OR CFFT_Q31_4096)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR DCT4_F32_128 OR DCT4_F32_512 OR DCT4_F32_2048 OR DCT4_F32_8192)
target_sources(CMSISDSPTransform PRIVATE arm_dct4_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR DCT4_Q31_128 OR DCT4_Q31_512 OR DCT4_Q31_2048 OR DCT4_Q31_8192)
target_sources(CMSISDSPTransform PRIVATE arm_dct4_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR ALLFFT OR DCT4_Q15_128 OR DCT4_Q15_512 OR DCT4_Q15_2048 OR DCT4_Q15_8192)
target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_dct4_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F32_32 OR RFFT_FAST_F32_64 OR RFFT_FAST_F32_128
OR RFFT_FAST_F32_256 OR RFFT_FAST_F32_512 OR RFFT_FAST_F32_1024 OR RFFT_FAST_F32_2048
OR RFFT_FAST_F32_4096 )
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F64_32 OR RFFT_FAST_F64_64 OR RFFT_FAST_F64_128
OR RFFT_FAST_F64_256 OR RFFT_FAST_F64_512 OR RFFT_FAST_F64_1024 OR RFFT_FAST_F64_2048
OR RFFT_FAST_F64_4096 )
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f64.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f64.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F32_128 OR RFFT_F32_512 OR RFFT_F32_2048 OR RFFT_F32_8192)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q15_32 OR RFFT_Q15_64 OR RFFT_Q15_128 OR RFFT_Q15_256
OR RFFT_Q15_512 OR RFFT_Q15_1024 OR RFFT_Q15_2048 OR RFFT_Q15_4096 OR RFFT_Q15_8192)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q31_32 OR RFFT_Q31_64 OR RFFT_Q31_128 OR RFFT_Q31_256
OR RFFT_Q31_512 OR RFFT_Q31_1024 OR RFFT_Q31_2048 OR RFFT_Q31_4096 OR RFFT_Q31_8192)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
endif()
# For scipy or wrappers or benchmarks
if (WRAPPER)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_f32.c)
endif()
### Includes
target_include_directories(CMSISDSPTransform PUBLIC "${DSP}/Include")

View file

@ -0,0 +1,67 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: TransformFunctions.c
* Description: Combination of all transform function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_bitreversal.c"
#include "arm_bitreversal2.c"
#include "arm_cfft_f32.c"
#include "arm_cfft_f64.c"
#include "arm_cfft_q15.c"
#include "arm_cfft_q31.c"
#include "arm_cfft_init_f32.c"
#include "arm_cfft_init_f64.c"
#include "arm_cfft_init_q15.c"
#include "arm_cfft_init_q31.c"
#include "arm_cfft_radix2_f32.c"
#include "arm_cfft_radix2_init_f32.c"
#include "arm_cfft_radix2_init_q15.c"
#include "arm_cfft_radix2_init_q31.c"
#include "arm_cfft_radix2_q15.c"
#include "arm_cfft_radix2_q31.c"
#include "arm_cfft_radix4_f32.c"
#include "arm_cfft_radix4_init_f32.c"
#include "arm_cfft_radix4_init_q15.c"
#include "arm_cfft_radix4_init_q31.c"
#include "arm_cfft_radix4_q15.c"
#include "arm_cfft_radix4_q31.c"
#include "arm_cfft_radix8_f32.c"
#include "arm_dct4_f32.c"
#include "arm_dct4_init_f32.c"
#include "arm_dct4_init_q15.c"
#include "arm_dct4_init_q31.c"
#include "arm_dct4_q15.c"
#include "arm_dct4_q31.c"
#include "arm_rfft_f32.c"
#include "arm_rfft_fast_f32.c"
#include "arm_rfft_fast_f64.c"
#include "arm_rfft_fast_init_f32.c"
#include "arm_rfft_fast_init_f64.c"
#include "arm_rfft_init_f32.c"
#include "arm_rfft_init_q15.c"
#include "arm_rfft_init_q31.c"
#include "arm_rfft_q15.c"
#include "arm_rfft_q31.c"

View file

@ -0,0 +1,229 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_bitreversal.c
* Description: Bitreversal functions
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@brief In-place floating-point bit reversal function.
@param[in,out] pSrc points to in-place floating-point data buffer
@param[in] fftSize length of FFT
@param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
@param[in] pBitRevTab points to bit reversal table
@return none
*/
void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab)
{
uint16_t fftLenBy2, fftLenBy2p1;
uint16_t i, j;
float32_t in;
/* Initializations */
j = 0U;
fftLenBy2 = fftSize >> 1U;
fftLenBy2p1 = (fftSize >> 1U) + 1U;
/* Bit Reversal Implementation */
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
{
if (i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2U * i];
pSrc[2U * i] = pSrc[2U * j];
pSrc[2U * j] = in;
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[(2U * i) + 1U];
pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
pSrc[(2U * j) + 1U] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2U * (i + fftLenBy2p1)];
pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
pSrc[2U * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
pSrc[(2U * (j + fftLenBy2p1)) + 1U];
pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
}
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[2U * (i + 1U)];
pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
pSrc[2U * (j + fftLenBy2)] = in;
/* pSrc[i+2U] <-> pSrc[j+2U] */
in = pSrc[(2U * (i + 1U)) + 1U];
pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}
/**
@brief In-place Q31 bit reversal function.
@param[in,out] pSrc points to in-place Q31 data buffer.
@param[in] fftLen length of FFT.
@param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
@param[in] pBitRevTab points to bit reversal table
@return none
*/
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab)
{
uint32_t fftLenBy2, fftLenBy2p1, i, j;
q31_t in;
/* Initializations */
j = 0U;
fftLenBy2 = fftLen / 2U;
fftLenBy2p1 = (fftLen / 2U) + 1U;
/* Bit Reversal Implementation */
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
{
if (i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2U * i];
pSrc[2U * i] = pSrc[2U * j];
pSrc[2U * j] = in;
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[(2U * i) + 1U];
pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
pSrc[(2U * j) + 1U] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2U * (i + fftLenBy2p1)];
pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
pSrc[2U * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
pSrc[(2U * (j + fftLenBy2p1)) + 1U];
pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
}
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[2U * (i + 1U)];
pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
pSrc[2U * (j + fftLenBy2)] = in;
/* pSrc[i+2U] <-> pSrc[j+2U] */
in = pSrc[(2U * (i + 1U)) + 1U];
pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}
/**
@brief In-place Q15 bit reversal function.
@param[in,out] pSrc16 points to in-place Q15 data buffer
@param[in] fftLen length of FFT
@param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
@param[in] pBitRevTab points to bit reversal table
@return none
*/
void arm_bitreversal_q15(
q15_t * pSrc16,
uint32_t fftLen,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab)
{
q31_t *pSrc = (q31_t *) pSrc16;
q31_t in;
uint32_t fftLenBy2, fftLenBy2p1;
uint32_t i, j;
/* Initializations */
j = 0U;
fftLenBy2 = fftLen / 2U;
fftLenBy2p1 = (fftLen / 2U) + 1U;
/* Bit Reversal Implementation */
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
{
if (i < j)
{
/* pSrc[i] <-> pSrc[j]; */
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[i];
pSrc[i] = pSrc[j];
pSrc[j] = in;
/* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */
/* pSrc[i + fftLenBy2p1+1U] <-> pSrc[j + fftLenBy2p1+1U] */
in = pSrc[i + fftLenBy2p1];
pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
pSrc[j + fftLenBy2p1] = in;
}
/* pSrc[i+1U] <-> pSrc[j+fftLenBy2]; */
/* pSrc[i+2] <-> pSrc[j+fftLenBy2+1U] */
in = pSrc[i + 1U];
pSrc[i + 1U] = pSrc[j + fftLenBy2];
pSrc[j + fftLenBy2] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}

View file

@ -0,0 +1,216 @@
;/* ----------------------------------------------------------------------
; * Project: CMSIS DSP Library
; * Title: arm_bitreversal2.S
; * Description: arm_bitreversal_32 function done in assembly for maximum speed.
; * Called after doing an fft to reorder the output.
; * The function is loop unrolled by 2. arm_bitreversal_16 as well.
; *
; * $Date: 18. March 2019
; * $Revision: V1.5.2
; *
; * Target Processor: Cortex-M cores
; * -------------------------------------------------------------------- */
;/*
; * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
; *
; * SPDX-License-Identifier: Apache-2.0
; *
; * Licensed under the Apache License, Version 2.0 (the License); you may
; * not use this file except in compliance with the License.
; * You may obtain a copy of the License at
; *
; * www.apache.org/licenses/LICENSE-2.0
; *
; * Unless required by applicable law or agreed to in writing, software
; * distributed under the License is distributed on an AS IS BASIS, WITHOUT
; * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; * See the License for the specific language governing permissions and
; * limitations under the License.
; */
#if defined ( __CC_ARM ) /* Keil */
#define CODESECT AREA ||.text||, CODE, READONLY, ALIGN=2
#define LABEL
#elif defined ( __IASMARM__ ) /* IAR */
#define CODESECT SECTION `.text`:CODE
#define PROC
#define LABEL
#define ENDP
#define EXPORT PUBLIC
#elif defined ( __CSMC__ ) /* Cosmic */
#define CODESECT switch .text
#define THUMB
#define EXPORT xdef
#define PROC :
#define LABEL :
#define ENDP
#define arm_bitreversal_32 _arm_bitreversal_32
#elif defined ( __TI_ARM__ ) /* TI ARM */
#define THUMB .thumb
#define CODESECT .text
#define EXPORT .global
#define PROC : .asmfunc
#define LABEL :
#define ENDP .endasmfunc
#define END
#elif defined ( __GNUC__ ) /* GCC */
#define THUMB .thumb
#define CODESECT .section .text
#define EXPORT .global
#define PROC :
#define LABEL :
#define ENDP
#define END
.syntax unified
#endif
CODESECT
THUMB
;/**
; @brief In-place bit reversal function.
; @param[in,out] pSrc points to the in-place buffer of unknown 32-bit data type
; @param[in] bitRevLen bit reversal table length
; @param[in] pBitRevTab points to bit reversal table
; @return none
; */
EXPORT arm_bitreversal_32
EXPORT arm_bitreversal_16
#if defined ( __CC_ARM ) /* Keil */
#elif defined ( __IASMARM__ ) /* IAR */
#elif defined ( __CSMC__ ) /* Cosmic */
#elif defined ( __TI_ARM__ ) /* TI ARM */
#elif defined ( __GNUC__ ) /* GCC */
.type arm_bitreversal_16, %function
.type arm_bitreversal_32, %function
#endif
#if defined (ARM_MATH_CM0_FAMILY)
arm_bitreversal_32 PROC
ADDS r3,r1,#1
PUSH {r4-r6}
ADDS r1,r2,#0
LSRS r3,r3,#1
arm_bitreversal_32_0 LABEL
LDRH r2,[r1,#2]
LDRH r6,[r1,#0]
ADD r2,r0,r2
ADD r6,r0,r6
LDR r5,[r2,#0]
LDR r4,[r6,#0]
STR r5,[r6,#0]
STR r4,[r2,#0]
LDR r5,[r2,#4]
LDR r4,[r6,#4]
STR r5,[r6,#4]
STR r4,[r2,#4]
ADDS r1,r1,#4
SUBS r3,r3,#1
BNE arm_bitreversal_32_0
POP {r4-r6}
BX lr
ENDP
arm_bitreversal_16 PROC
ADDS r3,r1,#1
PUSH {r4-r6}
ADDS r1,r2,#0
LSRS r3,r3,#1
arm_bitreversal_16_0 LABEL
LDRH r2,[r1,#2]
LDRH r6,[r1,#0]
LSRS r2,r2,#1
LSRS r6,r6,#1
ADD r2,r0,r2
ADD r6,r0,r6
LDR r5,[r2,#0]
LDR r4,[r6,#0]
STR r5,[r6,#0]
STR r4,[r2,#0]
ADDS r1,r1,#4
SUBS r3,r3,#1
BNE arm_bitreversal_16_0
POP {r4-r6}
BX lr
ENDP
#else
arm_bitreversal_32 PROC
ADDS r3,r1,#1
CMP r3,#1
IT LS
BXLS lr
PUSH {r4-r9}
ADDS r1,r2,#2
LSRS r3,r3,#2
arm_bitreversal_32_0 LABEL ;/* loop unrolled by 2 */
LDRH r8,[r1,#4]
LDRH r9,[r1,#2]
LDRH r2,[r1,#0]
LDRH r12,[r1,#-2]
ADD r8,r0,r8
ADD r9,r0,r9
ADD r2,r0,r2
ADD r12,r0,r12
LDR r7,[r9,#0]
LDR r6,[r8,#0]
LDR r5,[r2,#0]
LDR r4,[r12,#0]
STR r6,[r9,#0]
STR r7,[r8,#0]
STR r5,[r12,#0]
STR r4,[r2,#0]
LDR r7,[r9,#4]
LDR r6,[r8,#4]
LDR r5,[r2,#4]
LDR r4,[r12,#4]
STR r6,[r9,#4]
STR r7,[r8,#4]
STR r5,[r12,#4]
STR r4,[r2,#4]
ADDS r1,r1,#8
SUBS r3,r3,#1
BNE arm_bitreversal_32_0
POP {r4-r9}
BX lr
ENDP
arm_bitreversal_16 PROC
ADDS r3,r1,#1
CMP r3,#1
IT LS
BXLS lr
PUSH {r4-r9}
ADDS r1,r2,#2
LSRS r3,r3,#2
arm_bitreversal_16_0 LABEL ;/* loop unrolled by 2 */
LDRH r8,[r1,#4]
LDRH r9,[r1,#2]
LDRH r2,[r1,#0]
LDRH r12,[r1,#-2]
ADD r8,r0,r8,LSR #1
ADD r9,r0,r9,LSR #1
ADD r2,r0,r2,LSR #1
ADD r12,r0,r12,LSR #1
LDR r7,[r9,#0]
LDR r6,[r8,#0]
LDR r5,[r2,#0]
LDR r4,[r12,#0]
STR r6,[r9,#0]
STR r7,[r8,#0]
STR r5,[r12,#0]
STR r4,[r2,#0]
ADDS r1,r1,#8
SUBS r3,r3,#1
BNE arm_bitreversal_16_0
POP {r4-r9}
BX lr
ENDP
#endif
END

View file

@ -0,0 +1,134 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_bitreversal2.c
* Description: Bitreversal functions
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@brief In-place 64 bit reversal function.
@param[in,out] pSrc points to in-place buffer of unknown 64-bit data type
@param[in] bitRevLen bit reversal table length
@param[in] pBitRevTab points to bit reversal table
@return none
*/
void arm_bitreversal_64(
uint64_t *pSrc,
const uint16_t bitRevLen,
const uint16_t *pBitRevTab)
{
uint64_t a, b, i, tmp;
for (i = 0; i < bitRevLen; )
{
a = pBitRevTab[i ] >> 2;
b = pBitRevTab[i + 1] >> 2;
//real
tmp = pSrc[a];
pSrc[a] = pSrc[b];
pSrc[b] = tmp;
//complex
tmp = pSrc[a+1];
pSrc[a+1] = pSrc[b+1];
pSrc[b+1] = tmp;
i += 2;
}
}
/**
@brief In-place 32 bit reversal function.
@param[in,out] pSrc points to in-place buffer of unknown 32-bit data type
@param[in] bitRevLen bit reversal table length
@param[in] pBitRevTab points to bit reversal table
@return none
*/
void arm_bitreversal_32(
uint32_t *pSrc,
const uint16_t bitRevLen,
const uint16_t *pBitRevTab)
{
uint32_t a, b, i, tmp;
for (i = 0; i < bitRevLen; )
{
a = pBitRevTab[i ] >> 2;
b = pBitRevTab[i + 1] >> 2;
//real
tmp = pSrc[a];
pSrc[a] = pSrc[b];
pSrc[b] = tmp;
//complex
tmp = pSrc[a+1];
pSrc[a+1] = pSrc[b+1];
pSrc[b+1] = tmp;
i += 2;
}
}
/**
@brief In-place 16 bit reversal function.
@param[in,out] pSrc points to in-place buffer of unknown 16-bit data type
@param[in] bitRevLen bit reversal table length
@param[in] pBitRevTab points to bit reversal table
@return none
*/
void arm_bitreversal_16(
uint16_t *pSrc,
const uint16_t bitRevLen,
const uint16_t *pBitRevTab)
{
uint16_t a, b, i, tmp;
for (i = 0; i < bitRevLen; )
{
a = pBitRevTab[i ] >> 2;
b = pBitRevTab[i + 1] >> 2;
//real
tmp = pSrc[a];
pSrc[a] = pSrc[b];
pSrc[b] = tmp;
//complex
tmp = pSrc[a+1];
pSrc[a+1] = pSrc[b+1];
pSrc[b+1] = tmp;
i += 2;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,318 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_f64.c
* Description: Combined Radix Decimation in Frequency CFFT Double Precision Floating point processing function
*
* $Date: 29. November 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
extern void arm_radix4_butterfly_f64(
float64_t * pSrc,
uint16_t fftLen,
const float64_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_bitreversal_64(
uint64_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
/**
* @} end of ComplexFFT group
*/
/* ----------------------------------------------------------------------
* Internal helper function used by the FFTs
* ---------------------------------------------------------------------- */
/*
* @brief Core function for the Double Precision floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of F64 data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix4_butterfly_f64(
float64_t * pSrc,
uint16_t fftLen,
const float64_t * pCoef,
uint16_t twidCoefModifier)
{
float64_t co1, co2, co3, si1, si2, si3;
uint32_t ia1, ia2, ia3;
uint32_t i0, i1, i2, i3;
uint32_t n1, n2, j, k;
float64_t t1, t2, r1, r2, s1, s2;
/* Initializations for the fft calculation */
n2 = fftLen;
n1 = n2;
for (k = fftLen; k > 1U; k >>= 2U)
{
/* Initializations for the fft calculation */
n1 = n2;
n2 >>= 2U;
ia1 = 0U;
/* FFT Calculation */
j = 0;
do
{
/* index calculation for the coefficients */
ia2 = ia1 + ia1;
ia3 = ia2 + ia1;
co1 = pCoef[ia1 * 2U];
si1 = pCoef[(ia1 * 2U) + 1U];
co2 = pCoef[ia2 * 2U];
si2 = pCoef[(ia2 * 2U) + 1U];
co3 = pCoef[ia3 * 2U];
si3 = pCoef[(ia3 * 2U) + 1U];
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
i0 = j;
do
{
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* xa + xc */
r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
/* xa - xc */
r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
/* ya + yc */
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
/* ya - yc */
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
/* xb + xd */
t1 = pSrc[2U * i1] + pSrc[2U * i3];
/* xa' = xa + xb + xc + xd */
pSrc[2U * i0] = r1 + t1;
/* xa + xc -(xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
/* ya' = ya + yb + yc + yd */
pSrc[(2U * i0) + 1U] = s1 + t2;
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* (yb - yd) */
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
/* (xb - xd) */
t2 = pSrc[2U * i1] - pSrc[2U * i3];
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
/* (xa - xc) + (yb - yd) */
r1 = r2 + t1;
/* (xa - xc) - (yb - yd) */
r2 = r2 - t1;
/* (ya - yc) - (xb - xd) */
s1 = s2 - t2;
/* (ya - yc) + (xb - xd) */
s2 = s2 + t2;
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
i0 += n1;
} while ( i0 < fftLen);
j++;
} while (j <= (n2 - 1U));
twidCoefModifier <<= 2U;
}
}
/*
* @brief Core function for the Double Precision floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of F64 data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_cfft_radix4by2_f64(
float64_t * pSrc,
uint32_t fftLen,
const float64_t * pCoef)
{
uint32_t i, l;
uint32_t n2, ia;
float64_t xt, yt, cosVal, sinVal;
float64_t p0, p1,p2,p3,a0,a1;
n2 = fftLen >> 1;
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2*ia];
sinVal = pCoef[2*ia + 1];
ia++;
l = i + n2;
/* Butterfly implementation */
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
}
// first col
arm_radix4_butterfly_f64( pSrc, n2, (float64_t*)pCoef, 2U);
// second col
arm_radix4_butterfly_f64( pSrc + fftLen, n2, (float64_t*)pCoef, 2U);
}
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for the Double Precision floating-point complex FFT.
@param[in] S points to an instance of the Double Precision floating-point CFFT structure
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return none
*/
void arm_cfft_f64(
const arm_cfft_instance_f64 * S,
float64_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen, l;
float64_t invL, * pSrc;
if (ifftFlag == 1U)
{
/* Conjugate input data */
pSrc = p1 + 1;
for(l=0; l<L; l++)
{
*pSrc = -*pSrc;
pSrc += 2;
}
}
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_f64 (p1, L, (float64_t*)S->pTwiddle, 1U);
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_f64 ( p1, L, (float64_t*)S->pTwiddle);
break;
}
if ( bitReverseFlag )
arm_bitreversal_64((uint64_t*)p1, S->bitRevLength,S->pBitRevTable);
if (ifftFlag == 1U)
{
invL = 1.0 / (float64_t)L;
/* Conjugate and scale output data */
pSrc = p1;
for(l=0; l<L; l++)
{
*pSrc++ *= invL ;
*pSrc = -(*pSrc) * invL;
pSrc++;
}
}
}
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,353 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_f32.c
* Description: Initialization function for cfft f32 instance
*
* $Date: 07. January 2020
* $Revision: V1.7.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define FFTINIT(EXT,SIZE) \
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the cfft f32 function
@param[in,out] S points to an instance of the floating-point CFFT structure
@param[in] fftLen fft length (number of complex samples)
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
@par Use of this function is mandatory only for the MVE version of the FFT.
Other versions can still initialize directly the data structure using
variables declared in arm_const_structs.h
*/
#include "arm_math.h"
#include "arm_common_tables.h"
#include "arm_const_structs.h"
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_fft.h"
#include "arm_mve_tables.h"
arm_status arm_cfft_radix4by2_rearrange_twiddles_f32(arm_cfft_instance_f32 *S, int twidCoefModifier)
{
switch (S->fftLen >> (twidCoefModifier - 1)) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_f32;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_f32;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_f32;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_f32;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_f32;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
case 1024U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_f32;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_f32;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_f32;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_f32;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_f32;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) || defined(ARM_TABLE_BITREVIDX_FXT_512)
case 256U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_f32;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_f32;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_f32;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_f32;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_f32;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 64U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_f32;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_f32;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_f32;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_f32;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_f32;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 16U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_f32;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_f32;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_f32;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_f32;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_f32;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_f32;
break;
#endif
default:
return(ARM_MATH_ARGUMENT_ERROR);
break;
/* invalid sizes already filtered */
}
return(ARM_MATH_SUCCESS);
}
arm_status arm_cfft_init_f32(
arm_cfft_instance_f32 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
/* Initializations of structure parameters for 4096 point FFT */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
S->pTwiddle = (float32_t *)twiddleCoef_4096;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
S->pTwiddle = (float32_t *)twiddleCoef_2048;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024)
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
S->pTwiddle = (float32_t *)twiddleCoef_1024;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512)
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
S->pTwiddle = (float32_t *)twiddleCoef_512;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256)
case 256U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
S->pTwiddle = (float32_t *)twiddleCoef_256;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 128U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
S->pTwiddle = (float32_t *)twiddleCoef_128;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64)
case 64U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
S->pTwiddle = (float32_t *)twiddleCoef_64;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 32U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
S->pTwiddle = (float32_t *)twiddleCoef_32;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16)
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
S->pTwiddle = (float32_t *)twiddleCoef_16;
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#else
arm_status arm_cfft_init_f32(
arm_cfft_instance_f32 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the bit reversal table modifier */
FFTINIT(f32,4096);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
FFTINIT(f32,2048);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
FFTINIT(f32,1024);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
FFTINIT(f32,512);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
case 256U:
FFTINIT(f32,256);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
case 128U:
FFTINIT(f32,128);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
case 64U:
FFTINIT(f32,64);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
case 32U:
FFTINIT(f32,32);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
case 16U:
/* Initializations of structure parameters for 16 point FFT */
FFTINIT(f32,16);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,150 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_f64.c
* Description: Initialization function for cfft f64 instance
*
* $Date: 23. January 2020
* $Revision: V1.7.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define FFTINIT(EXT,SIZE) \
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the cfft f64 function
@param[in,out] S points to an instance of the floating-point CFFT structure
@param[in] fftLen fft length (number of complex samples)
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
@par Use of this function is mandatory only for the MVE version of the FFT.
Other versions can still initialize directly the data structure using
variables declared in arm_const_structs.h
*/
#include "arm_math.h"
#include "arm_common_tables.h"
#include "arm_const_structs.h"
arm_status arm_cfft_init_f64(
arm_cfft_instance_f64 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the bit reversal table modifier */
FFTINIT(f64,4096);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
FFTINIT(f64,2048);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
FFTINIT(f64,1024);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
FFTINIT(f64,512);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
case 256U:
FFTINIT(f64,256);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
case 128U:
FFTINIT(f64,128);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
case 64U:
FFTINIT(f64,64);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
case 32U:
FFTINIT(f64,32);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
case 16U:
/* Initializations of structure parameters for 16 point FFT */
FFTINIT(f64,16);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,356 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_q15.c
* Description: Initialization function for cfft q15 instance
*
* $Date: 07. January 2020
* $Revision: V1.7.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define FFTINIT(EXT,SIZE) \
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the cfft q15 function
@param[in,out] S points to an instance of the floating-point CFFT structure
@param[in] fftLen fft length (number of complex samples)
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
@par Use of this function is mandatory only for the MVE version of the FFT.
Other versions can still initialize directly the data structure using
variables declared in arm_const_structs.h
*/
#include "arm_math.h"
#include "arm_common_tables.h"
#include "arm_const_structs.h"
#if defined(ARM_MATH_MVEI)
#include "arm_vec_fft.h"
#include "arm_mve_tables.h"
arm_status arm_cfft_radix4by2_rearrange_twiddles_q15(arm_cfft_instance_q15 *S, int twidCoefModifier)
{
switch (S->fftLen >> (twidCoefModifier - 1)) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
case 1024U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) || defined(ARM_TABLE_BITREVIDX_FXT_512)
case 256U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 64U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 16U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_q15;
break;
#endif
default:
return(ARM_MATH_ARGUMENT_ERROR);
break;
/* invalid sizes already filtered */
}
return(ARM_MATH_SUCCESS);
}
arm_status arm_cfft_init_q15(
arm_cfft_instance_q15 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
/* Initializations of structure parameters for 4096 point FFT */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
S->pTwiddle = (q15_t *)twiddleCoef_4096_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
S->pTwiddle = (q15_t *)twiddleCoef_2048_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024)
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
S->pTwiddle = (q15_t *)twiddleCoef_1024_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512)
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
S->pTwiddle = (q15_t *)twiddleCoef_512_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256)
case 256U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
S->pTwiddle = (q15_t *)twiddleCoef_256_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 128U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
S->pTwiddle = (q15_t *)twiddleCoef_128_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64)
case 64U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
S->pTwiddle = (q15_t *)twiddleCoef_64_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 32U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
S->pTwiddle = (q15_t *)twiddleCoef_32_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16)
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
S->pTwiddle = (q15_t *)twiddleCoef_16_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#else
arm_status arm_cfft_init_q15(
arm_cfft_instance_q15 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,4096);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,2048);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,1024);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,512);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
case 256U:
FFTINIT(q15,256);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
case 128U:
FFTINIT(q15,128);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
case 64U:
FFTINIT(q15,64);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
case 32U:
FFTINIT(q15,32);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
case 16U:
/* Initializations of structure parameters for 16 point FFT */
FFTINIT(q15,16);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,356 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_q31.c
* Description: Initialization function for cfft q31 instance
*
* $Date: 07. January 2020
* $Revision: V1.7.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define FFTINIT(EXT,SIZE) \
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the cfft q31 function
@param[in,out] S points to an instance of the floating-point CFFT structure
@param[in] fftLen fft length (number of complex samples)
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
@par Use of this function is mandatory only for the MVE version of the FFT.
Other versions can still initialize directly the data structure using
variables declared in arm_const_structs.h
*/
#include "arm_math.h"
#include "arm_common_tables.h"
#include "arm_const_structs.h"
#if defined(ARM_MATH_MVEI)
#include "arm_vec_fft.h"
#include "arm_mve_tables.h"
arm_status arm_cfft_radix4by2_rearrange_twiddles_q31(arm_cfft_instance_q31 *S, int twidCoefModifier)
{
switch (S->fftLen >> (twidCoefModifier - 1)) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_q31;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_q31;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_q31;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_q31;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_q31;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_q31;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
case 1024U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_q31;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_q31;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_q31;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_q31;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_q31;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_q31;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) || defined(ARM_TABLE_BITREVIDX_FXT_512)
case 256U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_q31;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_q31;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_q31;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_q31;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_q31;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_q31;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 64U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_q31;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_q31;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_q31;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_q31;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_q31;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_q31;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 16U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_q31;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_q31;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_q31;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_q31;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_q31;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_q31;
break;
#endif
default:
return(ARM_MATH_ARGUMENT_ERROR);
break;
/* invalid sizes already filtered */
}
return(ARM_MATH_SUCCESS);
}
arm_status arm_cfft_init_q31(
arm_cfft_instance_q31 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
/* Initializations of structure parameters for 4096 point FFT */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
S->pTwiddle = (q31_t *)twiddleCoef_4096_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
S->pTwiddle = (q31_t *)twiddleCoef_2048_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024)
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
S->pTwiddle = (q31_t *)twiddleCoef_1024_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512)
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
S->pTwiddle = (q31_t *)twiddleCoef_512_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256)
case 256U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
S->pTwiddle = (q31_t *)twiddleCoef_256_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 128U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
S->pTwiddle = (q31_t *)twiddleCoef_128_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64)
case 64U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
S->pTwiddle = (q31_t *)twiddleCoef_64_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 32U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
S->pTwiddle = (q31_t *)twiddleCoef_32_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16)
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
S->pTwiddle = (q31_t *)twiddleCoef_16_q31;
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#else
arm_status arm_cfft_init_q31(
arm_cfft_instance_q31 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the bit reversal table modifier */
FFTINIT(q31,4096);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
FFTINIT(q31,2048);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
FFTINIT(q31,1024);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
FFTINIT(q31,512);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
case 256U:
FFTINIT(q31,256);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
case 128U:
FFTINIT(q31,128);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
case 64U:
FFTINIT(q31,64);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
case 32U:
FFTINIT(q31,32);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
case 16U:
/* Initializations of structure parameters for 16 point FFT */
FFTINIT(q31,16);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,949 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_q15.c
* Description: Combined Radix Decimation in Q15 Frequency CFFT processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#if defined(ARM_MATH_MVEI)
#include "arm_vec_fft.h"
static void arm_bitreversal_16_inpl_mve(
uint16_t *pSrc,
const uint16_t bitRevLen,
const uint16_t *pBitRevTab)
{
uint32_t *src = (uint32_t *)pSrc;
uint32_t blkCnt; /* loop counters */
uint32x4_t bitRevTabOff;
uint16x8_t one = vdupq_n_u16(1);
blkCnt = (bitRevLen / 2) / 4;
while (blkCnt > 0U) {
bitRevTabOff = vldrhq_u16(pBitRevTab);
pBitRevTab += 8;
uint32x4_t bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
uint32x4_t bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
bitRevOff1 = bitRevOff1 >> 3;
bitRevOff2 = bitRevOff2 >> 3;
uint32x4_t in1 = vldrwq_gather_shifted_offset_u32(src, bitRevOff1);
uint32x4_t in2 = vldrwq_gather_shifted_offset_u32(src, bitRevOff2);
vstrwq_scatter_shifted_offset_u32(src, bitRevOff1, in2);
vstrwq_scatter_shifted_offset_u32(src, bitRevOff2, in1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = bitRevLen & 7;
if (blkCnt > 0U) {
mve_pred16_t p0 = vctp16q(blkCnt);
bitRevTabOff = vldrhq_z_u16(pBitRevTab, p0);
uint32x4_t bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
uint32x4_t bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
bitRevOff1 = bitRevOff1 >> 3;
bitRevOff2 = bitRevOff2 >> 3;
uint32x4_t in1 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff1, p0);
uint32x4_t in2 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff2, p0);
vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff1, in2, p0);
vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff2, in1, p0);
}
}
static void _arm_radix4_butterfly_q15_mve(
const arm_cfft_instance_q15 * S,
q15_t *pSrc,
uint32_t fftLen)
{
q15x8_t vecTmp0, vecTmp1;
q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
q15x8_t vecA, vecB, vecC, vecD;
q15x8_t vecW;
uint32_t blkCnt;
uint32_t n1, n2;
uint32_t stage = 0;
int32_t iter = 1;
static const uint32_t strides[4] = {
(0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *),
(8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *)
};
/*
* Process first stages
* Each stage in middle stages provides two down scaling of the input
*/
n2 = fftLen;
n1 = n2;
n2 >>= 2u;
for (int k = fftLen / 4u; k > 1; k >>= 2u)
{
for (int i = 0; i < iter; i++)
{
q15_t const *p_rearranged_twiddle_tab_stride2 =
&S->rearranged_twiddle_stride2[
S->rearranged_twiddle_tab_stride2_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
S->rearranged_twiddle_tab_stride3_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride1 =
&S->rearranged_twiddle_stride1[
S->rearranged_twiddle_tab_stride1_arr[stage]];
q15_t const *pW1, *pW2, *pW3;
q15_t *inA = pSrc + CMPLX_DIM * i * n1;
q15_t *inB = inA + n2 * CMPLX_DIM;
q15_t *inC = inB + n2 * CMPLX_DIM;
q15_t *inD = inC + n2 * CMPLX_DIM;
pW1 = p_rearranged_twiddle_tab_stride1;
pW2 = p_rearranged_twiddle_tab_stride2;
pW3 = p_rearranged_twiddle_tab_stride3;
blkCnt = n2 / 4;
/*
* load 4 x q15 complex pair
*/
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
while (blkCnt > 0U)
{
vecB = vldrhq_s16(inB);
vecD = vldrhq_s16(inD);
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
*/
vecTmp0 = vhaddq(vecSum0, vecSum1);
vst1q(inA, vecTmp0);
inA += 8;
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'
*/
vecTmp0 = vhsubq(vecSum0, vecSum1);
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
*/
vecW = vld1q(pW2);
pW2 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inB, vecTmp1);
inB += 8;
/*
* [ 1 -i -1 +i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
*/
vecW = vld1q(pW1);
pW1 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inC, vecTmp1);
inC += 8;
/*
* [ 1 +i -1 -i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
*/
vecW = vld1q(pW3);
pW3 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inD, vecTmp1);
inD += 8;
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
blkCnt--;
}
}
n1 = n2;
n2 >>= 2u;
iter = iter << 2;
stage++;
}
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
* load scheduling
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
blkCnt = (fftLen >> 4);
while (blkCnt > 0U)
{
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4);
vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* pre-load for next iteration
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
vecTmp0 = vhaddq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
blkCnt--;
}
}
static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen)
{
uint32_t n2;
q15_t *pIn0;
q15_t *pIn1;
const q15_t *pCoef = S->pTwiddle;
uint32_t blkCnt;
q15x8_t vecIn0, vecIn1, vecSum, vecDiff;
q15x8_t vecCmplxTmp, vecTw;
q15_t const *pCoefVec;
n2 = fftLen >> 1;
pIn0 = pSrc;
pIn1 = pSrc + fftLen;
pCoefVec = pCoef;
blkCnt = n2 / 4;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn1 = *(q15x8_t *) pIn1;
vecIn0 = vecIn0 >> 1;
vecIn1 = vecIn1 >> 1;
vecSum = vhaddq(vecIn0, vecIn1);
vst1q(pIn0, vecSum);
pIn0 += 8;
vecTw = vld1q(pCoefVec);
pCoefVec += 8;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 8;
blkCnt--;
}
_arm_radix4_butterfly_q15_mve(S, pSrc, n2);
_arm_radix4_butterfly_q15_mve(S, pSrc + fftLen, n2);
pIn0 = pSrc;
blkCnt = (fftLen << 1) >> 3;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vst1q(pIn0, vecIn0);
pIn0 += 8;
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (fftLen << 1) & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vstrhq_p(pIn0, vecIn0, p0);
}
}
static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S,q15_t *pSrc, uint32_t fftLen)
{
q15x8_t vecTmp0, vecTmp1;
q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
q15x8_t vecA, vecB, vecC, vecD;
q15x8_t vecW;
uint32_t blkCnt;
uint32_t n1, n2;
uint32_t stage = 0;
int32_t iter = 1;
static const uint32_t strides[4] = {
(0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *),
(8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *)
};
/*
* Process first stages
* Each stage in middle stages provides two down scaling of the input
*/
n2 = fftLen;
n1 = n2;
n2 >>= 2u;
for (int k = fftLen / 4u; k > 1; k >>= 2u)
{
for (int i = 0; i < iter; i++)
{
q15_t const *p_rearranged_twiddle_tab_stride2 =
&S->rearranged_twiddle_stride2[
S->rearranged_twiddle_tab_stride2_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
S->rearranged_twiddle_tab_stride3_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride1 =
&S->rearranged_twiddle_stride1[
S->rearranged_twiddle_tab_stride1_arr[stage]];
q15_t const *pW1, *pW2, *pW3;
q15_t *inA = pSrc + CMPLX_DIM * i * n1;
q15_t *inB = inA + n2 * CMPLX_DIM;
q15_t *inC = inB + n2 * CMPLX_DIM;
q15_t *inD = inC + n2 * CMPLX_DIM;
pW1 = p_rearranged_twiddle_tab_stride1;
pW2 = p_rearranged_twiddle_tab_stride2;
pW3 = p_rearranged_twiddle_tab_stride3;
blkCnt = n2 / 4;
/*
* load 4 x q15 complex pair
*/
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
while (blkCnt > 0U)
{
vecB = vldrhq_s16(inB);
vecD = vldrhq_s16(inD);
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
*/
vecTmp0 = vhaddq(vecSum0, vecSum1);
vst1q(inA, vecTmp0);
inA += 8;
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'
*/
vecTmp0 = vhsubq(vecSum0, vecSum1);
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
*/
vecW = vld1q(pW2);
pW2 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inB, vecTmp1);
inB += 8;
/*
* [ 1 -i -1 +i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
*/
vecW = vld1q(pW1);
pW1 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inC, vecTmp1);
inC += 8;
/*
* [ 1 +i -1 -i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
*/
vecW = vld1q(pW3);
pW3 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inD, vecTmp1);
inD += 8;
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
blkCnt--;
}
}
n1 = n2;
n2 >>= 2u;
iter = iter << 2;
stage++;
}
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
* load scheduling
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
blkCnt = (fftLen >> 4);
while (blkCnt > 0U)
{
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4);
vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* pre-load for next iteration
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
vecTmp0 = vhaddq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
blkCnt--;
}
}
static void arm_cfft_radix4by2_inverse_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen)
{
uint32_t n2;
q15_t *pIn0;
q15_t *pIn1;
const q15_t *pCoef = S->pTwiddle;
uint32_t blkCnt;
q15x8_t vecIn0, vecIn1, vecSum, vecDiff;
q15x8_t vecCmplxTmp, vecTw;
q15_t const *pCoefVec;
n2 = fftLen >> 1;
pIn0 = pSrc;
pIn1 = pSrc + fftLen;
pCoefVec = pCoef;
blkCnt = n2 / 4;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn1 = *(q15x8_t *) pIn1;
vecIn0 = vecIn0 >> 1;
vecIn1 = vecIn1 >> 1;
vecSum = vhaddq(vecIn0, vecIn1);
vst1q(pIn0, vecSum);
pIn0 += 8;
vecTw = vld1q(pCoefVec);
pCoefVec += 8;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = vqrdmlsdhq(vuninitializedq_s16() , vecDiff, vecTw);
vecCmplxTmp = vqrdmladhxq(vecCmplxTmp, vecDiff, vecTw);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 8;
blkCnt--;
}
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc, n2);
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc + fftLen, n2);
pIn0 = pSrc;
blkCnt = (fftLen << 1) >> 3;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vst1q(pIn0, vecIn0);
pIn0 += 8;
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (fftLen << 1) & 7;
while (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vstrhq_p(pIn0, vecIn0, p0);
}
}
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for Q15 complex FFT.
@param[in] S points to an instance of Q15 CFFT structure
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return none
*/
void arm_cfft_q15(
const arm_cfft_instance_q15 * S,
q15_t * pSrc,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t fftLen = S->fftLen;
if (ifftFlag == 1U) {
switch (fftLen) {
case 16:
case 64:
case 256:
case 1024:
case 4096:
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc, fftLen);
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q15_mve(S, pSrc, fftLen);
break;
}
} else {
switch (fftLen) {
case 16:
case 64:
case 256:
case 1024:
case 4096:
_arm_radix4_butterfly_q15_mve(S, pSrc, fftLen);
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q15_mve(S, pSrc, fftLen);
break;
}
}
if (bitReverseFlag)
{
arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable);
}
}
#else
extern void arm_radix4_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_bitreversal_16(
uint16_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
void arm_cfft_radix4by2_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef);
void arm_cfft_radix4by2_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef);
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for Q15 complex FFT.
@param[in] S points to an instance of Q15 CFFT structure
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return none
*/
void arm_cfft_q15(
const arm_cfft_instance_q15 * S,
q15_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen;
if (ifftFlag == 1U)
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_inverse_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q15 ( p1, L, S->pTwiddle );
break;
}
}
else
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q15 ( p1, L, S->pTwiddle );
break;
}
}
if ( bitReverseFlag )
arm_bitreversal_16 ((uint16_t*) p1, S->bitRevLength, S->pBitRevTable);
}
/**
@} end of ComplexFFT group
*/
void arm_cfft_radix4by2_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef)
{
uint32_t i;
uint32_t n2;
q15_t p0, p1, p2, p3;
#if defined (ARM_MATH_DSP)
q31_t T, S, R;
q31_t coeff, out1, out2;
const q15_t *pC = pCoef;
q15_t *pSi = pSrc;
q15_t *pSl = pSrc + fftLen;
#else
uint32_t l;
q15_t xt, yt, cosVal, sinVal;
#endif
n2 = fftLen >> 1U;
#if defined (ARM_MATH_DSP)
for (i = n2; i > 0; i--)
{
coeff = read_q15x2_ia ((q15_t **) &pC);
T = read_q15x2 (pSi);
T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
S = read_q15x2 (pSl);
S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */
R = __QSUB16(T, S);
write_q15x2_ia (&pSi, __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16U;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2_ia (&pSl, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
}
#else /* #if defined (ARM_MATH_DSP) */
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2 * i];
sinVal = pCoef[2 * i + 1];
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) +
((int16_t) (((q31_t) yt * sinVal) >> 16U)) );
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) -
((int16_t) (((q31_t) xt * sinVal) >> 16U)) );
}
#endif /* #if defined (ARM_MATH_DSP) */
/* first col */
arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2U);
/* second col */
arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
n2 = fftLen >> 1U;
for (i = 0; i < n2; i++)
{
p0 = pSrc[4 * i + 0];
p1 = pSrc[4 * i + 1];
p2 = pSrc[4 * i + 2];
p3 = pSrc[4 * i + 3];
p0 <<= 1U;
p1 <<= 1U;
p2 <<= 1U;
p3 <<= 1U;
pSrc[4 * i + 0] = p0;
pSrc[4 * i + 1] = p1;
pSrc[4 * i + 2] = p2;
pSrc[4 * i + 3] = p3;
}
}
void arm_cfft_radix4by2_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef)
{
uint32_t i;
uint32_t n2;
q15_t p0, p1, p2, p3;
#if defined (ARM_MATH_DSP)
q31_t T, S, R;
q31_t coeff, out1, out2;
const q15_t *pC = pCoef;
q15_t *pSi = pSrc;
q15_t *pSl = pSrc + fftLen;
#else
uint32_t l;
q15_t xt, yt, cosVal, sinVal;
#endif
n2 = fftLen >> 1U;
#if defined (ARM_MATH_DSP)
for (i = n2; i > 0; i--)
{
coeff = read_q15x2_ia ((q15_t **) &pC);
T = read_q15x2 (pSi);
T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
S = read_q15x2 (pSl);
S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */
R = __QSUB16(T, S);
write_q15x2_ia (&pSi, __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16U;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2_ia (&pSl, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
}
#else /* #if defined (ARM_MATH_DSP) */
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2 * i];
sinVal = pCoef[2 * i + 1];
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) -
((int16_t) (((q31_t) yt * sinVal) >> 16U)) );
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) +
((int16_t) (((q31_t) xt * sinVal) >> 16U)) );
}
#endif /* #if defined (ARM_MATH_DSP) */
/* first col */
arm_radix4_butterfly_inverse_q15( pSrc, n2, (q15_t*)pCoef, 2U);
/* second col */
arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
n2 = fftLen >> 1U;
for (i = 0; i < n2; i++)
{
p0 = pSrc[4 * i + 0];
p1 = pSrc[4 * i + 1];
p2 = pSrc[4 * i + 2];
p3 = pSrc[4 * i + 3];
p0 <<= 1U;
p1 <<= 1U;
p2 <<= 1U;
p3 <<= 1U;
pSrc[4 * i + 0] = p0;
pSrc[4 * i + 1] = p1;
pSrc[4 * i + 2] = p2;
pSrc[4 * i + 3] = p3;
}
}
#endif /* defined(ARM_MATH_MVEI) */

View file

@ -0,0 +1,876 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_q31.c
* Description: Combined Radix Decimation in Frequency CFFT fixed point processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#if defined(ARM_MATH_MVEI)
#include "arm_vec_fft.h"
static void arm_bitreversal_32_inpl_mve(
uint32_t *pSrc,
const uint16_t bitRevLen,
const uint16_t *pBitRevTab)
{
uint64_t *src = (uint64_t *) pSrc;
uint32_t blkCnt; /* loop counters */
uint32x4_t bitRevTabOff;
uint32x4_t one = vdupq_n_u32(1);
blkCnt = (bitRevLen / 2) / 2;
while (blkCnt > 0U) {
bitRevTabOff = vldrhq_u32(pBitRevTab);
pBitRevTab += 4;
uint64x2_t bitRevOff1 = vmullbq_int_u32(bitRevTabOff, one);
uint64x2_t bitRevOff2 = vmulltq_int_u32(bitRevTabOff, one);
uint64x2_t in1 = vldrdq_gather_offset_u64(src, bitRevOff1);
uint64x2_t in2 = vldrdq_gather_offset_u64(src, bitRevOff2);
vstrdq_scatter_offset_u64(src, bitRevOff1, in2);
vstrdq_scatter_offset_u64(src, bitRevOff2, in1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
}
static void _arm_radix4_butterfly_q31_mve(
const arm_cfft_instance_q31 * S,
q31_t *pSrc,
uint32_t fftLen)
{
q31x4_t vecTmp0, vecTmp1;
q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
q31x4_t vecA, vecB, vecC, vecD;
q31x4_t vecW;
uint32_t blkCnt;
uint32_t n1, n2;
uint32_t stage = 0;
int32_t iter = 1;
static const uint32_t strides[4] = {
(0 - 16) * sizeof(q31_t *), (1 - 16) * sizeof(q31_t *),
(8 - 16) * sizeof(q31_t *), (9 - 16) * sizeof(q31_t *)
};
/*
* Process first stages
* Each stage in middle stages provides two down scaling of the input
*/
n2 = fftLen;
n1 = n2;
n2 >>= 2u;
for (int k = fftLen / 4u; k > 1; k >>= 2u)
{
for (int i = 0; i < iter; i++)
{
q31_t const *p_rearranged_twiddle_tab_stride2 =
&S->rearranged_twiddle_stride2[
S->rearranged_twiddle_tab_stride2_arr[stage]];
q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
S->rearranged_twiddle_tab_stride3_arr[stage]];
q31_t const *p_rearranged_twiddle_tab_stride1 =
&S->rearranged_twiddle_stride1[
S->rearranged_twiddle_tab_stride1_arr[stage]];
q31_t const *pW1, *pW2, *pW3;
q31_t *inA = pSrc + CMPLX_DIM * i * n1;
q31_t *inB = inA + n2 * CMPLX_DIM;
q31_t *inC = inB + n2 * CMPLX_DIM;
q31_t *inD = inC + n2 * CMPLX_DIM;
pW1 = p_rearranged_twiddle_tab_stride1;
pW2 = p_rearranged_twiddle_tab_stride2;
pW3 = p_rearranged_twiddle_tab_stride3;
blkCnt = n2 / 2;
/*
* load 2 x q31 complex pair
*/
vecA = vldrwq_s32(inA);
vecC = vldrwq_s32(inC);
while (blkCnt > 0U)
{
vecB = vldrwq_s32(inB);
vecD = vldrwq_s32(inD);
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
*/
vecTmp0 = vhaddq(vecSum0, vecSum1);
vst1q(inA, vecTmp0);
inA += 4;
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'
*/
vecTmp0 = vhsubq(vecSum0, vecSum1);
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
*/
vecW = vld1q(pW2);
pW2 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inB, vecTmp1);
inB += 4;
/*
* [ 1 -i -1 +i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
*/
vecW = vld1q(pW1);
pW1 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inC, vecTmp1);
inC += 4;
/*
* [ 1 +i -1 -i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
*/
vecW = vld1q(pW3);
pW3 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inD, vecTmp1);
inD += 4;
vecA = vldrwq_s32(inA);
vecC = vldrwq_s32(inC);
blkCnt--;
}
}
n1 = n2;
n2 >>= 2u;
iter = iter << 2;
stage++;
}
/*
* End of 1st stages process
* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages
* data is in 9.23(q23) format for the 256 point as there are 2 middle stages
* data is in 7.25(q25) format for the 64 point as there are 1 middle stage
* data is in 5.27(q27) format for the 16 point as there are no middle stages
*/
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
* load scheduling
*/
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
blkCnt = (fftLen >> 3);
while (blkCnt > 0U)
{
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecB = vldrwq_gather_base_s32(vecScGathAddr, 8);
vecD = vldrwq_gather_base_s32(vecScGathAddr, 24);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* pre-load for next iteration
*/
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
vecTmp0 = vhaddq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64, vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 16, vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 24, vecTmp0);
blkCnt--;
}
/*
* output is in 11.21(q21) format for the 1024 point
* output is in 9.23(q23) format for the 256 point
* output is in 7.25(q25) format for the 64 point
* output is in 5.27(q27) format for the 16 point
*/
}
static void arm_cfft_radix4by2_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pSrc, uint32_t fftLen)
{
uint32_t n2;
q31_t *pIn0;
q31_t *pIn1;
const q31_t *pCoef = S->pTwiddle;
uint32_t blkCnt;
q31x4_t vecIn0, vecIn1, vecSum, vecDiff;
q31x4_t vecCmplxTmp, vecTw;
n2 = fftLen >> 1;
pIn0 = pSrc;
pIn1 = pSrc + fftLen;
blkCnt = n2 / 2;
while (blkCnt > 0U)
{
vecIn0 = vld1q_s32(pIn0);
vecIn1 = vld1q_s32(pIn1);
vecIn0 = vecIn0 >> 1;
vecIn1 = vecIn1 >> 1;
vecSum = vhaddq(vecIn0, vecIn1);
vst1q(pIn0, vecSum);
pIn0 += 4;
vecTw = vld1q_s32(pCoef);
pCoef += 4;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 4;
blkCnt--;
}
_arm_radix4_butterfly_q31_mve(S, pSrc, n2);
_arm_radix4_butterfly_q31_mve(S, pSrc + fftLen, n2);
pIn0 = pSrc;
blkCnt = (fftLen << 1) >> 2;
while (blkCnt > 0U)
{
vecIn0 = vld1q_s32(pIn0);
vecIn0 = vecIn0 << 1;
vst1q(pIn0, vecIn0);
pIn0 += 4;
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (fftLen << 1) & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecIn0 = vld1q_s32(pIn0);
vecIn0 = vecIn0 << 1;
vstrwq_p(pIn0, vecIn0, p0);
}
}
static void _arm_radix4_butterfly_inverse_q31_mve(
const arm_cfft_instance_q31 *S,
q31_t *pSrc,
uint32_t fftLen)
{
q31x4_t vecTmp0, vecTmp1;
q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
q31x4_t vecA, vecB, vecC, vecD;
q31x4_t vecW;
uint32_t blkCnt;
uint32_t n1, n2;
uint32_t stage = 0;
int32_t iter = 1;
static const uint32_t strides[4] = {
(0 - 16) * sizeof(q31_t *), (1 - 16) * sizeof(q31_t *),
(8 - 16) * sizeof(q31_t *), (9 - 16) * sizeof(q31_t *)
};
/*
* Process first stages
* Each stage in middle stages provides two down scaling of the input
*/
n2 = fftLen;
n1 = n2;
n2 >>= 2u;
for (int k = fftLen / 4u; k > 1; k >>= 2u)
{
for (int i = 0; i < iter; i++)
{
q31_t const *p_rearranged_twiddle_tab_stride2 =
&S->rearranged_twiddle_stride2[
S->rearranged_twiddle_tab_stride2_arr[stage]];
q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
S->rearranged_twiddle_tab_stride3_arr[stage]];
q31_t const *p_rearranged_twiddle_tab_stride1 =
&S->rearranged_twiddle_stride1[
S->rearranged_twiddle_tab_stride1_arr[stage]];
q31_t const *pW1, *pW2, *pW3;
q31_t *inA = pSrc + CMPLX_DIM * i * n1;
q31_t *inB = inA + n2 * CMPLX_DIM;
q31_t *inC = inB + n2 * CMPLX_DIM;
q31_t *inD = inC + n2 * CMPLX_DIM;
pW1 = p_rearranged_twiddle_tab_stride1;
pW2 = p_rearranged_twiddle_tab_stride2;
pW3 = p_rearranged_twiddle_tab_stride3;
blkCnt = n2 / 2;
/*
* load 2 x q31 complex pair
*/
vecA = vldrwq_s32(inA);
vecC = vldrwq_s32(inC);
while (blkCnt > 0U)
{
vecB = vldrwq_s32(inB);
vecD = vldrwq_s32(inD);
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
*/
vecTmp0 = vhaddq(vecSum0, vecSum1);
vst1q(inA, vecTmp0);
inA += 4;
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'
*/
vecTmp0 = vhsubq(vecSum0, vecSum1);
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
*/
vecW = vld1q(pW2);
pW2 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inB, vecTmp1);
inB += 4;
/*
* [ 1 -i -1 +i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
*/
vecW = vld1q(pW1);
pW1 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inC, vecTmp1);
inC += 4;
/*
* [ 1 +i -1 -i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
*/
vecW = vld1q(pW3);
pW3 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inD, vecTmp1);
inD += 4;
vecA = vldrwq_s32(inA);
vecC = vldrwq_s32(inC);
blkCnt--;
}
}
n1 = n2;
n2 >>= 2u;
iter = iter << 2;
stage++;
}
/*
* End of 1st stages process
* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages
* data is in 9.23(q23) format for the 256 point as there are 2 middle stages
* data is in 7.25(q25) format for the 64 point as there are 1 middle stage
* data is in 5.27(q27) format for the 16 point as there are no middle stages
*/
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
* load scheduling
*/
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
blkCnt = (fftLen >> 3);
while (blkCnt > 0U)
{
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecB = vldrwq_gather_base_s32(vecScGathAddr, 8);
vecD = vldrwq_gather_base_s32(vecScGathAddr, 24);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* pre-load for next iteration
*/
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
vecTmp0 = vhaddq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64, vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 16, vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 24, vecTmp0);
blkCnt--;
}
/*
* output is in 11.21(q21) format for the 1024 point
* output is in 9.23(q23) format for the 256 point
* output is in 7.25(q25) format for the 64 point
* output is in 5.27(q27) format for the 16 point
*/
}
static void arm_cfft_radix4by2_inverse_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pSrc, uint32_t fftLen)
{
uint32_t n2;
q31_t *pIn0;
q31_t *pIn1;
const q31_t *pCoef = S->pTwiddle;
//uint16_t twidCoefModifier = arm_cfft_radix2_twiddle_factor(S->fftLen);
//q31_t twidIncr = (2 * twidCoefModifier * sizeof(q31_t));
uint32_t blkCnt;
//uint64x2_t vecOffs;
q31x4_t vecIn0, vecIn1, vecSum, vecDiff;
q31x4_t vecCmplxTmp, vecTw;
n2 = fftLen >> 1;
pIn0 = pSrc;
pIn1 = pSrc + fftLen;
//vecOffs[0] = 0;
//vecOffs[1] = (uint64_t) twidIncr;
blkCnt = n2 / 2;
while (blkCnt > 0U)
{
vecIn0 = vld1q_s32(pIn0);
vecIn1 = vld1q_s32(pIn1);
vecIn0 = vecIn0 >> 1;
vecIn1 = vecIn1 >> 1;
vecSum = vhaddq(vecIn0, vecIn1);
vst1q(pIn0, vecSum);
pIn0 += 4;
//vecTw = (q31x4_t) vldrdq_gather_offset_s64(pCoef, vecOffs);
vecTw = vld1q_s32(pCoef);
pCoef += 4;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 4;
//vecOffs = vaddq((q31x4_t) vecOffs, 2 * twidIncr);
blkCnt--;
}
_arm_radix4_butterfly_inverse_q31_mve(S, pSrc, n2);
_arm_radix4_butterfly_inverse_q31_mve(S, pSrc + fftLen, n2);
pIn0 = pSrc;
blkCnt = (fftLen << 1) >> 2;
while (blkCnt > 0U)
{
vecIn0 = vld1q_s32(pIn0);
vecIn0 = vecIn0 << 1;
vst1q(pIn0, vecIn0);
pIn0 += 4;
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (fftLen << 1) & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecIn0 = vld1q_s32(pIn0);
vecIn0 = vecIn0 << 1;
vstrwq_p(pIn0, vecIn0, p0);
}
}
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for the Q31 complex FFT.
@param[in] S points to an instance of the fixed-point CFFT structure
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return none
*/
void arm_cfft_q31(
const arm_cfft_instance_q31 * S,
q31_t * pSrc,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t fftLen = S->fftLen;
if (ifftFlag == 1U) {
switch (fftLen) {
case 16:
case 64:
case 256:
case 1024:
case 4096:
_arm_radix4_butterfly_inverse_q31_mve(S, pSrc, fftLen);
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q31_mve(S, pSrc, fftLen);
break;
}
} else {
switch (fftLen) {
case 16:
case 64:
case 256:
case 1024:
case 4096:
_arm_radix4_butterfly_q31_mve(S, pSrc, fftLen);
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q31_mve(S, pSrc, fftLen);
break;
}
}
if (bitReverseFlag)
{
arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable);
}
}
#else
extern void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_bitreversal_32(
uint32_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
void arm_cfft_radix4by2_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef);
void arm_cfft_radix4by2_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef);
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for the Q31 complex FFT.
@param[in] S points to an instance of the fixed-point CFFT structure
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return none
*/
void arm_cfft_q31(
const arm_cfft_instance_q31 * S,
q31_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen;
if (ifftFlag == 1U)
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_inverse_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q31 ( p1, L, S->pTwiddle );
break;
}
}
else
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q31 ( p1, L, S->pTwiddle );
break;
}
}
if ( bitReverseFlag )
arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable);
}
/**
@} end of ComplexFFT group
*/
void arm_cfft_radix4by2_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef)
{
uint32_t i, l;
uint32_t n2;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
n2 = fftLen >> 1U;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2 * i];
sinVal = pCoef[2 * i + 1];
l = i + n2;
xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);
yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2 * l] = p0 << 1;
pSrc[2 * l + 1] = p1 << 1;
}
/* first col */
arm_radix4_butterfly_q31 (pSrc, n2, (q31_t*)pCoef, 2U);
/* second col */
arm_radix4_butterfly_q31 (pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
n2 = fftLen >> 1U;
for (i = 0; i < n2; i++)
{
p0 = pSrc[4 * i + 0];
p1 = pSrc[4 * i + 1];
xt = pSrc[4 * i + 2];
yt = pSrc[4 * i + 3];
p0 <<= 1U;
p1 <<= 1U;
xt <<= 1U;
yt <<= 1U;
pSrc[4 * i + 0] = p0;
pSrc[4 * i + 1] = p1;
pSrc[4 * i + 2] = xt;
pSrc[4 * i + 3] = yt;
}
}
void arm_cfft_radix4by2_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef)
{
uint32_t i, l;
uint32_t n2;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
n2 = fftLen >> 1U;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2 * i];
sinVal = pCoef[2 * i + 1];
l = i + n2;
xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);
yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2 * l] = p0 << 1U;
pSrc[2 * l + 1] = p1 << 1U;
}
/* first col */
arm_radix4_butterfly_inverse_q31( pSrc, n2, (q31_t*)pCoef, 2U);
/* second col */
arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
n2 = fftLen >> 1U;
for (i = 0; i < n2; i++)
{
p0 = pSrc[4 * i + 0];
p1 = pSrc[4 * i + 1];
xt = pSrc[4 * i + 2];
yt = pSrc[4 * i + 3];
p0 <<= 1U;
p1 <<= 1U;
xt <<= 1U;
yt <<= 1U;
pSrc[4 * i + 0] = p0;
pSrc[4 * i + 1] = p1;
pSrc[4 * i + 2] = xt;
pSrc[4 * i + 3] = yt;
}
}
#endif /* defined(ARM_MATH_MVEI) */

View file

@ -0,0 +1,470 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_f32.c
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab);
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Radix-2 CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future
@param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@return none
*/
void arm_cfft_radix2_f32(
const arm_cfft_radix2_instance_f32 * S,
float32_t * pSrc)
{
if (S->ifftFlag == 1U)
{
/* Complex IFFT radix-2 */
arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier, S->onebyfftLen);
}
else
{
/* Complex FFT radix-2 */
arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier);
}
if (S->bitReverseFlag == 1U)
{
/* Bit Reversal */
arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
}
/**
@} end of ComplexFFT group
*/
/* ----------------------------------------------------------------------
** Internal helper function used by the FFTs
** ------------------------------------------------------------------- */
/**
brief Core function for the floating-point CFFT butterfly process.
param[in,out] pSrc points to in-place buffer of floating-point data type
param[in] fftLen length of the FFT
param[in] pCoef points to twiddle coefficient buffer
param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
return none
*/
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#if defined (ARM_MATH_DSP)
/* Initializations for the first stage */
n2 = fftLen >> 1;
ia = 0;
i = 0;
// loop for groups
for (k = n2; k > 0; k--)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
/* Twiddle coefficients index modifier */
ia += twidCoefModifier;
/* index calculation for the input as, */
/* pSrc[i + 0], pSrc[i + fftLen/1] */
l = i + n2;
/* Butterfly implementation */
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i++;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = n2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while ( j < n2); // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * i + 2] = xt;
pSrc[2 * i + 3] = yt;
} // groups loop end
#else /* #if defined (ARM_MATH_DSP) */
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 1; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while (i < fftLen);
j++;
} while (j < n2);
twidCoefModifier <<= 1U;
}
#endif /* #if defined (ARM_MATH_DSP) */
}
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#if defined (ARM_MATH_DSP)
n2 = fftLen >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while (j < n2); // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2 * i + 1] = p1;
pSrc[2 * i + 2] = p2;
pSrc[2 * i + 3] = p3;
} // butterfly loop end
#else /* #if defined (ARM_MATH_DSP) */
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while ( j < n2 ); // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2 * l] = p2;
pSrc[2 * i + 1] = p1;
pSrc[2 * l + 1] = p3;
} // butterfly loop end
#endif /* #if defined (ARM_MATH_DSP) */
}

View file

@ -0,0 +1,197 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_f32.c
* Description: Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the floating-point CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
@param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
@param[in] fftLen length of the FFT
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Details
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_f32(
arm_cfft_radix2_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 2048U:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.00048828125;
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 512U:
/* Initializations of structure parameters for 512 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.001953125;
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 128U:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32U;
S->bitRevFactor = 32U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
S->onebyfftLen = 0.0078125;
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 32U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 128U;
S->bitRevFactor = 128U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
S->onebyfftLen = 0.03125;
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,182 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q15.c
* Description: Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the Q15 CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
@param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure.
@param[in] fftLen length of the FFT.
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Details
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q15(
arm_cfft_radix2_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 2048U:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4U;
S->bitRevFactor = 4U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 512U:
/* Initializations of structure parameters for 512 point FFT */
S->twidCoefModifier = 8U;
S->bitRevFactor = 8U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128U:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32U;
S->bitRevFactor = 32U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32U:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128U;
S->bitRevFactor = 128U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,179 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q31.c
* Description: Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the Q31 CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
@param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure
@param[in] fftLen length of the FFT
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Details
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q31(
arm_cfft_radix2_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128U:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32U;
S->bitRevFactor = 32U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32U:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128U;
S->bitRevFactor = 128U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,689 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q15.c
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q15(
q15_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab);
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for the fixed-point CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
@param[in] S points to an instance of the fixed-point CFFT/CIFFT structure
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@return none
*/
void arm_cfft_radix2_q15(
const arm_cfft_radix2_instance_q15 * S,
q15_t * pSrc)
{
if (S->ifftFlag == 1U)
{
arm_radix2_butterfly_inverse_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
@} end of ComplexFFT group
*/
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef,
uint16_t twidCoefModifier)
{
#if defined (ARM_MATH_DSP)
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = read_q15x2 (pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
/* loop for butterfly */
i++;
l++;
T = read_q15x2 (pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = read_q15x2 (pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
/* loop for stage */
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
S = read_q15x2 (pSrc + (2 * l));
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
i += n1;
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
S = read_q15x2 (pSrc + (2 * l));
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
} /* stages loop end */
n1 = n2;
n2 = n2 >> 1;
ia = 0;
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
S = read_q15x2 (pSrc + (2 * l));
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
write_q15x2 (pSrc + (2 * l), R);
i += n1;
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
S = read_q15x2 (pSrc + (2 * l));
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
write_q15x2 (pSrc + (2 * l), R);
} /* groups loop end */
#else /* #if defined (ARM_MATH_DSP) */
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
// N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
cosVal = pCoef[(ia * 2)];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
(pSrc[2 * i + 1] >> 1U) ) >> 1U;
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
/* loop for stage */
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
} /* stages loop end */
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2 * l] = xt;
pSrc[2 * l + 1] = yt;
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
#endif /* #if defined (ARM_MATH_DSP) */
}
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef,
uint16_t twidCoefModifier)
{
#if defined (ARM_MATH_DSP)
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
// N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (i = 0; i < n2; i++)
{
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = read_q15x2 (pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
/* loop for butterfly */
i++;
l++;
T = read_q15x2 (pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = read_q15x2 (pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
/* loop for stage */
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
S = read_q15x2 (pSrc + (2 * l));
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
i += n1;
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
S = read_q15x2 (pSrc + (2 * l));
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
} /* stages loop end */
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = read_q15x2 (pSrc + (2 * i));
S = read_q15x2 (pSrc + (2 * l));
R = __QSUB16(T, S);
write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
write_q15x2 (pSrc + (2 * l), R);
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
#else /* #if defined (ARM_MATH_DSP) */
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
// N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
cosVal = pCoef[(ia * 2)];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
(pSrc[2 * i + 1] >> 1U) ) >> 1U;
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
/* loop for stage */
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
/* loop for groups */
for (j = 0; j < n2; j++)
{
cosVal = pCoef[(ia * 2)];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)) );
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)) );
} /* butterfly loop end */
} /* groups loop end */
twidCoefModifier = twidCoefModifier << 1U;
} /* stages loop end */
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[(ia * 2)];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
/* loop for butterfly */
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2 * l] = xt;
pSrc[2 * l + 1] = yt;
} /* groups loop end */
#endif /* #if defined (ARM_MATH_DSP) */
}

View file

@ -0,0 +1,337 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q31.c
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab);
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for the fixed-point CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
@param[in] S points to an instance of the fixed-point CFFT/CIFFT structure
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@return none
*/
void arm_cfft_radix2_q31(
const arm_cfft_radix2_instance_q31 * S,
q31_t * pSrc)
{
if (S->ifftFlag == 1U)
{
arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
@} end of ComplexFFT group
*/
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l, m;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
m = fftLen / n1;
do
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
i += n1;
m--;
} while ( m > 0); // butterfly loop end
} // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
} // butterfly loop end
}
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
} // butterfly loop end
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,156 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_f32.c
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the floating-point CFFT/CIFFT.
@deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed in the future.
@param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
@param[in] fftLen length of the FFT
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Details
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_f32(
arm_cfft_radix4_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q15.c
* Description: Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the Q15 CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
@param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure
@param[in] fftLen length of the FFT
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Details
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q15(
arm_cfft_radix4_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4U;
S->bitRevFactor = 4U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
@} end of ComplexFFT group
*/

View file

@ -0,0 +1,141 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q31.c
* Description: Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the Q31 CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
@param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure.
@param[in] fftLen length of the FFT.
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Details
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q31(
arm_cfft_radix4_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
@} end of ComplexFFT group
*/

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,827 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_q31.c
* Description: This file has function definition of Radix-4 FFT & IFFT function and
* In-place bit reversal using bit reversal table
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint32_t twidCoefModifier);
void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint32_t twidCoefModifier);
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab);
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for the Q31 CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
@param[in] S points to an instance of the Q31 CFFT/CIFFT structure
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@return none
@par Input and output formats:
Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
Hence the output format is different for different FFT sizes.
The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
@par
\image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
\image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
*/
void arm_cfft_radix4_q31(
const arm_cfft_radix4_instance_q31 * S,
q31_t * pSrc)
{
if (S->ifftFlag == 1U)
{
/* Complex IFFT radix-4 */
arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
}
else
{
/* Complex FFT radix-4 */
arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
}
if (S->bitReverseFlag == 1U)
{
/* Bit Reversal */
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
}
/**
@} end of ComplexFFT group
*/
/*
* Radix-4 FFT algorithm used is :
*
* Input real and imaginary data:
* x(n) = xa + j * ya
* x(n+N/4 ) = xb + j * yb
* x(n+N/2 ) = xc + j * yc
* x(n+3N 4) = xd + j * yd
*
*
* Output real and imaginary data:
* x(4r) = xa'+ j * ya'
* x(4r+1) = xb'+ j * yb'
* x(4r+2) = xc'+ j * yc'
* x(4r+3) = xd'+ j * yd'
*
*
* Twiddle factors for radix-4 FFT:
* Wn = co1 + j * (- si1)
* W2n = co2 + j * (- si2)
* W3n = co3 + j * (- si3)
*
* Butterfly implementation:
* xa' = xa + xb + xc + xd
* ya' = ya + yb + yc + yd
* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
*
*/
/**
@brief Core function for the Q31 CFFT butterfly process.
@param[in,out] pSrc points to the in-place buffer of Q31 data type.
@param[in] fftLen length of the FFT.
@param[in] pCoef points to twiddle coefficient buffer.
@param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
@return none
*/
void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint32_t twidCoefModifier)
{
uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
q31_t xa, xb, xc, xd;
q31_t ya, yb, yc, yd;
q31_t xa_out, xb_out, xc_out, xd_out;
q31_t ya_out, yb_out, yc_out, yd_out;
q31_t *ptr1;
/* Total process is divided into three stages */
/* process first stage, middle stages, & last stage */
/* start of first stage process */
/* Initializations for the first stage */
n2 = fftLen;
n1 = n2;
/* n2 = fftLen/4 */
n2 >>= 2U;
i0 = 0U;
ia1 = 0U;
j = n2;
/* Calculation of first stage */
do
{
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* input is in 1.31(q31) format and provide 4 guard bits for the input */
/* Butterfly implementation */
/* xa + xc */
r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U);
/* xa - xc */
r2 = (pSrc[(2U * i0)] >> 4U) - (pSrc[(2U * i2)] >> 4U);
/* xb + xd */
t1 = (pSrc[(2U * i1)] >> 4U) + (pSrc[(2U * i3)] >> 4U);
/* ya + yc */
s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
/* ya - yc */
s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
/* xa' = xa + xb + xc + xd */
pSrc[2U * i0] = (r1 + t1);
/* (xa + xc) - (xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
/* ya' = ya + yb + yc + yd */
pSrc[(2U * i0) + 1U] = (s1 + t2);
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* yb - yd */
t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
/* xb - xd */
t2 = (pSrc[(2U * i1)] >> 4U) - (pSrc[(2U * i3)] >> 4U);
/* index calculation for the coefficients */
ia2 = 2U * ia1;
co2 = pCoef[(ia2 * 2U)];
si2 = pCoef[(ia2 * 2U) + 1U];
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
/* (xa - xc) + (yb - yd) */
r1 = r2 + t1;
/* (xa - xc) - (yb - yd) */
r2 = r2 - t1;
/* (ya - yc) - (xb - xd) */
s1 = s2 - t2;
/* (ya - yc) + (xb - xd) */
s2 = s2 + t2;
co1 = pCoef[(ia1 * 2U)];
si1 = pCoef[(ia1 * 2U) + 1U];
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
/* index calculation for the coefficients */
ia3 = 3U * ia1;
co3 = pCoef[(ia3 * 2U)];
si3 = pCoef[(ia3 * 2U) + 1U];
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
/* Updating input index */
i0 = i0 + 1U;
} while (--j);
/* end of first stage process */
/* data is in 5.27(q27) format */
/* start of Middle stages process */
/* each stage in middle stages provides two down scaling of the input */
twidCoefModifier <<= 2U;
for (k = fftLen / 4U; k > 4U; k >>= 2U)
{
/* Initializations for the first stage */
n1 = n2;
n2 >>= 2U;
ia1 = 0U;
/* Calculation of first stage */
for (j = 0U; j <= (n2 - 1U); j++)
{
/* index calculation for the coefficients */
ia2 = ia1 + ia1;
ia3 = ia2 + ia1;
co1 = pCoef[(ia1 * 2U)];
si1 = pCoef[(ia1 * 2U) + 1U];
co2 = pCoef[(ia2 * 2U)];
si2 = pCoef[(ia2 * 2U) + 1U];
co3 = pCoef[(ia3 * 2U)];
si3 = pCoef[(ia3 * 2U) + 1U];
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
for (i0 = j; i0 < fftLen; i0 += n1)
{
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* Butterfly implementation */
/* xa + xc */
r1 = pSrc[2U * i0] + pSrc[2U * i2];
/* xa - xc */
r2 = pSrc[2U * i0] - pSrc[2U * i2];
/* ya + yc */
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
/* ya - yc */
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
/* xb + xd */
t1 = pSrc[2U * i1] + pSrc[2U * i3];
/* xa' = xa + xb + xc + xd */
pSrc[2U * i0] = (r1 + t1) >> 2U;
/* xa + xc -(xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
/* ya' = ya + yb + yc + yd */
pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* (yb - yd) */
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
/* (xb - xd) */
t2 = pSrc[2U * i1] - pSrc[2U * i3];
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
/* (xa - xc) + (yb - yd) */
r1 = r2 + t1;
/* (xa - xc) - (yb - yd) */
r2 = r2 - t1;
/* (ya - yc) - (xb - xd) */
s1 = s2 - t2;
/* (ya - yc) + (xb - xd) */
s2 = s2 + t2;
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
}
}
twidCoefModifier <<= 2U;
}
/* End of Middle stages process */
/* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
/* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
/* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
/* data is in 5.27(q27) format for the 16 point as there are no middle stages */
/* start of Last stage process */
/* Initializations for the last stage */
j = fftLen >> 2;
ptr1 = &pSrc[0];
/* Calculations of last stage */
do
{
/* Read xa (real), ya(imag) input */
xa = *ptr1++;
ya = *ptr1++;
/* Read xb (real), yb(imag) input */
xb = *ptr1++;
yb = *ptr1++;
/* Read xc (real), yc(imag) input */
xc = *ptr1++;
yc = *ptr1++;
/* Read xc (real), yc(imag) input */
xd = *ptr1++;
yd = *ptr1++;
/* xa' = xa + xb + xc + xd */
xa_out = xa + xb + xc + xd;
/* ya' = ya + yb + yc + yd */
ya_out = ya + yb + yc + yd;
/* pointer updation for writing */
ptr1 = ptr1 - 8U;
/* writing xa' and ya' */
*ptr1++ = xa_out;
*ptr1++ = ya_out;
xc_out = (xa - xb + xc - xd);
yc_out = (ya - yb + yc - yd);
/* writing xc' and yc' */
*ptr1++ = xc_out;
*ptr1++ = yc_out;
xb_out = (xa + yb - xc - yd);
yb_out = (ya - xb - yc + xd);
/* writing xb' and yb' */
*ptr1++ = xb_out;
*ptr1++ = yb_out;
xd_out = (xa - yb - xc + yd);
yd_out = (ya + xb - yc - xd);
/* writing xd' and yd' */
*ptr1++ = xd_out;
*ptr1++ = yd_out;
} while (--j);
/* output is in 11.21(q21) format for the 1024 point */
/* output is in 9.23(q23) format for the 256 point */
/* output is in 7.25(q25) format for the 64 point */
/* output is in 5.27(q27) format for the 16 point */
/* End of last stage process */
}
/**
@brief Core function for the Q31 CIFFT butterfly process.
@param[in,out] pSrc points to the in-place buffer of Q31 data type.
@param[in] fftLen length of the FFT.
@param[in] pCoef points to twiddle coefficient buffer.
@param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
@return none
*/
/*
* Radix-4 IFFT algorithm used is :
*
* CIFFT uses same twiddle coefficients as CFFT Function
* x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
*
*
* IFFT is implemented with following changes in equations from FFT
*
* Input real and imaginary data:
* x(n) = xa + j * ya
* x(n+N/4 ) = xb + j * yb
* x(n+N/2 ) = xc + j * yc
* x(n+3N 4) = xd + j * yd
*
*
* Output real and imaginary data:
* x(4r) = xa'+ j * ya'
* x(4r+1) = xb'+ j * yb'
* x(4r+2) = xc'+ j * yc'
* x(4r+3) = xd'+ j * yd'
*
*
* Twiddle factors for radix-4 IFFT:
* Wn = co1 + j * (si1)
* W2n = co2 + j * (si2)
* W3n = co3 + j * (si3)
* The real and imaginary output values for the radix-4 butterfly are
* xa' = xa + xb + xc + xd
* ya' = ya + yb + yc + yd
* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
*
*/
void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef,
uint32_t twidCoefModifier)
{
uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
q31_t xa, xb, xc, xd;
q31_t ya, yb, yc, yd;
q31_t xa_out, xb_out, xc_out, xd_out;
q31_t ya_out, yb_out, yc_out, yd_out;
q31_t *ptr1;
/* input is be 1.31(q31) format for all FFT sizes */
/* Total process is divided into three stages */
/* process first stage, middle stages, & last stage */
/* Start of first stage process */
/* Initializations for the first stage */
n2 = fftLen;
n1 = n2;
/* n2 = fftLen/4 */
n2 >>= 2U;
i0 = 0U;
ia1 = 0U;
j = n2;
do
{
/* input is in 1.31(q31) format and provide 4 guard bits for the input */
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* Butterfly implementation */
/* xa + xc */
r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U);
/* xa - xc */
r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
/* xb + xd */
t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
/* ya + yc */
s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
/* ya - yc */
s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
/* xa' = xa + xb + xc + xd */
pSrc[2U * i0] = (r1 + t1);
/* (xa + xc) - (xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
/* ya' = ya + yb + yc + yd */
pSrc[(2U * i0) + 1U] = (s1 + t2);
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* yb - yd */
t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
/* xb - xd */
t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
/* index calculation for the coefficients */
ia2 = 2U * ia1;
co2 = pCoef[ia2 * 2U];
si2 = pCoef[(ia2 * 2U) + 1U];
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
/* (xa - xc) - (yb - yd) */
r1 = r2 - t1;
/* (xa - xc) + (yb - yd) */
r2 = r2 + t1;
/* (ya - yc) + (xb - xd) */
s1 = s2 + t2;
/* (ya - yc) - (xb - xd) */
s2 = s2 - t2;
co1 = pCoef[ia1 * 2U];
si1 = pCoef[(ia1 * 2U) + 1U];
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
/* index calculation for the coefficients */
ia3 = 3U * ia1;
co3 = pCoef[ia3 * 2U];
si3 = pCoef[(ia3 * 2U) + 1U];
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
/* Updating input index */
i0 = i0 + 1U;
} while (--j);
/* data is in 5.27(q27) format */
/* each stage provides two down scaling of the input */
/* Start of Middle stages process */
twidCoefModifier <<= 2U;
/* Calculation of second stage to excluding last stage */
for (k = fftLen / 4U; k > 4U; k >>= 2U)
{
/* Initializations for the first stage */
n1 = n2;
n2 >>= 2U;
ia1 = 0U;
for (j = 0; j <= (n2 - 1U); j++)
{
/* index calculation for the coefficients */
ia2 = ia1 + ia1;
ia3 = ia2 + ia1;
co1 = pCoef[(ia1 * 2U)];
si1 = pCoef[(ia1 * 2U) + 1U];
co2 = pCoef[(ia2 * 2U)];
si2 = pCoef[(ia2 * 2U) + 1U];
co3 = pCoef[(ia3 * 2U)];
si3 = pCoef[(ia3 * 2U) + 1U];
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
for (i0 = j; i0 < fftLen; i0 += n1)
{
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* Butterfly implementation */
/* xa + xc */
r1 = pSrc[2U * i0] + pSrc[2U * i2];
/* xa - xc */
r2 = pSrc[2U * i0] - pSrc[2U * i2];
/* ya + yc */
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
/* ya - yc */
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
/* xb + xd */
t1 = pSrc[2U * i1] + pSrc[2U * i3];
/* xa' = xa + xb + xc + xd */
pSrc[2U * i0] = (r1 + t1) >> 2U;
/* xa + xc -(xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
/* ya' = ya + yb + yc + yd */
pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* (yb - yd) */
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
/* (xb - xd) */
t2 = pSrc[2U * i1] - pSrc[2U * i3];
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
/* (xa - xc) - (yb - yd) */
r1 = r2 - t1;
/* (xa - xc) + (yb - yd) */
r2 = r2 + t1;
/* (ya - yc) + (xb - xd) */
s1 = s2 + t2;
/* (ya - yc) - (xb - xd) */
s2 = s2 - t2;
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
}
}
twidCoefModifier <<= 2U;
}
/* End of Middle stages process */
/* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
/* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
/* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
/* data is in 5.27(q27) format for the 16 point as there are no middle stages */
/* Start of last stage process */
/* Initializations for the last stage */
j = fftLen >> 2;
ptr1 = &pSrc[0];
/* Calculations of last stage */
do
{
/* Read xa (real), ya(imag) input */
xa = *ptr1++;
ya = *ptr1++;
/* Read xb (real), yb(imag) input */
xb = *ptr1++;
yb = *ptr1++;
/* Read xc (real), yc(imag) input */
xc = *ptr1++;
yc = *ptr1++;
/* Read xc (real), yc(imag) input */
xd = *ptr1++;
yd = *ptr1++;
/* xa' = xa + xb + xc + xd */
xa_out = xa + xb + xc + xd;
/* ya' = ya + yb + yc + yd */
ya_out = ya + yb + yc + yd;
/* pointer updation for writing */
ptr1 = ptr1 - 8U;
/* writing xa' and ya' */
*ptr1++ = xa_out;
*ptr1++ = ya_out;
xc_out = (xa - xb + xc - xd);
yc_out = (ya - yb + yc - yd);
/* writing xc' and yc' */
*ptr1++ = xc_out;
*ptr1++ = yc_out;
xb_out = (xa - yb - xc + yd);
yb_out = (ya + xb - yc - xd);
/* writing xb' and yb' */
*ptr1++ = xb_out;
*ptr1++ = yb_out;
xd_out = (xa + yb - xc - yd);
yd_out = (ya - xb - yc + xd);
/* writing xd' and yd' */
*ptr1++ = xd_out;
*ptr1++ = yd_out;
} while (--j);
/* output is in 11.21(q21) format for the 1024 point */
/* output is in 9.23(q23) format for the 256 point */
/* output is in 7.25(q25) format for the 64 point */
/* output is in 5.27(q27) format for the 16 point */
/* End of last stage process */
}

View file

@ -0,0 +1,285 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix8_f32.c
* Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal helper function used by the FFTs
* -------------------------------------------------------------------- */
/**
brief Core function for the floating-point CFFT butterfly process.
param[in,out] pSrc points to the in-place buffer of floating-point data type.
param[in] fftLen length of the FFT.
param[in] pCoef points to the twiddle coefficient buffer.
param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
return none
*/
void arm_radix8_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
uint32_t id;
uint32_t n1, n2, j;
float32_t r1, r2, r3, r4, r5, r6, r7, r8;
float32_t t1, t2;
float32_t s1, s2, s3, s4, s5, s6, s7, s8;
float32_t p1, p2, p3, p4;
float32_t co2, co3, co4, co5, co6, co7, co8;
float32_t si2, si3, si4, si5, si6, si7, si8;
const float32_t C81 = 0.70710678118f;
n2 = fftLen;
do
{
n1 = n2;
n2 = n2 >> 3;
i1 = 0;
do
{
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
pSrc[2 * i5] = r1 - r2;
r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = r1 - s3;
r1 = r1 + s3;
s3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1 + 1] = r1 + r2;
pSrc[2 * i5 + 1] = r1 - r2;
pSrc[2 * i3] = t1 + s3;
pSrc[2 * i7] = t1 - s3;
pSrc[2 * i3 + 1] = t2 - r3;
pSrc[2 * i7 + 1] = t2 + r3;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
r2 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - r2;
s5 = s5 + r2;
s8 = s7 - s6;
s7 = s7 + s6;
pSrc[2 * i2] = r5 + s7;
pSrc[2 * i8] = r5 - s7;
pSrc[2 * i6] = t1 + s8;
pSrc[2 * i4] = t1 - s8;
pSrc[2 * i2 + 1] = s5 - r7;
pSrc[2 * i8 + 1] = s5 + r7;
pSrc[2 * i6 + 1] = t2 - r8;
pSrc[2 * i4 + 1] = t2 + r8;
i1 += n1;
} while (i1 < fftLen);
if (n2 < 8)
break;
ia1 = 0;
j = 1;
do
{
/* index calculation for the coefficients */
id = ia1 + twidCoefModifier;
ia1 = id;
ia2 = ia1 + id;
ia3 = ia2 + id;
ia4 = ia3 + id;
ia5 = ia4 + id;
ia6 = ia5 + id;
ia7 = ia6 + id;
co2 = pCoef[2 * ia1];
co3 = pCoef[2 * ia2];
co4 = pCoef[2 * ia3];
co5 = pCoef[2 * ia4];
co6 = pCoef[2 * ia5];
co7 = pCoef[2 * ia6];
co8 = pCoef[2 * ia7];
si2 = pCoef[2 * ia1 + 1];
si3 = pCoef[2 * ia2 + 1];
si4 = pCoef[2 * ia3 + 1];
si5 = pCoef[2 * ia4 + 1];
si6 = pCoef[2 * ia5 + 1];
si7 = pCoef[2 * ia6 + 1];
si8 = pCoef[2 * ia7 + 1];
i1 = j;
do
{
/* index calculation for the input */
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
r2 = r1 - r2;
s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = s1 - s3;
s1 = s1 + s3;
s3 = s2 - s4;
s2 = s2 + s4;
r1 = t1 + s3;
t1 = t1 - s3;
pSrc[2 * i1 + 1] = s1 + s2;
s2 = s1 - s2;
s1 = t2 - r3;
t2 = t2 + r3;
p1 = co5 * r2;
p2 = si5 * s2;
p3 = co5 * s2;
p4 = si5 * r2;
pSrc[2 * i5] = p1 + p2;
pSrc[2 * i5 + 1] = p3 - p4;
p1 = co3 * r1;
p2 = si3 * s1;
p3 = co3 * s1;
p4 = si3 * r1;
pSrc[2 * i3] = p1 + p2;
pSrc[2 * i3 + 1] = p3 - p4;
p1 = co7 * t1;
p2 = si7 * t2;
p3 = co7 * t2;
p4 = si7 * t1;
pSrc[2 * i7] = p1 + p2;
pSrc[2 * i7 + 1] = p3 - p4;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
s1 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - s1;
s5 = s5 + s1;
s8 = s7 - s6;
s7 = s7 + s6;
r1 = r5 + s7;
r5 = r5 - s7;
r6 = t1 + s8;
t1 = t1 - s8;
s1 = s5 - r7;
s5 = s5 + r7;
s6 = t2 - r8;
t2 = t2 + r8;
p1 = co2 * r1;
p2 = si2 * s1;
p3 = co2 * s1;
p4 = si2 * r1;
pSrc[2 * i2] = p1 + p2;
pSrc[2 * i2 + 1] = p3 - p4;
p1 = co8 * r5;
p2 = si8 * s5;
p3 = co8 * s5;
p4 = si8 * r5;
pSrc[2 * i8] = p1 + p2;
pSrc[2 * i8 + 1] = p3 - p4;
p1 = co6 * r6;
p2 = si6 * s6;
p3 = co6 * s6;
p4 = si6 * r6;
pSrc[2 * i6] = p1 + p2;
pSrc[2 * i6 + 1] = p3 - p4;
p1 = co4 * t1;
p2 = si4 * t2;
p3 = co4 * t2;
p4 = si4 * t1;
pSrc[2 * i4] = p1 + p2;
pSrc[2 * i4 + 1] = p3 - p4;
i1 += n1;
} while (i1 < fftLen);
j++;
} while (j < n2);
twidCoefModifier <<= 3;
} while (n2 > 7);
}

View file

@ -0,0 +1,448 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_f32.c
* Description: Processing function of DCT4 & IDCT4 F32
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupTransforms
*/
/**
@defgroup DCT4_IDCT4 DCT Type IV Functions
Representation of signals by minimum number of values is important for storage and transmission.
The possibility of large discontinuity between the beginning and end of a period of a signal
in DFT can be avoided by extending the signal so that it is even-symmetric.
Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
spectrum and is very widely used in signal and image coding applications.
The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
Reordering of the input data makes the computation of DCT just a problem of
computing the DFT of a real signal with a few additional operations.
This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
DCT2 implementation can be described in the following steps:
- Re-ordering input
- Calculating Real FFT
- Multiplication of weights and Real FFT output and getting real part from the product.
This process is explained by the block diagram below:
\image html DCT4.gif "Discrete Cosine Transform - type-IV"
@par Algorithm
The N-point type-IV DCT is defined as a real, linear transformation by the formula:
\image html DCT4Equation.gif
where <code>k = 0, 1, 2, ..., N-1</code>
@par
Its inverse is defined as follows:
\image html IDCT4Equation.gif
where <code>n = 0, 1, 2, ..., N-1</code>
@par
The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
The symmetry of the transform matrix indicates that the fast algorithms for the forward
and inverse transform computation are identical.
Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
@par Lengths supported by the transform:
As DCT4 internally uses Real FFT, it supports all the lengths 128, 512, 2048 and 8192.
The library provides separate functions for Q15, Q31, and floating-point data types.
@par Instance Structure
The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
A separate instance structure must be defined for each transform.
There are separate instance structure declarations for each of the 3 supported data types.
@par Initialization Functions
There is also an associated initialization function for each data type.
The initialization function performs the following operations:
- Sets the values of the internal structure fields.
- Initializes Real FFT as its process function is used internally in DCT4, by calling \ref arm_rfft_init_f32().
@par
Use of the initialization function is optional.
However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
To place an instance structure into a const data section, the instance structure must be manually initialized.
Manually initialize the instance structure as follows:
<pre>
arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
</pre>
where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
\c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
\c pTwiddle points to the twiddle factor table;
\c pCosFactor points to the cosFactor table;
\c pRfft points to the real FFT instance;
\c pCfft points to the complex FFT instance;
The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
and arm_rfft_f32() respectively for details regarding static initialization.
@par Fixed-Point Behavior
Care must be taken when using the fixed-point versions of the DCT4 transform functions.
In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
Refer to the function specific documentation below for usage guidelines.
*/
/**
@addtogroup DCT4_IDCT4
@{
*/
/**
@brief Processing function for the floating-point DCT4/IDCT4.
@param[in] S points to an instance of the floating-point DCT4/IDCT4 structure
@param[in] pState points to state buffer
@param[in,out] pInlineBuffer points to the in-place input and output buffer
@return none
*/
void arm_dct4_f32(
const arm_dct4_instance_f32 * S,
float32_t * pState,
float32_t * pInlineBuffer)
{
const float32_t *weights = S->pTwiddle; /* Pointer to the Weights table */
const float32_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
float32_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
float32_t in; /* Temporary variable */
uint32_t i; /* Loop counter */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = S->Nby2 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = (S->N - 1U) >> 2U;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = (S->N - 1U) % 0x4U;
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement the loop counter */
i--;
} while (i > 0U);
#else
/* Initializing the loop counter to N/2 */
i = S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = (S->N - 1U);
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement loop counter */
i--;
} while (i > 0U);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing loop counter */
i = S->N;
/* pbuff initialized to the pInlineBuffer (now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement loop counter */
i--;
} while (i > 0U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
}
/**
@} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,130 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_init_f32.c
* Description: Initialization function of DCT-4 & IDCT4 F32
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup DCT4_IDCT4
@{
*/
/**
@brief Initialization function for the floating-point DCT4/IDCT4.
@param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure
@param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure
@param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure
@param[in] N length of the DCT4
@param[in] Nby2 half of the length of the DCT4
@param[in] normalize normalizing factor.
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
@par Normalizing factor
The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
Floating-point normalizing factors are mentioned in the table below for different DCT sizes:
\image html dct4NormalizingF32Table.gif
*/
arm_status arm_dct4_init_f32(
arm_dct4_instance_f32 * S,
arm_rfft_instance_f32 * S_RFFT,
arm_cfft_radix4_instance_f32 * S_CFFT,
uint16_t N,
uint16_t Nby2,
float32_t normalize)
{
/* Initialize the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the DCT4 length */
S->N = N;
/* Initialize the half of DCT4 length */
S->Nby2 = Nby2;
/* Initialize the DCT4 Normalizing factor */
S->normalize = normalize;
/* Initialize Real FFT Instance */
S->pRfft = S_RFFT;
/* Initialize Complex FFT Instance */
S->pCfft = S_CFFT;
switch (N)
{
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192)
/* Initialize the table modifier values */
case 8192U:
S->pTwiddle = Weights_8192;
S->pCosFactor = cos_factors_8192;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048)
case 2048U:
S->pTwiddle = Weights_2048;
S->pCosFactor = cos_factors_2048;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512)
case 512U:
S->pTwiddle = Weights_512;
S->pCosFactor = cos_factors_512;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128)
case 128U:
S->pTwiddle = Weights_128;
S->pCosFactor = cos_factors_128;
break;
#endif
default:
status = ARM_MATH_ARGUMENT_ERROR;
}
/* Initialize the RFFT/RIFFT Function */
arm_rfft_init_f32(S->pRfft, S->pCfft, S->N, 0U, 1U);
/* return the status of DCT4 Init function */
return (status);
}
/**
@} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,130 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_init_q15.c
* Description: Initialization function of DCT-4 & IDCT4 Q15
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup DCT4_IDCT4
@{
*/
/**
@brief Initialization function for the Q15 DCT4/IDCT4.
@param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure
@param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure
@param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure
@param[in] N length of the DCT4
@param[in] Nby2 half of the length of the DCT4
@param[in] normalize normalizing factor
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
@par Normalizing factor
The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
Normalizing factors in 1.15 format are mentioned in the table below for different DCT sizes:
\image html dct4NormalizingQ15Table.gif
*/
arm_status arm_dct4_init_q15(
arm_dct4_instance_q15 * S,
arm_rfft_instance_q15 * S_RFFT,
arm_cfft_radix4_instance_q15 * S_CFFT,
uint16_t N,
uint16_t Nby2,
q15_t normalize)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the DCT4 length */
S->N = N;
/* Initialize the half of DCT4 length */
S->Nby2 = Nby2;
/* Initialize the DCT4 Normalizing factor */
S->normalize = normalize;
/* Initialize Real FFT Instance */
S->pRfft = S_RFFT;
/* Initialize Complex FFT Instance */
S->pCfft = S_CFFT;
switch (N)
{
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192)
/* Initialize the table modifier values */
case 8192U:
S->pTwiddle = WeightsQ15_8192;
S->pCosFactor = cos_factorsQ15_8192;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048)
case 2048U:
S->pTwiddle = WeightsQ15_2048;
S->pCosFactor = cos_factorsQ15_2048;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512)
case 512U:
S->pTwiddle = WeightsQ15_512;
S->pCosFactor = cos_factorsQ15_512;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128)
case 128U:
S->pTwiddle = WeightsQ15_128;
S->pCosFactor = cos_factorsQ15_128;
break;
#endif
default:
status = ARM_MATH_ARGUMENT_ERROR;
}
/* Initialize the RFFT/RIFFT */
arm_rfft_init_q15(S->pRfft, S->N, 0U, 1U);
/* return the status of DCT4 Init function */
return (status);
}
/**
@} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,129 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_init_q31.c
* Description: Initialization function of DCT-4 & IDCT4 Q31
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup DCT4_IDCT4
@{
*/
/**
@brief Initialization function for the Q31 DCT4/IDCT4.
@param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure.
@param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure
@param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure
@param[in] N length of the DCT4.
@param[in] Nby2 half of the length of the DCT4.
@param[in] normalize normalizing factor.
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
@par Normalizing factor:
The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
Normalizing factors in 1.31 format are mentioned in the table below for different DCT sizes:
\image html dct4NormalizingQ31Table.gif
*/
arm_status arm_dct4_init_q31(
arm_dct4_instance_q31 * S,
arm_rfft_instance_q31 * S_RFFT,
arm_cfft_radix4_instance_q31 * S_CFFT,
uint16_t N,
uint16_t Nby2,
q31_t normalize)
{
/* Initialize the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the DCT4 length */
S->N = N;
/* Initialize the half of DCT4 length */
S->Nby2 = Nby2;
/* Initialize the DCT4 Normalizing factor */
S->normalize = normalize;
/* Initialize Real FFT Instance */
S->pRfft = S_RFFT;
/* Initialize Complex FFT Instance */
S->pCfft = S_CFFT;
switch (N)
{
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192)
/* Initialize the table modifier values */
case 8192U:
S->pTwiddle = WeightsQ31_8192;
S->pCosFactor = cos_factorsQ31_8192;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048)
case 2048U:
S->pTwiddle = WeightsQ31_2048;
S->pCosFactor = cos_factorsQ31_2048;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512)
case 512U:
S->pTwiddle = WeightsQ31_512;
S->pCosFactor = cos_factorsQ31_512;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128)
case 128U:
S->pTwiddle = WeightsQ31_128;
S->pCosFactor = cos_factorsQ31_128;
break;
#endif
default:
status = ARM_MATH_ARGUMENT_ERROR;
}
/* Initialize the RFFT/RIFFT Function */
arm_rfft_init_q31(S->pRfft, S->N, 0U, 1U);
/* return the status of DCT4 Init function */
return (status);
}
/**
@} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,381 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_q15.c
* Description: Processing function of DCT4 & IDCT4 Q15
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup DCT4_IDCT4
@{
*/
/**
@brief Processing function for the Q15 DCT4/IDCT4.
@param[in] S points to an instance of the Q15 DCT4 structure.
@param[in] pState points to state buffer.
@param[in,out] pInlineBuffer points to the in-place input and output buffer.
@return none
@par Input an output formats
Internally inputs are downscaled in the RFFT process function to avoid overflows.
Number of bits downscaled, depends on the size of the transform. The input and output
formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
\image html dct4FormatsQ15Table.gif
*/
void arm_dct4_q15(
const arm_dct4_instance_q15 * S,
q15_t * pState,
q15_t * pInlineBuffer)
{
const q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */
const q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q15_t in; /* Temporary variable */
uint32_t i; /* Loop counter */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q15 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q15 (pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = S->Nby2 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15 (pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = (S->N - 1U) >> 2U;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = (S->N - 1U) % 0x4U;
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement loop counter */
i--;
} while (i > 0U);
#else
/* Initializing the loop counter to N/2 */
i = S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15 (pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = (S->N - 1U);
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement loop counter */
i--;
} while (i > 0U);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing loop counter */
i = S->N;
/* pbuff initialized to the pInlineBuffer (now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement loop counter */
i--;
} while (i > 0U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
}
/**
@} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,383 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_q31.c
* Description: Processing function of DCT4 & IDCT4 Q31
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup DCT4_IDCT4
@{
*/
/**
@brief Processing function for the Q31 DCT4/IDCT4.
@param[in] S points to an instance of the Q31 DCT4 structure.
@param[in] pState points to state buffer.
@param[in,out] pInlineBuffer points to the in-place input and output buffer.
@return none
@par Input an output formats
Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
as the conversion from DCT2 to DCT4 involves one subtraction.
Internally inputs are downscaled in the RFFT process function to avoid overflows.
Number of bits downscaled, depends on the size of the transform.
The input and output formats for different DCT sizes and number of bits to upscale are
mentioned in the table below:
\image html dct4FormatsQ31Table.gif
*/
void arm_dct4_q31(
const arm_dct4_instance_q31 * S,
q31_t * pState,
q31_t * pInlineBuffer)
{
const q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */
const q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q31_t in; /* Temporary variable */
uint32_t i; /* Loop counter */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q31 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q31 (pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = S->Nby2 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31 (pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = (S->N - 1U) >> 2U;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = (S->N - 1U) % 0x4U;
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement loop counter */
i--;
} while (i > 0U);
#else
/* Initializing the loop counter to N/2 */
i = S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = (S->N - 1U);
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing loop counter */
i = S->N;
/* pbuff initialized to the pInlineBuffer (now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement loop counter */
i--;
} while (i > 0U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
}
/**
@} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,311 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
* Description: RFFT & RIFFT Floating point process function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal functions prototypes
* -------------------------------------------------------------------- */
extern void arm_radix4_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab);
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pATable,
const float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pATable,
const float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
/**
@ingroup groupTransforms
*/
/**
@addtogroup RealFFT
@{
*/
/**
@brief Processing function for the floating-point RFFT/RIFFT.
Source buffer is modified by this function.
@deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_f32 and will be removed in the future.
@param[in] S points to an instance of the floating-point RFFT/RIFFT structure
@param[in] pSrc points to the input buffer
@param[out] pDst points to the output buffer
@return none
*/
void arm_rfft_f32(
const arm_rfft_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst)
{
const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;
/* Calculation of Real IFFT of input */
if (S->ifftFlagR == 1U)
{
/* Real IFFT core process */
arm_split_rifft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex radix-4 IFFT process */
arm_radix4_butterfly_inverse_f32 (pDst, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier, S_CFFT->onebyfftLen);
/* Bit reversal process */
if (S->bitReverseFlagR == 1U)
{
arm_bitreversal_f32 (pDst, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
}
else
{
/* Calculation of RFFT of input */
/* Complex radix-4 FFT process */
arm_radix4_butterfly_f32 (pSrc, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
/* Bit reversal process */
if (S->bitReverseFlagR == 1U)
{
arm_bitreversal_f32 (pSrc, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
/* Real FFT core process */
arm_split_rfft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
@} end of RealFFT group
*/
/**
@brief Core Real FFT process
@param[in] pSrc points to input buffer
@param[in] fftLen length of FFT
@param[in] pATable points to twiddle Coef A buffer
@param[in] pBTable points to twiddle Coef B buffer
@param[out] pDst points to output buffer
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
@return none
*/
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pATable,
const float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
float32_t outR, outI; /* Temporary variables for output */
const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U]; /* temp pointers for output buffer */
float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U]; /* temp pointers for input buffer */
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2];
pCoefB = &pBTable[modifier * 2];
i = fftLen - 1U;
while (i > 0U)
{
/*
outR = ( pSrc[2 * i] * pATable[2 * i]
- pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ pIn[2 * i] * pATable[2 * i + 1]
+ pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
/* read pATable[2 * i] */
CoefA1 = *pCoefA++;
/* pATable[2 * i + 1] */
CoefA2 = *pCoefA;
/* pSrc[2 * i] * pATable[2 * i] */
outR = *pSrc1 * CoefA1;
/* pSrc[2 * i] * CoefA2 */
outI = *pSrc1++ * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR -= (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += *pSrc1++ * CoefA1;
CoefB1 = *pCoefB;
/* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI -= *pSrc2 * CoefA2;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2-- * CoefB1;
/* write output */
*pDst1++ = outR;
*pDst1++ = outI;
/* write complex conjugate output */
*pDst2-- = -outI;
*pDst2-- = outR;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2U);
pCoefA = pCoefA + ((modifier * 2U) - 1U);
i--;
}
pDst[2U * fftLen] = pSrc[0] - pSrc[1];
pDst[(2U * fftLen) + 1U] = 0.0f;
pDst[0] = pSrc[0] + pSrc[1];
pDst[1] = 0.0f;
}
/**
@brief Core Real IFFT process
@param[in] pSrc points to input buffer
@param[in] fftLen length of FFT
@param[in] pATable points to twiddle Coef A buffer
@param[in] pBTable points to twiddle Coef B buffer
@param[out] pDst points to output buffer
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
@return none
*/
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
const float32_t * pATable,
const float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
float32_t outR, outI; /* Temporary variables for output */
const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while (fftLen > 0U)
{
/*
outR = ( pIn[2 * i] * pATable[2 * i]
+ pIn[2 * i + 1] * pATable[2 * i + 1]
+ pIn[2 * n - 2 * i] * pBTable[2 * i]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = ( pIn[2 * i + 1] * pATable[2 * i]
- pIn[2 * i] * pATable[2 * i + 1]
- pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * CoefA1 */
outR = *pSrc1 * CoefA1;
/* - pSrc[2 * i] * CoefA2 */
outI = -(*pSrc1++) * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR += (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += (*pSrc1++) * CoefA1;
CoefB1 = *pCoefB;
/* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2 * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI += *pSrc2-- * CoefA2;
/* write output */
*pDst++ = outR;
*pDst++ = outI;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2);
pCoefA = pCoefA + (modifier * 2 - 1);
/* Decrement loop count */
fftLen--;
}
}

View file

@ -0,0 +1,603 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
* Description: RFFT & RIFFT Floating point process function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void stage_rfft_f32(
const arm_rfft_fast_instance_f32 * S,
float32_t * p,
float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
const float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b; /* temporary variables */
float32_t p0, p1, p2, p3; /* temporary variables */
float32x4x2_t tw,xA,xB;
float32x4x2_t tmp1, tmp2, res;
uint32x4_t vecStridesFwd, vecStridesBkwd;
vecStridesFwd = vidupq_u32((uint32_t)0, 2);
vecStridesBkwd = -vecStridesFwd;
int blockCnt;
k = (S->Sint).fftLen - 1;
/* Pack first and last sample of the frequency domain together */
xBR = pB[0];
xBI = pB[1];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++ ;
twI = *pCoeff++ ;
// U1 = XA(1) + XB(1); % It is real
t1a = xBR + xAR ;
// U2 = XB(1) - XA(1); % It is imaginary
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
*pOut++ = 0.5f * ( t1a + t1b );
*pOut++ = 0.5f * ( t1a - t1b );
// XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
pB = p + 2*k;
pA += 2;
blockCnt = k >> 2;
while (blockCnt > 0)
{
/*
function X = my_split_rfft(X, ifftFlag)
% X is a series of real numbers
L = length(X);
XC = X(1:2:end) +i*X(2:2:end);
XA = fft(XC);
XB = conj(XA([1 end:-1:2]));
TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
for l = 2:L/2
XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
end
XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
X = XA;
*/
xA = vld2q_f32(pA);
pA += 8;
xB = vld2q_f32(pB);
xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
xB.val[1] = vnegq_f32(xB.val[1]);
pB -= 8;
tw = vld2q_f32(pCoeff);
pCoeff += 8;
tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
vst2q_f32(pOut, res);
pOut += 8;
blockCnt--;
}
blockCnt = k & 3;
while (blockCnt > 0)
{
/*
function X = my_split_rfft(X, ifftFlag)
% X is a series of real numbers
L = length(X);
XC = X(1:2:end) +i*X(2:2:end);
XA = fft(XC);
XB = conj(XA([1 end:-1:2]));
TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
for l = 2:L/2
XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
end
XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
X = XA;
*/
xBI = pB[1];
xBR = pB[0];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xBR - xAR ;
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
p0 = twR * t1a;
p1 = twI * t1a;
p2 = twR * t1b;
p3 = twI * t1b;
*pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
pA += 2;
pB -= 2;
blockCnt--;
}
}
/* Prepares data for inverse cfft */
void merge_rfft_f32(
const arm_rfft_fast_instance_f32 * S,
float32_t * p,
float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
const float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b, r, s, t, u; /* temporary variables */
float32x4x2_t tw,xA,xB;
float32x4x2_t tmp1, tmp2, res;
uint32x4_t vecStridesFwd, vecStridesBkwd;
vecStridesFwd = vidupq_u32((uint32_t)0, 2);
vecStridesBkwd = -vecStridesFwd;
int blockCnt;
k = (S->Sint).fftLen - 1;
xAR = pA[0];
xAI = pA[1];
pCoeff += 2 ;
*pOut++ = 0.5f * ( xAR + xAI );
*pOut++ = 0.5f * ( xAR - xAI );
pB = p + 2*k ;
pA += 2 ;
blockCnt = k >> 2;
while (blockCnt > 0)
{
/* G is half of the frequency complex spectrum */
//for k = 2:N
// Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
xA = vld2q_f32(pA);
pA += 8;
xB = vld2q_f32(pB);
xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
xB.val[1] = vnegq_f32(xB.val[1]);
pB -= 8;
tw = vld2q_f32(pCoeff);
tw.val[1] = vnegq_f32(tw.val[1]);
pCoeff += 8;
tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
vst2q_f32(pOut, res);
pOut += 8;
blockCnt--;
}
blockCnt = k & 3;
while (blockCnt > 0)
{
/* G is half of the frequency complex spectrum */
//for k = 2:N
// Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
xBI = pB[1] ;
xBR = pB[0] ;
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xAR - xBR ;
t1b = xAI + xBI ;
r = twR * t1a;
s = twI * t1b;
t = twI * t1a;
u = twR * t1b;
// real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
// imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
*pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
pA += 2;
pB -= 2;
blockCnt--;
}
}
#else
void stage_rfft_f32(
const arm_rfft_fast_instance_f32 * S,
float32_t * p,
float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
const float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b; /* temporary variables */
float32_t p0, p1, p2, p3; /* temporary variables */
k = (S->Sint).fftLen - 1;
/* Pack first and last sample of the frequency domain together */
xBR = pB[0];
xBI = pB[1];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++ ;
twI = *pCoeff++ ;
// U1 = XA(1) + XB(1); % It is real
t1a = xBR + xAR ;
// U2 = XB(1) - XA(1); % It is imaginary
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
*pOut++ = 0.5f * ( t1a + t1b );
*pOut++ = 0.5f * ( t1a - t1b );
// XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
pB = p + 2*k;
pA += 2;
do
{
/*
function X = my_split_rfft(X, ifftFlag)
% X is a series of real numbers
L = length(X);
XC = X(1:2:end) +i*X(2:2:end);
XA = fft(XC);
XB = conj(XA([1 end:-1:2]));
TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
for l = 2:L/2
XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
end
XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
X = XA;
*/
xBI = pB[1];
xBR = pB[0];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xBR - xAR ;
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
p0 = twR * t1a;
p1 = twI * t1a;
p2 = twR * t1b;
p3 = twI * t1b;
*pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
pA += 2;
pB -= 2;
k--;
} while (k > 0U);
}
/* Prepares data for inverse cfft */
void merge_rfft_f32(
const arm_rfft_fast_instance_f32 * S,
float32_t * p,
float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
const float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b, r, s, t, u; /* temporary variables */
k = (S->Sint).fftLen - 1;
xAR = pA[0];
xAI = pA[1];
pCoeff += 2 ;
*pOut++ = 0.5f * ( xAR + xAI );
*pOut++ = 0.5f * ( xAR - xAI );
pB = p + 2*k ;
pA += 2 ;
while (k > 0U)
{
/* G is half of the frequency complex spectrum */
//for k = 2:N
// Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
xBI = pB[1] ;
xBR = pB[0] ;
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xAR - xBR ;
t1b = xAI + xBI ;
r = twR * t1a;
s = twI * t1b;
t = twI * t1a;
u = twR * t1b;
// real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
// imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
*pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
pA += 2;
pB -= 2;
k--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@ingroup groupTransforms
*/
/**
@defgroup RealFFT Real FFT Functions
@par
The CMSIS DSP library includes specialized algorithms for computing the
FFT of real data sequences. The FFT is defined over complex data but
in many applications the input is real. Real FFT algorithms take advantage
of the symmetry properties of the FFT and have a speed advantage over complex
algorithms of the same length.
@par
The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
@par
The real length N forward FFT of a sequence is computed using the steps shown below.
@par
\image html RFFT.gif "Real Fast Fourier Transform"
@par
The real sequence is initially treated as if it were complex to perform a CFFT.
Later, a processing stage reshapes the data to obtain half of the frequency spectrum
in complex format. Except the first complex number that contains the two real numbers
X[0] and X[N/2] all the data is complex. In other words, the first complex sample
contains two real values packed.
@par
The input for the inverse RFFT should keep the same format as the output of the
forward RFFT. A first processing stage pre-process the data to later perform an
inverse CFFT.
@par
\image html RIFFT.gif "Real Inverse Fast Fourier Transform"
@par
The algorithms for floating-point, Q15, and Q31 data are slightly different
and we describe each algorithm in turn.
@par Floating-point
The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32().
The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated
but are still documented.
@par
The FFT of a real N-point sequence has even symmetry in the frequency domain.
The second half of the data equals the conjugate of the first half flipped in frequency.
Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
These are packed into the output array in alternating real and imaginary components:
@par
X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
real[(N/2)-1], imag[(N/2)-1 }
@par
It happens that the first complex number (real[0], imag[0]) is actually
all real. real[0] represents the DC offset, and imag[0] should be 0.
(real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
the first harmonic and so on.
@par
The real FFT functions pack the frequency domain data in this fashion.
The forward transform outputs the data in this form and the inverse
transform expects input data in this form. The function always performs
the needed bitreversal so that the input and output data is always in
normal order. The functions support lengths of [32, 64, 128, ..., 4096]
samples.
@par Q15 and Q31
The real algorithms are defined in a similar manner and utilize N/2 complex
transforms behind the scenes.
@par
The complex transforms used internally include scaling to prevent fixed-point
overflows. The overall scaling equals 1/(fftLen/2).
Due to the use of complex transform internally, the source buffer is
modified by the rfft.
@par
A separate instance structure must be defined for each transform used but
twiddle factor and bit reversal tables can be reused.
@par
There is also an associated initialization function for each data type.
The initialization function performs the following operations:
- Sets the values of the internal structure fields.
- Initializes twiddle factor table and bit reversal table pointers.
- Initializes the internal complex FFT data structure.
@par
Use of the initialization function is optional **except for MVE versions where it is mandatory**.
If you don't use the initialization functions, then the structures should be initialized with code
similar to the one below:
<pre>
arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
</pre>
where <code>fftLenReal</code> is the length of the real transform;
<code>fftLenBy2</code> length of the internal complex transform (fftLenReal/2).
<code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
<code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
output (=1).
<code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
The value is based on the FFT length;
<code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
<code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
<code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
must also be initialized.
@par
Note that with MVE versions you can't initialize instance structures directly and **must
use the initialization function**.
*/
/**
@addtogroup RealFFT
@{
*/
/**
@brief Processing function for the floating-point real FFT.
@param[in] S points to an arm_rfft_fast_instance_f32 structure
@param[in] p points to input buffer (Source buffer is modified by this function.)
@param[in] pOut points to output buffer
@param[in] ifftFlag
- value = 0: RFFT
- value = 1: RIFFT
@return none
*/
void arm_rfft_fast_f32(
const arm_rfft_fast_instance_f32 * S,
float32_t * p,
float32_t * pOut,
uint8_t ifftFlag)
{
const arm_cfft_instance_f32 * Sint = &(S->Sint);
/* Calculation of Real FFT */
if (ifftFlag)
{
/* Real FFT compression */
merge_rfft_f32(S, p, pOut);
/* Complex radix-4 IFFT process */
arm_cfft_f32( Sint, pOut, ifftFlag, 1);
}
else
{
/* Calculation of RFFT of input */
arm_cfft_f32( Sint, p, ifftFlag, 1);
/* Real FFT extraction */
stage_rfft_f32(S, p, pOut);
}
}
/**
* @} end of RealFFT group
*/

View file

@ -0,0 +1,228 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_f64.c
* Description: RFFT & RIFFT Double precision Floating point process function
*
* $Date: 29. November 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void stage_rfft_f64(
const arm_rfft_fast_instance_f64 * S,
float64_t * p,
float64_t * pOut)
{
uint32_t k; /* Loop Counter */
float64_t twR, twI; /* RFFT Twiddle coefficients */
const float64_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float64_t *pA = p; /* increasing pointer */
float64_t *pB = p; /* decreasing pointer */
float64_t xAR, xAI, xBR, xBI; /* temporary variables */
float64_t t1a, t1b; /* temporary variables */
float64_t p0, p1, p2, p3; /* temporary variables */
k = (S->Sint).fftLen - 1;
/* Pack first and last sample of the frequency domain together */
xBR = pB[0];
xBI = pB[1];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++ ;
twI = *pCoeff++ ;
// U1 = XA(1) + XB(1); % It is real
t1a = xBR + xAR ;
// U2 = XB(1) - XA(1); % It is imaginary
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
*pOut++ = 0.5 * ( t1a + t1b );
*pOut++ = 0.5 * ( t1a - t1b );
// XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
pB = p + 2*k;
pA += 2;
do
{
/*
function X = my_split_rfft(X, ifftFlag)
% X is a series of real numbers
L = length(X);
XC = X(1:2:end) +i*X(2:2:end);
XA = fft(XC);
XB = conj(XA([1 end:-1:2]));
TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
for l = 2:L/2
XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
end
XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
X = XA;
*/
xBI = pB[1];
xBR = pB[0];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xBR - xAR ;
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
p0 = twR * t1a;
p1 = twI * t1a;
p2 = twR * t1b;
p3 = twI * t1b;
*pOut++ = 0.5 * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5 * (xAI - xBI + p1 - p2 ); //xAI
pA += 2;
pB -= 2;
k--;
} while (k > 0U);
}
/* Prepares data for inverse cfft */
void merge_rfft_f64(
const arm_rfft_fast_instance_f64 * S,
float64_t * p,
float64_t * pOut)
{
uint32_t k; /* Loop Counter */
float64_t twR, twI; /* RFFT Twiddle coefficients */
const float64_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float64_t *pA = p; /* increasing pointer */
float64_t *pB = p; /* decreasing pointer */
float64_t xAR, xAI, xBR, xBI; /* temporary variables */
float64_t t1a, t1b, r, s, t, u; /* temporary variables */
k = (S->Sint).fftLen - 1;
xAR = pA[0];
xAI = pA[1];
pCoeff += 2 ;
*pOut++ = 0.5 * ( xAR + xAI );
*pOut++ = 0.5 * ( xAR - xAI );
pB = p + 2*k ;
pA += 2 ;
while (k > 0U)
{
/* G is half of the frequency complex spectrum */
//for k = 2:N
// Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
xBI = pB[1] ;
xBR = pB[0] ;
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xAR - xBR ;
t1b = xAI + xBI ;
r = twR * t1a;
s = twI * t1b;
t = twI * t1a;
u = twR * t1b;
// real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
// imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
*pOut++ = 0.5 * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5 * (xAI - xBI + t - u ); //xAI
pA += 2;
pB -= 2;
k--;
}
}
/**
@ingroup groupTransforms
*/
/**
@addtogroup RealFFT
@{
*/
/**
@brief Processing function for the Double Precision floating-point real FFT.
@param[in] S points to an arm_rfft_fast_instance_f64 structure
@param[in] p points to input buffer (Source buffer is modified by this function.)
@param[in] pOut points to output buffer
@param[in] ifftFlag
- value = 0: RFFT
- value = 1: RIFFT
@return none
*/
void arm_rfft_fast_f64(
arm_rfft_fast_instance_f64 * S,
float64_t * p,
float64_t * pOut,
uint8_t ifftFlag)
{
arm_cfft_instance_f64 * Sint = &(S->Sint);
Sint->fftLen = S->fftLenRFFT / 2;
/* Calculation of Real FFT */
if (ifftFlag)
{
/* Real FFT compression */
merge_rfft_f64(S, p, pOut);
/* Complex radix-4 IFFT process */
arm_cfft_f64( Sint, pOut, ifftFlag, 1);
}
else
{
/* Calculation of RFFT of input */
arm_cfft_f64( Sint, p, ifftFlag, 1);
/* Real FFT extraction */
stage_rfft_f64(S, p, pOut);
}
}
/**
* @} end of RealFFT group
*/

View file

@ -0,0 +1,352 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_f32.c
* Description: Split Radix Decimation in Frequency CFFT Floating point processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup RealFFT
@{
*/
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
/**
@private
@brief Initialization function for the 32pt floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_32_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),16);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 32U;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
/**
@private
@brief Initialization function for the 64pt floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_64_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),32);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 64U;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
/**
@private
@brief Initialization function for the 128pt floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_128_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),64);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 128;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
/**
@private
@brief Initialization function for the 256pt floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_256_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),128);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 256U;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
/**
@private
@brief Initialization function for the 512pt floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_512_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),256);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 512U;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
/**
@private
@brief Initialization function for the 1024pt floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_1024_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),512);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 1024U;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
/**
@private
@brief Initialization function for the 2048pt floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_2048_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),1024);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 2048U;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
/**
@private
* @brief Initialization function for the 4096pt floating-point real FFT.
* @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_4096_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
arm_status status;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
status=arm_cfft_init_f32(&(S->Sint),2048);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
S->fftLenRFFT = 4096U;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096;
return ARM_MATH_SUCCESS;
}
#endif
/**
@brief Initialization function for the floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f32 structure
@param[in] fftLen length of the Real Sequence
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Description
The parameter <code>fftLen</code> specifies the length of RFFT/CIFFT process.
Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_rfft_fast_init_f32(
arm_rfft_fast_instance_f32 * S,
uint16_t fftLen)
{
typedef arm_status(*fft_init_ptr)( arm_rfft_fast_instance_f32 *);
fft_init_ptr fptr = 0x0;
switch (fftLen)
{
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
case 4096U:
fptr = arm_rfft_4096_fast_init_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
case 2048U:
fptr = arm_rfft_2048_fast_init_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
case 1024U:
fptr = arm_rfft_1024_fast_init_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
case 512U:
fptr = arm_rfft_512_fast_init_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
case 256U:
fptr = arm_rfft_256_fast_init_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
case 128U:
fptr = arm_rfft_128_fast_init_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
case 64U:
fptr = arm_rfft_64_fast_init_f32;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
case 32U:
fptr = arm_rfft_32_fast_init_f32;
break;
#endif
default:
return ARM_MATH_ARGUMENT_ERROR;
}
if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
return fptr( S );
}
/**
@} end of RealFFT group
*/

View file

@ -0,0 +1,344 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_f64.c
* Description: Split Radix Decimation in Frequency CFFT Double Precision Floating point processing function
*
* $Date: 29. November 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupTransforms
*/
/**
@addtogroup RealFFT
@{
*/
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_16) && defined(ARM_TABLE_BITREVIDX_FLT64_16) && defined(ARM_TABLE_TWIDDLECOEF_F64_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_32))
/**
@brief Initialization function for the 32pt double precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_32_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 16U;
S->fftLenRFFT = 32U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_16;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_16;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_32;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_BITREVIDX_FLT64_32) && defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_64))
/**
@brief Initialization function for the 64pt Double Precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_64_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 32U;
S->fftLenRFFT = 64U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_32;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_32;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_64;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_BITREVIDX_FLT64_64) && defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_128))
/**
@brief Initialization function for the 128pt Double Precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_128_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 64U;
S->fftLenRFFT = 128U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_64;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_64;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_128;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_BITREVIDX_FLT64_128) && defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_256))
/**
@brief Initialization function for the 256pt Double Precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_256_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 128U;
S->fftLenRFFT = 256U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_128;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_128;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_256;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_BITREVIDX_FLT64_256) && defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_512))
/**
@brief Initialization function for the 512pt Double Precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_512_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 256U;
S->fftLenRFFT = 512U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_256;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_256;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_512;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_BITREVIDX_FLT64_512) && defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_1024))
/**
@brief Initialization function for the 1024pt Double Precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_1024_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 512U;
S->fftLenRFFT = 1024U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_512;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_512;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_1024;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_BITREVIDX_FLT64_1024) && defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_2048))
/**
@brief Initialization function for the 2048pt Double Precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_2048_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 1024U;
S->fftLenRFFT = 2048U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_1024;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_1024;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_2048;
return ARM_MATH_SUCCESS;
}
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_BITREVIDX_FLT64_2048) && defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_4096))
/**
* @brief Initialization function for the 4096pt Double Precision floating-point real FFT.
* @param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
*/
static arm_status arm_rfft_4096_fast_init_f64( arm_rfft_fast_instance_f64 * S ) {
arm_cfft_instance_f64 * Sint;
if( !S ) return ARM_MATH_ARGUMENT_ERROR;
Sint = &(S->Sint);
Sint->fftLen = 2048U;
S->fftLenRFFT = 4096U;
Sint->bitRevLength = ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTableF64_2048;
Sint->pTwiddle = (float64_t *) twiddleCoefF64_2048;
S->pTwiddleRFFT = (float64_t *) twiddleCoefF64_rfft_4096;
return ARM_MATH_SUCCESS;
}
#endif
/**
@brief Initialization function for the Double Precision floating-point real FFT.
@param[in,out] S points to an arm_rfft_fast_instance_f64 structure
@param[in] fftLen length of the Real Sequence
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
@par Description
The parameter <code>fftLen</code> specifies the length of RFFT/CIFFT process.
Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
@par
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_rfft_fast_init_f64(
arm_rfft_fast_instance_f64 * S,
uint16_t fftLen)
{
typedef arm_status(*fft_init_ptr)( arm_rfft_fast_instance_f64 *);
fft_init_ptr fptr = 0x0;
switch (fftLen)
{
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_BITREVIDX_FLT64_2048) && defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_4096))
case 4096U:
fptr = arm_rfft_4096_fast_init_f64;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_BITREVIDX_FLT64_1024) && defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_2048))
case 2048U:
fptr = arm_rfft_2048_fast_init_f64;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_BITREVIDX_FLT64_512) && defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_1024))
case 1024U:
fptr = arm_rfft_1024_fast_init_f64;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_BITREVIDX_FLT64_256) && defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_512))
case 512U:
fptr = arm_rfft_512_fast_init_f64;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_BITREVIDX_FLT64_128) && defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_256))
case 256U:
fptr = arm_rfft_256_fast_init_f64;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_BITREVIDX_FLT64_64) && defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_128))
case 128U:
fptr = arm_rfft_128_fast_init_f64;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_BITREVIDX_FLT64_32) && defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_64))
case 64U:
fptr = arm_rfft_64_fast_init_f64;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_16) && defined(ARM_TABLE_BITREVIDX_FLT64_16) && defined(ARM_TABLE_TWIDDLECOEF_F64_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_32))
case 32U:
fptr = arm_rfft_32_fast_init_f64;
break;
#endif
default:
return ARM_MATH_ARGUMENT_ERROR;
}
if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
return fptr( S );
}
/**
@} end of RealFFT group
*/

View file

@ -0,0 +1,139 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_init_f32.c
* Description: RFFT & RIFFT Floating point initialisation function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@addtogroup RealFFT
@{
*/
/**
@brief Initialization function for the floating-point RFFT/RIFFT.
@deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_init_f32 and will be removed in the future.
@param[in,out] S points to an instance of the floating-point RFFT/RIFFT structure
@param[in,out] S_CFFT points to an instance of the floating-point CFFT/CIFFT structure
@param[in] fftLenReal length of the FFT.
@param[in] ifftFlagR flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length
@par Description
The parameter <code>fftLenReal</code>specifies length of RFFT/RIFFT Process.
Supported FFT Lengths are 128, 512, 2048.
@par
The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
This function also initializes Twiddle factor table.
*/
arm_status arm_rfft_init_f32(
arm_rfft_instance_f32 * S,
arm_cfft_radix4_instance_f32 * S_CFFT,
uint32_t fftLenReal,
uint32_t ifftFlagR,
uint32_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the Real FFT length */
S->fftLenReal = (uint16_t) fftLenReal;
/* Initialize the Complex FFT length */
S->fftLenBy2 = (uint16_t) fftLenReal / 2U;
/* Initialize the Twiddle coefficientA pointer */
S->pTwiddleAReal = (float32_t *) realCoefA;
/* Initialize the Twiddle coefficientB pointer */
S->pTwiddleBReal = (float32_t *) realCoefB;
/* Initialize the Flag for selection of RFFT or RIFFT */
S->ifftFlagR = (uint8_t) ifftFlagR;
/* Initialize the Flag for calculation Bit reversal or not */
S->bitReverseFlagR = (uint8_t) bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLenReal)
{
/* Init table modifier value */
case 8192U:
S->twidCoefRModifier = 1U;
break;
case 2048U:
S->twidCoefRModifier = 4U;
break;
case 512U:
S->twidCoefRModifier = 16U;
break;
case 128U:
S->twidCoefRModifier = 64U;
break;
default:
/* Reporting argument error if rfftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
/* Init Complex FFT Instance */
S->pCfft = S_CFFT;
if (S->ifftFlagR)
{
/* Initializes the CIFFT Module for fftLenreal/2 length */
arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 1U, 0U);
}
else
{
/* Initializes the CFFT Module for fftLenreal/2 length */
arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 0U, 0U);
}
/* return the status of RFFT Init function */
return (status);
}
/**
@} end of RealFFT group
*/

View file

@ -0,0 +1,239 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_init_q15.c
* Description: RFFT & RIFFT Q15 initialisation function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
#include "arm_const_structs.h"
/**
@addtogroup RealFFT
@{
*/
/**
@brief Initialization function for the Q15 RFFT/RIFFT.
@param[in,out] S points to an instance of the Q15 RFFT/RIFFT structure
@param[in] fftLenReal length of the FFT
@param[in] ifftFlagR flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length
@par Details
The parameter <code>fftLenReal</code> specifies length of RFFT/RIFFT Process.
Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192.
@par
The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
This function also initializes Twiddle factor table.
*/
arm_status arm_rfft_init_q15(
arm_rfft_instance_q15 * S,
uint32_t fftLenReal,
uint32_t ifftFlagR,
uint32_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the Real FFT length */
S->fftLenReal = (uint16_t) fftLenReal;
/* Initialize the Twiddle coefficientA pointer */
S->pTwiddleAReal = (q15_t *) realCoefAQ15;
/* Initialize the Twiddle coefficientB pointer */
S->pTwiddleBReal = (q15_t *) realCoefBQ15;
/* Initialize the Flag for selection of RFFT or RIFFT */
S->ifftFlagR = (uint8_t) ifftFlagR;
/* Initialize the Flag for calculation Bit reversal or not */
S->bitReverseFlagR = (uint8_t) bitReverseFlag;
/* Initialization of coef modifier depending on the FFT length */
switch (S->fftLenReal)
{
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
case 8192U:
S->twidCoefRModifier = 1U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),4096);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len4096;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
case 4096U:
S->twidCoefRModifier = 2U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),2048);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len2048;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
case 2048U:
S->twidCoefRModifier = 4U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),1024);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len1024;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
case 1024U:
S->twidCoefRModifier = 8U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),512);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len512;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
case 512U:
S->twidCoefRModifier = 16U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),256);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len256;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
case 256U:
S->twidCoefRModifier = 32U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),128);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len128;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
case 128U:
S->twidCoefRModifier = 64U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),64);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len64;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
case 64U:
S->twidCoefRModifier = 128U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),32);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len32;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
case 32U:
S->twidCoefRModifier = 256U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),16);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len16;
#endif
break;
#endif
default:
/* Reporting argument error if rfftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
/* return the status of RFFT Init function */
return (status);
}
/**
@} end of RealFFT group
*/

View file

@ -0,0 +1,237 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_init_q31.c
* Description: RFFT & RIFFT Q31 initialisation function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
#include "arm_const_structs.h"
/**
@addtogroup RealFFT
@{
*/
/**
@brief Initialization function for the Q31 RFFT/RIFFT.
@param[in,out] S points to an instance of the Q31 RFFT/RIFFT structure
@param[in] fftLenReal length of the FFT
@param[in] ifftFlagR flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length
@par Details
The parameter <code>fftLenReal</code> specifies length of RFFT/RIFFT Process.
Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192.
@par
The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.
Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.
@par
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
@par
This function also initializes Twiddle factor table.
*/
arm_status arm_rfft_init_q31(
arm_rfft_instance_q31 * S,
uint32_t fftLenReal,
uint32_t ifftFlagR,
uint32_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the Real FFT length */
S->fftLenReal = (uint16_t) fftLenReal;
/* Initialize the Twiddle coefficientA pointer */
S->pTwiddleAReal = (q31_t *) realCoefAQ31;
/* Initialize the Twiddle coefficientB pointer */
S->pTwiddleBReal = (q31_t *) realCoefBQ31;
/* Initialize the Flag for selection of RFFT or RIFFT */
S->ifftFlagR = (uint8_t) ifftFlagR;
/* Initialize the Flag for calculation Bit reversal or not */
S->bitReverseFlagR = (uint8_t) bitReverseFlag;
/* Initialization of coef modifier depending on the FFT length */
switch (S->fftLenReal)
{
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
case 8192U:
S->twidCoefRModifier = 1U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),4096);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len4096;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
case 4096U:
S->twidCoefRModifier = 2U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),2048);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len2048;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
case 2048U:
S->twidCoefRModifier = 4U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),1024);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len1024;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
case 1024U:
S->twidCoefRModifier = 8U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),512);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len512;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
case 512U:
S->twidCoefRModifier = 16U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),256);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len256;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
case 256U:
S->twidCoefRModifier = 32U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),128);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len128;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
case 128U:
S->twidCoefRModifier = 64U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),64);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len64;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
case 64U:
S->twidCoefRModifier = 128U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),32);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len32;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
case 32U:
S->twidCoefRModifier = 256U;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q31(&(S->cfftInst),16);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q31_len16;
#endif
break;
#endif
default:
/* Reporting argument error if rfftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
/* return the status of RFFT Init function */
return (status);
}
/**
@} end of RealFFT group
*/

View file

@ -0,0 +1,592 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_q15.c
* Description: RFFT & RIFFT Q15 process function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal functions prototypes
* -------------------------------------------------------------------- */
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pATable,
const q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pATable,
const q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
/**
@addtogroup RealFFT
@{
*/
/**
@brief Processing function for the Q15 RFFT/RIFFT.
@param[in] S points to an instance of the Q15 RFFT/RIFFT structure
@param[in] pSrc points to input buffer (Source buffer is modified by this function.)
@param[out] pDst points to output buffer
@return none
@par Input an output formats
Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
Hence the output format is different for different RFFT sizes.
The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
@par
\image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
@par
\image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
@par
If the input buffer is of length N, the output buffer must have length 2*N.
The input buffer is modified by this function.
*/
void arm_rfft_q15(
const arm_rfft_instance_q15 * S,
q15_t * pSrc,
q15_t * pDst)
{
#if defined(ARM_MATH_MVEI)
const arm_cfft_instance_q15 *S_CFFT = &(S->cfftInst);
#else
const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
#endif
uint32_t L2 = S->fftLenReal >> 1U;
uint32_t i;
/* Calculation of RIFFT of input */
if (S->ifftFlagR == 1U)
{
/* Real IFFT core process */
arm_split_rifft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex IFFT process */
arm_cfft_q15 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
for(i = 0; i < S->fftLenReal; i++)
{
pDst[i] = pDst[i] << 1U;
}
}
else
{
/* Calculation of RFFT of input */
/* Complex FFT process */
arm_cfft_q15 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
/* Real FFT core process */
arm_split_rfft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
@} end of RealFFT group
*/
/**
@brief Core Real FFT process
@param[in] pSrc points to input buffer
@param[in] fftLen length of FFT
@param[in] pATable points to twiddle Coef A buffer
@param[in] pBTable points to twiddle Coef B buffer
@param[out] pDst points to output buffer
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
@return none
@par
The function implements a Real FFT
*/
#if defined(ARM_MATH_MVEI)
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pATable,
const q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
q15_t const *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U - 14]; /* temp pointers for output buffer */
q15_t const *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U - 14]; /* temp pointers for input buffer */
q15_t const *pVecSrc1;
q15_t *pVecDst1;
q15x8x2_t vecIn, vecSum;
uint32_t blkCnt;
uint16x8_t vecStridesFwd, vecStridesBkwd;
q15x8_t vecInBkwd, vecCoefFwd0, vecCoefFwd1;
/*
* Init coefficient pointers
*/
pCoefA = &pATable[modifier * 2U];
pCoefB = &pBTable[modifier * 2U];
/*
* scatter / gather offsets
* for ascending & descending addressing
*/
vecStridesFwd = vidupq_u16((uint32_t)0, 2); // 0, 2, 4, 6, 8, 10, 12, 14
vecStridesBkwd = vddupq_u16(14, 2); // 14, 12, 10, 8, 6, 4, 2, 0
vecStridesFwd = vecStridesFwd * (uint16_t) modifier;
pVecSrc1 = (q15_t const *) pSrc1;
pVecDst1 = pDst1;
blkCnt = fftLen >> 3;
while (blkCnt > 0U)
{
vecCoefFwd0 = vldrhq_gather_shifted_offset(pCoefA, vecStridesFwd);
vecCoefFwd1 = vldrhq_gather_shifted_offset(&pCoefA[1], vecStridesFwd);
vecIn = vld2q(pVecSrc1);
pVecSrc1 += 16;
/*
* outR = *pSrc1 * CoefA1;
*/
vecSum.val[0] = vrmulhq(vecIn.val[0], vecCoefFwd0);
/*
* outI = *pSrc1++ * CoefA2;
*/
vecSum.val[1] = vrmulhq(vecIn.val[0], vecCoefFwd1);
vecInBkwd = vldrhq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outR -= (*pSrc1 + *pSrc2) * CoefA2;
*/
vecInBkwd = vqaddq(vecIn.val[1], vecInBkwd);
vecSum.val[0] = vqsubq(vecSum.val[0], vrmulhq(vecInBkwd, vecCoefFwd1));
vecInBkwd = vldrhq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outI += *pSrc1++ * CoefA1;
*/
vecSum.val[1] = vqaddq(vecSum.val[1], vrmulhq(vecIn.val[1], vecCoefFwd0));
vecCoefFwd0 = vldrhq_gather_shifted_offset(pCoefB, vecStridesFwd);
/*
* outI -= *pSrc2-- * CoefB1;
*/
vecSum.val[1] = vqsubq(vecSum.val[1], vrmulhq(vecInBkwd, vecCoefFwd0));
vecInBkwd = vldrhq_gather_shifted_offset(&pSrc2[-1], vecStridesBkwd);
/*
* outI -= *pSrc2 * CoefA2;
*/
vecSum.val[1] = vqsubq(vecSum.val[1], vrmulhq(vecInBkwd, vecCoefFwd1));
/*
* outR += *pSrc2-- * CoefB1;
*/
vecSum.val[0] = vqaddq(vecSum.val[0], vrmulhq(vecInBkwd, vecCoefFwd0));
vst2q(pVecDst1, vecSum);
pVecDst1 += 16;
/*
* write complex conjugate output
*/
vecSum.val[1] = -vecSum.val[1];
vstrhq_scatter_shifted_offset(pDst2, vecStridesBkwd, vecSum.val[1]);
vstrhq_scatter_shifted_offset(&pDst2[-1], vecStridesBkwd, vecSum.val[0]);
/*
* update fwd and backwd offsets
*/
vecStridesFwd = vecStridesFwd + (uint16_t)(modifier * 16U);
/* cannot use negative 16-bit offsets (would lead to positive 32-65K jump*/
//vecStridesBkwd = vecStridesBkwd - (uint16_t)16;
pSrc2 = pSrc2 - 16;
pDst2 = pDst2 - 16;
blkCnt--;
}
pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2U * fftLen) + 1U] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
}
#else
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pATable,
const q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
#if defined (ARM_MATH_DSP)
q15_t *pD1, *pD2;
#endif
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2];
pCoefB = &pBTable[modifier * 2];
pSrc1 = &pSrc[2];
pSrc2 = &pSrc[(2U * fftLen) - 2U];
#if defined (ARM_MATH_DSP)
i = 1U;
pD1 = pDst + 2;
pD2 = pDst + (4U * fftLen) - 2;
for (i = fftLen - 1; i > 0; i--)
{
/*
outR = ( pSrc[2 * i] * pATable[2 * i]
- pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ pIn[2 * i] * pATable[2 * i + 1]
+ pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i])
*/
#ifndef ARM_MATH_BIG_ENDIAN
/* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
outR = __SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA));
#else
/* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
outR = -(__SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMLAD(read_q15x2 (pSrc2), read_q15x2((q15_t *) pCoefB), outR) >> 16U;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMUSDX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB));
#else
outI = __SMUSDX(read_q15x2 ((q15_t *) pCoefB), read_q15x2_da (&pSrc2));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
outI = __SMLADX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), outI);
/* write output */
*pD1++ = (q15_t) outR;
*pD1++ = outI >> 16U;
/* write complex conjugate output */
pD2[0] = (q15_t) outR;
pD2[1] = -(outI >> 16U);
pD2 -= 2;
/* update coefficient pointer */
pCoefB = pCoefB + (2U * modifier);
pCoefA = pCoefA + (2U * modifier);
}
pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
pDst[2U * fftLen + 1U] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
pDst[1] = 0;
#else
i = 1U;
while (i < fftLen)
{
/*
outR = ( pSrc[2 * i] * pATable[2 * i]
- pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
outR = *pSrc1 * *pCoefA;
outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
outR = outR + (*pSrc2 * *pCoefB);
outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;
/*
outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ pIn[2 * i] * pATable[2 * i + 1]
+ pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
outI = *pSrc2 * *(pCoefB + 1);
outI = outI - (*(pSrc2 + 1) * *pCoefB);
outI = outI + (*(pSrc1 + 1) * *pCoefA);
outI = outI + (*pSrc1 * *(pCoefA + 1));
/* update input pointers */
pSrc1 += 2U;
pSrc2 -= 2U;
/* write output */
pDst[2U * i] = (q15_t) outR;
pDst[2U * i + 1U] = outI >> 16U;
/* write complex conjugate output */
pDst[(4U * fftLen) - (2U * i)] = (q15_t) outR;
pDst[((4U * fftLen) - (2U * i)) + 1U] = -(outI >> 16U);
/* update coefficient pointer */
pCoefB = pCoefB + (2U * modifier);
pCoefA = pCoefA + (2U * modifier);
i++;
}
pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[2U * fftLen + 1U] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
#endif /* #if defined (ARM_MATH_DSP) */
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@brief Core Real IFFT process
@param[in] pSrc points to input buffer
@param[in] fftLen length of FFT
@param[in] pATable points to twiddle Coef A buffer
@param[in] pBTable points to twiddle Coef B buffer
@param[out] pDst points to output buffer
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
@return none
@par
The function implements a Real IFFT
*/
#if defined(ARM_MATH_MVEI)
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pATable,
const q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
q15_t const *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t const *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U - 14U];
q15_t *pDst1 = &pDst[0];
q15_t const *pVecSrc1;
q15_t *pVecDst1;
q15x8x2_t vecIn, vecSum;
uint32_t blkCnt;
uint16x8_t vecStridesFwd, vecStridesBkwd;
q15x8_t vecInBkwd, vecCoefFwd0, vecCoefFwd1;
/*
* Init coefficient pointers
*/
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
/*
* scatter / gather offsets
* for ascending & descending addressing
*/
vecStridesFwd = vidupq_u16((uint32_t)0, 2); // 0, 2, 4, 6, 8, 10, 12, 14
vecStridesBkwd = vddupq_u16(14, 2); // 14, 12, 10, 8, 6, 4, 2, 0
vecStridesFwd = vecStridesFwd * (uint16_t) modifier;
pVecSrc1 = (q15_t const *) pSrc1;
pVecDst1 = pDst1;
blkCnt = fftLen >> 3;
while (blkCnt > 0U)
{
vecCoefFwd0 = vldrhq_gather_shifted_offset(pCoefA, vecStridesFwd);
vecCoefFwd1 = vldrhq_gather_shifted_offset(&pCoefA[1], vecStridesFwd);
vecIn = vld2q(pVecSrc1);
pVecSrc1 += 16;
/*
* outR = *pSrc1 * CoefA1;
*/
vecSum.val[0] = vmulhq(vecIn.val[0], vecCoefFwd0);
/*
* outI = -(*pSrc1++) * CoefA2;
*/
vecIn.val[0] = vnegq(vecIn.val[0]);
vecSum.val[1] = vmulhq(vecIn.val[0], vecCoefFwd1);
vecInBkwd = vldrhq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outR += (*pSrc1 + *pSrc2) * CoefA2;
*/
vecInBkwd = vqaddq(vecIn.val[1], vecInBkwd);
vecSum.val[0] = vqaddq(vecSum.val[0], vmulhq(vecInBkwd, vecCoefFwd1));
vecInBkwd = vldrhq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outI += *pSrc1++ * CoefA1;
*/
vecSum.val[1] = vqaddq(vecSum.val[1], vmulhq(vecIn.val[1], vecCoefFwd0));
vecCoefFwd0 = vldrhq_gather_shifted_offset(pCoefB, vecStridesFwd);
/*
* outI -= *pSrc2-- * CoefB1;
*/
vecSum.val[1] = vqsubq(vecSum.val[1], vmulhq(vecInBkwd, vecCoefFwd0));
vecInBkwd = vldrhq_gather_shifted_offset(&pSrc2[-1], vecStridesBkwd);
/*
* outI += *pSrc2 * CoefA2;
*/
vecSum.val[1] = vqaddq(vecSum.val[1], vmulhq(vecInBkwd, vecCoefFwd1));
/*
* outR += *pSrc2-- * CoefB1;
*/
vecSum.val[0] = vqaddq(vecSum.val[0], vmulhq(vecInBkwd, vecCoefFwd0));
vst2q(pVecDst1, vecSum);
pVecDst1 += 16;
/*
* update fwd and backwd offsets
*/
vecStridesFwd = vecStridesFwd + (uint16_t)(modifier * 16U);
/* cannot use negative 16-bit offsets (would lead to positive 32-65K jump*/
//vecStridesBkwd = vecStridesBkwd - (uint16_t)16;
pSrc2 = pSrc2 - 16;
blkCnt--;
}
}
#else
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pATable,
const q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
q15_t *pDst1 = &pDst[0];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
pSrc1 = &pSrc[0];
pSrc2 = &pSrc[2 * fftLen];
i = fftLen;
while (i > 0U)
{
/*
outR = ( pIn[2 * i] * pATable[2 * i]
+ pIn[2 * i + 1] * pATable[2 * i + 1]
+ pIn[2 * n - 2 * i] * pBTable[2 * i]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = ( pIn[2 * i + 1] * pATable[2 * i]
- pIn[2 * i] * pATable[2 * i + 1]
- pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
#if defined (ARM_MATH_DSP)
#ifndef ARM_MATH_BIG_ENDIAN
/* pIn[2 * n - 2 * i] * pBTable[2 * i] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB));
#else
/* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
outR = -(__SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] + pIn[2 * n - 2 * i] * pBTable[2 * i] */
outR = __SMLAD(read_q15x2(pSrc1), read_q15x2 ((q15_t *) pCoefA), outR) >> 16U;
/* -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
outI = __SMUADX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB));
/* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMLSDX(read_q15x2 ((q15_t *) pCoefA), read_q15x2_ia (&pSrc1), -outI);
#else
outI = __SMLSDX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), -outI);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* write output */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst1, __PKHBT(outR, (outI >> 16U), 16));
#else
write_q15x2_ia (&pDst1, __PKHBT((outI >> 16U), outR, 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else /* #if defined (ARM_MATH_DSP) */
outR = *pSrc2 * *pCoefB;
outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
outR = outR + (*pSrc1 * *pCoefA);
outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
outI = *(pSrc1 + 1) * *pCoefA;
outI = outI - (*pSrc1 * *(pCoefA + 1));
outI = outI - (*pSrc2 * *(pCoefB + 1));
outI = outI - (*(pSrc2 + 1) * *(pCoefB));
/* update input pointers */
pSrc1 += 2U;
pSrc2 -= 2U;
/* write output */
*pDst1++ = (q15_t) outR;
*pDst1++ = (q15_t) (outI >> 16);
#endif /* #if defined (ARM_MATH_DSP) */
/* update coefficient pointer */
pCoefB = pCoefB + (2 * modifier);
pCoefA = pCoefA + (2 * modifier);
i--;
}
}
#endif /* defined(ARM_MATH_MVEI) */

View file

@ -0,0 +1,500 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_q31.c
* Description: FFT & RIFFT Q31 process function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal functions prototypes
* -------------------------------------------------------------------- */
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pATable,
const q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pATable,
const q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
/**
@addtogroup RealFFT
@{
*/
/**
@brief Processing function for the Q31 RFFT/RIFFT.
@param[in] S points to an instance of the Q31 RFFT/RIFFT structure
@param[in] pSrc points to input buffer (Source buffer is modified by this function)
@param[out] pDst points to output buffer
@return none
@par Input an output formats
Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
Hence the output format is different for different RFFT sizes.
The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
@par
\image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
@par
\image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
@par
If the input buffer is of length N, the output buffer must have length 2*N.
The input buffer is modified by this function.
*/
void arm_rfft_q31(
const arm_rfft_instance_q31 * S,
q31_t * pSrc,
q31_t * pDst)
{
#if defined(ARM_MATH_MVEI)
const arm_cfft_instance_q31 *S_CFFT = &(S->cfftInst);
#else
const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
#endif
uint32_t L2 = S->fftLenReal >> 1U;
uint32_t i;
/* Calculation of RIFFT of input */
if (S->ifftFlagR == 1U)
{
/* Real IFFT core process */
arm_split_rifft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex IFFT process */
arm_cfft_q31 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
for(i = 0; i < S->fftLenReal; i++)
{
pDst[i] = pDst[i] << 1U;
}
}
else
{
/* Calculation of RFFT of input */
/* Complex FFT process */
arm_cfft_q31 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
/* Real FFT core process */
arm_split_rfft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
@} end of RealFFT group
*/
/**
@brief Core Real FFT process
@param[in] pSrc points to input buffer
@param[in] fftLen length of FFT
@param[in] pATable points to twiddle Coef A buffer
@param[in] pBTable points to twiddle Coef B buffer
@param[out] pDst points to output buffer
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_split_rfft_q31(
q31_t *pSrc,
uint32_t fftLen,
const q31_t *pATable,
const q31_t *pBTable,
q31_t *pDst,
uint32_t modifier)
{
q31_t const *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U]; /* temp pointers for output buffer */
q31_t const *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U]; /* temp pointers for input buffer */
q31_t const *pVecSrc1;
q31_t *pVecDst1;
q31x4x2_t vecIn, vecSum;
uint32_t blkCnt;
uint32x4_t vecStridesFwd, vecStridesBkwd;
q31x4_t vecInBkwd, vecCoefFwd0, vecCoefFwd1;
/*
* Init coefficient pointers
*/
pCoefA = &pATable[modifier * 2U];
pCoefB = &pBTable[modifier * 2U];
/*
* scatter / gather offsets
* for ascending & descending addressing
*/
vecStridesFwd = vidupq_u32((uint32_t)0, 2);
vecStridesBkwd = -vecStridesFwd;
vecStridesFwd = vecStridesFwd * modifier;
pVecSrc1 = (q31_t const *) pSrc1;
pVecDst1 = pDst1;
blkCnt = fftLen >> 2;
while (blkCnt > 0U)
{
vecCoefFwd0 = vldrwq_gather_shifted_offset(pCoefA, vecStridesFwd);
vecCoefFwd1 = vldrwq_gather_shifted_offset(&pCoefA[1], vecStridesFwd);
vecIn = vld2q(pVecSrc1);
pVecSrc1 += 8;
/*
* outR = *pSrc1 * CoefA1;
*/
vecSum.val[0] = vmulhq(vecIn.val[0], vecCoefFwd0);
/*
* outI = *pSrc1++ * CoefA2;
*/
vecSum.val[1] = vmulhq(vecIn.val[0], vecCoefFwd1);
vecInBkwd = vldrwq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outR -= (*pSrc1 + *pSrc2) * CoefA2;
*/
vecInBkwd = vqaddq(vecIn.val[1], vecInBkwd);
vecSum.val[0] = vqsubq(vecSum.val[0], vmulhq(vecInBkwd, vecCoefFwd1));
vecInBkwd = vldrwq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outI += *pSrc1++ * CoefA1;
*/
vecSum.val[1] = vqaddq(vecSum.val[1], vmulhq(vecIn.val[1], vecCoefFwd0));
vecCoefFwd0 = vldrwq_gather_shifted_offset(pCoefB, vecStridesFwd);
/*
* outI -= *pSrc2-- * CoefB1;
*/
vecSum.val[1] = vqsubq(vecSum.val[1], vmulhq(vecInBkwd, vecCoefFwd0));
vecInBkwd = vldrwq_gather_shifted_offset(&pSrc2[-1], vecStridesBkwd);
/*
* outI -= *pSrc2 * CoefA2;
*/
vecSum.val[1] = vqsubq(vecSum.val[1], vmulhq(vecInBkwd, vecCoefFwd1));
/*
* outR += *pSrc2-- * CoefB1;
*/
vecSum.val[0] = vqaddq(vecSum.val[0], vmulhq(vecInBkwd, vecCoefFwd0));
vst2q(pVecDst1, vecSum);
pVecDst1 += 8;
/*
* write complex conjugate output
*/
vecSum.val[1] = -vecSum.val[1];
vstrwq_scatter_shifted_offset(pDst2, vecStridesBkwd, vecSum.val[1]);
vstrwq_scatter_shifted_offset(&pDst2[-1], vecStridesBkwd, vecSum.val[0]);
/*
* update fwd and backwd offsets
*/
vecStridesFwd = vecStridesFwd + (modifier * 8U);
vecStridesBkwd = vecStridesBkwd - 8;
blkCnt--;
}
pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2U * fftLen) + 1U] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
}
#else
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pATable,
const q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[4 * fftLen - 1];
q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[2 * fftLen - 1];
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2];
pCoefB = &pBTable[modifier * 2];
i = fftLen - 1U;
while (i > 0U)
{
/*
outR = ( pSrc[2 * i] * pATable[2 * i]
- pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ pIn[2 * i] * pATable[2 * i + 1]
+ pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * pATable[2 * i] */
mult_32x32_keep32_R (outR, *pIn1, CoefA1);
/* outI = pIn[2 * i] * pATable[2 * i + 1] */
mult_32x32_keep32_R (outI, *pIn1++, CoefA2);
/* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
multSub_32x32_keep32_R (outR, *pIn1, CoefA2);
/* (pIn[2 * i + 1] * pATable[2 * i] */
multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] */
multSub_32x32_keep32_R (outR, *pIn2, CoefA2);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
/* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
multSub_32x32_keep32_R (outI, *pIn2--, CoefA2);
/* write output */
*pOut1++ = outR;
*pOut1++ = outI;
/* write complex conjugate output */
*pOut2-- = -outI;
*pOut2-- = outR;
/* update coefficient pointer */
pCoefB = pCoefB + (2 * modifier);
pCoefA = pCoefA + (2 * modifier - 1);
/* Decrement loop count */
i--;
}
pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
pDst[2 * fftLen + 1] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
pDst[1] = 0;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@brief Core Real IFFT process
@param[in] pSrc points to input buffer
@param[in] fftLen length of FFT
@param[in] pATable points to twiddle Coef A buffer
@param[in] pBTable points to twiddle Coef B buffer
@param[out] pDst points to output buffer
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
@return none
*/
#if defined(ARM_MATH_MVEI)
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pATable,
const q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
q31_t const *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t const *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U];
q31_t const *pVecSrc1;
q31_t *pVecDst;
q31x4x2_t vecIn, vecSum;
uint32_t blkCnt;
uint32x4_t vecStridesFwd, vecStridesBkwd;
q31x4_t vecInBkwd, vecCoefFwd0, vecCoefFwd1;
/*
* Init coefficient pointers
*/
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
/*
* scatter / gather offsets
* for ascending & descending addressing
*/
vecStridesFwd = vidupq_u32((uint32_t)0, 2);
vecStridesBkwd = -vecStridesFwd;
vecStridesFwd = vecStridesFwd * modifier;
pVecSrc1 = (q31_t const *) pSrc1;
pVecDst = pDst;
blkCnt = fftLen >> 2;
while (blkCnt > 0U)
{
vecCoefFwd0 = vldrwq_gather_shifted_offset(pCoefA, vecStridesFwd);
vecCoefFwd1 = vldrwq_gather_shifted_offset(&pCoefA[1], vecStridesFwd);
vecIn = vld2q(pVecSrc1);
pVecSrc1 += 8;
/*
* outR = *pSrc1 * CoefA1;
*/
vecSum.val[0] = vmulhq(vecIn.val[0], vecCoefFwd0);
/*
* outI = -(*pSrc1++) * CoefA2;
*/
vecIn.val[0] = (-vecIn.val[0]);
vecSum.val[1] = vmulhq(vecIn.val[0], vecCoefFwd1);
vecInBkwd = vldrwq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outR += (*pSrc1 + *pSrc2) * CoefA2;
*/
vecInBkwd = vqaddq(vecIn.val[1], vecInBkwd);
vecSum.val[0] = vqaddq(vecSum.val[0], vmulhq(vecInBkwd, vecCoefFwd1));
vecInBkwd = vldrwq_gather_shifted_offset(pSrc2, vecStridesBkwd);
/*
* outI += *pSrc1++ * CoefA1;
*/
vecSum.val[1] = vqaddq(vecSum.val[1], vmulhq(vecIn.val[1], vecCoefFwd0));
vecCoefFwd0 = vldrwq_gather_shifted_offset(pCoefB, vecStridesFwd);
/*
* outI -= *pSrc2-- * CoefB1;
*/
vecSum.val[1] = vqsubq(vecSum.val[1], vmulhq(vecInBkwd, vecCoefFwd0));
vecInBkwd = vldrwq_gather_shifted_offset(&pSrc2[-1], vecStridesBkwd);
/*
* outI += *pSrc2-- * CoefA2;;
*/
vecSum.val[1] = vqaddq(vecSum.val[1], vmulhq(vecInBkwd, vecCoefFwd1));
/*
* outR += *pSrc2-- * CoefB1;
*/
vecSum.val[0] = vqaddq(vecSum.val[0], vmulhq(vecInBkwd, vecCoefFwd0));
vst2q(pVecDst, vecSum);
pVecDst += 8;
/*
* update fwd and backwd offsets
*/
vecStridesFwd = vecStridesFwd + (modifier * 8U);
vecStridesBkwd = vecStridesBkwd - 8;
blkCnt--;
}
}
#else
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pATable,
const q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
q31_t outR, outI; /* Temporary variables for output */
const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[2 * fftLen + 1];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while (fftLen > 0U)
{
/*
outR = ( pIn[2 * i] * pATable[2 * i]
+ pIn[2 * i + 1] * pATable[2 * i + 1]
+ pIn[2 * n - 2 * i] * pBTable[2 * i]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = ( pIn[2 * i + 1] * pATable[2 * i]
- pIn[2 * i] * pATable[2 * i + 1]
- pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pIn[2 * i] * pATable[2 * i] */
mult_32x32_keep32_R (outR, *pIn1, CoefA1);
/* - pIn[2 * i] * pATable[2 * i + 1] */
mult_32x32_keep32_R (outI, *pIn1++, -CoefA2);
/* pIn[2 * i + 1] * pATable[2 * i + 1] */
multAcc_32x32_keep32_R (outR, *pIn1, CoefA2);
/* pIn[2 * i + 1] * pATable[2 * i] */
multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
/* pIn[2 * n - 2 * i] * pBTable[2 * i] */
multAcc_32x32_keep32_R (outR, *pIn2, CoefA2);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
multAcc_32x32_keep32_R (outI, *pIn2--, CoefA2);
/* write output */
*pDst++ = outR;
*pDst++ = outI;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2);
pCoefA = pCoefA + (modifier * 2 - 1);
/* Decrement loop count */
fftLen--;
}
}
#endif /* defined(ARM_MATH_MVEI) */