diff --git a/features/cmsis_nn_sample_code/.cproject b/features/cmsis_nn_sample_code/.cproject
new file mode 100644
index 0000000..d3f8226
--- /dev/null
+++ b/features/cmsis_nn_sample_code/.cproject
@@ -0,0 +1,339 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096" moduleId="org.eclipse.cdt.core.settings" name="DA1459x-00-Debug_eFLASH">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="${cross_rm} -rf" description="Applicable for DA1459x-00. Debug build configuration for executing using cached eFlash mode." errorParsers="org.eclipse.cdt.core.GASErrorParser;org.eclipse.cdt.core.GmakeErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.CWDLocator;org.eclipse.cdt.core.GCCErrorParser" id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096" name="DA1459x-00-Debug_eFLASH" optionalBuildProperties="" parent="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug" postannouncebuildStep="" postbuildStep="" preannouncebuildStep="Generate linker scripts." prebuildStep="+${cross_make} generate_ldscripts DEVICE=DA14592_00 APP_CONFIG_H=&quot;${workspace_loc:/${ProjName}/config/custom_config_eflash.h}&quot; CC=&quot;${cross_prefix}${cross_c}${cross_suffix}&quot; BSP_CONFIG_DIR=&quot;${workspace_loc:/${ProjName}/sdk/config}&quot; MIDDLEWARE_CONFIG_DIR=&quot;${workspace_loc:/${ProjName}/sdk/middleware_config}&quot; LDSCRIPT_PATH=&quot;${workspace_loc:/${ProjName}/sdk/ldscripts}&quot;">
+					<folderInfo id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096." name="/" resourcePath="">
+						<toolChain errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.toolchain.elf.debug.1863304541" name="Cross ARM GCC" superClass="ilg.gnuarmeclipse.managedbuild.cross.toolchain.elf.debug">
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.level.154620095" name="Optimization Level" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.level" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.level.debug" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.messagelength.474758369" name="Message length (-fmessage-length=0)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.messagelength" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.signedchar.336537408" name="'char' is signed (-fsigned-char)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.signedchar" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.functionsections.985345803" name="Function sections (-ffunction-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.functionsections" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.datasections.2029539904" name="Data sections (-fdata-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.datasections" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.level.484168442" name="Debug level" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.level" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.level.max" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.format.1779000155" name="Debug format" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.format" useByScannerDiscovery="true"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.name.1161857814" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.name" useByScannerDiscovery="false" value="GNU Tools for ARM Embedded Processors" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.architecture.1625570490" name="Architecture" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.architecture" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.architecture.arm" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.family.2068937016" name="Arm family (-mcpu)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.family" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.mcpu.cortex-m33" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.instructionset.1631350402" name="Instruction set" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.instructionset" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.instructionset.thumb" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.prefix.54264426" name="Prefix" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.prefix" useByScannerDiscovery="false" value="arm-none-eabi-" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.c.995388255" name="C compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.c" useByScannerDiscovery="false" value="gcc" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.cpp.49240279" name="C++ compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.cpp" useByScannerDiscovery="false" value="g++" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.ar.1274595077" name="Archiver" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.ar" useByScannerDiscovery="false" value="ar" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.objcopy.833587580" name="Hex/Bin converter" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.objcopy" useByScannerDiscovery="false" value="objcopy" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.objdump.711826664" name="Listing generator" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.objdump" useByScannerDiscovery="false" value="objdump" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.size.341998617" name="Size command" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.size" useByScannerDiscovery="false" value="size" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.make.352972228" name="Build command" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.make" useByScannerDiscovery="false" value="make" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.rm.2131059372" name="Remove command" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.rm" useByScannerDiscovery="false" value="rm" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createflash.1867524209" name="Create flash image" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createflash" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.printsize.1487983495" name="Print size" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.printsize" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.abi.1494712284" name="Float ABI" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.abi" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.abi.hard" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.unit.1788341005" name="FPU Type" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.unit" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.unit.fpv5spd16" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createlisting.2100074078" name="Create extended listing" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createlisting" useByScannerDiscovery="false"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.toerrors.191453400" name="Generate errors instead of warnings (-Werror)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.toerrors" useByScannerDiscovery="true" value="false" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.allwarn.950320884" name="Enable all common warnings (-Wall)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.allwarn" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.id.1703827256" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.id" useByScannerDiscovery="false" value="1287942917" valueType="string"/>
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="ilg.gnuarmeclipse.managedbuild.cross.targetPlatform.578124921" isAbstract="false" osList="all" superClass="ilg.gnuarmeclipse.managedbuild.cross.targetPlatform"/>
+							<builder buildPath="${workspace_loc:/freertos_rtt}/Debug_QSPI" command="${cross_make}" errorParsers="org.eclipse.cdt.core.GmakeErrorParser;org.eclipse.cdt.core.CWDLocator" id="ilg.gnuarmeclipse.managedbuild.cross.builder.1514294789" incrementalBuildTarget="all --silent" keepEnvironmentInBuildfile="false" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="ilg.gnuarmeclipse.managedbuild.cross.builder"/>
+							<tool command="${cross_prefix}${cross_c}${cross_suffix}" commandLinePattern="${COMMAND} ${cross_toolchain_flags} ${FLAGS} -c ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" errorParsers="org.eclipse.cdt.core.GASErrorParser;org.eclipse.cdt.core.GCCErrorParser" id="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler.1291307647" name="Cross ARM GNU Assembler" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.usepreprocessor.1355416852" name="Use preprocessor" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.usepreprocessor" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.defs.1007658281" name="Defined symbols (-D)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.defs" useByScannerDiscovery="true" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="dg_configDEVICE=DA14592_00"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.paths.864011018" name="Include paths (-I)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.paths" useByScannerDiscovery="true" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/config}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/middleware_config}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.files.1421804521" name="Include files (-include)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.files" useByScannerDiscovery="true" valueType="includeFiles">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/config/custom_config_eflash.h}&quot;"/>
+								</option>
+								<inputType id="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler.input.1485472214" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler.input"/>
+							</tool>
+							<tool command="${cross_prefix}${cross_c}${cross_suffix}" commandLinePattern="${COMMAND} ${cross_toolchain_flags} ${FLAGS} -c ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" errorParsers="org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GCCErrorParser" id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.38177201" name="Cross ARM C Compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler">
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.paths.986718821" name="Include paths (-I)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.paths" useByScannerDiscovery="false" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/adapters/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/test-includes}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/nnlib/Include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/util/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/memory/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/config}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/middleware_config}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/bsp_include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/FreeRTOS/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/FreeRTOS/portable/GCC/DA1459x}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/osal}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/sys_man/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/peripherals/include}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.defs.1469170733" name="Defined symbols (-D)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.defs" useByScannerDiscovery="false" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="dg_configDEVICE=DA14592_00"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.files.222385028" name="Include files (-include)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.files" useByScannerDiscovery="false" valueType="includeFiles">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/config/custom_config_eflash.h}&quot;"/>
+								</option>
+								<inputType id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.923498863" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input"/>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.compiler.441050096" name="Cross ARM C++ Compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.compiler"/>
+							<tool command="${cross_prefix}${cross_c}${cross_suffix}" commandLinePattern="${COMMAND} ${cross_toolchain_flags} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker.1375159764" name="Cross ARM C Linker" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.gcsections.2066962453" name="Remove unused sections (-Xlinker --gc-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.gcsections" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.usenewlibnano.570889905" name="Use newlib-nano (--specs=nano.specs)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.usenewlibnano" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.scriptfile.1154531936" name="Script files (-T)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.scriptfile" useByScannerDiscovery="false" valueType="stringList">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/mem.ld&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/sections.ld&quot;"/>
+								</option>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.other.1908324097" name="Other linker flags" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.other" useByScannerDiscovery="false" value="--specs=nosys.specs " valueType="string"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.paths.1795463792" name="Library search path (-L)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.paths" useByScannerDiscovery="false" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/nnlib}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.libs.1564601547" name="Libraries (-l)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.libs" useByScannerDiscovery="false" valueType="libs">
+									<listOptionValue builtIn="false" value="cmsis-nn"/>
+								</option>
+								<inputType id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker.input.1229111067" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.linker.1087516810" name="Cross ARM C++ Linker" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.linker">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.gcsections.1814997365" name="Remove unused sections (-Xlinker --gc-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.gcsections" value="true" valueType="boolean"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.scriptfile.842273280" name="Script files (-T)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.scriptfile" valueType="stringList">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/mem.ld&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/sections.ld&quot;"/>
+								</option>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.other.817586435" name="Other linker flags" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.other" value="--specs=nosys.specs " valueType="string"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.paths.1759512064" name="Library search path (-L)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/nnlib}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.libs.192033802" name="Libraries (-l)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="cmsis-nn"/>
+								</option>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.archiver.1016519406" name="Cross ARM GNU Archiver" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.archiver"/>
+							<tool command="${cross_prefix}${cross_objcopy}${cross_suffix}" commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT}" errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.tool.createflash.260431120" name="Cross ARM GNU Create Flash Image" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.createflash">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createflash.choice.1273059889" name="Output file format (-O)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createflash.choice" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.createflash.choice.binary" valueType="enumerated"/>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.createlisting.1584262900" name="Cross ARM GNU Create Listing" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.createlisting">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.source.886706428" name="Display source (--source|-S)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.source" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.allheaders.48627378" name="Display all headers (--all-headers|-x)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.allheaders" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.demangle.1344581427" name="Demangle names (--demangle|-C)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.demangle" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.linenumbers.939154929" name="Display line numbers (--line-numbers|-l)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.linenumbers" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.wide.948295020" name="Wide lines (--wide|-w)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.wide" value="true" valueType="boolean"/>
+							</tool>
+							<tool command="${cross_prefix}${cross_size}${cross_suffix}" commandLinePattern="${COMMAND} ${FLAGS}" errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.tool.printsize.876066347" name="Cross ARM GNU Print Size" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.printsize">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.printsize.format.1423818040" name="Size format" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.printsize.format" useByScannerDiscovery="false"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+			<storageModule moduleId="ilg.gnuarmeclipse.managedbuild.packs"/>
+			<storageModule moduleId="packages"/>
+			<storageModule moduleId="ilg.gnumcueclipse.managedbuild.packs"/>
+		</cconfiguration>
+		<cconfiguration id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096.802618131">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096.802618131" moduleId="org.eclipse.cdt.core.settings" name="DA1459x-00-Release_eFLASH">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="${cross_rm} -rf" description="Applicable for DA1459x-00. Release build configuration for executing using cached eFlash mode." errorParsers="org.eclipse.cdt.core.GASErrorParser;org.eclipse.cdt.core.GmakeErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.CWDLocator;org.eclipse.cdt.core.GCCErrorParser" id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096.802618131" name="DA1459x-00-Release_eFLASH" parent="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug" postannouncebuildStep="" postbuildStep="" preannouncebuildStep="Generate linker scripts." prebuildStep="+${cross_make} generate_ldscripts DEVICE=DA14592_00 LD_DEFS=-DRELEASE_BUILD APP_CONFIG_H=&quot;${workspace_loc:/${ProjName}/config/custom_config_eflash.h}&quot; CC=&quot;${cross_prefix}${cross_c}${cross_suffix}&quot; BSP_CONFIG_DIR=&quot;${workspace_loc:/${ProjName}/sdk/config}&quot; MIDDLEWARE_CONFIG_DIR=&quot;${workspace_loc:/${ProjName}/sdk/middleware_config}&quot; LDSCRIPT_PATH=&quot;${workspace_loc:/${ProjName}/sdk/ldscripts}&quot;">
+					<folderInfo id="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096.802618131." name="/" resourcePath="">
+						<toolChain errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.toolchain.elf.debug.1602157892" name="Cross ARM GCC" superClass="ilg.gnuarmeclipse.managedbuild.cross.toolchain.elf.debug">
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.level.2138492062" name="Optimization Level" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.level" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.level.size" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.messagelength.1226344198" name="Message length (-fmessage-length=0)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.messagelength" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.signedchar.1026562874" name="'char' is signed (-fsigned-char)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.signedchar" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.functionsections.2015963399" name="Function sections (-ffunction-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.functionsections" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.datasections.545779600" name="Data sections (-fdata-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.optimization.datasections" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.level.1119151870" name="Debug level" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.level" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.level.max" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.format.1366188749" name="Debug format" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.debugging.format" useByScannerDiscovery="true"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.name.755988015" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.name" useByScannerDiscovery="false" value="GNU Tools for ARM Embedded Processors" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.architecture.83974778" name="Architecture" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.architecture" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.architecture.arm" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.family.1043895180" name="Arm family (-mcpu)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.family" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.mcpu.cortex-m33" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.instructionset.1432105498" name="Instruction set" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.instructionset" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.instructionset.thumb" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.prefix.647836589" name="Prefix" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.prefix" useByScannerDiscovery="false" value="arm-none-eabi-" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.c.1929511655" name="C compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.c" useByScannerDiscovery="false" value="gcc" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.cpp.723106668" name="C++ compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.cpp" useByScannerDiscovery="false" value="g++" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.ar.935381989" name="Archiver" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.ar" useByScannerDiscovery="false" value="ar" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.objcopy.435471685" name="Hex/Bin converter" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.objcopy" useByScannerDiscovery="false" value="objcopy" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.objdump.868998417" name="Listing generator" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.objdump" useByScannerDiscovery="false" value="objdump" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.size.2141845152" name="Size command" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.size" useByScannerDiscovery="false" value="size" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.make.1644580623" name="Build command" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.make" useByScannerDiscovery="false" value="make" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.command.rm.1850380968" name="Remove command" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.command.rm" useByScannerDiscovery="false" value="rm" valueType="string"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createflash.404302810" name="Create flash image" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createflash" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.printsize.2069269190" name="Print size" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.printsize" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.abi.1414818394" name="Float ABI" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.abi" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.abi.hard" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.unit.1989016221" name="FPU Type" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.unit" useByScannerDiscovery="true" value="ilg.gnuarmeclipse.managedbuild.cross.option.arm.target.fpu.unit.fpv5spd16" valueType="enumerated"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createlisting.1162359303" name="Create extended listing" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.addtools.createlisting" useByScannerDiscovery="false"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.toerrors.1365325531" name="Generate errors instead of warnings (-Werror)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.toerrors" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.allwarn.677466768" name="Enable all common warnings (-Wall)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.warnings.allwarn" useByScannerDiscovery="true" value="true" valueType="boolean"/>
+							<option id="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.id.391463991" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.toolchain.id" useByScannerDiscovery="false" value="1287942917" valueType="string"/>
+							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="ilg.gnuarmeclipse.managedbuild.cross.targetPlatform.188356459" isAbstract="false" osList="all" superClass="ilg.gnuarmeclipse.managedbuild.cross.targetPlatform"/>
+							<builder buildPath="${workspace_loc:/freertos_rtt}/Debug_QSPI" command="${cross_make}" errorParsers="org.eclipse.cdt.core.GmakeErrorParser;org.eclipse.cdt.core.CWDLocator" id="ilg.gnuarmeclipse.managedbuild.cross.builder.839508653" incrementalBuildTarget="all --silent" keepEnvironmentInBuildfile="false" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="ilg.gnuarmeclipse.managedbuild.cross.builder"/>
+							<tool command="${cross_prefix}${cross_c}${cross_suffix}" commandLinePattern="${COMMAND} ${cross_toolchain_flags} ${FLAGS} -c ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" errorParsers="org.eclipse.cdt.core.GASErrorParser;org.eclipse.cdt.core.GCCErrorParser" id="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler.1984206173" name="Cross ARM GNU Assembler" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.usepreprocessor.1602379950" name="Use preprocessor" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.usepreprocessor" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.defs.479641675" name="Defined symbols (-D)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.defs" useByScannerDiscovery="true" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="RELEASE_BUILD"/>
+									<listOptionValue builtIn="false" value="dg_configDEVICE=DA14592_00"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.paths.424790205" name="Include paths (-I)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.paths" useByScannerDiscovery="true" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/config}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/middleware_config}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.files.1558069312" name="Include files (-include)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.assembler.include.files" useByScannerDiscovery="true" valueType="includeFiles">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/config/custom_config_eflash.h}&quot;"/>
+								</option>
+								<inputType id="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler.input.1143980627" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.assembler.input"/>
+							</tool>
+							<tool command="${cross_prefix}${cross_c}${cross_suffix}" commandLinePattern="${COMMAND} ${cross_toolchain_flags} ${FLAGS} -c ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" errorParsers="org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GCCErrorParser" id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.540208732" name="Cross ARM C Compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler">
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.paths.1823017970" name="Include paths (-I)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.paths" useByScannerDiscovery="false" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/adapters/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/test-includes}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/nnlib/Include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/util/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/memory/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/config}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/middleware_config}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/bsp_include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/FreeRTOS/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/FreeRTOS/portable/GCC/DA1459x}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/osal}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/sys_man/include}&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/sdk/peripherals/include}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.defs.1082336507" name="Defined symbols (-D)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.defs" useByScannerDiscovery="false" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="RELEASE_BUILD"/>
+									<listOptionValue builtIn="false" value="dg_configDEVICE=DA14592_00"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.files.1433985872" name="Include files (-include)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.compiler.include.files" useByScannerDiscovery="false" valueType="includeFiles">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/config/custom_config_eflash.h}&quot;"/>
+								</option>
+								<inputType id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.1449129890" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input"/>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.compiler.64037965" name="Cross ARM C++ Compiler" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.compiler"/>
+							<tool command="${cross_prefix}${cross_c}${cross_suffix}" commandLinePattern="${COMMAND} ${cross_toolchain_flags} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker.2036680014" name="Cross ARM C Linker" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.gcsections.1354761410" name="Remove unused sections (-Xlinker --gc-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.gcsections" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.usenewlibnano.2124232028" name="Use newlib-nano (--specs=nano.specs)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.usenewlibnano" useByScannerDiscovery="false" value="true" valueType="boolean"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.scriptfile.1408910793" name="Script files (-T)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.scriptfile" useByScannerDiscovery="false" valueType="stringList">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/mem.ld&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/sections.ld&quot;"/>
+								</option>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.other.907825633" name="Other linker flags" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.other" useByScannerDiscovery="false" value="--specs=nosys.specs " valueType="string"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.paths.1999768441" name="Library search path (-L)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.paths" useByScannerDiscovery="false" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/nnlib}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.libs.1070032604" name="Libraries (-l)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.c.linker.libs" useByScannerDiscovery="false" valueType="libs">
+									<listOptionValue builtIn="false" value="cmsis-nn"/>
+								</option>
+								<inputType id="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker.input.1530853268" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.c.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.linker.1104728203" name="Cross ARM C++ Linker" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.cpp.linker">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.gcsections.1076017969" name="Remove unused sections (-Xlinker --gc-sections)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.gcsections" value="true" valueType="boolean"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.scriptfile.2002504783" name="Script files (-T)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.scriptfile" valueType="stringList">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/mem.ld&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/${ConfigName}/sections.ld&quot;"/>
+								</option>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.other.1829212354" name="Other linker flags" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.other" value="--specs=nosys.specs " valueType="string"/>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.paths.2011516568" name="Library search path (-L)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}/nnlib}&quot;"/>
+								</option>
+								<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.libs.1446792904" name="Libraries (-l)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.cpp.linker.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="cmsis-nn"/>
+								</option>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.archiver.1312988192" name="Cross ARM GNU Archiver" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.archiver"/>
+							<tool command="${cross_prefix}${cross_objcopy}${cross_suffix}" commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT}" errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.tool.createflash.2012742661" name="Cross ARM GNU Create Flash Image" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.createflash">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createflash.choice.110697509" name="Output file format (-O)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createflash.choice" useByScannerDiscovery="false" value="ilg.gnuarmeclipse.managedbuild.cross.option.createflash.choice.binary" valueType="enumerated"/>
+							</tool>
+							<tool id="ilg.gnuarmeclipse.managedbuild.cross.tool.createlisting.2143681484" name="Cross ARM GNU Create Listing" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.createlisting">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.source.1354773369" name="Display source (--source|-S)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.source" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.allheaders.1449473619" name="Display all headers (--all-headers|-x)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.allheaders" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.demangle.477953442" name="Demangle names (--demangle|-C)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.demangle" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.linenumbers.2037878361" name="Display line numbers (--line-numbers|-l)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.linenumbers" value="true" valueType="boolean"/>
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.wide.1061180303" name="Wide lines (--wide|-w)" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.createlisting.wide" value="true" valueType="boolean"/>
+							</tool>
+							<tool command="${cross_prefix}${cross_size}${cross_suffix}" commandLinePattern="${COMMAND} ${FLAGS}" errorParsers="" id="ilg.gnuarmeclipse.managedbuild.cross.tool.printsize.1611201775" name="Cross ARM GNU Print Size" superClass="ilg.gnuarmeclipse.managedbuild.cross.tool.printsize">
+								<option id="ilg.gnuarmeclipse.managedbuild.cross.option.printsize.format.2135535606" name="Size format" superClass="ilg.gnuarmeclipse.managedbuild.cross.option.printsize.format" useByScannerDiscovery="false"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+			<storageModule moduleId="ilg.gnuarmeclipse.managedbuild.packs"/>
+			<storageModule moduleId="packages"/>
+			<storageModule moduleId="ilg.gnumcueclipse.managedbuild.packs"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="freertos_retarget.null.435026441" name="freertos_retarget"/>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
+	<storageModule moduleId="refreshScope"/>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611;ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.550104045;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.903589725">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1825295060.1205335715.60147864.130947607;ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1825295060.1205335715.60147864.130947607.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.1132938837;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.1467360329">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.289030582.424810697;ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.289030582.424810697.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.1508666502;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.1583153585">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.release.395694521;ilg.gnuarmeclipse.managedbuild.cross.config.elf.release.395694521.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.1046827007;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.1876181439">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.release.395694521.301986592;ilg.gnuarmeclipse.managedbuild.cross.config.elf.release.395694521.301986592.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.1600572290;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.1676171357">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.release.395694521.301986592.1987593638;ilg.gnuarmeclipse.managedbuild.cross.config.elf.release.395694521.301986592.1987593638.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.1765826447;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.951246965">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.526921126;ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.526921126.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.1063186517;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.705343494">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096;ilg.gnuarmeclipse.managedbuild.cross.config.elf.debug.76586611.1007346875.660827118.235708102.1847866934.384067096.;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.38177201;ilg.gnuarmeclipse.managedbuild.cross.tool.c.compiler.input.923498863">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="refreshScope" versionNumber="2">
+		<configuration configurationName="DA1459x-00-Release_eFlash"/>
+		<configuration configurationName="DA1469x-00-Release_RAM">
+			<resource resourceType="PROJECT" workspacePath="/cmsis_nn"/>
+		</configuration>
+		<configuration configurationName="DA1459x-00-Debug_eFlash"/>
+		<configuration configurationName="DA1469x-00-Debug_RAM">
+			<resource resourceType="PROJECT" workspacePath="/cmsis_nn"/>
+		</configuration>
+		<configuration configurationName="DA1469x-00-Release_QSPI">
+			<resource resourceType="PROJECT" workspacePath="/cmsis_nn"/>
+		</configuration>
+		<configuration configurationName="DA1469x-00-Debug_QSPI">
+			<resource resourceType="PROJECT" workspacePath="/cmsis_nn"/>
+		</configuration>
+	</storageModule>
+</cproject>
\ No newline at end of file
diff --git a/features/cmsis_nn_sample_code/.project b/features/cmsis_nn_sample_code/.project
new file mode 100644
index 0000000..5eda894
--- /dev/null
+++ b/features/cmsis_nn_sample_code/.project
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>cmsis_nn_sample_code</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+	<linkedResources>
+		<link>
+			<name>sdk</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
+		<link>
+			<name>startup</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/startup</locationURI>
+		</link>
+		<link>
+			<name>sdk/FreeRTOS</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/free_rtos</locationURI>
+		</link>
+		<link>
+			<name>sdk/adapters</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/middleware/adapters</locationURI>
+		</link>
+		<link>
+			<name>sdk/bsp_include</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/include</locationURI>
+		</link>
+		<link>
+			<name>sdk/config</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/config</locationURI>
+		</link>
+		<link>
+			<name>sdk/ldscripts</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/ldscripts/non_ble_projects</locationURI>
+		</link>
+		<link>
+			<name>sdk/memory</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/memory</locationURI>
+		</link>
+		<link>
+			<name>sdk/middleware_config</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/middleware/config</locationURI>
+		</link>
+		<link>
+			<name>sdk/osal</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/middleware/osal</locationURI>
+		</link>
+		<link>
+			<name>sdk/peripherals</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/peripherals</locationURI>
+		</link>
+		<link>
+			<name>sdk/sys_man</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/system/sys_man</locationURI>
+		</link>
+		<link>
+			<name>sdk/util</name>
+			<type>2</type>
+			<locationURI>SDKROOT/sdk/bsp/util</locationURI>
+		</link>
+	</linkedResources>
+	<variableList>
+		<variable>
+			<name>SDKROOT</name>
+			<value>$%7BWORKSPACE_LOC%7D</value>
+		</variable>
+	</variableList>
+</projectDescription>
diff --git a/features/cmsis_nn_sample_code/Readme.md b/features/cmsis_nn_sample_code/Readme.md
new file mode 100644
index 0000000..317add9
--- /dev/null
+++ b/features/cmsis_nn_sample_code/Readme.md
@@ -0,0 +1,37 @@
+# CMSIS NN Demonstration Example
+
+This example runs the ARM CMSIS NN library on the DA1469x family of devices. Four of the tests from the CMSIS library have been extracted and used as a sanity check. 
+
+## HW and SW Configuration
+
+  - **Hardware Configuration**
+    - This example runs on the DA1459x family of devices.
+    - A [Pro Development Kit](https://www.renesas.com/us/en/products/wireless-connectivity/bluetooth-low-energy/da14592-016fdevkt-p-smartbond-da14592-bluetooth-low-energy-52-soc-development-kit-pro) (DevKit) is needed for this example.
+  - **Software Configuration**
+    - Download the latest [SDK](https://www.renesas.com/us/en/products/wireless-connectivity/bluetooth-low-energy/da14592-smartbond-multi-core-bluetooth-le-52-soc-embedded-flash?gad_source=1) version for the target family of devices.
+
+    - SEGGER J-Link tools are normally downloaded and installed as part of the [e2 Studio](https://www.renesas.com/us/en/software-tool/smartbond-development-tools) installation.
+
+## How to run the example
+
+### Initial Setup
+
+- Download the source code from [GitHub](https://github.com/dialog-semiconductor/BLE_SDK10_DA1459x_examples). 
+- Import the project into your workspace (there should be no path dependencies). If you are not familiar with these processes it's advised that you first familiarize yourself with the [Getting Started](https://lpccs-docs.renesas.com/um-b-166-da1459x_getting_started/index.html) guide.
+- Connect the target device to your host PC via USB1. The mentioned port is used to power the device and to support serial and JTAG interfaces. These two interfaces can be used both for flashing and debugging purposes.
+- Compile the source code (either in Release or Debug mode) and flash it into the chip. Please note that the debug flavor should be used merely for debugging purposes since it should increase the generated binary file, significantly. In addition, the source code is built to work with the embedded flash Working with external flash memory devices is out of the scope of this demonstration example.  
+- Open a serial terminal (115200/8-N-1).
+- Once the application image is flashed, press the RESET button on the daughter board to start executing the application. 
+
+- The terminal should display that the test vectors have run successfully (or not). The `#` should also be printed every second as the sample code is built on top of the `freertos_retarget` SDK sample code. 
+
+```
+arm_fully_connected_s16 test success
+arm_fully_connected_s16_big test success
+int16xint8_arm_convolve_fast_s16 test success
+requantize_s64_arm_convolve_fast_s16 test success
+```
+
+## Known Limitations
+
+There should be no known limitations for this example.
diff --git a/features/cmsis_nn_sample_code/config/custom_config_eflash.h b/features/cmsis_nn_sample_code/config/custom_config_eflash.h
new file mode 100644
index 0000000..ed4b278
--- /dev/null
+++ b/features/cmsis_nn_sample_code/config/custom_config_eflash.h
@@ -0,0 +1,66 @@
+/**
+ ****************************************************************************************
+ *
+ * @file custom_config_eflash.h
+ *
+ * @brief Board Support Package. User Configuration file for cached eFLASH mode.
+ *
+ * Copyright (C) 2020-2024 Renesas Electronics Corporation and/or its affiliates.
+ * All rights reserved. Confidential Information.
+ *
+ * This software ("Software") is supplied by Renesas Electronics Corporation and/or its
+ * affiliates ("Renesas"). Renesas grants you a personal, non-exclusive, non-transferable,
+ * revocable, non-sub-licensable right and license to use the Software, solely if used in
+ * or together with Renesas products. You may make copies of this Software, provided this
+ * copyright notice and disclaimer ("Notice") is included in all such copies. Renesas
+ * reserves the right to change or discontinue the Software at any time without notice.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS". RENESAS DISCLAIMS ALL WARRANTIES OF ANY KIND,
+ * WHETHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. TO THE
+ * MAXIMUM EXTENT PERMITTED UNDER LAW, IN NO EVENT SHALL RENESAS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE, EVEN IF RENESAS HAS BEEN ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES. USE OF THIS SOFTWARE MAY BE SUBJECT TO TERMS AND CONDITIONS CONTAINED IN
+ * AN ADDITIONAL AGREEMENT BETWEEN YOU AND RENESAS. IN CASE OF CONFLICT BETWEEN THE TERMS
+ * OF THIS NOTICE AND ANY SUCH ADDITIONAL LICENSE AGREEMENT, THE TERMS OF THE AGREEMENT
+ * SHALL TAKE PRECEDENCE. BY CONTINUING TO USE THIS SOFTWARE, YOU AGREE TO THE TERMS OF
+ * THIS NOTICE.IF YOU DO NOT AGREE TO THESE TERMS, YOU ARE NOT PERMITTED TO USE THIS
+ * SOFTWARE.
+ *
+ ****************************************************************************************
+ */
+
+#ifndef CUSTOM_CONFIG_EFLASH_H_
+#define CUSTOM_CONFIG_EFLASH_H_
+
+#include "bsp_definitions.h"
+
+#define CONFIG_RETARGET
+
+#define dg_configEXEC_MODE                      MODE_IS_CACHED
+#define dg_configCODE_LOCATION                  NON_VOLATILE_IS_EMBEDDED_FLASH
+
+#define dg_configUSE_WDOG                       ( 1 )
+
+#define dg_configUSE_SW_CURSOR                  ( 1 )
+
+/*************************************************************************************************\
+ * FreeRTOS specific config
+ */
+#define OS_FREERTOS                              /* Define this to use FreeRTOS */
+#define configTOTAL_HEAP_SIZE                    14000   /* This is the FreeRTOS Total Heap Size */
+
+/*************************************************************************************************\
+ * Peripheral specific config
+ */
+#define dg_configFLASH_ADAPTER                  ( 0 )
+#define dg_configNVMS_ADAPTER                   ( 0 )
+#define dg_configNVMS_VES                       ( 0 )
+
+/* Include bsp default values */
+#include "bsp_defaults.h"
+/* Include middleware default values */
+#include "middleware_defaults.h"
+
+#endif /* CUSTOM_CONFIG_EFLASH_H_ */
diff --git a/features/cmsis_nn_sample_code/main.c b/features/cmsis_nn_sample_code/main.c
new file mode 100644
index 0000000..1f8d155
--- /dev/null
+++ b/features/cmsis_nn_sample_code/main.c
@@ -0,0 +1,261 @@
+/**
+ ****************************************************************************************
+ *
+ * @file main.c
+ *
+ * @brief FreeRTOS template application with retarget and CMSIS NN library demonstration
+ *
+ * Copyright (C) 2015-2024 Renesas Electronics Corporation and/or its affiliates
+ * The MIT License (MIT)
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+ * OR OTHER DEALINGS IN THE SOFTWARE.
+ ****************************************************************************************
+ */
+#include <stdio.h>
+#include <stdbool.h>
+#include "osal.h"
+#include "sys_watchdog.h"
+#include "sys_clock_mgr.h"
+#include "sys_power_mgr.h"
+#include "test_arm_fully_connected_s16.h"
+#include "test_arm_convolve_fast_s16.h"
+
+/* Task priorities */
+#define mainTEMPLATE_TASK_PRIORITY              ( OS_TASK_PRIORITY_NORMAL )
+
+/* The rate at which data is template task counter is incremented. */
+#define mainCOUNTER_FREQUENCY_MS                OS_MS_2_TICKS(200)
+/*
+ * Perform any application specific hardware configuration.  The clocks,
+ * memory, etc. are configured before main() is called.
+ */
+static void prvSetupHardware( void );
+/*
+ * Task functions .
+ */
+static OS_TASK_FUNCTION(prvTemplateTask, pvParameters);
+
+static OS_TASK_FUNCTION(system_init, pvParameters)
+{
+        OS_TASK task_h = NULL;
+
+#if defined CONFIG_RETARGET
+        extern void retarget_init(void);
+#endif
+
+        cm_sys_clk_init(sysclk_XTAL32M);
+
+        cm_apb_set_clock_divider(apb_div1);
+        cm_ahb_set_clock_divider(ahb_div1);
+        cm_lp_clk_init();
+
+        /* Prepare the hardware to run this demo. */
+        prvSetupHardware();
+
+#if defined CONFIG_RETARGET
+        retarget_init();
+#endif
+
+        pm_set_wakeup_mode(true);
+        /* Set the desired sleep mode. */
+        pm_sleep_mode_set(pm_mode_extended_sleep);
+
+        /* Start main task here (text menu available via UART1 to control application) */
+        OS_TASK_CREATE( "CMSIS NN",                     /* The text name assigned to the task, for
+                                                           debug only; not used by the kernel. */
+                        prvTemplateTask,                /* The function that implements the task. */
+                        NULL,                           /* The parameter passed to the task. */
+                        1024,
+                                                        /* The number of bytes to allocate to the
+                                                           stack of the task. */
+                        mainTEMPLATE_TASK_PRIORITY,     /* The priority assigned to the task. */
+                        task_h );                       /* The task handle */
+        OS_ASSERT(task_h);
+
+        /* the work of the SysInit task is done */
+        OS_TASK_DELETE(OS_GET_CURRENT_TASK());
+}
+
+/**
+ * @brief Template main creates a SysInit task, which creates a Template task
+ */
+int main( void )
+{
+        OS_BASE_TYPE status;
+        OS_TASK xHandle;
+
+        /* Start the two tasks as described in the comments at the top of this
+        file. */
+        status = OS_TASK_CREATE("SysInit",              /* The text name assigned to the task, for
+                                                           debug only; not used by the kernel. */
+                        system_init,                    /* The System Initialization task. */
+                        ( void * ) 0,                   /* The parameter passed to the task. */
+                        configMINIMAL_STACK_SIZE * OS_STACK_WORD_SIZE,
+                                                        /* The number of bytes to allocate to the
+                                                           stack of the task. */
+                        OS_TASK_PRIORITY_HIGHEST,       /* The priority assigned to the task. */
+                        xHandle );                      /* The task handle */
+        OS_ASSERT(status == OS_TASK_CREATE_SUCCESS);
+
+
+
+        /* Start the tasks and timer running. */
+        vTaskStartScheduler();
+
+        /* If all is well, the scheduler will now be running, and the following
+        line will never be reached.  If the following line does execute, then
+        there was insufficient FreeRTOS heap memory available for the idle and/or
+        timer tasks to be created.  See the memory management section on the
+        FreeRTOS web site for more details. */
+        for ( ;; );
+
+}
+
+/**
+ * @brief Template task increases a counter every mainCOUNTER_FREQUENCY_MS ms
+ */
+static OS_TASK_FUNCTION(prvTemplateTask, pvParameters)
+{
+        OS_TICK_TIME xNextWakeTime;
+        static uint32_t test_counter=0;
+
+        /* Initialise xNextWakeTime - this only needs to be done once. */
+        xNextWakeTime = OS_GET_TICK_COUNT();
+
+        if (fully_connected_int16_arm_fully_connected_s16()) {
+                printf("\r\narm_fully_connected_s16 test success\r\n");
+        } else {
+                printf("\r\narm_fully_connected_s16 test unsuccessful\r\n");
+        }
+
+        if (fully_connected_int16_big_arm_fully_connected_s16()) {
+                printf("arm_fully_connected_s16_big test success\r\n");
+        } else {
+                printf("arm_fully_connected_s16_big test unsuccessful\r\n");
+        }
+
+        if (int16xint8_arm_convolve_fast_s16()) {
+                printf("int16xint8_arm_convolve_fast_s16 test success\r\n");
+        } else {
+                printf("int16xint8_arm_convolve_fast_s16 test unsuccessful\r\n");
+        }
+
+        if (requantize_s64_arm_convolve_fast_s16()) {
+                printf("requantize_s64_arm_convolve_fast_s16 test success\r\n");
+        } else {
+                printf("requantize_s64_arm_convolve_fast_s16 test unsuccessful\r\n");
+        }
+        fflush(stdout);
+
+        for ( ;; ) {
+                /* Place this task in the blocked state until it is time to run again.
+                   The block time is specified in ticks, the constant used converts ticks
+                   to ms.  While in the Blocked state this task will not consume any CPU
+                   time. */
+                vTaskDelayUntil( &xNextWakeTime, mainCOUNTER_FREQUENCY_MS );
+                test_counter++;
+
+                if (test_counter % (1000 / OS_TICKS_2_MS(mainCOUNTER_FREQUENCY_MS)) == 0) {
+                        printf("#");
+                        fflush(stdout);
+                }
+        }
+}
+
+/**
+ * @brief Initialize the peripherals domain after power-up.
+ *
+ */
+static void periph_init(void)
+{
+}
+
+/**
+ * @brief Hardware Initialization
+ */
+static void prvSetupHardware( void )
+{
+        /* Init hardware */
+        pm_system_init(periph_init);
+
+}
+
+/**
+ * @brief Malloc fail hook
+ *
+ * This function will be called only if it is enabled in the configuration of the OS
+ * or in the OS abstraction layer header osal.h, by a relevant macro definition.
+ * It is a hook function that will execute when a call to OS_MALLOC() returns error.
+ * OS_MALLOC() is called internally by the kernel whenever a task, queue,
+ * timer or semaphore is created. It can be also called by the application.
+ * The size of the available heap is defined by OS_TOTAL_HEAP_SIZE in osal.h.
+ * The OS_GET_FREE_HEAP_SIZE() API function can be used to query the size of
+ * free heap space that remains, although it does not provide information on
+ * whether the remaining heap is fragmented.
+ */
+OS_APP_MALLOC_FAILED( void )
+{
+}
+
+/**
+ * @brief Application idle task hook
+ *
+ * This function will be called only if it is enabled in the configuration of the OS
+ * or in the OS abstraction layer header osal.h, by a relevant macro definition.
+ * It will be called on each iteration of the idle task.
+ * It is essential that code added to this hook function never attempts
+ * to block in any way (for example, call OS_QUEUE_GET() with a block time
+ * specified, or call OS_TASK_DELAY()). If the application makes use of the
+ * OS_TASK_DELETE() API function (as this demo application does) then it is also
+ * important that OS_APP_IDLE() is permitted to return to its calling
+ * function, because it is the responsibility of the idle task to clean up
+ * memory allocated by the kernel to any task that has since been deleted.
+ */
+OS_APP_IDLE( void )
+{
+#if dg_configUSE_WDOG
+        sys_watchdog_idle_task_notify();
+#endif
+}
+
+/**
+ * @brief Application stack overflow hook
+ *
+ * Run-time stack overflow checking is performed only if it is enabled in the configuration of the OS
+ * or in the OS abstraction layer header osal.h, by a relevant macro definition.
+ * This hook function is called if a stack overflow is detected.
+ */
+OS_APP_STACK_OVERFLOW( OS_TASK pxTask, char *pcTaskName )
+{
+        ( void ) pcTaskName;
+        ( void ) pxTask;
+
+        ASSERT_ERROR(0);
+}
+
+/**
+ * @brief Application tick hook
+ *
+ * This function will be called only if it is enabled in the configuration of the OS
+ * or in the OS abstraction layer header osal.h, by a relevant macro definition.
+ * This hook function is executed each time a tick interrupt occurs.
+ */
+OS_APP_TICK( void )
+{
+}
diff --git a/features/cmsis_nn_sample_code/makefile.targets b/features/cmsis_nn_sample_code/makefile.targets
new file mode 100644
index 0000000..6ec9f78
--- /dev/null
+++ b/features/cmsis_nn_sample_code/makefile.targets
@@ -0,0 +1,11 @@
+LDSCRIPT_PATH=../ldscripts
+
+.PHONY: main-build pre-build generate_ldscripts FORCE
+main-build : | pre-build
+
+FORCE:
+
+generate_ldscripts : mem.ld sections.ld
+
+%.ld : $(LDSCRIPT_PATH)/%.ld.h FORCE
+	"$(CC)" -I "$(BSP_CONFIG_DIR)" -I "$(MIDDLEWARE_CONFIG_DIR)" $(PRE_BUILD_EXTRA_DEFS) -imacros "$(APP_CONFIG_H)" $(LD_DEFS) -Ddg_configDEVICE=$(DEVICE) -E -P -c "$<" -o "$@"
diff --git a/features/cmsis_nn_sample_code/nnlib/Include/arm_math_types.h b/features/cmsis_nn_sample_code/nnlib/Include/arm_math_types.h
new file mode 100644
index 0000000..7680eef
--- /dev/null
+++ b/features/cmsis_nn_sample_code/nnlib/Include/arm_math_types.h
@@ -0,0 +1,592 @@
+/******************************************************************************
+ * @file     arm_math_types.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ARM_MATH_TYPES_H_
+
+#define _ARM_MATH_TYPES_H_
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __CC_ARM )
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+
+#elif defined ( __GNUC__ )
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wsign-conversion"
+  #pragma GCC diagnostic ignored "-Wconversion"
+  #pragma GCC diagnostic ignored "-Wunused-parameter"
+
+#elif defined ( __ICCARM__ )
+
+#elif defined ( __TI_ARM__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#elif defined ( _MSC_VER )
+
+#else
+  #error Unknown compiler
+#endif
+
+
+/* Included for instrinsics definitions */
+#if defined (_MSC_VER ) 
+#include <stdint.h>
+#define __STATIC_FORCEINLINE static __forceinline
+#define __STATIC_INLINE static __inline
+#define __ALIGNED(x) __declspec(align(x))
+
+#elif defined (__GNUC_PYTHON__)
+#include <stdint.h>
+#define  __ALIGNED(x) __attribute__((aligned(x)))
+#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) 
+#define __STATIC_INLINE static inline
+
+#else
+#include "cmsis_compiler.h"
+#endif
+
+
+
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+/* evaluate ARM DSP feature */
+#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
+  #define ARM_MATH_DSP                   1
+#endif
+
+#if defined(ARM_MATH_NEON)
+#include <arm_neon.h>
+#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+  #if !defined(ARM_MATH_NEON_FLOAT16)
+  #define ARM_MATH_NEON_FLOAT16
+  #endif
+#endif
+#endif
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if __ARM_FEATURE_MVE
+  #if !defined(ARM_MATH_MVEI)
+    #define ARM_MATH_MVEI
+  #endif
+#endif
+
+#if (__ARM_FEATURE_MVE & 2)
+  #if !defined(ARM_MATH_MVEF)
+    #define ARM_MATH_MVEF
+  #endif
+  #if !defined(ARM_MATH_MVE_FLOAT16)
+       #define ARM_MATH_MVE_FLOAT16
+  #endif
+#endif
+
+#endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+#if defined (ARM_MATH_HELIUM)
+  #if !defined(ARM_MATH_MVEF)
+    #define ARM_MATH_MVEF
+  #endif
+
+  #if !defined(ARM_MATH_MVEI)
+    #define ARM_MATH_MVEI
+  #endif
+
+  #if !defined(ARM_MATH_MVE_FLOAT16)
+       #define ARM_MATH_MVE_FLOAT16
+  #endif
+#endif
+
+
+
+#if   defined ( __CC_ARM )
+  /* Enter low optimization region - place directly above function definition */
+  #if defined( __ARM_ARCH_7EM__ )
+    #define LOW_OPTIMIZATION_ENTER \
+       _Pragma ("push")         \
+       _Pragma ("O1")
+  #else
+    #define LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #if defined ( __ARM_ARCH_7EM__ )
+    #define LOW_OPTIMIZATION_EXIT \
+       _Pragma ("pop")
+  #else
+    #define LOW_OPTIMIZATION_EXIT
+  #endif
+
+  /* Enter low optimization region - place directly above function definition */
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __GNUC__ )
+  #define LOW_OPTIMIZATION_ENTER \
+       __attribute__(( optimize("-O1") ))
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __ICCARM__ )
+  /* Enter low optimization region - place directly above function definition */
+  #if defined ( __ARM_ARCH_7EM__ )
+    #define LOW_OPTIMIZATION_ENTER \
+       _Pragma ("optimize=low")
+  #else
+    #define LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #define LOW_OPTIMIZATION_EXIT
+
+  /* Enter low optimization region - place directly above function definition */
+  #if defined ( __ARM_ARCH_7EM__ )
+    #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
+       _Pragma ("optimize=low")
+  #else
+    #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __TI_ARM__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __CSMC__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __TASKING__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+       
+#elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
+      #define LOW_OPTIMIZATION_ENTER
+      #define LOW_OPTIMIZATION_EXIT
+      #define IAR_ONLY_LOW_OPTIMIZATION_ENTER 
+      #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+#endif
+
+
+
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __CC_ARM )
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+
+#elif defined ( __GNUC__ )
+#pragma GCC diagnostic pop
+
+#elif defined ( __ICCARM__ )
+
+#elif defined ( __TI_ARM__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#elif defined ( _MSC_VER )
+
+#else
+  #error Unknown compiler
+#endif
+
+#ifdef   __cplusplus
+}
+#endif
+
+#if __ARM_FEATURE_MVE
+#include <arm_mve.h>
+#endif
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+ /**
+   * @brief 8-bit fractional data type in 1.7 format.
+   */
+  typedef int8_t q7_t;
+
+  /**
+   * @brief 16-bit fractional data type in 1.15 format.
+   */
+  typedef int16_t q15_t;
+
+  /**
+   * @brief 32-bit fractional data type in 1.31 format.
+   */
+  typedef int32_t q31_t;
+
+  /**
+   * @brief 64-bit fractional data type in 1.63 format.
+   */
+  typedef int64_t q63_t;
+
+  /**
+   * @brief 32-bit floating-point type definition.
+   */
+  typedef float float32_t;
+
+  /**
+   * @brief 64-bit floating-point type definition.
+   */
+  typedef double float64_t;
+
+  /**
+   * @brief vector types
+   */
+#if defined(ARM_MATH_NEON) || (defined (ARM_MATH_MVEI)  && !defined(ARM_MATH_AUTOVECTORIZE))
+  /**
+   * @brief 64-bit fractional 128-bit vector data type in 1.63 format
+   */
+  typedef int64x2_t q63x2_t;
+
+  /**
+   * @brief 32-bit fractional 128-bit vector data type in 1.31 format.
+   */
+  typedef int32x4_t q31x4_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector data type with 16-bit alignment in 1.15 format.
+   */
+  typedef __ALIGNED(2) int16x8_t q15x8_t;
+
+ /**
+   * @brief 8-bit fractional 128-bit vector data type with 8-bit alignment in 1.7 format.
+   */
+  typedef __ALIGNED(1) int8x16_t q7x16_t;
+
+    /**
+   * @brief 32-bit fractional 128-bit vector pair data type in 1.31 format.
+   */
+  typedef int32x4x2_t q31x4x2_t;
+
+  /**
+   * @brief 32-bit fractional 128-bit vector quadruplet data type in 1.31 format.
+   */
+  typedef int32x4x4_t q31x4x4_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector pair data type in 1.15 format.
+   */
+  typedef int16x8x2_t q15x8x2_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector quadruplet data type in 1.15 format.
+   */
+  typedef int16x8x4_t q15x8x4_t;
+
+  /**
+   * @brief 8-bit fractional 128-bit vector pair data type in 1.7 format.
+   */
+  typedef int8x16x2_t q7x16x2_t;
+
+  /**
+   * @brief 8-bit fractional 128-bit vector quadruplet data type in 1.7 format.
+   */
+   typedef int8x16x4_t q7x16x4_t;
+
+  /**
+   * @brief 32-bit fractional data type in 9.23 format.
+   */
+  typedef int32_t q23_t;
+
+  /**
+   * @brief 32-bit fractional 128-bit vector data type in 9.23 format.
+   */
+  typedef int32x4_t q23x4_t;
+
+  /**
+   * @brief 64-bit status 128-bit vector data type.
+   */
+  typedef int64x2_t status64x2_t;
+
+  /**
+   * @brief 32-bit status 128-bit vector data type.
+   */
+  typedef int32x4_t status32x4_t;
+
+  /**
+   * @brief 16-bit status 128-bit vector data type.
+   */
+  typedef int16x8_t status16x8_t;
+
+  /**
+   * @brief 8-bit status 128-bit vector data type.
+   */
+  typedef int8x16_t status8x16_t;
+
+
+#endif
+
+#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF)  && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/
+  /**
+   * @brief 32-bit floating-point 128-bit vector type
+   */
+  typedef float32x4_t f32x4_t;
+
+  /**
+   * @brief 32-bit floating-point 128-bit vector pair data type
+   */
+  typedef float32x4x2_t f32x4x2_t;
+
+  /**
+   * @brief 32-bit floating-point 128-bit vector quadruplet data type
+   */
+  typedef float32x4x4_t f32x4x4_t;
+
+  /**
+   * @brief 32-bit ubiquitous 128-bit vector data type
+   */
+  typedef union _any32x4_t
+  {
+      float32x4_t     f;
+      int32x4_t       i;
+  } any32x4_t;
+
+#endif
+
+#if defined(ARM_MATH_NEON)
+  /**
+   * @brief 32-bit fractional 64-bit vector data type in 1.31 format.
+   */
+  typedef int32x2_t  q31x2_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector data type in 1.15 format.
+   */
+  typedef  __ALIGNED(2) int16x4_t q15x4_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector data type in 1.7 format.
+   */
+  typedef  __ALIGNED(1) int8x8_t q7x8_t;
+
+  /**
+   * @brief 32-bit float 64-bit vector data type.
+   */
+  typedef float32x2_t  f32x2_t;
+
+  /**
+   * @brief 32-bit floating-point 128-bit vector triplet data type
+   */
+  typedef float32x4x3_t f32x4x3_t;
+
+
+  /**
+   * @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format
+   */
+  typedef int32x4x3_t q31x4x3_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector triplet data type in 1.15 format
+   */
+  typedef int16x8x3_t q15x8x3_t;
+
+  /**
+   * @brief 8-bit fractional 128-bit vector triplet data type in 1.7 format
+   */
+  typedef int8x16x3_t q7x16x3_t;
+
+  /**
+   * @brief 32-bit floating-point 64-bit vector pair data type
+   */
+  typedef float32x2x2_t f32x2x2_t;
+
+  /**
+   * @brief 32-bit floating-point 64-bit vector triplet data type
+   */
+  typedef float32x2x3_t f32x2x3_t;
+
+  /**
+   * @brief 32-bit floating-point 64-bit vector quadruplet data type
+   */
+  typedef float32x2x4_t f32x2x4_t;
+
+
+  /**
+   * @brief 32-bit fractional 64-bit vector pair data type in 1.31 format
+   */
+  typedef int32x2x2_t q31x2x2_t;
+
+  /**
+   * @brief 32-bit fractional 64-bit vector triplet data type in 1.31 format
+   */
+  typedef int32x2x3_t q31x2x3_t;
+
+  /**
+   * @brief 32-bit fractional 64-bit vector quadruplet data type in 1.31 format
+   */
+  typedef int32x4x3_t q31x2x4_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector pair data type in 1.15 format
+   */
+  typedef int16x4x2_t q15x4x2_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector triplet data type in 1.15 format
+   */
+  typedef int16x4x2_t q15x4x3_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector quadruplet data type in 1.15 format
+   */
+  typedef int16x4x3_t q15x4x4_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector pair data type in 1.7 format
+   */
+  typedef int8x8x2_t q7x8x2_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector triplet data type in 1.7 format
+   */
+  typedef int8x8x3_t q7x8x3_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector quadruplet data type in 1.7 format
+   */
+  typedef int8x8x4_t q7x8x4_t;
+
+  /**
+   * @brief 32-bit ubiquitous 64-bit vector data type
+   */
+  typedef union _any32x2_t
+  {
+      float32x2_t     f;
+      int32x2_t       i;
+  } any32x2_t;
+
+
+  /**
+   * @brief 32-bit status 64-bit vector data type.
+   */
+  typedef int32x4_t status32x2_t;
+
+  /**
+   * @brief 16-bit status 64-bit vector data type.
+   */
+  typedef int16x8_t status16x4_t;
+
+  /**
+   * @brief 8-bit status 64-bit vector data type.
+   */
+  typedef int8x16_t status8x8_t;
+
+#endif
+
+
+
+
+
+#define F64_MAX   ((float64_t)DBL_MAX)
+#define F32_MAX   ((float32_t)FLT_MAX)
+
+
+
+#define F64_MIN   (-DBL_MAX)
+#define F32_MIN   (-FLT_MAX)
+
+
+
+#define F64_ABSMAX   ((float64_t)DBL_MAX)
+#define F32_ABSMAX   ((float32_t)FLT_MAX)
+
+
+
+#define F64_ABSMIN   ((float64_t)0.0)
+#define F32_ABSMIN   ((float32_t)0.0)
+
+
+#define Q31_MAX   ((q31_t)(0x7FFFFFFFL))
+#define Q15_MAX   ((q15_t)(0x7FFF))
+#define Q7_MAX    ((q7_t)(0x7F))
+#define Q31_MIN   ((q31_t)(0x80000000L))
+#define Q15_MIN   ((q15_t)(0x8000))
+#define Q7_MIN    ((q7_t)(0x80))
+
+#define Q31_ABSMAX   ((q31_t)(0x7FFFFFFFL))
+#define Q15_ABSMAX   ((q15_t)(0x7FFF))
+#define Q7_ABSMAX    ((q7_t)(0x7F))
+#define Q31_ABSMIN   ((q31_t)0)
+#define Q15_ABSMIN   ((q15_t)0)
+#define Q7_ABSMIN    ((q7_t)0)
+
+  /* Dimension C vector space */
+  #define CMPLX_DIM 2
+
+  /**
+   * @brief Error status returned by some functions in the library.
+   */
+
+  typedef enum
+  {
+    ARM_MATH_SUCCESS                 =  0,        /**< No error */
+    ARM_MATH_ARGUMENT_ERROR          = -1,        /**< One or more arguments are incorrect */
+    ARM_MATH_LENGTH_ERROR            = -2,        /**< Length of data buffer is incorrect */
+    ARM_MATH_SIZE_MISMATCH           = -3,        /**< Size of matrices is not compatible with the operation */
+    ARM_MATH_NANINF                  = -4,        /**< Not-a-number (NaN) or infinity is generated */
+    ARM_MATH_SINGULAR                = -5,        /**< Input matrix is singular and cannot be inverted */
+    ARM_MATH_TEST_FAILURE            = -6,        /**< Test Failed */
+    ARM_MATH_DECOMPOSITION_FAILURE   = -7         /**< Decomposition Failed */
+  } arm_status;
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*ifndef _ARM_MATH_TYPES_H_ */
diff --git a/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_math_types.h b/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_math_types.h
new file mode 100644
index 0000000..fc7080f
--- /dev/null
+++ b/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_math_types.h
@@ -0,0 +1,167 @@
+/******************************************************************************
+ * @file     arm_nn_math_types.h
+ * @brief    Compiler include and basic types
+ * @version  V1.0.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+   Copied from CMSIS/DSP/arm_math_types.h and modified
+*/
+
+#ifndef _ARM_NN_MATH_TYPES_H_
+
+#define _ARM_NN_MATH_TYPES_H_
+
+/* DSP inlcude for enum arm_status. */
+#include "arm_math_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Compiler specific diagnostic adjustment */
+#if defined(__CC_ARM)
+
+#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+
+#elif defined(__GNUC__)
+
+#elif defined(__ICCARM__)
+
+#elif defined(__TI_ARM__)
+
+#elif defined(__CSMC__)
+
+#elif defined(__TASKING__)
+
+#elif defined(_MSC_VER)
+
+#else
+#error Unknown compiler
+#endif
+
+/* Included for instrinsics definitions */
+#if defined(_MSC_VER)
+#include <stdint.h>
+#ifndef __STATIC_FORCEINLINE
+#define __STATIC_FORCEINLINE static __forceinline
+#endif
+#ifndef __STATIC_INLINE
+#define __STATIC_INLINE static __inline
+#endif
+#ifndef __ALIGNED
+#define __ALIGNED(x) __declspec(align(x))
+#endif
+
+#elif defined(__GNUC_PYTHON__)
+#include <stdint.h>
+#ifndef __ALIGNED
+#define __ALIGNED(x) __attribute__((aligned(x)))
+#endif
+#ifndef __STATIC_FORCEINLINE
+#define __STATIC_FORCEINLINE static inline __attribute__((always_inline))
+#endif
+#ifndef __STATIC_INLINE
+#define __STATIC_INLINE static inline
+#endif
+
+#else
+#include "cmsis_compiler.h"
+#endif
+
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+
+/* evaluate ARM DSP feature */
+#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
+#ifndef ARM_MATH_DSP
+#define ARM_MATH_DSP 1
+#endif
+#endif
+
+#if __ARM_FEATURE_MVE
+#ifndef ARM_MATH_MVEI
+#define ARM_MATH_MVEI
+#endif
+#endif
+
+/* Compiler specific diagnostic adjustment */
+#if defined(__CC_ARM)
+
+#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+
+#elif defined(__GNUC__)
+// #pragma GCC diagnostic pop
+
+#elif defined(__ICCARM__)
+
+#elif defined(__TI_ARM__)
+
+#elif defined(__CSMC__)
+
+#elif defined(__TASKING__)
+
+#elif defined(_MSC_VER)
+
+#else
+#error Unknown compiler
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#if __ARM_FEATURE_MVE
+#include <arm_mve.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Add necessary typedefs
+ */
+
+#define NN_Q31_MAX ((q31_t)(0x7FFFFFFFL))
+#define NN_Q7_MAX ((q7_t)(0x7F))
+#define NN_Q31_MIN ((q31_t)(0x80000000L))
+#define NN_Q7_MIN ((q7_t)(0x80))
+
+/**
+ * @brief Error status returned by some functions in the library.
+ */
+
+typedef enum
+{
+    ARM_CMSIS_NN_SUCCESS = 0,        /**< No error */
+    ARM_CMSIS_NN_ARG_ERROR = -1,     /**< One or more arguments are incorrect */
+    ARM_CMSIS_NN_NO_IMPL_ERROR = -2, /**<  No implementation available */
+} arm_cmsis_nn_status;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*ifndef _ARM_NN_MATH_TYPES_H_ */
diff --git a/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_tables.h b/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_tables.h
new file mode 100644
index 0000000..327294d
--- /dev/null
+++ b/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_tables.h
@@ -0,0 +1,56 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_tables.h
+ * Description:  Extern declaration for NN tables
+ *
+ * $Date:        17. August 2021
+ * $Revision:    V.1.0.2
+ *
+ * Target Processor:  Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ARM_NN_TABLES_H
+#define _ARM_NN_TABLES_H
+
+#include "arm_nn_math_types.h"
+
+/**
+ * @brief tables for various activation functions
+ *
+ */
+
+extern const q15_t sigmoidTable_q15[256];
+extern const q7_t sigmoidTable_q7[256];
+
+extern const q7_t tanhTable_q7[256];
+extern const q15_t tanhTable_q15[256];
+
+/**
+ * @brief 2-way tables for various activation functions
+ *
+ * 2-way table, H table for value larger than 1/4
+ * L table for value smaller than 1/4, H table for remaining
+ * We have this only for the q15_t version. It does not make
+ * sense to have it for q7_t type
+ */
+extern const q15_t sigmoidHTable_q15[192];
+extern const q15_t sigmoidLTable_q15[128];
+
+#endif /*  ARM_NN_TABLES_H */
diff --git a/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_types.h b/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_types.h
new file mode 100644
index 0000000..c371236
--- /dev/null
+++ b/features/cmsis_nn_sample_code/nnlib/Include/arm_nn_types.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_types.h
+ * Description:  Public header file to contain the CMSIS-NN structs for the
+ *               TensorFlowLite micro compliant functions
+ *
+ * $Date:        19. March 2021
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ * -------------------------------------------------------------------- */
+
+#ifndef _ARM_NN_TYPES_H
+#define _ARM_NN_TYPES_H
+
+#include <stdint.h>
+
+/** CMSIS-NN object to contain the width and height of a tile */
+typedef struct
+{
+    int32_t w; /**< Width */
+    int32_t h; /**< Height */
+} cmsis_nn_tile;
+
+/** CMSIS-NN object used for the function context. */
+typedef struct
+{
+    void *buf;    /**< Pointer to a buffer needed for the optimization */
+    int32_t size; /**< Buffer size */
+} cmsis_nn_context;
+
+/** CMSIS-NN object to contain the dimensions of the tensors */
+typedef struct
+{
+    int32_t n; /**< Generic dimension to contain either the batch size or output channels.
+                     Please refer to the function documentation for more information */
+    int32_t h; /**< Height */
+    int32_t w; /**< Width */
+    int32_t c; /**< Input channels */
+} cmsis_nn_dims;
+
+/** CMSIS-NN object for the per-channel quantization parameters */
+typedef struct
+{
+    int32_t *multiplier; /**< Multiplier values */
+    int32_t *shift;      /**< Shift values */
+} cmsis_nn_per_channel_quant_params;
+
+/** CMSIS-NN object for the per-tensor quantization parameters */
+typedef struct
+{
+    int32_t multiplier; /**< Multiplier value */
+    int32_t shift;      /**< Shift value */
+} cmsis_nn_per_tensor_quant_params;
+
+/** CMSIS-NN object for the quantized Relu activation */
+typedef struct
+{
+    int32_t min; /**< Min value used to clamp the result */
+    int32_t max; /**< Max value used to clamp the result */
+} cmsis_nn_activation;
+
+/** CMSIS-NN object for the convolution layer parameters */
+typedef struct
+{
+    int32_t input_offset;  /**< Zero value for the input tensor */
+    int32_t output_offset; /**< Zero value for the output tensor */
+    cmsis_nn_tile stride;
+    cmsis_nn_tile padding;
+    cmsis_nn_tile dilation;
+    cmsis_nn_activation activation;
+} cmsis_nn_conv_params;
+
+/** CMSIS-NN object for Depthwise convolution layer parameters */
+typedef struct
+{
+    int32_t input_offset;  /**< Zero value for the input tensor */
+    int32_t output_offset; /**< Zero value for the output tensor */
+    int32_t ch_mult;       /**< Channel Multiplier. ch_mult * in_ch = out_ch */
+    cmsis_nn_tile stride;
+    cmsis_nn_tile padding;
+    cmsis_nn_tile dilation;
+    cmsis_nn_activation activation;
+} cmsis_nn_dw_conv_params;
+/** CMSIS-NN object for pooling layer parameters */
+typedef struct
+{
+    cmsis_nn_tile stride;
+    cmsis_nn_tile padding;
+    cmsis_nn_activation activation;
+} cmsis_nn_pool_params;
+
+/** CMSIS-NN object for Fully Connected layer parameters */
+typedef struct
+{
+    int32_t input_offset;  /**< Zero value for the input tensor */
+    int32_t filter_offset; /**< Zero value for the filter tensor. Not used */
+    int32_t output_offset; /**< Zero value for the output tensor */
+    cmsis_nn_activation activation;
+} cmsis_nn_fc_params;
+
+/** CMSIS-NN object for SVDF layer parameters */
+typedef struct
+{
+    int32_t rank;
+    int32_t input_offset;  /**< Zero value for the input tensor */
+    int32_t output_offset; /**< Zero value for the output tensor */
+    cmsis_nn_activation input_activation;
+    cmsis_nn_activation output_activation;
+} cmsis_nn_svdf_params;
+
+#endif // _ARM_NN_TYPES_H
diff --git a/features/cmsis_nn_sample_code/nnlib/Include/arm_nnfunctions.h b/features/cmsis_nn_sample_code/nnlib/Include/arm_nnfunctions.h
new file mode 100644
index 0000000..4c20e99
--- /dev/null
+++ b/features/cmsis_nn_sample_code/nnlib/Include/arm_nnfunctions.h
@@ -0,0 +1,2314 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nnfunctions.h
+ * Description:  Public header file for CMSIS NN Library
+ *
+ * $Date:        17 August 2021
+ * $Revision:    V.7.3.1
+ *
+ * Target Processor:  Cortex-M CPUs
+ * -------------------------------------------------------------------- */
+
+/**
+   \mainpage CMSIS NN Software Library
+   *
+   * Introduction
+   * ------------
+   *
+   * This user manual describes the CMSIS NN software library,
+   * a collection of efficient neural network kernels developed to maximize the
+   * performance and minimize the memory footprint of neural networks on Cortex-M processor cores.
+   *
+   * The library is divided into a number of functions each covering a specific category:
+   * - Convolution Functions
+   * - Activation Functions
+   * - Fully-connected Layer Functions
+   * - SVDF Layer Functions
+   * - Pooling Functions
+   * - Softmax Functions
+   * - Basic math Functions
+   *
+   * The library has separate functions for operating on different weight and activation data
+   * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the
+   * kernels are included in the function description. The implementation details are also
+   * described in this paper [1].
+   *
+   * Function Classification
+   * --------
+   * The functions can be classified into two segments
+   * - Legacy functions supporting ARM's internal symmetric quantization(8 bits).
+   * - Functions that support TensorFlow Lite framework with symmetric quantization(8 bits).
+   *
+   * The legacy functions can be identified with their suffix of _q7 or _q15 and are no new development is done there.
+   * The article in [2] describes in detail how to run a network using the legacy functions.
+   *
+   * The functions supporting TensorFlow Lite framework is identified by the _s8 suffix and can be invoked from TFL
+   * micro. The functions are bit exact to TensorFlow Lite. Refer to the TensorFlow's documentation in [3] on how to run
+   * a TensorFlow Lite model using optimized CMSIS-NN kernels.
+   *
+   * Block Diagram
+   * --------
+   * \image html CMSIS-NN-OVERVIEW.PNG
+   *
+   * Examples
+   * --------
+   *
+   * The library ships with a number of examples which demonstrate how to use the library functions.
+   *
+   * Pre-processor Macros
+   * ------------
+   *
+   * Each library project have different pre-processor macros.
+   *
+   * - ARM_MATH_DSP:
+   *
+   * Define macro ARM_MATH_DSP, If the silicon supports DSP instructions(DSP extension).
+   *
+   * - ARM_MATH_MVEI:
+   *
+   * Define macro ARM_MATH_MVEI, If the silicon supports M-Profile Vector Extension.
+
+   * - ARM_MATH_AUTOVECTORIZE
+   *  Used in conjucture with ARM_MATH_MVEI to let the compiler auto vectorize for the functions that uses inline
+   *  assembly. It does not affect functions that use C or intrinsics.
+   * - ARM_MATH_BIG_ENDIAN:
+   *
+   * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. This is supported only for the legacy
+   * functions i.e, functions targetted at TensorFlow Lite do not support big endianness. By default library builds for
+   * little endian targets.
+   *
+   * - ARM_NN_TRUNCATE:
+   *
+   * Define macro ARM_NN_TRUNCATE to use floor instead of round-to-the-nearest-int for the computation.
+   *
+   *
+   * Copyright Notice
+   * ------------
+   *
+   * Copyright (C) 2010-2019 Arm Limited. All rights reserved.
+   *
+   * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601
+   *
+   * [2] Converting a Neural Network for Arm Cortex-M with CMSIS-NN
+   *
+   https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides/converting-a-neural-network-for-arm-cortex-m-with-cmsis-nn/single-page
+   * [3] https://www.tensorflow.org/lite/microcontrollers/library
+   *
+   * [4] https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN#legacy-vs-tfl-micro-compliant-apis
+   */
+
+/**
+ * @defgroup groupNN Neural Network Functions
+ * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support
+ * TensorFlow Lite framework.
+ */
+
+#ifndef _ARM_NNFUNCTIONS_H
+#define _ARM_NNFUNCTIONS_H
+
+#include "arm_nn_math_types.h"
+#include "arm_nn_types.h"
+
+#define USE_INTRINSIC
+
+//#define ARM_NN_TRUNCATE /* This config the rounding model to floor or round to the nearest int */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Struct for specifying activation function types
+ *
+ */
+typedef enum
+{
+    ARM_SIGMOID = 0,
+    /**< Sigmoid activation function */
+    ARM_TANH = 1,
+    /**< Tanh activation function */
+} arm_nn_activation_type;
+
+/**
+ * @defgroup NNConv Convolution Functions
+ *
+ * Collection of convolution, depthwise convolution functions and their variants.
+ *
+ * The convolution is implemented in 2 steps: im2col and GEMM
+ *
+ * im2col is a process of converting each patch of image data into
+ * a column. After im2col, the convolution is computed as matrix-matrix
+ * multiplication.
+ *
+ * To reduce the memory footprint, the im2col is performed partially.
+ * Each iteration, only a few column (i.e., patches) are generated and
+ * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
+ *
+ */
+
+/**
+ * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
+ cmsis-nn
+ *        to perform the convolution.
+ *
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int8
+ *
+ * @return     The function returns either
+ *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
+ *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
+ *
+ */
+arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
+                                   const cmsis_nn_conv_params *conv_params,
+                                   const cmsis_nn_per_channel_quant_params *quant_params,
+                                   const cmsis_nn_dims *input_dims,
+                                   const q7_t *input_data,
+                                   const cmsis_nn_dims *filter_dims,
+                                   const q7_t *filter_data,
+                                   const cmsis_nn_dims *bias_dims,
+                                   const int32_t *bias_data,
+                                   const cmsis_nn_dims *output_dims,
+                                   q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for arm_convolve_wrapper_s8
+ *
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      input_dims     Input (activation) dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      filter_dims    Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
+ *                                filter dimensions
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ *
+ * @return         The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
+                                                const cmsis_nn_dims *input_dims,
+                                                const cmsis_nn_dims *filter_dims,
+                                                const cmsis_nn_dims *output_dims);
+
+/**
+ * @brief s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
+ cmsis-nn
+ *        to perform the convolution.
+ *
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+ *
+ * @return     The function returns either
+ *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
+ *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
+ *
+ */
+arm_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_conv_params *conv_params,
+                                    const cmsis_nn_per_channel_quant_params *quant_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q15_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const q7_t *filter_data,
+                                    const cmsis_nn_dims *bias_dims,
+                                    const int64_t *bias_data,
+                                    const cmsis_nn_dims *output_dims,
+                                    q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for arm_convolve_wrapper_s16
+ *
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      input_dims     Input (activation) dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      filter_dims    Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
+ *                                filter dimensions
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ *
+ * @return         The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
+                                                 const cmsis_nn_dims *input_dims,
+                                                 const cmsis_nn_dims *filter_dims,
+                                                 const cmsis_nn_dims *output_dims);
+
+/**
+ * @brief Basic s8 convolution function
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_s8_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int8
+
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
+ *
+ */
+arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
+                           const cmsis_nn_conv_params *conv_params,
+                           const cmsis_nn_per_channel_quant_params *quant_params,
+                           const cmsis_nn_dims *input_dims,
+                           const q7_t *input_data,
+                           const cmsis_nn_dims *filter_dims,
+                           const q7_t *filter_data,
+                           const cmsis_nn_dims *bias_dims,
+                           const int32_t *bias_data,
+                           const cmsis_nn_dims *output_dims,
+                           q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for s8 convolution function
+ *
+ * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
+ * are the spatial filter dimensions
+ * @return          The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Basic s16 convolution function
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_s16_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
+ *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
+ *
+ */
+arm_status arm_convolve_s16(const cmsis_nn_context *ctx,
+                            const cmsis_nn_conv_params *conv_params,
+                            const cmsis_nn_per_channel_quant_params *quant_params,
+                            const cmsis_nn_dims *input_dims,
+                            const q15_t *input_data,
+                            const cmsis_nn_dims *filter_dims,
+                            const q7_t *filter_data,
+                            const cmsis_nn_dims *bias_dims,
+                            const int64_t *bias_data,
+                            const cmsis_nn_dims *output_dims,
+                            q15_t *output_data);
+/**
+ * @brief Optimized s16 convolution function
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not
+ exceed 512
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
+ *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
+ *    4. Implementation supports kernel volumes (filter width * filter height * input channels) < 512.
+ *
+ */
+
+arm_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
+                                 const cmsis_nn_conv_params *conv_params,
+                                 const cmsis_nn_per_channel_quant_params *quant_params,
+                                 const cmsis_nn_dims *input_dims,
+                                 const q15_t *input_data,
+                                 const cmsis_nn_dims *filter_dims,
+                                 const q7_t *filter_data,
+                                 const cmsis_nn_dims *bias_dims,
+                                 const int64_t *bias_data,
+                                 const cmsis_nn_dims *output_dims,
+                                 q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for s16 convolution function
+ *
+ * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
+ * are the spatial filter dimensions
+ * @return          The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Get the required buffer size for fast s16 convolution function
+ *
+ * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
+ * are the spatial filter dimensions
+ * @return          The function returns required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Basic Q7 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
+                                     const uint16_t dim_im_in,
+                                     const uint16_t ch_im_in,
+                                     const q7_t *wt,
+                                     const uint16_t ch_im_out,
+                                     const uint16_t dim_kernel,
+                                     const uint16_t padding,
+                                     const uint16_t stride,
+                                     const q7_t *bias,
+                                     const uint16_t bias_shift,
+                                     const uint16_t out_shift,
+                                     q7_t *Im_out,
+                                     const uint16_t dim_im_out,
+                                     q15_t *bufferA,
+                                     q7_t *bufferB);
+
+/**
+ * @brief Basic Q7 convolution function (non-square shape)
+ * @param[in]       Im_in        pointer to input tensor
+ * @param[in]       dim_im_in_x  input tensor dimension x
+ * @param[in]       dim_im_in_y  input tensor dimension y
+ * @param[in]       ch_im_in     number of input tensor channels
+ * @param[in]       wt           pointer to kernel weights
+ * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x filter kernel size x
+ * @param[in]       dim_kernel_y filter kernel size y
+ * @param[in]       padding_x    padding size x
+ * @param[in]       padding_y    padding size y
+ * @param[in]       stride_x     convolution stride x
+ * @param[in]       stride_y     convolution stride y
+ * @param[in]       bias         pointer to bias
+ * @param[in]       bias_shift   amount of left-shift for bias
+ * @param[in]       out_shift    amount of right-shift for output
+ * @param[in,out]   Im_out       pointer to output tensor
+ * @param[in]       dim_im_out_x output tensor dimension x
+ * @param[in]       dim_im_out_y output tensor dimension y
+ * @param[in,out]   bufferA      pointer to buffer space for input
+ * @param[in,out]   bufferB      pointer to buffer space for output
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ */
+arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
+                                               const uint16_t dim_im_in_x,
+                                               const uint16_t dim_im_in_y,
+                                               const uint16_t ch_im_in,
+                                               const q7_t *wt,
+                                               const uint16_t ch_im_out,
+                                               const uint16_t dim_kernel_x,
+                                               const uint16_t dim_kernel_y,
+                                               const uint16_t padding_x,
+                                               const uint16_t padding_y,
+                                               const uint16_t stride_x,
+                                               const uint16_t stride_y,
+                                               const q7_t *bias,
+                                               const uint16_t bias_shift,
+                                               const uint16_t out_shift,
+                                               q7_t *Im_out,
+                                               const uint16_t dim_im_out_x,
+                                               const uint16_t dim_im_out_y,
+                                               q15_t *bufferA,
+                                               q7_t *bufferB);
+
+/**
+ * @brief Basic Q15 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
+                                      const uint16_t dim_im_in,
+                                      const uint16_t ch_im_in,
+                                      const q15_t *wt,
+                                      const uint16_t ch_im_out,
+                                      const uint16_t dim_kernel,
+                                      const uint16_t padding,
+                                      const uint16_t stride,
+                                      const q15_t *bias,
+                                      const uint16_t bias_shift,
+                                      const uint16_t out_shift,
+                                      q15_t *Im_out,
+                                      const uint16_t dim_im_out,
+                                      q15_t *bufferA,
+                                      q7_t *bufferB);
+
+/**
+ * @brief Fast Q7 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 4
+ *   ch_im_out is multiple of 2
+ */
+arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
+                                    const uint16_t dim_im_in,
+                                    const uint16_t ch_im_in,
+                                    const q7_t *wt,
+                                    const uint16_t ch_im_out,
+                                    const uint16_t dim_kernel,
+                                    const uint16_t padding,
+                                    const uint16_t stride,
+                                    const q7_t *bias,
+                                    const uint16_t bias_shift,
+                                    const uint16_t out_shift,
+                                    q7_t *Im_out,
+                                    const uint16_t dim_im_out,
+                                    q15_t *bufferA,
+                                    q7_t *bufferB);
+
+/**
+ * @brief Fast Q7 convolution function (non-sqaure shape)
+ * @param[in]       Im_in        pointer to input tensor
+ * @param[in]       dim_im_in_x  input tensor dimension x
+ * @param[in]       dim_im_in_y  input tensor dimension y
+ * @param[in]       ch_im_in     number of input tensor channels
+ * @param[in]       wt           pointer to kernel weights
+ * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x filter kernel size x
+ * @param[in]       dim_kernel_y filter kernel size y
+ * @param[in]       padding_x    padding size x
+ * @param[in]       padding_y    padding size y
+ * @param[in]       stride_x     convolution stride x
+ * @param[in]       stride_y     convolution stride y
+ * @param[in]       bias         pointer to bias
+ * @param[in]       bias_shift   amount of left-shift for bias
+ * @param[in]       out_shift    amount of right-shift for output
+ * @param[in,out]   Im_out       pointer to output tensor
+ * @param[in]       dim_im_out_x output tensor dimension x
+ * @param[in]       dim_im_out_y output tensor dimension y
+ * @param[in,out]   bufferA      pointer to buffer space for input
+ * @param[in,out]   bufferB      pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 4
+ *   ch_im_out is multiple of 2
+ */
+
+arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
+                                              const uint16_t dim_im_in_x,
+                                              const uint16_t dim_im_in_y,
+                                              const uint16_t ch_im_in,
+                                              const q7_t *wt,
+                                              const uint16_t ch_im_out,
+                                              const uint16_t dim_kernel_x,
+                                              const uint16_t dim_kernel_y,
+                                              const uint16_t padding_x,
+                                              const uint16_t padding_y,
+                                              const uint16_t stride_x,
+                                              const uint16_t stride_y,
+                                              const q7_t *bias,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              q7_t *Im_out,
+                                              const uint16_t dim_im_out_x,
+                                              const uint16_t dim_im_out_y,
+                                              q15_t *bufferA,
+                                              q7_t *bufferB);
+
+/**
+ * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
+ * @param[in]       Im_in        pointer to input tensor
+ * @param[in]       dim_im_in_x  input tensor dimension x
+ * @param[in]       dim_im_in_y  input tensor dimension y
+ * @param[in]       ch_im_in     number of input tensor channels
+ * @param[in]       wt           pointer to kernel weights
+ * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x filter kernel size x
+ * @param[in]       dim_kernel_y filter kernel size y
+ * @param[in]       padding_x    padding size x
+ * @param[in]       padding_y    padding size y
+ * @param[in]       stride_x     convolution stride x
+ * @param[in]       stride_y     convolution stride y
+ * @param[in]       bias         pointer to bias
+ * @param[in]       bias_shift   amount of left-shift for bias
+ * @param[in]       out_shift    amount of right-shift for output
+ * @param[in,out]   Im_out       pointer to output tensor
+ * @param[in]       dim_im_out_x output tensor dimension x
+ * @param[in]       dim_im_out_y output tensor dimension y
+ * @param[in,out]   bufferA      pointer to buffer space for input
+ * @param[in,out]   bufferB      pointer to buffer space for output
+ * @return     The function returns either
+ *                          <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
+ *                          <code>ARM_MATH_SUCCESS</code> on successful completion.
+ *
+ * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1
+ * and dim_kernel_y=1). It can be used for
+ * second half of MobileNets after depthwise separable convolution.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 4
+ *   ch_im_out is multiple of 2
+ */
+arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
+                                                  const uint16_t dim_im_in_x,
+                                                  const uint16_t dim_im_in_y,
+                                                  const uint16_t ch_im_in,
+                                                  const q7_t *wt,
+                                                  const uint16_t ch_im_out,
+                                                  const uint16_t dim_kernel_x,
+                                                  const uint16_t dim_kernel_y,
+                                                  const uint16_t padding_x,
+                                                  const uint16_t padding_y,
+                                                  const uint16_t stride_x,
+                                                  const uint16_t stride_y,
+                                                  const q7_t *bias,
+                                                  const uint16_t bias_shift,
+                                                  const uint16_t out_shift,
+                                                  q7_t *Im_out,
+                                                  const uint16_t dim_im_out_x,
+                                                  const uint16_t dim_im_out_y,
+                                                  q15_t *bufferA,
+                                                  q7_t *bufferB);
+
+/**
+ * @brief Fast s8 version for 1x1 convolution (non-square shape)
+ *
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int8
+ *
+ * @return     The function returns either
+ *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
+ *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
+ *
+ * @details
+ *   - Supported framework : TensorFlow Lite Micro
+ *   - The following constrains on the arguments apply
+ *      -# input_dims->c is a multiple of 4
+ *      -# conv_params->padding.w = conv_params->padding.h = 0
+ *      -# conv_params->stride.w = conv_params->stride.h = 1
+ *
+ */
+arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_conv_params *conv_params,
+                                    const cmsis_nn_per_channel_quant_params *quant_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q7_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const q7_t *filter_data,
+                                    const cmsis_nn_dims *bias_dims,
+                                    const int32_t *bias_data,
+                                    const cmsis_nn_dims *output_dims,
+                                    q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for arm_convolve_1x1_s8_fast
+ *
+ * @param[in]       input_dims            Input (activation) dimensions
+ * @return          The function returns the required buffer size in bytes
+ *
+ */
+int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims);
+
+/**
+ * @brief 1xn convolution
+ *
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal
+ *                                spatial filter dimension
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int8
+ *
+ * @return     The function returns either
+ *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
+ *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
+ *
+ * @details
+ *   - Supported framework : TensorFlow Lite Micro
+ *   - The following constrains on the arguments apply
+ *      -# input_dims->n equals 1
+ *      -# ouput_dims->w is a multiple of 4
+ *      -# Explicit constraints(since it is for 1xN convolution)
+ *      -## input_dims->h equals 1
+ *      -## output_dims->h equals 1
+ *      -## filter_dims->h equals 1
+ *@todo  Remove constraint on output_dims->w to make the function generic.
+ *
+ */
+arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
+                                 const cmsis_nn_conv_params *conv_params,
+                                 const cmsis_nn_per_channel_quant_params *quant_params,
+                                 const cmsis_nn_dims *input_dims,
+                                 const q7_t *input_data,
+                                 const cmsis_nn_dims *filter_dims,
+                                 const q7_t *filter_data,
+                                 const cmsis_nn_dims *bias_dims,
+                                 const int32_t *bias_data,
+                                 const cmsis_nn_dims *output_dims,
+                                 q7_t *output_data);
+
+/**
+ * @brief Get the required additional buffer size for 1xn convolution
+ *
+ * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the
+ *                                        horizontal spatial filter dimension
+ * @return          The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Q7 version of convolution for RGB image
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ *
+ * This kernel is written exclusively for convolution with ch_im_in
+ * equals 3. This applies on the first layer of CNNs which has input
+ * image with RGB format.
+ */
+
+arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
+                                   const uint16_t dim_im_in,
+                                   const uint16_t ch_im_in,
+                                   const q7_t *wt,
+                                   const uint16_t ch_im_out,
+                                   const uint16_t dim_kernel,
+                                   const uint16_t padding,
+                                   const uint16_t stride,
+                                   const q7_t *bias,
+                                   const uint16_t bias_shift,
+                                   const uint16_t out_shift,
+                                   q7_t *Im_out,
+                                   const uint16_t dim_im_out,
+                                   q15_t *bufferA,
+                                   q7_t *bufferB);
+
+/**
+ * @brief Fast Q15 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 2
+ *   ch_im_out is multiple of 2
+ *   dim_im_out is a multiple of 2
+ */
+
+arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
+                                     const uint16_t dim_im_in,
+                                     const uint16_t ch_im_in,
+                                     const q15_t *wt,
+                                     const uint16_t ch_im_out,
+                                     const uint16_t dim_kernel,
+                                     const uint16_t padding,
+                                     const uint16_t stride,
+                                     const q15_t *bias,
+                                     const uint16_t bias_shift,
+                                     const uint16_t out_shift,
+                                     q15_t *Im_out,
+                                     const uint16_t dim_im_out,
+                                     q15_t *bufferA,
+                                     q7_t *bufferB);
+
+/**
+ * @brief Fast Q15 convolution function (non-sqaure shape)
+ * @param[in]       Im_in        pointer to input tensor
+ * @param[in]       dim_im_in_x  input tensor dimension x
+ * @param[in]       dim_im_in_y  input tensor dimension y
+ * @param[in]       ch_im_in     number of input tensor channels
+ * @param[in]       wt           pointer to kernel weights
+ * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x filter kernel size x
+ * @param[in]       dim_kernel_y filter kernel size y
+ * @param[in]       padding_x    padding size x
+ * @param[in]       padding_y    padding size y
+ * @param[in]       stride_x     convolution stride x
+ * @param[in]       stride_y     convolution stride y
+ * @param[in]       bias         pointer to bias
+ * @param[in]       bias_shift   amount of left-shift for bias
+ * @param[in]       out_shift    amount of right-shift for output
+ * @param[in,out]   Im_out       pointer to output tensor
+ * @param[in]       dim_im_out_x output tensor dimension x
+ * @param[in]       dim_im_out_y output tensor dimension y
+ * @param[in,out]   bufferA      pointer to buffer space for input
+ * @param[in,out]   bufferB      pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ *
+ * @details
+ *
+ * <b>Buffer size:</b>
+ *
+ * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
+ *
+ * bufferB size: 0
+ *
+ * <b>Input dimension constraints:</b>
+ *
+ * ch_im_in is multiple of 2
+ *
+ * ch_im_out is multipe of 2
+ *
+ */
+
+arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
+                                               const uint16_t dim_im_in_x,
+                                               const uint16_t dim_im_in_y,
+                                               const uint16_t ch_im_in,
+                                               const q15_t *wt,
+                                               const uint16_t ch_im_out,
+                                               const uint16_t dim_kernel_x,
+                                               const uint16_t dim_kernel_y,
+                                               const uint16_t padding_x,
+                                               const uint16_t padding_y,
+                                               const uint16_t stride_x,
+                                               const uint16_t stride_y,
+                                               const q15_t *bias,
+                                               const uint16_t bias_shift,
+                                               const uint16_t out_shift,
+                                               q15_t *Im_out,
+                                               const uint16_t dim_im_out_x,
+                                               const uint16_t dim_im_out_y,
+                                               q15_t *bufferA,
+                                               q7_t *bufferB);
+
+/**
+ * @brief Q7 depthwise separable convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 2
+ *   ch_im_out is multiple of 2
+ */
+
+arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
+                                               const uint16_t dim_im_in,
+                                               const uint16_t ch_im_in,
+                                               const q7_t *wt,
+                                               const uint16_t ch_im_out,
+                                               const uint16_t dim_kernel,
+                                               const uint16_t padding,
+                                               const uint16_t stride,
+                                               const q7_t *bias,
+                                               const uint16_t bias_shift,
+                                               const uint16_t out_shift,
+                                               q7_t *Im_out,
+                                               const uint16_t dim_im_out,
+                                               q15_t *bufferA,
+                                               q7_t *bufferB);
+
+/**
+ * @brief Q7 depthwise separable convolution function (non-square shape)
+ * @param[in]       Im_in         pointer to input tensor
+ * @param[in]       dim_im_in_x   input tensor dimension x
+ * @param[in]       dim_im_in_y   input tensor dimension y
+ * @param[in]       ch_im_in      number of input tensor channels
+ * @param[in]       wt            pointer to kernel weights
+ * @param[in]       ch_im_out     number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x  filter kernel size x
+ * @param[in]       dim_kernel_y  filter kernel size y
+ * @param[in]       padding_x     padding sizes x
+ * @param[in]       padding_y     padding sizes y
+ * @param[in]       stride_x      convolution stride x
+ * @param[in]       stride_y      convolution stride y
+ * @param[in]       bias          pointer to bias
+ * @param[in]       bias_shift    amount of left-shift for bias
+ * @param[in]       out_shift     amount of right-shift for output
+ * @param[in,out]   Im_out        pointer to output tensor
+ * @param[in]       dim_im_out_x  output tensor dimension x
+ * @param[in]       dim_im_out_y  output tensor dimension y
+ * @param[in,out]   bufferA       pointer to buffer space for input
+ * @param[in,out]   bufferB       pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 2
+ *   ch_im_out is multiple of 2
+ */
+arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
+                                                         const uint16_t dim_im_in_x,
+                                                         const uint16_t dim_im_in_y,
+                                                         const uint16_t ch_im_in,
+                                                         const q7_t *wt,
+                                                         const uint16_t ch_im_out,
+                                                         const uint16_t dim_kernel_x,
+                                                         const uint16_t dim_kernel_y,
+                                                         const uint16_t padding_x,
+                                                         const uint16_t padding_y,
+                                                         const uint16_t stride_x,
+                                                         const uint16_t stride_y,
+                                                         const q7_t *bias,
+                                                         const uint16_t bias_shift,
+                                                         const uint16_t out_shift,
+                                                         q7_t *Im_out,
+                                                         const uint16_t dim_im_out_x,
+                                                         const uint16_t dim_im_out_y,
+                                                         q15_t *bufferA,
+                                                         q7_t *bufferB);
+
+/**
+ * @brief Wrapper function to pick the right optimized s8 depthwise convolution function
+ *
+ * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
+ *                                definition file to see if an additional buffer is required.
+ *                                Optional function {API}_get_buffer_size() provides the buffer
+ *                                size if required.
+ * @param[in]      dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                dw_conv_params->dilation is not used.
+ *                                Range of dw_conv_params->input_offset : [-127, 128]
+ *                                Range of dw_conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                               It contains the multiplier and shift values to be applied to each
+ *                               output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                Batch argument N is not used and assumed to be 1.
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in, out] output_data    Output data pointer. Data type: int8
+ * @return     The function returns
+ *                <code>ARM_MATH_SUCCESS</code>   -  Successful completion.
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - Picks one of the the following functions
+ *        -# arm_depthwise_conv_s8()
+ *        -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only
+ *        -# arm_depthwise_conv_s8_opt()
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *    - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the
+ * boundary.
+ */
+arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
+                                         const cmsis_nn_dw_conv_params *dw_conv_params,
+                                         const cmsis_nn_per_channel_quant_params *quant_params,
+                                         const cmsis_nn_dims *input_dims,
+                                         const q7_t *input_data,
+                                         const cmsis_nn_dims *filter_dims,
+                                         const q7_t *filter_data,
+                                         const cmsis_nn_dims *bias_dims,
+                                         const int32_t *bias_data,
+                                         const cmsis_nn_dims *output_dims,
+                                         q7_t *output_data);
+
+/**
+ * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8()
+ *
+ * @param[in]      dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                dw_conv_params->dilation is not used.
+ *                                Range of dw_conv_params->input_offset : [-127, 128]
+ *                                Range of dw_conv_params->input_offset : [-128, 127]
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                Batch argument N is not used and assumed to be 1.
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      output_dims    Output tensor dimensions. Format: [1, H, W, C_OUT]
+ * @return                        Size of additional memory required for optimizations in bytes.
+ *
+ */
+int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                      const cmsis_nn_dims *input_dims,
+                                                      const cmsis_nn_dims *filter_dims,
+                                                      const cmsis_nn_dims *output_dims);
+
+/**
+ * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions.
+ *
+ * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
+ *                                definition file to see if an additional buffer is required.
+ *                                Optional function {API}_get_buffer_size() provides the buffer
+ *                                size if an additional buffer is required.
+ *                                exists if additional memory is.
+ * @param[in]      dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                dw_conv_params->dilation is not used.
+ *                                Range of dw_conv_params->input_offset : [-127, 128]
+ *                                Range of dw_conv_params->input_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                               It contains the multiplier and shift values to be applied to each
+ *                               output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
+ *                                Batch argument N is not used.
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in, out] output_data    Output data pointer. Data type: int8
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ */
+arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
+                                 const cmsis_nn_dw_conv_params *dw_conv_params,
+                                 const cmsis_nn_per_channel_quant_params *quant_params,
+                                 const cmsis_nn_dims *input_dims,
+                                 const q7_t *input_data,
+                                 const cmsis_nn_dims *filter_dims,
+                                 const q7_t *filter_data,
+                                 const cmsis_nn_dims *bias_dims,
+                                 const int32_t *bias_data,
+                                 const cmsis_nn_dims *output_dims,
+                                 q7_t *output_data);
+
+/**
+ * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on
+ *        the input arguments(documented below). Refer arm_depthwise_conv_s8() for function
+ *        argument details.
+ *
+ * @return     The function returns one of the following
+ *                <code>ARM_MATH_SIZE_MISMATCH</code> - Unsupported dimension of tensors
+ *                <code>ARM_MATH_ARGUMENT_ERROR</code> - Unsupported pad size along the x axis
+ *                <code>ARM_MATH_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *   - Supported framework : TensorFlow Lite Micro
+ *   - The following constrains on the arguments apply
+ *      -# Number of input channel equals number of output channels
+ *      -# Filter height and width equals 3
+ *      -# Padding along x is either 0 or 1.
+ *
+ */
+arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
+                                     const cmsis_nn_dw_conv_params *dw_conv_params,
+                                     const cmsis_nn_per_channel_quant_params *quant_params,
+                                     const cmsis_nn_dims *input_dims,
+                                     const q7_t *input_data,
+                                     const cmsis_nn_dims *filter_dims,
+                                     const q7_t *filter_data,
+                                     const cmsis_nn_dims *bias_dims,
+                                     const int32_t *bias_data,
+                                     const cmsis_nn_dims *output_dims,
+                                     q7_t *output_data);
+
+/**
+ * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel.
+ *        Refer arm_depthwise_conv_s8() for function argument details.
+ *
+ * @return     The function returns one of the following
+ *                <code>ARM_MATH_SIZE_MISMATCH</code> - input channel != output channel or
+ *                                                      ch_mult != 1
+ *                <code>ARM_MATH_SUCCESS</code> - Successful operation
+ *
+ * @note       If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
+ *             for the following if MVE optimizations(Arm Helium Technology) are used.
+ *               - Output shift
+ *               - Output multiplier
+ *               - Output bias
+ *               - kernel
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - The following constrains on the arguments apply
+ *        -# Number of input channel equals number of output channels or ch_mult equals 1
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *    - Reccomended when number of channels is 4 or greater.
+ *
+ */
+arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
+                                     const cmsis_nn_dw_conv_params *dw_conv_params,
+                                     const cmsis_nn_per_channel_quant_params *quant_params,
+                                     const cmsis_nn_dims *input_dims,
+                                     const q7_t *input_data,
+                                     const cmsis_nn_dims *filter_dims,
+                                     const q7_t *filter_data,
+                                     const cmsis_nn_dims *bias_dims,
+                                     const int32_t *bias_data,
+                                     const cmsis_nn_dims *output_dims,
+                                     q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for optimized s8 depthwise convolution
+ * function with constraint that in_channel equals out_channel.
+ * @param[in]       input_dims     Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
+ *                                 Batch argument N is not used.
+ * @param[in]       filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @return          The function returns  required buffer size in bytes
+ *
+ */
+int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @defgroup FC Fully-connected Layer Functions
+ *
+ * Collection of fully-connected and matrix multiplication functions.
+ *
+ * Fully-connected layer is basically a matrix-vector multiplication
+ * with bias. The matrix is the weights and the input/output vectors
+ * are the activation values. Supported {weight, activation} precisions
+ * include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}.
+ *
+ * Here we have two types of kernel functions. The basic function
+ * implements the function using regular GEMV approach. The opt functions
+ * operates with weights in interleaved formats.
+ *
+ */
+
+/**
+ *@brief Q7 basic fully-connected layer function
+ *@param[in]       pV          pointer to input vector
+ *@param[in]       pM          pointer to matrix weights
+ *@param[in]       dim_vec     length of the vector
+ *@param[in]       num_of_rows number of rows in weight matrix
+ *@param[in]       bias_shift  amount of left-shift for bias
+ *@param[in]       out_shift   amount of right-shift for output
+ *@param[in]       bias        pointer to bias
+ *@param[in,out]   pOut        pointer to output vector
+ *@param[in,out]   vec_buffer  pointer to buffer space for input
+ *@return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+
+arm_status arm_fully_connected_q7(const q7_t *pV,
+                                  const q7_t *pM,
+                                  const uint16_t dim_vec,
+                                  const uint16_t num_of_rows,
+                                  const uint16_t bias_shift,
+                                  const uint16_t out_shift,
+                                  const q7_t *bias,
+                                  q7_t *pOut,
+                                  q15_t *vec_buffer);
+
+/**
+ * @brief Basic s8 Fully Connected function.
+ *
+ * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
+ *                                definition file to see if an additional buffer is required.
+ *                                Optional function {API}_get_buffer_size() provides the buffer
+ *                                size if an additional buffer is required.
+ * @param[in]      fc_params      Fully Connected layer parameters.
+ *                                Range of fc_params->input_offset  : [-127, 128]
+ *                                fc_params->filter_offset : 0
+ *                                Range of fc_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-tensor quantization info.
+ *                                It contains the multiplier and shift values to be applied to the output tensor.
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ *                                Input dimension is taken as Nx(H * W * C_IN)
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Two dimensional filter dimensions. Format: [N, C]
+ *                                N : accumulation depth and equals (H * W * C_IN) from input_dims
+ *                                C : output depth and equals C_OUT in output_dims
+ *                                H & W : Not used
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ *                                N, H, W : Not used
+ * @param[in]      bias_data      Bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, C_OUT]
+ *                                N : Batches
+ *                                C_OUT : Output depth
+ *                                H & W : Not used.
+ * @param[in, out] output_data    Output data pointer. Data type: int8
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ */
+arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
+                                  const cmsis_nn_fc_params *fc_params,
+                                  const cmsis_nn_per_tensor_quant_params *quant_params,
+                                  const cmsis_nn_dims *input_dims,
+                                  const q7_t *input_data,
+                                  const cmsis_nn_dims *filter_dims,
+                                  const q7_t *filter_data,
+                                  const cmsis_nn_dims *bias_dims,
+                                  const int32_t *bias_data,
+                                  const cmsis_nn_dims *output_dims,
+                                  q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S8 basic fully-connected and
+ * matrix multiplication layer function for TF Lite
+ * @param[in]      filter_dims             dimension of filter
+ * @return         The function returns    required buffer size in bytes
+ *
+ */
+int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Basic s16 Fully Connected function.
+ *
+ * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
+ *                                definition file to see if an additional buffer is required.
+ *                                Optional function {API}_get_buffer_size() provides the buffer
+ *                                size if an additional buffer is required.
+ * @param[in]      fc_params      Fully Connected layer parameters.
+ *                                fc_params->input_offset  : 0
+ *                                fc_params->filter_offset : 0
+ *                                fc_params->output_offset : 0
+ * @param[in]      quant_params   Per-tensor quantization info.
+ *                                It contains the multiplier and shift values to be applied to the output tensor.
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ *                                Input dimension is taken as Nx(H * W * C_IN)
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Two dimensional filter dimensions. Format: [N, C]
+ *                                N : accumulation depth and equals (H * W * C_IN) from input_dims
+ *                                C : output depth and equals C_OUT in output_dims
+ *                                H & W : Not used
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ *                                N, H, W : Not used
+ * @param[in]      bias_data      Bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, C_OUT]
+ *                                N : Batches
+ *                                C_OUT : Output depth
+ *                                H & W : Not used.
+ * @param[in, out] output_data    Output data pointer. Data type: int16
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs.
+ */
+arm_status arm_fully_connected_s16(const cmsis_nn_context *ctx,
+                                   const cmsis_nn_fc_params *fc_params,
+                                   const cmsis_nn_per_tensor_quant_params *quant_params,
+                                   const cmsis_nn_dims *input_dims,
+                                   const q15_t *input_data,
+                                   const cmsis_nn_dims *filter_dims,
+                                   const q7_t *filter_data,
+                                   const cmsis_nn_dims *bias_dims,
+                                   const int64_t *bias_data,
+                                   const cmsis_nn_dims *output_dims,
+                                   q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S16 basic fully-connected and
+ * matrix multiplication layer function for TF Lite
+ * @param[in]      filter_dims             dimension of filter
+ * @return         The function returns    required buffer size in bytes
+ *
+ */
+int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Q7 opt fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+
+arm_status arm_fully_connected_q7_opt(const q7_t *pV,
+                                      const q7_t *pM,
+                                      const uint16_t dim_vec,
+                                      const uint16_t num_of_rows,
+                                      const uint16_t bias_shift,
+                                      const uint16_t out_shift,
+                                      const q7_t *bias,
+                                      q7_t *pOut,
+                                      q15_t *vec_buffer);
+
+/**
+ * @brief Q15 basic fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+
+arm_status arm_fully_connected_q15(const q15_t *pV,
+                                   const q15_t *pM,
+                                   const uint16_t dim_vec,
+                                   const uint16_t num_of_rows,
+                                   const uint16_t bias_shift,
+                                   const uint16_t out_shift,
+                                   const q15_t *bias,
+                                   q15_t *pOut,
+                                   q15_t *vec_buffer);
+
+/**
+ * @brief Q15 opt fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+
+arm_status arm_fully_connected_q15_opt(const q15_t *pV,
+                                       const q15_t *pM,
+                                       const uint16_t dim_vec,
+                                       const uint16_t num_of_rows,
+                                       const uint16_t bias_shift,
+                                       const uint16_t out_shift,
+                                       const q15_t *bias,
+                                       q15_t *pOut,
+                                       q15_t *vec_buffer);
+
+/**
+ * @brief Mixed Q15-Q7 fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+
+arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
+                                              const q7_t *pM,
+                                              const uint16_t dim_vec,
+                                              const uint16_t num_of_rows,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              const q7_t *bias,
+                                              q15_t *pOut,
+                                              q15_t *vec_buffer);
+
+/**
+ * @brief Mixed Q15-Q7 opt fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+
+arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
+                                                  const q7_t *pM,
+                                                  const uint16_t dim_vec,
+                                                  const uint16_t num_of_rows,
+                                                  const uint16_t bias_shift,
+                                                  const uint16_t out_shift,
+                                                  const q7_t *bias,
+                                                  q15_t *pOut,
+                                                  q15_t *vec_buffer);
+
+/**
+ * @brief Matrix-Multiplication Kernels for Convolution
+ *
+ * These functions are used within convolution layer functions for
+ * matrix multiplication.
+ *
+ * The implementation is similar to CMSIS-DSP arm_mat_mult functions
+ * with one Q7 and one Q15 operands. The Q15 operand is the im2col
+ * output which is always with 2 columns.
+ *
+ */
+
+/**
+ * @brief Matrix-multiplication function for convolution
+ * @param[in]       pA          pointer to operand A
+ * @param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
+ * @param[in]       ch_im_out   numRow of A
+ * @param[in]       numCol_A    numCol of A
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        the bias
+ * @param[in,out]   pOut        pointer to output
+ * @return     The function returns the incremented output pointer
+ */
+
+q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t *pA,
+                                    const q15_t *pInBuffer,
+                                    const uint16_t ch_im_out,
+                                    const uint16_t numCol_A,
+                                    const uint16_t bias_shift,
+                                    const uint16_t out_shift,
+                                    const q7_t *bias,
+                                    q7_t *pOut);
+/**
+ * @brief Matrix-multiplication function for convolution with per-channel requantization.
+ * @param[in]       input_a     pointer to operand A
+ * @param[in]       input_b     pointer to operand B, always consists of 2 vectors.
+ * @param[in]       output_ch   number of rows of A
+ * @param[in]       out_shift  pointer to per output channel requantization shift parameter.
+ * @param[in]       out_mult   pointer to per output channel requantization multiplier parameter.
+ * @param[in]       out_offset      output tensor offset.
+ * @param[in]       activation_min   minimum value to clamp the output to. Range : int8
+ * @param[in]       activation_max   maximum value to clamp the output to. Range : int8
+ * @param[in]       num_col_a   number of columns of A
+ * @param[in]       output_bias per output channel bias. Range : int32
+ * @param[in,out]   out_0       pointer to output
+ * @return     The function returns one of the two
+ *              1. The incremented output pointer for a successful operation or
+ *              2. NULL if implementation is not available.
+ *
+ * @details   This function does the matrix multiplication of weight matrix for all output channels
+ *            with 2 columns from im2col and produces two elements/output_channel. The outputs are
+ *            clamped in the range provided by activation min and max.
+ *            Supported framework: TensorFlow Lite micro.
+ */
+q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
+                                    const q15_t *input_b,
+                                    const uint16_t output_ch,
+                                    const int32_t *out_shift,
+                                    const int32_t *out_mult,
+                                    const int32_t out_offset,
+                                    const int16_t activation_min,
+                                    const int16_t activation_max,
+                                    const uint16_t num_col_a,
+                                    const int32_t *const output_bias,
+                                    q7_t *out_0);
+
+/**
+ * @brief Matrix-multiplication of re-ordered input B with A.
+ *
+ * @details  For arguments, refer arm_nn_mat_mult_kernel_s8_s16. The re-ordering is a consequence
+ *           of sign extension done by the SXTB16 command on input_b. The outputs are clamped in the range
+ *           provided by activation min and max.
+ *   * @details
+ *   - Supported framework : TensorFlow Lite Micro
+ *   - The following constrains on the arguments apply
+ *      -# num_col_a is a multiple of 4
+ *      -# output_ch is a multiple of 2
+ *
+ */
+q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
+                                              const q15_t *input_b,
+                                              const uint16_t output_ch,
+                                              const int32_t *out_shift,
+                                              const int32_t *out_mult,
+                                              const int32_t out_offset,
+                                              const int16_t activation_min,
+                                              const int16_t activation_max,
+                                              const uint16_t num_col_a,
+                                              const int32_t *const output_bias,
+                                              q7_t *out_0);
+
+/**
+ *@brief Matrix-multiplication function for convolution with reordered columns
+ *@param[in]       pA          pointer to operand A
+ *@param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
+ *@param[in]       ch_im_out   numRow of A
+ *@param[in]       numCol_A    numCol of A
+ *@param[in]       bias_shift  amount of left-shift for bias
+ *@param[in]       out_shift   amount of right-shift for output
+ *@param[in]       bias        the bias
+ *@param[in,out]   pOut        pointer to output
+ *@return     The function returns the incremented output pointer
+ *
+ *@details  This function assumes that data in pInBuffer are reordered
+ */
+q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA,
+                                              const q15_t *pInBuffer,
+                                              const uint16_t ch_im_out,
+                                              const uint16_t numCol_A,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              const q7_t *bias,
+                                              q7_t *pOut);
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+ *  Other functions
+ *  These layers are typically not timing critical
+ *  Basic implementation is supported here
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup BasicMath Basic math functions
+ *
+ * Element wise add and multiplication functions.
+ *
+ */
+
+/**
+ * @brief s8 element wise add of two vectors
+ * @param[in]       input_1_vect            pointer to input vector 1
+ * @param[in]       input_2_vect            pointer to input vector 2
+ * @param[in]       input_1_offset          offset for input 1. Range: Range: -127 to 128
+ * @param[in]       input_1_mult            multiplier for input 1
+ * @param[in]       input_1_shift           shift for input 1
+ * @param[in]       input_2_offset          offset for input 2. Range: Range: -127 to 128
+ * @param[in]       input_2_mult            multiplier for input 2
+ * @param[in]       input_2_shift           shift for input 2
+ * @param[in]       left_shift              input left shift
+ * @param[in,out]   output                  pointer to output vector
+ * @param[in]       out_offset              output offset
+ * @param[in]       out_mult                output multiplier
+ * @param[in]       out_shift               output shift
+ * @param[in]       out_activation_min      minimum value to clamp output to
+ * @param[in]       out_activation_max      maximum value to clamp output to
+ * @param[in]       block_size              number of samples
+ * @return          The function returns    ARM_MATH_SUCCESS
+ */
+arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
+                                  const int8_t *input_2_vect,
+                                  const int32_t input_1_offset,
+                                  const int32_t input_1_mult,
+                                  const int32_t input_1_shift,
+                                  const int32_t input_2_offset,
+                                  const int32_t input_2_mult,
+                                  const int32_t input_2_shift,
+                                  const int32_t left_shift,
+                                  int8_t *output,
+                                  const int32_t out_offset,
+                                  const int32_t out_mult,
+                                  const int32_t out_shift,
+                                  const int32_t out_activation_min,
+                                  const int32_t out_activation_max,
+                                  const uint32_t block_size);
+
+/**
+ * @brief s8 element wise multiplication
+ * @param[in]       input_1_vect            pointer to input vector 1
+ * @param[in]       input_2_vect            pointer to input vector 2
+ * @param[in]       input_1_offset          offset for input 1. Range: Range: -127 to 128
+ * @param[in]       input_2_offset          offset for input 2. Range: Range: -127 to 128
+ * @param[in,out]   output                  pointer to output vector
+ * @param[in]       out_offset              output offset
+ * @param[in]       out_mult                output multiplier
+ * @param[in]       out_shift               output shift
+ * @param[in]       out_activation_min      minimum value to clamp output to
+ * @param[in]       out_activation_max      maximum value to clamp output to
+ * @param[in]       block_size              number of samples
+ * @return          The function returns    ARM_MATH_SUCCESS
+ *
+ * @details   Supported framework: TensorFlow Lite micro
+ */
+arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
+                                  const int8_t *input_2_vect,
+                                  const int32_t input_1_offset,
+                                  const int32_t input_2_offset,
+                                  int8_t *output,
+                                  const int32_t out_offset,
+                                  const int32_t out_mult,
+                                  const int32_t out_shift,
+                                  const int32_t out_activation_min,
+                                  const int32_t out_activation_max,
+                                  const uint32_t block_size);
+/**
+ * @defgroup Acti Activation Functions
+ *
+ * Perform activation layers, including ReLU (Rectified Linear Unit),
+ * sigmoid and tanh
+ *
+ */
+
+/**
+ * @brief Q7 RELU function
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ * @return none.
+ */
+
+void arm_relu_q7(q7_t *data, uint16_t size);
+
+/**
+ * @brief s8 ReLU6 function
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ */
+
+void arm_relu6_s8(q7_t *data, uint16_t size);
+
+/**
+ * @brief Q15 RELU function
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ * @return none.
+ */
+
+void arm_relu_q15(q15_t *data, uint16_t size);
+
+/**
+ * @brief Q7 neural network activation function using direct table look-up
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
+ * @param[in]       type        type of activation functions
+ * @return none.
+ */
+
+void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
+
+/**
+ * @brief Q15 neural network activation function using direct table look-up
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
+ * @param[in]       type        type of activation functions
+ * @return none.
+ *
+ * @details
+ *
+ * This is the direct table look-up approach.
+ *
+ * Assume here the integer part of the fixed-point is <= 3.
+ * More than 3 just not making much sense, makes no difference with
+ * saturation followed by any of these activation functions.
+ */
+
+void arm_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
+
+/**
+ * @defgroup Pooling Pooling Functions
+ *
+ * Perform pooling functions, including max pooling and average pooling
+ *
+ */
+
+/**
+ * @brief Q7 max pooling function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @return none.
+ *
+ */
+
+void arm_maxpool_q7_HWC(q7_t *Im_in,
+                        const uint16_t dim_im_in,
+                        const uint16_t ch_im_in,
+                        const uint16_t dim_kernel,
+                        const uint16_t padding,
+                        const uint16_t stride,
+                        const uint16_t dim_im_out,
+                        q7_t *bufferA,
+                        q7_t *Im_out);
+
+/**
+ * @brief Q7 average pooling function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @return none.
+ *
+ */
+
+void arm_avepool_q7_HWC(q7_t *Im_in,
+                        const uint16_t dim_im_in,
+                        const uint16_t ch_im_in,
+                        const uint16_t dim_kernel,
+                        const uint16_t padding,
+                        const uint16_t stride,
+                        const uint16_t dim_im_out,
+                        q7_t *bufferA,
+                        q7_t *Im_out);
+
+/**
+ * @brief s8 average pooling function.
+ *
+ * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
+ *                                definition file to see if an additional buffer is required.
+ *                                Optional function {API}_get_buffer_size() provides the buffer
+ *                                size if an additional buffer is required.
+ * @param[in]      pool_params    Pooling parameters
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                Argument 'N' is not used.
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [H, W]
+ *                                Argument N and C are not used.
+ * @param[in]      output_dims    Output tensor dimensions. Format: [H, W, C_OUT]
+ *                                Argument N is not used.
+ *                                C_OUT equals C_IN.
+ * @param[in, out] output_data    Output data pointer. Data type: int8
+ * @return                        The function returns
+ *                                    <code>ARM_MATH_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *    - Supported Framework: TensorFlow Lite
+ *
+ */
+arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
+                          const cmsis_nn_pool_params *pool_params,
+                          const cmsis_nn_dims *input_dims,
+                          const q7_t *input_data,
+                          const cmsis_nn_dims *filter_dims,
+                          const cmsis_nn_dims *output_dims,
+                          q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S8 average pooling function
+ * @param[in]       dim_dst_width         output tensor dimension
+ * @param[in]       ch_src                number of input tensor channels
+ * @return          The function returns  required buffer size in bytes
+ *
+ */
+int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, const int ch_src);
+
+/**
+ * @brief s8 max pooling function.
+ *
+ * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
+ *                                definition file to see if an additional buffer is required.
+ *                                Optional function {API}_get_buffer_size() provides the buffer
+ *                                size if an additional buffer is required.
+ * @param[in]      pool_params    Pooling parameters
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                Argument 'N' is not used.
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [H, W]
+ *                                Argument N and C are not used.
+ * @param[in]      output_dims    Output tensor dimensions. Format: [H, W, C_OUT]
+ *                                Argument N is not used.
+ *                                C_OUT equals C_IN.
+ * @param[in, out] output_data    Output data pointer. Data type: int8
+ * @return                        The function returns
+ *                                    <code>ARM_MATH_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *    - Supported Framework: TensorFlow Lite
+ *
+ */
+arm_status arm_max_pool_s8(const cmsis_nn_context *ctx,
+                           const cmsis_nn_pool_params *pool_params,
+                           const cmsis_nn_dims *input_dims,
+                           const q7_t *input_data,
+                           const cmsis_nn_dims *filter_dims,
+                           const cmsis_nn_dims *output_dims,
+                           q7_t *output_data);
+/**
+ * @defgroup Softmax Softmax Functions
+ *
+ * EXP(2) based softmax functions.
+ *
+ */
+
+/**
+ * @brief Q7 softmax function
+ * @param[in]       vec_in      pointer to input vector
+ * @param[in]       dim_vec     input vector dimension
+ * @param[out]      p_out       pointer to output vector
+ *
+ * @note This function is an optimized version which is not bit-accurate with
+ *       TensorFlow Lite's kernel
+ *
+ */
+
+void arm_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out);
+
+/**
+ * @brief Q7 softmax function with batch parameter
+ * @param[in]       vec_in      pointer to input vector
+ * @param[in]       nb_batches  number of batches
+ * @param[in]       dim_vec     input vector dimension
+ * @param[out]      p_out       pointer to output vector
+ * @return none.
+ *
+ * @note This function is an optimized version which is not bit-accurate with
+ *       TensorFlow Lite's kernel
+ *
+ */
+
+void arm_softmax_with_batch_q7(const q7_t *vec_in, const uint16_t nb_batches, const uint16_t dim_vec, q7_t *p_out);
+/**
+ * @brief Q15 softmax function
+ * @param[in]       vec_in      pointer to input vector
+ * @param[in]       dim_vec     input vector dimension
+ * @param[out]      p_out       pointer to output vector
+ * @return none.
+ *
+ * @note This function is an optimized version which is not bit-accurate with
+ *       TensorFlow Lite's kernel
+ *
+ */
+
+void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out);
+
+/**
+ * @brief S8 softmax function
+ * @param[in]  input     Pointer to the input tensor
+ * @param[in]  num_rows  Number of rows in the input tensor
+ * @param[in]  row_size  Number of elements in each input row
+ * @param[in]  mult      Input quantization multiplier
+ * @param[in]  shift     Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min  Minimum difference with max in row. Used to check if
+ *                       the quantized exponential operation can be performed
+ * @param[out] output    Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+
+void arm_softmax_s8(const int8_t *input,
+                    const int32_t num_rows,
+                    const int32_t row_size,
+                    const int32_t mult,
+                    const int32_t shift,
+                    const int32_t diff_min,
+                    int8_t *output);
+
+/**
+ * @brief U8 softmax function
+ * @param[in]  input     Pointer to the input tensor
+ * @param[in]  num_rows  Number of rows in the input tensor
+ * @param[in]  row_size  Number of elements in each input row
+ * @param[in]  mult      Input quantization multiplier
+ * @param[in]  shift     Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min  Minimum difference with max in row. Used to check if
+ *                       the quantized exponential operation can be performed
+ * @param[out] output    Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+
+void arm_softmax_u8(const uint8_t *input,
+                    const int32_t num_rows,
+                    const int32_t row_size,
+                    const int32_t mult,
+                    const int32_t shift,
+                    const int32_t diff_min,
+                    uint8_t *output);
+
+/**
+ * @brief uint8 depthwise convolution function with asymmetric quantization
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @param[in]     input     Pointer to input tensor
+ * @param[in]     input_x   Width of input tensor
+ * @param[in]     input_y   Height of input tensor
+ * @param[in]     input_ch  Channels in input tensor
+ * @param[in]     kernel    Pointer to kernel weights
+ * @param[in]     kernel_x  Width of kernel
+ * @param[in]     kernel_y  Height of kernel
+ * @param[in]     ch_mult   Number of channel multiplier
+ * @param[in]     pad_x     Padding sizes x
+ * @param[in]     pad_y     Padding sizes y
+ * @param[in]     stride_x  stride along the width
+ * @param[in]     stride_y  stride along the height
+ * @param[in]     dilation_x Dilation along width. Not used and intended for future enhancement.
+ * @param[in]     dilation_y Dilation along height. Not used and intended for future enhancement.
+ * @param[in]     bias       Pointer to optional bias values. If no bias is
+ *                           availble, NULL is expected
+ * @param[in]     input_offset  Input tensor zero offset
+ * @param[in]     filter_offset Kernel tensor zero offset
+ * @param[in]     output_offset Output tensor zero offset
+ * @param[in,out] output        Pointer to output tensor
+ * @param[in]     output_x  Width of output tensor
+ * @param[in]     output_y  Height of output tensor
+ * @param[in]     output_activation_min   Minimum value to clamp the output to. Range : {0, 255}
+ * @param[in]     output_activation_max   Minimum value to clamp the output to. Range : {0, 255}
+ * @param[in]     out_shift  Amount of right-shift for output
+ * @param[in]     out_mult   Output multiplier for requantization
+ * @return        The function returns the following
+ *                <code>ARM_MATH_SUCCESS</code> - Successful operation
+ *
+ */
+arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
+                                            const uint16_t input_x,
+                                            const uint16_t input_y,
+                                            const uint16_t input_ch,
+                                            const uint8_t *kernel,
+                                            const uint16_t kernel_x,
+                                            const uint16_t kernel_y,
+                                            const int16_t ch_mult,
+                                            const int16_t pad_x,
+                                            const int16_t pad_y,
+                                            const int16_t stride_x,
+                                            const int16_t stride_y,
+                                            const int16_t dilation_x,
+                                            const int16_t dilation_y,
+                                            const int32_t *bias,
+                                            const int32_t input_offset,
+                                            const int32_t filter_offset,
+                                            const int32_t output_offset,
+                                            uint8_t *output,
+                                            const uint16_t output_x,
+                                            const uint16_t output_y,
+                                            const int32_t output_activation_min,
+                                            const int32_t output_activation_max,
+                                            const int32_t out_shift,
+                                            const int32_t out_mult);
+
+/**
+ * @defgroup Reshape Reshape Functions
+ *
+ */
+
+/**
+ * @brief Reshape a s8 vector into another with different shape
+ * @param[in]  input      points to the s8 input vector
+ * @param[out] output     points to the s8 output vector
+ * @param[in]  total_size total size of the input and output vectors in bytes
+ *
+ * @note The output is expected to be in a memory area that does not overlap with the input's
+ *
+ */
+void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size);
+
+/**
+ * @defgroup Concatenation Concatenation Functions
+ *
+ */
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
+ *        This function should be called for each input tensor to concatenate. The argument offset_x
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_x = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x)
+ *                    offset_x += input_x[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same height of the input tensor
+ *        -# The same number of channels of the input tensor
+ *        -# The same batch size of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *      does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor
+ * @param[in]  output_x Width of output tensor
+ * @param[in]  offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ * <b> Input constraints</b>
+ * offset_x is less than output_x
+ *
+ */
+void arm_concatenation_s8_x(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint16_t output_x,
+                            const uint32_t offset_x);
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis
+ *        This function should be called for each input tensor to concatenate. The argument offset_y
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_y = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y)
+ *                    offset_y += input_y[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same width of the input tensor
+ *        -# The same number of channels of the input tensor
+ *        -# The same batch size of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *       does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor
+ * @param[in]  output_y Height of output tensor
+ * @param[in]  offset_y The offset on the Y axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ * <b> Input constraints</b>
+ * offset_y is less than output_y
+ *
+ */
+void arm_concatenation_s8_y(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint16_t output_y,
+                            const uint32_t offset_y);
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis
+ *        This function should be called for each input tensor to concatenate. The argument offset_z
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_z = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z)
+ *                    offset_z += input_z[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same width of the input tensor
+ *        -# The same height of the input tensor
+ *        -# The same batch size of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *       does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor
+ * @param[in]  output_z Channels in output tensor
+ * @param[in]  offset_z The offset on the Z axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ * <b> Input constraints</b>
+ * offset_z is less than output_z
+ *
+ */
+void arm_concatenation_s8_z(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint16_t output_z,
+                            const uint32_t offset_z);
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size)
+ *        This function should be called for each input tensor to concatenate. The argument offset_w
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_w = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w)
+ *                    offset_w += input_w[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same width of the input tensor
+ *        -# The same height of the input tensor
+ *        -# The same number o channels of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *       does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor
+ * @param[in]  offset_w The offset on the W axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ */
+void arm_concatenation_s8_w(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint32_t offset_w);
+/**
+ * @defgroup SVDF SVDF Layer Functions
+ *
+ */
+
+/**
+ * @brief s8 SVDF function
+ *
+ * @param[in]   input_ctx Temporary scratch buffer
+ * @param[in]   output_ctx Temporary output scratch buffer
+ * @param[in]   svdf_params SVDF Parameters
+ *              Range of svdf_params->input_offset  : [-128, 127]
+ *              Range of svdf_params->output_offset  : [-128, 127]
+ * @param[in]   input_quant_params Input quantization parameters
+ * @param[in]   output_quant_params Output quantization parameters
+ * @param[in]   input_dims Input tensor dimensions
+ * @param[in]   input_data Pointer to input tensor
+ * @param[in]   state_dims State tensor dimensions
+ * @param[in]   state_data Pointer to state tensor
+ * @param[in]   weights_feature_dims Weights (feature) tensor dimensions
+ * @param[in]   weights_feature_data Pointer to the weights (feature) tensor
+ * @param[in]   weights_time_dims Weights (time) tensor dimensions
+ * @param[in]   weights_time_data Pointer to the weights (time) tensor
+ * @param[in]   bias_dims Bias tensor dimensions
+ * @param[in]   bias_data Pointer to bias tensor
+ * @param[in]   output_dims Output tensor dimensions
+ * @param[out]  output_data Pointer to the output tensor
+ *
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *
+ */
+arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
+                       const cmsis_nn_context *output_ctx,
+                       const cmsis_nn_svdf_params *svdf_params,
+                       const cmsis_nn_per_tensor_quant_params *input_quant_params,
+                       const cmsis_nn_per_tensor_quant_params *output_quant_params,
+                       const cmsis_nn_dims *input_dims,
+                       const q7_t *input_data,
+                       const cmsis_nn_dims *state_dims,
+                       q15_t *state_data,
+                       const cmsis_nn_dims *weights_feature_dims,
+                       const q7_t *weights_feature_data,
+                       const cmsis_nn_dims *weights_time_dims,
+                       const q15_t *weights_time_data,
+                       const cmsis_nn_dims *bias_dims,
+                       const q31_t *bias_data,
+                       const cmsis_nn_dims *output_dims,
+                       q7_t *output_data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/features/cmsis_nn_sample_code/nnlib/Include/arm_nnsupportfunctions.h b/features/cmsis_nn_sample_code/nnlib/Include/arm_nnsupportfunctions.h
new file mode 100644
index 0000000..71eadb1
--- /dev/null
+++ b/features/cmsis_nn_sample_code/nnlib/Include/arm_nnsupportfunctions.h
@@ -0,0 +1,1071 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nnsupportfunctions.h
+ * Description:  Public header file of support functions for CMSIS NN Library
+ *
+ * $Date:        24. Aug 2021
+ * $Revision:    V.5.10.0
+ *
+ * Target Processor:  Cortex-M CPUs
+ * -------------------------------------------------------------------- */
+
+#ifndef _ARM_NNSUPPORTFUNCTIONS_H_
+#define _ARM_NNSUPPORTFUNCTIONS_H_
+
+#include "arm_nn_math_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
+#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
+#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0
+#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0
+#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b))
+
+#define MAX(A, B) ((A) > (B) ? (A) : (B))
+#define MIN(A, B) ((A) < (B) ? (A) : (B))
+#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
+#define REDUCE_MULTIPLIER(_mult) ((_mult < 0x7FFF0000) ? ((_mult + (1 << 15)) >> 16) : 0x7FFF)
+
+/**
+ * @brief definition to pack four 8 bit values.
+ */
+#define PACK_Q7x4_32x1(v0, v1, v2, v3)                                                                                 \
+    ((((int32_t)(v0) << 0) & (int32_t)0x000000FF) | (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) |                     \
+     (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | (((int32_t)(v3) << 24) & (int32_t)0xFF000000))
+
+/**
+ * @brief Union for SIMD access of q31/q15/q7 types
+ */
+union arm_nnword
+{
+    q31_t word;
+    /**< q31 type */
+    q15_t half_words[2];
+    /**< q15 type */
+    q7_t bytes[4];
+    /**< q7 type */
+};
+
+/**
+ * @brief Union for data type long long
+ */
+struct arm_nn_double
+{
+    uint32_t low;
+    int32_t high;
+};
+
+union arm_nn_long_long
+{
+    int64_t long_long;
+    struct arm_nn_double word;
+};
+
+/**
+ * @defgroup nndata_convert Neural Network Data Conversion Functions
+ *
+ * Perform data type conversion in-between neural network operations
+ *
+ */
+
+/**
+ * @brief Converts the elements of the q7 vector to q15 vector without left-shift
+ * @param[in]       *pSrc points to the q7 input vector
+ * @param[out]      *pDst points to the q15 output vector
+ * @param[in]       blockSize length of the input vector
+ *
+ */
+void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
+
+/**
+ * @brief Non-saturating addition of elements of a q7 vector
+ * @param[in]       *input Pointer to the q7 input vector
+ * @param[out]      *output Pointer to the q31 output variable.
+ * @param[in]       block_size length of the input vector
+ * \par Description:
+ *
+ * 2^24 samples can be added without saturating the result.
+ *
+ * The equation used for the conversion process is:
+ *
+ * <pre>
+ *  sum = input[0] + input[1] + .. + input[block_size -1]
+ * </pre>
+ *
+ * */
+void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size);
+
+/**
+ * @brief  Converts the elements of the q7 vector to reordered q15 vector without left-shift
+ * @param[in]       *pSrc points to the q7 input vector
+ * @param[out]      *pDst points to the q15 output vector
+ * @param[in]       blockSize length of the input vector
+ * @return none.
+ *
+ */
+void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
+
+/**
+ * @brief Converts the elements from a q7 vector to a q15 vector with an added offset
+ * @param[in]    src        pointer to the q7 input vector
+ * @param[out]   dst        pointer to the q15 output vector
+ * @param[in]    block_size length of the input vector
+ * @param[in]    offset     q7 offset to be added to each input vector element.
+ *
+ * \par Description:
+ *
+ * The equation used for the conversion process is:
+ *
+ * <pre>
+ *  dst[n] = (q15_t) src[n] + offset;   0 <= n < block_size.
+ * </pre>
+ *
+ */
+void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
+
+/**
+ * @brief Converts the elements of the q7 vector to reordered q15 vector with an added offset
+ * @param[in]       src        pointer to the q7 input vector
+ * @param[out]      dst        pointer to the q15 output vector
+ * @param[in]       block_size length of the input vector
+ * @param[in]       offset     offset to be added to each input vector element.
+ * @return none.
+ *
+ * @details  This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of
+ *           the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its
+ * original order.
+ *
+ */
+void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
+
+/**
+ * @brief Converts the elements from a q7 vector and accumulate to a q15 vector
+ * @param[in]    *src       points to the q7 input vector
+ * @param[out]   *dst       points to the q15 output vector
+ * @param[in]    block_size length of the input vector
+ *
+ * \par Description:
+ *
+ * The equation used for the conversion process is:
+ *
+ * <pre>
+ *  dst[n] += (q15_t) src[n] ;   0 <= n < block_size.
+ * </pre>
+ *
+ */
+void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_size);
+
+/**
+ * @brief Depthwise conv on an im2col buffer where the input channel equals output channel.
+ * @param[in]    row     pointer to row
+ * @param[in]    col     pointer to im2col buffer, always consists of 2 columns.
+ * @param[in]    num_ch   number of channels
+ * @param[in]    out_shift  pointer to per output channel requantization shift parameter.
+ * @param[in]    out_mult   pointer to per output channel requantization multiplier parameter.
+ * @param[in]    out_offset      output tensor offset.
+ * @param[in]    activation_min   minimum value to clamp the output to. Range : int8
+ * @param[in]    activation_max   maximum value to clamp the output to. Range : int8
+ * @param[in]    kernel_size   number of elements in one column.
+ * @param[in]    output_bias per output channel bias. Range : int32
+ * @param[out]   out         pointer to output
+ * @return     The function returns one of the two
+ *              1. The incremented output pointer for a successful operation or
+ *              2. NULL if implementation is not available.
+ *
+ * @details     Supported framework: TensorFlow Lite micro.
+ */
+q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row,
+                                    const q15_t *col,
+                                    const uint16_t num_ch,
+                                    const int32_t *out_shift,
+                                    const int32_t *out_mult,
+                                    const int32_t out_offset,
+                                    const int32_t activation_min,
+                                    const int32_t activation_max,
+                                    const uint16_t kernel_size,
+                                    const int32_t *const output_bias,
+                                    q7_t *out);
+
+/**
+ * @brief General Matrix-multiplication function with per-channel requantization.
+ * @param[in]       input_row    pointer to row operand
+ * @param[in]       input_col    pointer to col operand
+ * @param[in]       output_ch    number of rows of input_row
+ * @param[in]       col_batches  number of column batches. Range: 1 to 4
+ * @param[in]       output_shift  pointer to per output channel requantization shift parameter.
+ * @param[in]       output_mult   pointer to per output channel requantization multiplier parameter.
+ * @param[in]       out_offset    output tensor offset.
+ * @param[in]       col_offset    input tensor(col) offset.
+ * @param[in]       row_offset    kernel offset(row). Not used.
+ * @param[in]       out_activation_min   minimum value to clamp the output to. Range : int8
+ * @param[in]       out_activation_max   maximum value to clamp the output to. Range : int8
+ * @param[in]       row_len       number of elements in each row
+ * @param[in]       bias          per output channel bias. Range : int32
+ * @param[in,out]   out           pointer to output
+ * @return     The function returns one of the two
+ *              1. The incremented output pointer for a successful operation or
+ *              2. NULL if implementation is not available.
+ *
+ * @details   Supported framework: TensorFlow Lite
+ */
+q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
+                         const q7_t *input_col,
+                         const uint16_t output_ch,
+                         const uint16_t col_batches,
+                         const int32_t *output_shift,
+                         const int32_t *output_mult,
+                         const int32_t out_offset,
+                         const int32_t col_offset,
+                         const int32_t row_offset,
+                         const int16_t out_activation_min,
+                         const int16_t out_activation_max,
+                         const uint16_t row_len,
+                         const int32_t *const bias,
+                         q7_t *out);
+/**
+ * @brief Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution.
+ * @param[in]       input_a     pointer to operand A
+ * @param[in]       input_b     pointer to operand B, always consists of 2 vectors.
+ * @param[in]       output_ch   number of rows of A
+ * @param[in]       out_shift  pointer to per output channel requantization shift parameter.
+ * @param[in]       out_mult   pointer to per output channel requantization multiplier parameter.
+ * @param[in]       activation_min   minimum value to clamp the output to. Range : int16
+ * @param[in]       activation_max   maximum value to clamp the output to. Range : int16
+ * @param[in]       num_col_a   number of columns of A
+ * @param[in]       output_bias per output channel bias. Range : int64
+ * @param[in,out]   out_0       pointer to output
+ * @return     The function returns one of the two
+ *              1. The incremented output pointer for a successful operation or
+ *              2. NULL if implementation is not available.
+ *
+ * @details   This function does the matrix multiplication of weight matrix for all output channels
+ *            with 2 columns from im2col and produces two elements/output_channel. The outputs are
+ *            clamped in the range provided by activation min and max.
+ *            Supported framework: TensorFlow Lite micro.
+ */
+q15_t *arm_nn_mat_mult_kernel_s16(const q7_t *input_a,
+                                  const q15_t *input_b,
+                                  const int32_t output_ch,
+                                  const int32_t *out_shift,
+                                  const int32_t *out_mult,
+                                  const int16_t activation_min,
+                                  const int16_t activation_max,
+                                  const int32_t num_col_a,
+                                  const int64_t *const output_bias,
+                                  q15_t *out_0);
+/**
+ * @brief General Matrix-multiplication without requantization for one row & one column
+ * @param[in]       row_elements  number of row elements
+ * @param[in]       row_base      pointer to row operand
+ * @param[in]       col_base      pointer to col operand
+ * @param[out]      sum_col       pointer to store sum of column elements
+ * @param[out]      output        pointer to store result of multiply-accumulate
+ * @return     The function returns the multiply-accumulated result of the row by column.
+ *
+ * @details Pseudo-code
+ *      *output = 0
+ *      sum_col = 0
+ *      for (i = 0; i < row_elements; i++)
+ *          *output += row_base[i] * col_base[i]
+ *          sum_col += col_base[i]
+ *
+ */
+arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
+                                     const int8_t *row_base,
+                                     const int8_t *col_base,
+                                     int32_t *const sum_col,
+                                     int32_t *const output);
+
+/**
+ * @brief General Matrix-multiplication without requantization for four rows and one column
+ * @param[in]       row_elements  number of row elements
+ * @param[in]       offset        offset between rows. Can be the same as row_elements.
+ *                                For e.g, in a 1x1 conv scenario with stride as 1.
+ * @param[in]       row_base      pointer to row operand
+ * @param[in]       col_base      pointer to col operand
+ * @param[out]      sum_col       pointer to store sum of column elements
+ * @param[out]      output        pointer to store result(4 int32's) of multiply-accumulate
+ * @return     The function returns the multiply-accumulated result of the row by column
+ *
+ * @details Pseudo-code
+ *      output[0] = 0
+ *         ..
+ *      output[3] = 0
+ *      sum_col = 0
+ *      for (i = 0; i < row_elements; i++)
+ *          output[0] += row_base[i] * col_base[i]
+ *                ..
+ *          output[3] += row_base[i + (row_elements * 3)] * col_base[i]
+ *          sum_col += col_base[i]
+ */
+arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
+                                     const int32_t offset,
+                                     const int8_t *row_base,
+                                     const int8_t *col_base,
+                                     int32_t *const sum_col,
+                                     int32_t *const output);
+
+/**
+ * @brief General Matrix-multiplication function with per-channel requantization.
+ *        This function assumes:
+ *        - LHS input matrix NOT transposed (nt)
+ *        - RHS input matrix transposed (t)
+ *
+ *  @note This operation also performs the broadcast bias addition before the requantization
+ *
+ * @param[in]  lhs                Pointer to the LHS input matrix
+ * @param[in]  rhs                Pointer to the RHS input matrix
+ * @param[in]  bias               Pointer to the bias vector. The length of this vector is equal to the number of
+ * output columns (or RHS input rows)
+ * @param[out] dst                Pointer to the output matrix with "m" rows and "n" columns
+ * @param[in]  dst_multipliers    Pointer to the multipliers vector needed for the per-channel requantization.
+ *                                The length of this vector is equal to the number of output columns (or RHS input
+ * rows)
+ * @param[in]  dst_shifts         Pointer to the shifts vector needed for the per-channel requantization. The length
+ * of this vector is equal to the number of output columns (or RHS input rows)
+ * @param[in]  lhs_rows           Number of LHS input rows
+ * @param[in]  rhs_rows           Number of RHS input rows
+ * @param[in]  rhs_cols           Number of LHS/RHS input columns
+ * @param[in]  lhs_offset         Offset to be applied to the LHS input value
+ * @param[in]  dst_offset         Offset to be applied the output result
+ * @param[in]  activation_min     Minimum value to clamp down the output. Range : int8
+ * @param[in]  activation_max     Maximum value to clamp up the output. Range : int8
+ *
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
+                                   const q7_t *rhs,
+                                   const q31_t *bias,
+                                   q7_t *dst,
+                                   const int32_t *dst_multipliers,
+                                   const int32_t *dst_shifts,
+                                   const int32_t lhs_rows,
+                                   const int32_t rhs_rows,
+                                   const int32_t rhs_cols,
+                                   const int32_t lhs_offset,
+                                   const int32_t dst_offset,
+                                   const int32_t activation_min,
+                                   const int32_t activation_max);
+
+/**
+ * @brief s8 Vector by Matrix (transposed) multiplication
+ *
+ * @param[in]      lhs             Input left-hand side vector
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[in]      bias            Input bias
+ * @param[out]     dst             Output vector
+ * @param[in]      lhs_offset      Offset to be added to the input values of the left-hand side vector.
+ *                                 Range: -127 to 128
+ * @param[in]      rhs_offset      Not used
+ * @param[in]      dst_offset      Offset to be added to the output values. Range: -127 to 128
+ * @param[in]      dst_multiplier  Output multiplier
+ * @param[in]      dst_shift       Output shift
+ * @param[in]      rhs_cols        Number of columns in the right-hand side input matrix
+ * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int8
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int8
+ *
+ * @return         The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
+                                    const q7_t *rhs,
+                                    const q31_t *bias,
+                                    q7_t *dst,
+                                    const int32_t lhs_offset,
+                                    const int32_t rhs_offset,
+                                    const int32_t dst_offset,
+                                    const int32_t dst_multiplier,
+                                    const int32_t dst_shift,
+                                    const int32_t rhs_cols,
+                                    const int32_t rhs_rows,
+                                    const int32_t activation_min,
+                                    const int32_t activation_max);
+
+/**
+ * @brief s16 Vector by Matrix (transposed) multiplication
+ *
+ * @param[in]      lhs             Input left-hand side vector
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[in]      bias            Input bias
+ * @param[out]     dst             Output vector
+ * @param[in]      dst_multiplier  Output multiplier
+ * @param[in]      dst_shift       Output shift
+ * @param[in]      rhs_cols        Number of columns in the right-hand side input matrix
+ * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int16
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int16
+ *
+ * @return         The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+arm_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs,
+                                     const q7_t *rhs,
+                                     const q63_t *bias,
+                                     q15_t *dst,
+                                     const int32_t dst_multiplier,
+                                     const int32_t dst_shift,
+                                     const int32_t rhs_cols,
+                                     const int32_t rhs_rows,
+                                     const int32_t activation_min,
+                                     const int32_t activation_max);
+
+/**
+ * @brief s8 Vector by Matrix (transposed) multiplication with s16 output
+ *
+ * @param[in]      lhs             Input left-hand side vector
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[out]     dst             Output vector
+ * @param[in]      lhs_offset      Offset to be added to the input values of the left-hand side
+ *                                 vector. Range: -127 to 128
+ * @param[in]      rhs_offset      Not used
+ * @param[in]      scatter_offset  Address offset for dst. First output is stored at 'dst', the
+ *                                 second at 'dst + scatter_offset' and so on.
+ * @param[in]      dst_multiplier  Output multiplier
+ * @param[in]      dst_shift       Output shift
+ * @param[in]      rhs_cols        Number of columns in the right-hand side input matrix
+ * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int16
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int16
+ *
+ * @return         The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+arm_status arm_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs,
+                                         const q7_t *rhs,
+                                         q15_t *dst,
+                                         const int32_t lhs_offset,
+                                         const int32_t rhs_offset,
+                                         const int32_t scatter_offset,
+                                         const int32_t dst_multiplier,
+                                         const int32_t dst_shift,
+                                         const int32_t rhs_cols,
+                                         const int32_t rhs_rows,
+                                         const int32_t activation_min,
+                                         const int32_t activation_max);
+
+/**
+ * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where
+ *        the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.
+ *
+ * @param[in]      lhs             Input left-hand side matrix
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[in]      lhs_offset      LHS matrix offset(input offset). Range: -127 to 128
+ * @param[in]      num_ch          Number of channels in LHS/RHS
+ * @param[in]      out_shift       Per channel output shift. Length of vector is equal to number of channels
+ * @param[in]      out_mult        Per channel output multiplier. Length of vector is equal to number of channels
+ * @param[in]      out_offset      Offset to be added to the output values. Range: -127 to 128
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int8
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int8
+ * @param[in]       row_x_col       (row_dimension * col_dimension) of LHS/RHS matrix
+ * @param[in]      output_bias     Per channel output bias. Length of vector is equal to number of channels
+ * @param[in]      out             Output pointer
+ *
+ * @return         The function returns one of the two
+ *                  - Updated output pointer if an implementation is available
+ *                  - NULL if no implementation is available.
+ *
+ * @note           If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
+ * out for the following.
+ *                  - Output shift
+ *                  - Output multiplier
+ *                  - Output bias
+ *                  - rhs
+ */
+q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
+                                           const q7_t *rhs,
+                                           const int32_t lhs_offset,
+                                           const uint16_t num_ch,
+                                           const int32_t *out_shift,
+                                           const int32_t *out_mult,
+                                           const int32_t out_offset,
+                                           const int32_t activation_min,
+                                           const int32_t activation_max,
+                                           const uint16_t row_x_col,
+                                           const int32_t *const output_bias,
+                                           q7_t *out);
+
+/**
+ * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases.
+ *        Dimensions are the same for lhs and rhs.
+ *
+ * @param[in]      lhs             Input left-hand side matrix
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[in]      lhs_offset      LHS matrix offset(input offset). Range: -127 to 128
+ * @param[in]      num_ch          Number of channels in LHS/RHS
+ * @param[in]      out_shift       Per channel output shift. Length of vector is equal to number of channels.
+ * @param[in]      out_mult        Per channel output multiplier. Length of vector is equal to number of channels.
+ * @param[in]      out_offset      Offset to be added to the output values. Range: -127 to 128
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int8
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int8
+ * @param[in]       row_x_col       (row_dimension * col_dimension) of LHS/RHS matrix
+ * @param[in]      output_bias     Per channel output bias. Length of vector is equal to number of channels.
+ * @param[in]      out             Output pointer
+ *
+ * @return         The function returns one of the two
+ *                  - Updated output pointer if an implementation is available
+ *                  - NULL if no implementation is available.
+ *
+ * @note           If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
+ * out for the following.
+ *                  - Output shift
+ *                  - Output multiplier
+ *                  - Output bias
+ *                  - rhs
+ */
+q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
+                                    const q7_t *rhs,
+                                    const int32_t lhs_offset,
+                                    const uint16_t num_ch,
+                                    const int32_t *out_shift,
+                                    const int32_t *out_mult,
+                                    const int32_t out_offset,
+                                    const int32_t activation_min,
+                                    const int32_t activation_max,
+                                    const uint16_t row_x_col,
+                                    const int32_t *const output_bias,
+                                    q7_t *out);
+
+/**
+  @brief         Read 2 q15 elements and post increment pointer.
+  @param[in]     in_q15   Pointer to pointer that holds address of input.
+  @return        q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15)
+{
+    q31_t val;
+
+    memcpy(&val, *in_q15, 4);
+    *in_q15 += 2;
+
+    return (val);
+}
+
+/**
+  @brief         Read 4 q7 from q7 pointer and post increment pointer.
+  @param[in]     in_q7       Pointer to pointer that holds address of input.
+  @return        q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7)
+{
+    q31_t val;
+    memcpy(&val, *in_q7, 4);
+    *in_q7 += 4;
+
+    return (val);
+}
+
+/**
+  @brief         Read 2 q15 from q15 pointer.
+  @param[in]     in_q15   pointer to address of input.
+  @return        q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2(const q15_t *in_q15)
+{
+    q31_t val;
+    memcpy(&val, in_q15, 4);
+
+    return (val);
+}
+
+/**
+  @brief         Read 4 q7 values.
+  @param[in]     in_q7       pointer to address of input.
+  @return        q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7)
+{
+    q31_t val;
+    memcpy(&val, in_q7, 4);
+
+    return (val);
+}
+
+/**
+  @brief         Write four q7 to q7 pointer and increment pointer afterwards.
+  @param[in]     in       Double pointer to input value
+  @param[in]     value    Four bytes to copy
+  @return        none
+ */
+__STATIC_FORCEINLINE void arm_nn_write_q7x4_ia(q7_t **in, q31_t value)
+{
+    memcpy(*in, &value, 4);
+    *in += 4;
+}
+
+/**
+ * @brief           memset optimized for MVE
+ * @param[in, out]  dst         Destination pointer
+ * @param[in]       val         Value to set
+ * @param[in]       block_size  Number of bytes to copy.
+ *
+ */
+__STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst, const q7_t val, uint32_t block_size)
+{
+#if defined(ARM_MATH_MVEI)
+    __asm volatile("   vdup.8                  q0, %[set_val]             \n"
+                   "   wlstp.8                 lr, %[cnt], 1f             \n"
+                   "2:                                                    \n"
+                   "   vstrb.8                 q0, [%[in]], 16            \n"
+                   "   letp                    lr, 2b                     \n"
+                   "1:                                                    \n"
+                   : [ in ] "+r"(dst)
+                   : [ cnt ] "r"(block_size), [ set_val ] "r"(val)
+                   : "q0", "memory", "r14");
+#else
+    memset(dst, val, block_size);
+#endif
+}
+
+#if defined(ARM_MATH_DSP)
+
+/**
+ * @brief read and expand one q7 word into two q15 words
+ */
+
+__STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
+{
+    q31_t inA = arm_nn_read_q7x4_ia(&source);
+    q31_t inAbuf1 = __SXTB16_RORn((uint32_t)inA, 8);
+    q31_t inAbuf2 = __SXTB16(inA);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+    *out2 = (int32_t)(__PKHTB(inAbuf1, inAbuf2, 16));
+    *out1 = (int32_t)(__PKHBT(inAbuf2, inAbuf1, 16));
+#else
+    *out1 = (int32_t)(__PKHTB(inAbuf1, inAbuf2, 16));
+    *out2 = (int32_t)(__PKHBT(inAbuf2, inAbuf1, 16));
+#endif
+
+    return source;
+}
+
+/**
+ * @brief read and expand one q7 word into two q15 words with reordering
+ */
+
+__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
+{
+    q31_t inA = arm_nn_read_q7x4_ia(&source);
+#ifndef ARM_MATH_BIG_ENDIAN
+    *out2 = __SXTB16(__ROR((uint32_t)inA, 8));
+    *out1 = __SXTB16(inA);
+#else
+    *out1 = __SXTB16(__ROR((uint32_t)inA, 8));
+    *out2 = __SXTB16(inA);
+#endif
+
+    return source;
+}
+
+/**
+ * @brief read and expand one q7 word into two q15 words with reordering and add an offset
+ */
+__STATIC_FORCEINLINE const q7_t *
+read_and_pad_reordered_with_offset(const q7_t *source, q31_t *out1, q31_t *out2, q31_t offset)
+{
+    q31_t inA = arm_nn_read_q7x4_ia(&source);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+    *out2 = __SXTB16(__ROR((uint32_t)inA, 8));
+    *out1 = __SXTB16(inA);
+#else
+    *out1 = __SXTB16(__ROR((uint32_t)inA, 8));
+    *out2 = __SXTB16(inA);
+#endif
+    *out1 = __QADD16(*out1, offset);
+    *out2 = __QADD16(*out2, offset);
+
+    return source;
+}
+
+#endif
+
+/**
+ * @defgroup NNBasicMath Basic Math Functions for Neural Network Computation
+ *
+ * Basic Math Functions for Neural Network Computation
+ *
+ */
+
+/**
+ * @brief           q7 vector multiplication with variable output shifts
+ * @param[in]       *pSrcA        pointer to the first input vector
+ * @param[in]       *pSrcB        pointer to the second input vector
+ * @param[out]      *pDst         pointer to the output vector
+ * @param[in]       out_shift     amount of right-shift for output
+ * @param[in]       blockSize     number of samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function uses saturating arithmetic.
+ * Results outside of the allowable q15 range [0x8000 0x7FFF] will be saturated.
+ */
+
+void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize);
+
+/**
+ * @brief           q7 vector multiplication with variable output shifts
+ * @param[in]       *pSrcA        pointer to the first input vector
+ * @param[in]       *pSrcB        pointer to the second input vector
+ * @param[out]      *pDst         pointer to the output vector
+ * @param[in]       out_shift     amount of right-shift for output
+ * @param[in]       blockSize     number of samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function uses saturating arithmetic.
+ * Results outside of the allowable q7 range [0x80 0x7F] will be saturated.
+ */
+
+void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize);
+
+/**
+ * @brief macro for adding rounding offset
+ */
+#ifndef ARM_NN_TRUNCATE
+#define NN_ROUND(out_shift) ((0x1u << out_shift) >> 1)
+#else
+#define NN_ROUND(out_shift) 0
+#endif
+
+// Macros for shortening quantization functions' names and avoid long lines
+#define MUL_SAT(a, b) arm_nn_doubling_high_mult((a), (b))
+#define MUL_SAT_MVE(a, b) arm_doubling_high_mult_mve_32x4((a), (b))
+#define MUL_POW2(a, b) arm_nn_mult_by_power_of_two((a), (b))
+
+#define DIV_POW2(a, b) arm_nn_divide_by_power_of_two((a), (b))
+#define DIV_POW2_MVE(a, b) arm_divide_by_power_of_two_mve((a), (b))
+
+#define EXP_ON_NEG(x) arm_nn_exp_on_negative_values((x))
+#define ONE_OVER1(x) arm_nn_one_over_one_plus_x_for_x_in_0_1((x))
+
+/**
+ * @brief           Saturating doubling high multiply. Result matches
+ *                  NEON instruction VQRDMULH.
+ * @param[in]       m1        Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
+ * @param[in]       m2        Multiplier. Range: {NN_Q31_MIN, NN_Q31_MAX}
+ * @return          Result of multiplication.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult(const q31_t m1, const q31_t m2)
+{
+    q31_t result = 0;
+    // Rounding offset to add for a right shift of 31
+    q63_t mult = 1 << 30;
+
+    if ((m1 < 0) ^ (m2 < 0))
+    {
+        mult = 1 - mult;
+    }
+    // Gets resolved as a SMLAL instruction
+    mult = mult + (q63_t)m1 * m2;
+
+    // Utilize all of the upper 32 bits. This is the doubling step
+    // as well.
+    result = (int32_t)(mult / (1ll << 31));
+
+    if ((m1 == m2) && (m1 == (int32_t)NN_Q31_MIN))
+    {
+        result = NN_Q31_MAX;
+    }
+    return result;
+}
+
+/**
+ * @brief           Doubling high multiply without saturation. This is intended
+ *                  for requantization where the scale is a positive integer
+ *
+ * @param[in]       m1        Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
+ * @param[in]       m2        Multiplier Range: {NN_Q31_MIN, NN_Q31_MAX}
+ * @return          Result of multiplication.
+ * @note            The result of this matches that of neon instruction
+ *                  VQRDMULH for m1 in range {NN_Q31_MIN, NN_Q31_MAX} and m2 in
+ *                  range {NN_Q31_MIN + 1, NN_Q31_MAX}. Saturation occurs when
+ *                  m1 equals m2 equals NN_Q31_MIN and that is not handled by
+ *                  this function.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult_no_sat(const q31_t m1, const q31_t m2)
+{
+    q31_t result = 0;
+    union arm_nn_long_long mult;
+
+    // Rounding offset to add for a right shift of 31
+    mult.word.low = 1 << 30;
+    mult.word.high = 0;
+
+    // Gets resolved as a SMLAL instruction
+    mult.long_long = mult.long_long + (q63_t)m1 * m2;
+
+    // Utilize all of the upper 32 bits. This is the doubling step
+    // as well.
+    result = (int32_t)(mult.long_long >> 31);
+
+    return result;
+}
+
+/**
+ * @brief           Rounding divide by power of two.
+ * @param[in]       dividend - Dividend
+ * @param[in]       exponent - Divisor = power(2, exponent)
+ *                             Range: [0, 31]
+ * @return          Rounded result of division. Midpoint is rounded away from zero.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
+{
+    q31_t result = 0;
+    const q31_t remainder_mask = (1 << exponent) - 1;
+    int32_t remainder = remainder_mask & dividend;
+
+    // Basic division
+    result = dividend >> exponent;
+
+    // Adjust 'result' for rounding (mid point away from zero)
+    q31_t threshold = remainder_mask >> 1;
+    if (result < 0)
+    {
+        threshold++;
+    }
+    if (remainder > threshold)
+    {
+        result++;
+    }
+
+    return result;
+}
+
+/**
+ * @brief           Requantize a given value.
+ * @param[in]       val         Value to be requantized
+ * @param[in]       multiplier  multiplier. Range {NN_Q31_MIN + 1, Q32_MAX}
+ * @param[in]       shift       left or right shift for 'val * multiplier'
+ *
+ * @return          Returns (val * multiplier)/(2 ^ shift)
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
+{
+    return arm_nn_divide_by_power_of_two(arm_nn_doubling_high_mult_no_sat(val * (1 << LEFT_SHIFT(shift)), multiplier),
+                                         RIGHT_SHIFT(shift));
+}
+
+/**
+ * @brief           Requantize a given 64 bit value.
+ * @param[in]       val                 Value to be requantized
+ * @param[in]       reduced_multiplier  Reduced multiplier from range {NN_Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1,
+ * Q16_MAX}
+ * @param[in]       shift               left or right shift for 'val * multiplier'
+ *
+ * @return          Returns (val * multiplier)/(2 ^ shift)
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_requantize_s64(const q63_t val, const q31_t reduced_multiplier, const q31_t shift)
+{
+    q31_t result = 0;
+    q63_t new_val = val * reduced_multiplier;
+
+    result = new_val >> (14 - shift); // 64->32 bit reduction
+    result = (result + 1) >> 1;       // Last shift position and insert round
+
+    return result;
+}
+
+/**
+ * @brief           memcpy optimized for MVE
+ * @param[in, out]  dst         Destination pointer
+ * @param[in]       src         Source pointer.
+ * @param[in]       block_size  Number of bytes to copy.
+ *
+ */
+__STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst, const q7_t *__RESTRICT src, uint32_t block_size)
+{
+#if defined(ARM_MATH_MVEI)
+    __asm volatile("   wlstp.8                 lr, %[cnt], 1f             \n"
+                   "2:                                                    \n"
+                   "   vldrb.8                 q0, [%[in]], 16            \n"
+                   "   vstrb.8                 q0, [%[out]], 16           \n"
+                   "   letp                    lr, 2b                     \n"
+                   "1:                                                    \n"
+                   : [ in ] "+r"(src), [ out ] "+r"(dst)
+                   : [ cnt ] "r"(block_size)
+                   : "q0", "memory", "r14");
+#else
+    memcpy(dst, src, block_size);
+#endif
+}
+
+#if defined(ARM_MATH_MVEI)
+/**
+ * @brief           Vector saturating doubling high multiply returning high half.
+ * @param[in]       m1        Multiplicand
+ * @param[in]       m2        Multiplier
+ * @return          Result of multiplication.
+ *
+ */
+__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve(const int32x4_t m1, const q31_t m2)
+{
+    return vqrdmulhq_n_s32(m1, m2);
+}
+
+/**
+ * @brief           Vector rounding divide by power of two.
+ * @param[in]       dividend - Dividend vector
+ * @param[in]       exponent - Divisor = power(2, exponent)
+ *                             Range: [0, 31]
+ * @return          Rounded result of division. Midpoint is rounded away from zero.
+ *
+ */
+__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t dividend, const q31_t exponent)
+{
+    const int32x4_t shift = vdupq_n_s32(-exponent);
+    const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
+    const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
+    return vrshlq_s32(fixed_up_dividend, shift);
+}
+
+/**
+ * @brief           Requantize a given vector.
+ * @param[in]       val         Vector to be requantized
+ * @param[in]       multiplier  multiplier
+ * @param[in]       shift       shift
+ *
+ * @return          Returns (val * multiplier)/(2 ^ shift)
+ *
+ */
+__STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const q31_t multiplier, const q31_t shift)
+{
+    return arm_divide_by_power_of_two_mve(
+        arm_doubling_high_mult_mve(vshlq_s32(val, vdupq_n_s32(LEFT_SHIFT(shift))), multiplier), RIGHT_SHIFT(shift));
+}
+
+__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve_32x4(const int32x4_t m1, const int32x4_t m2)
+{
+    return vqrdmulhq_s32(m1, m2);
+}
+
+__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve_32x4(const int32x4_t dividend, const int32x4_t exponent)
+{
+    const int32x4_t shift = -exponent;
+    const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
+    const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
+    return vrshlq_s32(fixed_up_dividend, shift);
+}
+
+__STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val,
+                                                       const int32x4_t multiplier,
+                                                       const int32x4_t shift)
+{
+    const int32x4_t zz = vdupq_n_s32(0);
+    const mve_pred16_t p = vcmpgtq_n_s32(shift, 0);
+
+    const int32x4_t left_shift = vpselq_s32(shift, zz, p);
+    const int32x4_t right_shift = -vpselq_s32(zz, shift, p);
+
+    return arm_divide_by_power_of_two_mve_32x4(arm_doubling_high_mult_mve_32x4(vshlq_s32(val, left_shift), multiplier),
+                                               right_shift);
+}
+#endif
+
+// @note The following functions are used only for softmax layer, scaled bits = 5 assumed
+
+__STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values(int32_t val)
+{
+    int32_t mask = 0;
+    int32_t shift = 24;
+
+    const int32_t val_mod_minus_quarter = (val & ((1 << shift) - 1)) - (1 << shift);
+    const int32_t remainder = val_mod_minus_quarter - val;
+    const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28);
+    const int32_t x2 = MUL_SAT(x, x);
+
+    int32_t result = 1895147668 +
+        MUL_SAT(1895147668, x + DIV_POW2(MUL_SAT(DIV_POW2(MUL_SAT(x2, x2), 2) + MUL_SAT(x2, x), 715827883) + x2, 1));
+
+#define SELECT_IF_NON_ZERO(x)                                                                                          \
+    {                                                                                                                  \
+        mask = MASK_IF_NON_ZERO(remainder & (1 << shift++));                                                           \
+        result = SELECT_USING_MASK(mask, MUL_SAT(result, x), result);                                                  \
+    }
+
+    SELECT_IF_NON_ZERO(1672461947)
+    SELECT_IF_NON_ZERO(1302514674)
+    SELECT_IF_NON_ZERO(790015084)
+    SELECT_IF_NON_ZERO(290630308)
+    SELECT_IF_NON_ZERO(39332535)
+    SELECT_IF_NON_ZERO(720401)
+    SELECT_IF_NON_ZERO(242)
+
+#undef SELECT_IF_NON_ZERO
+
+    mask = MASK_IF_ZERO(val);
+    return SELECT_USING_MASK(mask, NN_Q31_MAX, result);
+}
+
+__STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two(const int32_t val, const int32_t exp)
+{
+    const int32_t thresh = ((1 << (31 - exp)) - 1);
+    int32_t result = val << exp;
+    result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), NN_Q31_MAX, result);
+    result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), NN_Q31_MIN, result);
+    return result;
+}
+
+__STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val)
+{
+    const int64_t sum = (int64_t)val + (int64_t)NN_Q31_MAX;
+    const int32_t half_denominator = (int32_t)((sum + (sum >= 0 ? 1 : -1)) / 2L);
+    int32_t x = 1515870810 + MUL_SAT(half_denominator, -1010580540);
+
+    const int32_t shift = (1 << 29);
+    x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
+    x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
+    x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
+
+    return MUL_POW2(x, 1);
+}
+
+/**
+  @brief         Write 2 q15 elements and post increment pointer.
+  @param[in]     dest_q15  Pointer to pointer that holds address of destination.
+  @param[in]     src_q31   Input value to be written.
+  @return        none
+ */
+__STATIC_FORCEINLINE void arm_nn_write_q15x2_ia(q15_t **dest_q15, q31_t src_q31)
+{
+    q31_t val = src_q31;
+
+    memcpy(*dest_q15, &val, 4);
+    *dest_q15 += 2;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/features/cmsis_nn_sample_code/nnlib/libcmsis-nn.a b/features/cmsis_nn_sample_code/nnlib/libcmsis-nn.a
new file mode 100644
index 0000000..f44974e
Binary files /dev/null and b/features/cmsis_nn_sample_code/nnlib/libcmsis-nn.a differ
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/biases_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/biases_data.h
new file mode 100644
index 0000000..80674a5
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/biases_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int64_t fully_connected_int16_biases[11] = {-5, 45, 53, -33, 31, 51, 43, 35, 37, -1, 46};
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/config_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/config_data.h
new file mode 100644
index 0000000..2607db8
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/config_data.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#define FULLY_CONNECTED_INT16_OUT_CH 11
+#define FULLY_CONNECTED_INT16_IN_CH 7
+#define FULLY_CONNECTED_INT16_INPUT_W 3
+#define FULLY_CONNECTED_INT16_INPUT_H 3
+#define FULLY_CONNECTED_INT16_DST_SIZE 22
+#define FULLY_CONNECTED_INT16_INPUT_SIZE 63
+#define FULLY_CONNECTED_INT16_OUT_ACTIVATION_MIN -32766
+#define FULLY_CONNECTED_INT16_OUT_ACTIVATION_MAX 32767
+#define FULLY_CONNECTED_INT16_INPUT_BATCHES 2
+#define FULLY_CONNECTED_INT16_INPUT_OFFSET 0
+#define FULLY_CONNECTED_INT16_OUTPUT_OFFSET 0
+#define FULLY_CONNECTED_INT16_OUTPUT_MULTIPLIER 1073741824
+#define FULLY_CONNECTED_INT16_OUTPUT_SHIFT 1
+#define FULLY_CONNECTED_INT16_ACCUMULATION_DEPTH 63
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/input_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/input_data.h
new file mode 100644
index 0000000..f112806
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/input_data.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t fully_connected_int16_input[126] = {
+    -3,  -38, 24,  8,   -11, -43, 47,  45,  6,   -10, -3,  -52, -23, 9,   32,  3,   -7,  -2,  -5,  28, -40,
+    -21, -39, 6,   -45, 35,  -8,  -16, -16, 2,   -9,  -42, -35, 12,  -37, 33,  14,  -47, -32, -38, 40, 13,
+    12,  46,  12,  -46, 27,  -42, 33,  3,   -43, 53,  3,   -50, -35, 27,  -3,  -18, 12,  -39, -47, 28, 1,
+    30,  8,   -50, 11,  -23, 1,   4,   -31, 17,  2,   -35, -20, 51,  -32, -42, -37, -37, 36,  -12, 38, -12,
+    -28, -46, -49, -56, 17,  -32, 6,   -19, -47, 5,   -17, -29, 1,   50,  -39, 1,   39,  8,   -19, 56, 5,
+    -44, -28, -49, -36, 23,  51,  -7,  -10, 15,  53,  50,  -15, -52, -52, -1,  -46, 50,  -52, -49, 38, 9};
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/output_ref_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/output_ref_data.h
new file mode 100644
index 0000000..e264016
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/output_ref_data.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t fully_connected_int16_output_ref[22] = {-428,  1672,  -2140, 1843,  -13255, 7356, -9406, -6522,
+                                                    -1898, 7253,  -5511, -4247, -9077,  372,  -6992, -13817,
+                                                    9870,  -1640, 6758,  5351,  -4067,  -373};
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/test_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/test_data.h
new file mode 100644
index 0000000..c9e0a61
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/test_data.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_ref_data.h"
+#include "weights_data.h"
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/weights_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/weights_data.h
new file mode 100644
index 0000000..642743a
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16/weights_data.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t fully_connected_int16_weights[693] = {
+    -31, -15, -29, -30, -42, 48,  44,  -18, 17,  50,  0,   57,  17,  50,  -1,  -10, 22,  -19, -20, -13, 21,  -29, 12,
+    38,  28,  11,  38,  -12, -35, -10, -47, -28, -55, -50, 3,   46,  -47, -2,  26,  8,   46,  15,  9,   -8,  18,  26,
+    20,  49,  -43, 20,  -11, 36,  -50, 18,  -47, 52,  -51, 31,  -38, -23, -1,  -5,  -15, -53, -39, 35,  -24, -17, 10,
+    50,  32,  -31, 26,  -3,  24,  1,   51,  -9,  47,  -15, -26, 35,  -13, -32, -20, -18, -16, 17,  -25, -31, -29, -13,
+    6,   57,  -52, 22,  -38, 0,   -46, 21,  -53, 45,  -5,  17,  -8,  28,  10,  -25, 27,  25,  -16, 22,  -27, 48,  12,
+    30,  -4,  41,  -43, 42,  58,  -20, 27,  13,  17,  19,  13,  -45, 25,  58,  7,   19,  29,  50,  -21, 14,  -48, 57,
+    -9,  -26, -4,  -15, -20, 14,  37,  -21, -50, 7,   52,  49,  34,  -10, 27,  -8,  -1,  -28, 47,  14,  -45, -17, -2,
+    42,  22,  40,  51,  -3,  -1,  49,  -7,  -37, 38,  -8,  36,  6,   30,  -24, 42,  44,  -13, 40,  -26, -11, -7,  -19,
+    32,  -33, 58,  -7,  -26, 22,  42,  -2,  -27, -46, 9,   5,   -34, 41,  -51, 28,  -17, -11, 21,  -28, -8,  45,  -52,
+    15,  34,  51,  -32, -29, 17,  -37, -17, 39,  13,  -1,  -33, -8,  29,  16,  -49, -20, 55,  21,  10,  -47, -31, -51,
+    11,  -44, 45,  39,  8,   -37, -47, 16,  22,  7,   6,   38,  35,  17,  15,  -39, -32, -37, -54, 33,  -8,  17,  -23,
+    49,  -33, 10,  -8,  27,  20,  -10, 15,  -33, 12,  49,  28,  -49, 34,  28,  -8,  -24, -13, 1,   -36, -49, 57,  29,
+    32,  -11, -3,  19,  56,  19,  -38, 49,  6,   14,  -45, 21,  -47, -15, -16, 54,  31,  4,   -26, -17, -18, 27,  -29,
+    9,   55,  12,  -25, 0,   7,   16,  -16, -4,  -45, -27, -22, 43,  52,  -47, 20,  54,  -2,  -17, 58,  19,  -7,  4,
+    -7,  -26, -2,  15,  15,  -21, -3,  1,   6,   10,  -27, 54,  -7,  -8,  -13, 21,  29,  -42, 51,  7,   2,   -16, -42,
+    -26, 4,   -52, -50, 7,   33,  56,  -31, 45,  58,  25,  25,  -50, -42, -31, -54, 25,  38,  -54, 58,  -38, 23,  11,
+    -4,  19,  34,  -55, 15,  -44, -16, -40, 9,   49,  -42, -16, -55, 19,  -45, 16,  -39, 29,  0,   35,  -53, 11,  -20,
+    -55, -50, 37,  -27, 22,  5,   -12, 31,  51,  -47, 50,  -49, 9,   11,  38,  -25, -2,  -12, -22, -10, 58,  -20, -10,
+    -9,  3,   -26, 43,  -32, 7,   17,  36,  -18, 24,  3,   37,  -6,  -28, 36,  -14, 51,  50,  -19, 37,  8,   49,  -43,
+    38,  10,  -18, 3,   -16, 26,  1,   -26, -7,  54,  52,  -54, 24,  -3,  56,  22,  5,   19,  14,  10,  24,  -3,  25,
+    5,   -47, -31, 14,  -49, 38,  -38, 4,   -38, 12,  -55, -9,  -30, 9,   9,   9,   -32, -40, -38, 33,  2,   -29, -4,
+    -15, -38, -35, -25, -44, -14, -54, -42, -50, -15, 5,   0,   -45, -22, 40,  -23, 18,  22,  -33, 5,   9,   -13, -16,
+    0,   -46, 14,  29,  -40, -8,  3,   1,   12,  1,   -38, 53,  -23, 9,   -4,  12,  13,  -52, -3,  -51, 51,  18,  -42,
+    40,  -3,  16,  -55, -44, 5,   46,  11,  15,  24,  -53, -16, -25, 45,  -48, -6,  5,   -39, 27,  1,   34,  -44, 14,
+    -5,  55,  13,  48,  18,  -49, -39, 7,   42,  41,  52,  56,  -5,  58,  -43, -10, 13,  43,  37,  -20, 52,  18,  -12,
+    4,   4,   23,  44,  -46, -35, 44,  -27, 13,  24,  -7,  -29, -33, 35,  10,  12,  -28, 12,  35,  -35, 40,  -7,  -55,
+    -28, 21,  11,  -12, 28,  -21, -8,  -34, 2,   -13, -27, -8,  3,   8,   -10, -6,  8,   -12, -53, -28, 14,  37,  -42,
+    -50, -11, -7,  -18, -13, 8,   -7,  -8,  -50, 29,  -56, 27,  -52, 30,  27,  40,  -43, 13,  11,  -45, 3,   18,  19,
+    -15, -19, -15, 5,   -23, 51,  -31, -15, 40,  -47, 12,  2,   -27, -26, -7,  -26, -40, 53,  24,  -52, 21,  16,  50,
+    -19, -26, 12,  -31, -30, 23,  -52, 20,  37,  20,  -28, -16, 35,  16,  -48, -47, 29,  30,  13,  -11, -5,  28,  -51,
+    0,   58,  -49};
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/biases_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/biases_data.h
new file mode 100644
index 0000000..587dd55
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/biases_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int64_t fully_connected_int16_big_biases[11] = {-1, 3, 4, 2, -1, 3, 3, -2, -5, -3, -4};
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/config_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/config_data.h
new file mode 100644
index 0000000..00f0442
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/config_data.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#define FULLY_CONNECTED_INT16_BIG_OUT_CH 11
+#define FULLY_CONNECTED_INT16_BIG_IN_CH 7
+#define FULLY_CONNECTED_INT16_BIG_INPUT_W 10
+#define FULLY_CONNECTED_INT16_BIG_INPUT_H 10
+#define FULLY_CONNECTED_INT16_BIG_DST_SIZE 33
+#define FULLY_CONNECTED_INT16_BIG_INPUT_SIZE 700
+#define FULLY_CONNECTED_INT16_BIG_OUT_ACTIVATION_MIN -32766
+#define FULLY_CONNECTED_INT16_BIG_OUT_ACTIVATION_MAX 32767
+#define FULLY_CONNECTED_INT16_BIG_INPUT_BATCHES 3
+#define FULLY_CONNECTED_INT16_BIG_INPUT_OFFSET 0
+#define FULLY_CONNECTED_INT16_BIG_OUTPUT_OFFSET 0
+#define FULLY_CONNECTED_INT16_BIG_OUTPUT_MULTIPLIER 1073741824
+#define FULLY_CONNECTED_INT16_BIG_OUTPUT_SHIFT 1
+#define FULLY_CONNECTED_INT16_BIG_ACCUMULATION_DEPTH 700
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/input_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/input_data.h
new file mode 100644
index 0000000..b69e08c
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/input_data.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t fully_connected_int16_big_input[2100] = {
+    -3, 1,  -4, -3, 1,  3,  -2, 3,  4,  -4, 4,  1,  -2, -2, -4, 0,  2,  -3, 3,  3,  -1, -3, 4,  -4, -2, -2, 3,  2,  0,
+    -4, 3,  -2, -1, 2,  2,  3,  -2, -1, -2, -3, -1, 2,  -5, -1, 0,  3,  2,  -5, 1,  3,  -4, 4,  -4, -4, -2, 4,  -3, -1,
+    -4, -4, -4, -3, -2, 3,  -5, 3,  2,  1,  2,  4,  -3, 0,  0,  2,  4,  -2, -3, 2,  1,  2,  -1, -5, -3, 2,  3,  -1, 2,
+    -4, 2,  -4, -5, -4, 1,  -2, 0,  -5, -3, 1,  3,  4,  3,  0,  -4, 0,  4,  2,  4,  3,  -5, -1, -4, 3,  4,  1,  -2, -2,
+    1,  -5, 0,  -2, -4, -3, -4, 0,  -2, -3, -5, 0,  -5, -4, 1,  3,  1,  -2, -5, -2, -5, 2,  1,  0,  -3, -3, 3,  -3, -1,
+    4,  -2, -1, -4, 4,  2,  4,  0,  3,  -3, 1,  -5, -5, 2,  3,  2,  4,  1,  -5, 1,  2,  1,  3,  4,  0,  4,  2,  3,  0,
+    -2, 0,  0,  4,  0,  -5, -5, 4,  2,  -1, -2, 0,  -1, -3, 1,  4,  2,  -4, 0,  4,  3,  -4, 2,  -5, 0,  1,  -3, 0,  -5,
+    4,  -5, 3,  0,  -3, 4,  2,  2,  0,  -3, -1, 1,  -2, 3,  -3, 4,  -2, -3, -1, 0,  -5, 4,  0,  2,  -2, 2,  -3, 4,  2,
+    1,  -2, -3, 3,  -5, -4, -2, -1, 0,  4,  4,  -4, -1, -2, -1, 4,  -1, -1, -1, 0,  -1, -5, 2,  -3, 2,  3,  3,  2,  -1,
+    0,  4,  1,  -2, 0,  -1, 4,  1,  -1, 0,  -1, 1,  1,  -3, -3, 0,  -3, 1,  -3, 0,  3,  -1, 0,  -1, 0,  4,  -5, 3,  -5,
+    -1, 3,  -3, -4, 3,  -4, -3, -4, 2,  -1, 0,  -3, 2,  -2, 3,  -5, 1,  -2, -1, -4, 1,  -2, -4, -3, -3, 1,  2,  1,  -3,
+    4,  -5, -3, 2,  0,  -1, 3,  -3, 3,  2,  -1, -5, 3,  -5, 4,  1,  1,  4,  -3, -5, 0,  -5, 0,  3,  1,  3,  0,  2,  -4,
+    2,  -1, 1,  -1, -1, 1,  4,  4,  0,  -2, 2,  2,  -1, 1,  0,  -3, -3, 3,  4,  3,  3,  -1, 4,  1,  -3, 4,  -1, 3,  -3,
+    -3, -4, 4,  2,  -4, -2, -4, 1,  1,  -5, -5, -2, -3, -1, 1,  -3, -2, -2, -2, -3, -5, -3, 0,  3,  -1, 2,  -1, -5, -4,
+    4,  4,  0,  3,  -3, 0,  4,  2,  3,  -3, 0,  3,  -4, 1,  -1, -3, 1,  -2, -1, -1, -3, -4, -2, 4,  3,  -2, 0,  2,  4,
+    0,  -4, 2,  1,  -5, -4, 4,  -3, 3,  3,  -4, -3, 1,  -1, 4,  -5, -3, -3, -4, 0,  4,  1,  -3, 3,  1,  3,  -3, 0,  -5,
+    1,  -5, -4, 3,  -5, -1, 1,  -5, -4, 2,  0,  2,  3,  -5, -5, -2, 1,  0,  -1, -1, -1, -2, 1,  4,  -4, -2, 0,  1,  2,
+    -4, 0,  1,  -1, -2, 3,  4,  -2, -4, 0,  -2, 0,  0,  4,  1,  0,  -4, 4,  -5, 2,  -3, 4,  4,  1,  -3, -1, -1, 2,  3,
+    -2, 0,  0,  3,  -3, 4,  -3, -5, -3, 2,  0,  4,  -2, 3,  -4, 3,  3,  -1, -2, -5, -2, -1, 2,  0,  4,  2,  0,  4,  -2,
+    0,  3,  3,  0,  2,  -1, 4,  3,  1,  3,  -3, 4,  2,  2,  2,  -2, 4,  0,  -1, 0,  1,  0,  -3, -2, -4, 1,  0,  -1, 3,
+    2,  -5, -1, 0,  -4, -2, -4, -5, -1, 0,  -3, -5, 0,  1,  -3, -3, 4,  3,  -1, -3, 3,  -5, 0,  -2, -1, 1,  -5, -2, -2,
+    -1, 2,  0,  -3, -4, 2,  -1, -1, -2, -4, -2, 0,  -3, -1, -2, -4, -3, -2, -3, -1, -1, -3, 2,  -3, 3,  -2, 2,  3,  -1,
+    1,  -4, -4, 0,  3,  4,  -3, -1, 4,  -4, 2,  -2, -1, 2,  -1, 3,  -3, 3,  0,  -2, -5, 2,  4,  -1, -1, 3,  -2, 1,  -1,
+    -4, 2,  3,  0,  3,  3,  4,  4,  2,  4,  4,  -1, 4,  -4, 4,  4,  1,  2,  -3, 2,  -4, 2,  0,  -2, 2,  0,  -3, 1,  4,
+    -3, -1, 3,  -1, 0,  0,  -4, -3, 1,  -2, -1, -3, 2,  -5, 0,  -1, -1, 1,  0,  -2, 0,  1,  0,  1,  1,  4,  -1, 3,  0,
+    2,  0,  3,  0,  -2, 4,  3,  -5, -1, -3, 0,  -1, -3, 0,  3,  -4, 2,  -4, 4,  -3, 1,  1,  -5, -2, -1, 3,  2,  2,  0,
+    4,  3,  -1, -2, -4, 0,  0,  4,  4,  2,  0,  0,  -4, -5, 1,  -2, -2, 2,  -1, -3, -2, 0,  0,  -4, 3,  2,  2,  3,  -4,
+    -5, -2, -5, 4,  -4, 2,  4,  4,  3,  -4, 4,  4,  -3, -2, 1,  -1, -5, -5, 2,  2,  -5, 3,  3,  -3, -5, -5, 0,  -1, -2,
+    0,  3,  2,  -4, -4, 2,  -1, -1, -3, -3, -2, 1,  -4, 4,  4,  -2, -2, 0,  3,  -4, -5, -5, 2,  -5, 4,  4,  -5, -3, -2,
+    -2, 4,  4,  1,  4,  1,  -1, -2, 0,  -3, 2,  1,  3,  -3, 0,  -1, 2,  -5, -5, 3,  2,  3,  4,  -2, -3, 3,  -1, -5, 2,
+    4,  0,  4,  -1, 4,  -2, -2, -5, 1,  -4, -4, -2, 4,  2,  -4, 0,  1,  -5, 0,  2,  4,  -2, -4, 2,  -3, 4,  1,  1,  -4,
+    -3, -4, 1,  1,  1,  4,  -2, 4,  -3, -2, 1,  1,  -1, -3, 0,  1,  4,  0,  -4, 4,  4,  -2, -3, 2,  -1, 1,  4,  -5, 0,
+    -3, -5, 4,  -3, -1, 4,  4,  0,  -4, -4, 3,  -1, 0,  -3, -1, -4, -5, 2,  -5, 4,  2,  -2, -2, 4,  4,  -2, -5, -4, 2,
+    -4, -3, -4, -3, -5, -2, 1,  -5, -1, -5, -3, 4,  3,  0,  -4, 2,  0,  2,  4,  -2, -3, 1,  -5, -2, 4,  3,  -5, -2, 4,
+    0,  -5, -1, 3,  -1, -1, -2, 3,  0,  4,  1,  -4, 2,  4,  1,  4,  -4, 3,  -1, -4, -3, -4, -2, -2, -4, 3,  -4, -5, 0,
+    -3, -3, -1, -3, 4,  1,  2,  -5, 0,  0,  -3, 3,  -2, 4,  -5, -3, -4, 1,  -5, 1,  0,  3,  2,  3,  -3, 4,  1,  -4, 2,
+    -5, -5, -3, -2, -4, 0,  -5, 2,  -1, -2, 1,  0,  4,  2,  -3, -5, 1,  4,  -4, 0,  -2, -3, -3, -3, 1,  0,  2,  -5, -1,
+    -3, -5, -5, 2,  3,  4,  -4, 3,  2,  -2, -1, -2, -3, 0,  4,  4,  1,  -4, 2,  -4, 2,  -5, 4,  -1, 1,  2,  -4, -4, -3,
+    3,  -3, -4, 0,  4,  0,  1,  -3, -1, 4,  -2, 3,  -5, 1,  3,  0,  -1, -2, 1,  4,  4,  -1, 2,  0,  -4, -5, 2,  1,  4,
+    -3, 3,  -3, -3, 1,  2,  -4, -2, -1, -2, -3, -3, -1, -3, 1,  -4, 4,  -4, -1, 3,  1,  0,  -3, 1,  1,  -1, 2,  4,  1,
+    3,  3,  -1, -2, -1, -3, 3,  4,  4,  1,  3,  0,  3,  4,  4,  0,  4,  -3, 0,  -1, 1,  3,  -4, 3,  -3, -4, -5, -5, -1,
+    -4, 4,  4,  4,  -1, -2, -5, 4,  -2, 1,  -3, -5, 1,  4,  4,  4,  -2, -5, -1, -2, 2,  1,  -5, 4,  -2, 3,  -5, 0,  -1,
+    0,  2,  -2, -5, 4,  -2, 1,  2,  4,  -4, -3, -4, 2,  4,  -1, 3,  -2, 1,  -3, 0,  1,  -4, 4,  2,  -2, 3,  -1, -5, 4,
+    1,  1,  3,  -5, 1,  -4, -1, -2, -5, -2, 2,  -3, -4, -3, -3, 4,  -5, -1, 0,  1,  1,  -4, -2, 0,  -5, -4, -2, 4,  -4,
+    -1, 3,  -4, 0,  -5, 3,  1,  -4, -5, -4, -3, 1,  -2, -2, -5, 0,  3,  0,  -4, 3,  1,  2,  -1, 1,  4,  -2, -3, -1, 3,
+    -2, 2,  3,  -1, -1, 3,  2,  1,  -5, -1, -5, 3,  2,  2,  2,  1,  -5, -5, -1, 0,  0,  0,  -1, 1,  -5, -2, 1,  -4, 2,
+    -2, 2,  4,  -4, 0,  -3, -1, -4, 0,  -1, -1, -1, 4,  1,  -5, -5, -5, -5, 2,  1,  0,  -1, 1,  1,  -4, 3,  -5, 0,  0,
+    4,  2,  -4, -1, -2, -2, 0,  2,  -5, -1, 2,  1,  1,  3,  -4, 4,  -4, -1, 3,  1,  3,  -3, -4, -3, 1,  -2, -4, 1,  0,
+    -1, 2,  1,  -2, -5, -4, 2,  -2, 1,  -4, 1,  2,  -5, -2, 1,  -1, -4, -5, 0,  4,  1,  -4, 0,  2,  3,  -5, -2, -4, -4,
+    -2, 2,  -1, 3,  -3, 3,  0,  3,  3,  -5, 0,  -4, 2,  0,  4,  -3, -4, -4, 2,  3,  1,  3,  -4, -2, -3, 1,  2,  -4, 0,
+    -4, -5, 0,  3,  -2, 4,  -4, -4, -5, 4,  -1, -2, 4,  -4, 0,  -1, 1,  4,  -2, -4, -4, -4, 2,  2,  -4, -2, 3,  3,  -5,
+    -3, -4, -5, -3, 1,  -3, -4, 3,  -5, 0,  -4, -1, -4, 2,  0,  -1, 4,  -3, 1,  0,  -5, -3, 0,  4,  1,  -5, -1, -2, 0,
+    0,  -4, 2,  -5, -2, 0,  3,  4,  2,  1,  -4, -1, 2,  -3, 4,  -4, -4, 0,  -4, 2,  1,  2,  4,  3,  -2, -4, -1, 3,  -4,
+    1,  3,  -4, 0,  -4, -3, 2,  -1, 1,  3,  4,  -5, 3,  -5, -2, -2, -1, -2, 3,  -5, -1, -3, 0,  2,  -4, -3, 0,  -2, 4,
+    -2, -3, -5, -4, -1, -2, -4, -4, -1, 3,  -2, -5, 2,  4,  -4, -4, 0,  1,  0,  -2, -2, -2, 4,  2,  -2, 2,  0,  -1, 2,
+    3,  4,  -2, -5, 0,  4,  2,  -4, 0,  2,  -5, 0,  -3, -3, -4, -3, -2, 3,  -3, -3, 2,  2,  -2, 4,  -3, -1, -3, -1, -2,
+    3,  -4, -1, 2,  -1, -1, 1,  1,  3,  3,  -3, -5, -4, 0,  -1, 3,  -5, 2,  -1, 0,  2,  -4, -2, -5, -1, 4,  -5, -1, 4,
+    2,  -4, -2, 4,  2,  -2, -2, -4, 3,  0,  1,  2,  -2, 0,  -4, -5, 4,  3,  1,  3,  3,  -4, 0,  -5, -4, 2,  3,  4,  0,
+    -4, 1,  -4, 0,  -1, 3,  2,  -3, 0,  -1, 4,  3,  -3, 3,  0,  0,  0,  -4, -3, 3,  -4, 3,  -1, -4, 1,  -4, 2,  2,  -3,
+    2,  -1, 3,  0,  0,  -2, 1,  -3, -2, -1, -4, 1,  -3, 4,  1,  4,  -5, 0,  1,  1,  3,  -3, 4,  4,  0,  -5, 4,  1,  0,
+    -2, -5, 0,  4,  -3, -2, -1, 0,  -2, 4,  -1, -5, 4,  0,  -4, 1,  -1, 3,  2,  0,  3,  1,  3,  3,  -5, -2, 1,  4,  -3,
+    0,  4,  -4, 4,  -4, -4, -2, -5, 3,  -1, 1,  4,  3,  -5, 4,  -5, -2, -2, -5, -3, -2, -5, 0,  -2, -1, -4, 4,  1,  -2,
+    3,  -4, -2, -5, 2,  -5, -5, 0,  -4, -3, 2,  -3, 3,  4,  1,  -4, 4,  1,  -3, -1, -1, 4,  -2, -4, -3, 3,  3,  4,  3,
+    1,  -5, 0,  -3, 3,  0,  4,  -5, -4, 3,  -3, -2, -5, 4,  2,  0,  -4, -5, 2,  3,  -1, -1, 4,  3,  2,  0,  -3, 3,  0,
+    2,  -4, 1,  -4, -5, 1,  -3, 4,  0,  -5, 4,  -4, -3, -4, 3,  2,  4,  3,  -5, 0,  3,  4,  -2, 1,  -5, 3,  0,  -2, 4,
+    2,  4,  -3, -2, -2, -5, 1,  -3, 1,  -5, 1,  -1, 2,  -5, 4,  -2, 4,  -4, -2, -2, -1, 2,  -4, 0,  3,  -3, 2,  -1, -2,
+    -2, -4, -4, -3, 1,  -2, 2,  0,  -5, 3,  -2, 2,  -3, -1, 4,  3,  -5, -2, -2, 4,  4,  -2, -3, -1, -1, 4,  -5, 1,  3,
+    2,  -3, 3,  -3, -2, 0,  3,  3,  -3, 2,  4,  2,  0,  3,  -2, -2, 2,  1,  -1, -1, 2,  4,  -4, 1,  2,  1,  -3, -2, 3,
+    -3, -2, -2, -3, -4, -4, -1, -2, -5, 1,  0,  -2, 1,  4,  -1, -2, 0,  4,  -3, 4,  2,  -3, 2,  4,  4,  4,  -5, 4,  -3,
+    0,  4,  3,  -2, 0,  3,  -2, 1,  4,  3,  4,  -4, 1,  3,  3,  -5, 0,  3,  4,  3,  1,  -2, -2, 4,  -5, -4, 1,  4,  -3,
+    -5, 0,  0,  -4, -4, -1, 3,  -5, -5, 2,  -3, 1,  -3, 4,  -3, -2, -2, 0,  0,  -1, -1, 3,  2,  3,  3,  3,  -2, -5, -4,
+    -3, -4, -4, -1, -2, 3,  -2, -1, -4, 1,  -1, -4, -4, 3,  -1, -4, 1,  -2, 3,  4,  -1, 2,  -5, -4, 2,  -5, -3, 4,  -1,
+    3,  -3, 3,  -5, 1,  -3, 2,  1,  -1, 2,  4,  -2};
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/output_ref_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/output_ref_data.h
new file mode 100644
index 0000000..12e57e4
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/output_ref_data.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t fully_connected_int16_big_output_ref[33] = {-134, 14,   -41,  201, -125, -154, 211, 14,  172, 221, -31,
+                                                        352,  83,   104,  -4,  225,  269,  378, -65, 33,  98,  500,
+                                                        -136, -197, -351, 133, 223,  -74,  208, 476, 527, -97, -155};
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/test_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/test_data.h
new file mode 100644
index 0000000..c9e0a61
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/test_data.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_ref_data.h"
+#include "weights_data.h"
diff --git a/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/weights_data.h b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/weights_data.h
new file mode 100644
index 0000000..6f4c26b
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/fully_connected_int16_big/weights_data.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t fully_connected_int16_big_weights[7700] = {
+    1,  -3, 0,  -3, 0,  -1, -2, -5, -4, -5, -5, -4, 2,  1,  3,  -2, -4, 0,  -2, -1, 2,  -2, -4, 4,  1,  -5, 0,  -5, 0,
+    -3, -3, -5, -4, -4, 4,  -1, 2,  2,  3,  4,  -3, 3,  1,  -1, -1, 4,  4,  -2, 1,  -3, 2,  4,  1,  3,  -1, -4, 4,  2,
+    3,  1,  1,  -1, -1, -5, -4, -1, -5, -5, -4, -4, -1, -2, -4, -2, 0,  3,  1,  3,  -1, -2, -5, -3, 2,  -3, 0,  -2, -3,
+    0,  -2, 3,  4,  -1, -4, 0,  0,  -2, 1,  2,  -1, -4, -2, -2, 1,  4,  2,  -1, 0,  2,  -1, 4,  -4, 3,  -3, -5, 0,  1,
+    1,  -3, 3,  -1, 1,  -5, -1, 3,  -4, 4,  -5, -4, -5, -2, 0,  -3, 0,  -3, 0,  4,  -4, 1,  4,  -4, 4,  1,  -5, 4,  -5,
+    -3, -2, 3,  2,  -1, 3,  0,  -5, -2, 4,  3,  -5, 3,  3,  0,  -2, -2, -3, 4,  1,  4,  4,  -1, 3,  2,  3,  2,  -5, -4,
+    1,  -4, -5, 1,  1,  4,  -4, -3, 2,  -2, -2, 0,  -1, -2, -3, 3,  -2, -2, -2, -4, 2,  -5, -1, -4, 3,  3,  -5, 4,  -2,
+    -1, -1, 2,  0,  -5, 0,  -1, 1,  4,  -3, 0,  3,  3,  -1, 3,  -1, -5, -4, -2, 0,  3,  2,  -4, -3, -5, -1, 3,  1,  -3,
+    -1, -1, 2,  -3, -2, -2, 4,  -5, -2, 0,  0,  -3, 4,  3,  2,  0,  -4, -1, -4, 0,  1,  0,  -2, -3, 4,  -4, -4, 0,  -5,
+    1,  -4, 1,  2,  2,  1,  -4, 3,  -5, -2, 2,  0,  -5, -4, -3, -2, 4,  -5, -3, 1,  2,  0,  1,  -3, 4,  -1, -2, -4, -2,
+    1,  0,  1,  -4, 1,  -2, 3,  -3, 1,  0,  -1, 4,  3,  2,  -4, -1, -3, 1,  -1, 2,  -4, 0,  2,  -2, 4,  -3, 0,  -4, -2,
+    2,  -1, -3, 3,  4,  1,  4,  -4, -3, 3,  0,  -3, -5, 0,  -3, 3,  2,  4,  -4, 4,  -2, 4,  -4, -1, -4, -3, 0,  -2, 1,
+    3,  -4, -1, -5, -2, 2,  1,  4,  -5, -1, 2,  -1, 0,  1,  0,  -2, -3, 2,  -4, -5, -5, 4,  2,  -1, 4,  -5, -5, 2,  2,
+    -3, 2,  -5, -5, 2,  -3, -4, 0,  -3, 2,  2,  -3, 0,  0,  -3, -3, -2, -2, -1, -4, 4,  -2, -1, 2,  -1, -1, 2,  2,  2,
+    -5, 2,  -3, 3,  2,  -5, -1, 1,  -3, 1,  -5, -1, 2,  2,  -2, 2,  -3, 2,  -3, 0,  -2, -1, 0,  -4, -2, -4, -4, -1, -2,
+    -4, 2,  -2, 2,  2,  2,  -3, 0,  2,  3,  0,  -2, -1, 2,  1,  -4, 0,  1,  -4, -3, 2,  -4, 0,  4,  -5, 2,  -1, 2,  0,
+    -4, 4,  -2, 4,  0,  3,  0,  -3, -5, -4, -2, 3,  -1, 4,  -1, -5, -3, -3, -5, -2, 1,  0,  2,  1,  3,  2,  -2, 1,  -1,
+    4,  0,  -3, -3, -1, -5, -1, -3, 1,  -3, 2,  -5, 2,  -2, 0,  -4, -5, -5, -4, -5, -5, -5, 4,  2,  2,  -3, 4,  3,  3,
+    4,  3,  2,  1,  3,  -1, -5, 1,  0,  -1, 0,  -4, 4,  3,  2,  0,  3,  1,  -5, 3,  -3, 1,  3,  -4, 1,  3,  -4, 1,  -3,
+    -1, 2,  -4, -2, 3,  1,  2,  3,  3,  -3, 0,  0,  -4, -2, -1, 2,  -2, 3,  3,  -1, 4,  -2, 3,  0,  -3, -5, -4, -1, -2,
+    -2, -2, 2,  0,  1,  2,  -2, 3,  4,  -4, 2,  -2, -5, -1, -4, 2,  -2, 2,  4,  -1, 0,  -2, 0,  3,  -1, -2, 2,  2,  2,
+    2,  -2, -4, 2,  -5, 2,  -3, 4,  -1, 2,  -1, 4,  -3, -5, 1,  0,  -1, 3,  -1, -3, 2,  -5, -1, -4, -3, 3,  -1, 3,  -4,
+    0,  -3, -1, 0,  -4, 2,  -3, 3,  4,  -5, 0,  -1, 2,  -5, -3, 4,  -3, -3, -5, 4,  0,  -2, -1, -1, -3, 4,  -3, -3, 4,
+    -4, -4, -3, -2, 4,  1,  -5, 3,  -2, -3, -4, 0,  -2, 0,  2,  3,  -4, 0,  1,  -4, -1, -5, 2,  4,  3,  -1, 4,  0,  1,
+    -3, -4, -1, -5, 3,  4,  0,  -1, 1,  0,  1,  2,  -2, -2, 3,  4,  0,  -4, -3, -2, -5, -5, -2, -2, -3, -3, -5, -2, -1,
+    0,  1,  -3, -3, 3,  -4, 2,  -4, 3,  -5, -5, 2,  1,  3,  -3, 4,  -3, -4, -1, 2,  4,  -4, 2,  4,  -3, -5, -2, -5, 0,
+    -1, 0,  1,  -4, -4, 0,  -1, -1, -2, 4,  4,  -4, 2,  0,  1,  1,  -4, 3,  1,  0,  2,  -4, 1,  -3, -1, 4,  -2, -2, -1,
+    4,  1,  2,  4,  2,  2,  4,  1,  1,  -2, 3,  4,  -5, -4, 0,  -2, -4, -1, -1, -1, -1, 4,  -4, -1, -1, -5, -1, 3,  -1,
+    2,  4,  1,  -4, 0,  -2, -5, -3, -1, 0,  3,  -5, -4, -1, 1,  0,  -1, 1,  0,  -5, 3,  4,  0,  2,  -4, 1,  1,  2,  2,
+    0,  -3, 2,  3,  -5, -3, -5, -4, -1, 0,  4,  0,  0,  -1, 3,  1,  -3, 1,  1,  -5, -3, 3,  4,  2,  4,  2,  0,  2,  -1,
+    -3, 4,  2,  1,  2,  -1, 2,  -1, -2, 3,  -4, 0,  0,  -5, 4,  -4, 4,  -5, -2, -5, 0,  -3, 1,  -5, -3, 3,  -5, -4, 3,
+    -2, 2,  -5, -3, -3, 3,  -2, -4, -2, -1, -5, -5, 0,  2,  3,  -3, 4,  4,  -5, -1, 3,  3,  0,  -4, 4,  -5, -3, -3, 0,
+    -3, 2,  -4, 0,  0,  -5, -4, -2, -2, -1, 3,  4,  -1, 3,  2,  -3, -4, -1, 0,  3,  4,  -5, -1, 3,  4,  -1, 2,  -4, -5,
+    -5, 1,  -2, 4,  4,  3,  1,  -5, -5, 3,  0,  -2, -3, -2, -3, -2, 0,  -3, 4,  1,  2,  -3, 4,  3,  2,  0,  -5, -1, -4,
+    -2, 0,  -5, 2,  -4, 0,  -1, 4,  2,  -5, -1, 2,  2,  4,  -5, -3, 0,  4,  1,  -1, 0,  -3, 3,  -2, 4,  4,  3,  0,  2,
+    4,  3,  2,  4,  3,  3,  0,  2,  4,  -2, 2,  -1, 3,  0,  2,  -5, 4,  -5, -4, -2, 1,  2,  -4, -5, -1, 0,  3,  -3, 0,
+    2,  3,  -1, 3,  -4, -2, 3,  0,  -4, 2,  1,  2,  0,  2,  -2, -1, -4, -2, -5, 3,  3,  1,  3,  -3, -2, -3, -3, 4,  -5,
+    3,  1,  0,  4,  1,  1,  0,  0,  1,  1,  -5, -4, -1, 3,  3,  -5, 0,  -5, 4,  -3, 2,  1,  0,  1,  -5, 2,  0,  1,  -5,
+    -4, 4,  -1, 4,  0,  -2, 0,  3,  3,  4,  -3, 0,  2,  2,  -2, 0,  0,  0,  -3, 2,  3,  4,  -3, 1,  -4, 1,  2,  -4, -4,
+    2,  2,  3,  3,  3,  -2, -4, -3, 0,  3,  1,  -1, -5, -1, 4,  -4, 2,  1,  -1, -1, -3, -2, 3,  -4, -4, -1, 0,  -4, 0,
+    -2, -3, -2, 3,  -2, 3,  -5, 3,  0,  -2, 0,  4,  -2, 2,  -2, 2,  4,  3,  -2, -1, 4,  3,  -5, -2, -3, 1,  -2, 0,  -1,
+    3,  -5, -3, 4,  -2, 4,  -4, -5, 0,  1,  -2, 4,  2,  -5, 3,  -5, 4,  -2, -1, 2,  -3, -1, 3,  1,  -5, 2,  2,  4,  -3,
+    -5, 4,  -4, 0,  -3, 3,  3,  -2, 4,  -5, -2, 1,  0,  2,  0,  2,  2,  -1, 0,  -2, 1,  -5, 1,  3,  3,  -3, 1,  4,  0,
+    3,  2,  1,  4,  1,  -4, -2, 4,  1,  -1, -1, 4,  -4, 0,  1,  -1, 3,  3,  -3, 0,  -2, -4, -5, -5, -4, -1, -5, -2, -3,
+    3,  2,  -4, 4,  -1, 3,  3,  4,  4,  3,  3,  0,  0,  0,  -2, 2,  -2, -5, -5, 3,  -1, -5, 4,  -3, 1,  -4, 4,  1,  -3,
+    0,  -3, 2,  -4, -4, 4,  -1, -4, -5, 4,  -1, 1,  0,  2,  0,  -1, -2, 1,  -3, -5, -4, -1, 0,  -1, -5, 1,  -2, 3,  -3,
+    -3, -4, 1,  -1, 3,  2,  3,  -3, 2,  2,  -5, 1,  3,  -1, 1,  -3, 0,  -2, 2,  -3, -4, 1,  2,  -1, 1,  -3, -3, 4,  -3,
+    2,  2,  2,  4,  -2, -4, -1, -2, 4,  -1, -1, -4, 3,  0,  -5, -2, -3, -4, -1, 1,  -1, 4,  3,  2,  -2, -2, -1, 4,  3,
+    -5, 0,  -2, -4, 2,  -2, -4, -2, -4, 0,  4,  4,  1,  -5, 4,  4,  -3, 1,  0,  2,  -5, -1, -1, 1,  -2, 2,  -4, -5, -1,
+    1,  3,  -4, 3,  -4, 3,  -5, -4, -2, -4, -4, -3, -4, 3,  -3, 0,  0,  1,  -4, 3,  0,  4,  -4, -3, 1,  2,  3,  -3, 4,
+    2,  -3, -4, 0,  4,  -5, 2,  4,  2,  4,  -5, 4,  -5, 3,  -1, 2,  -1, -5, 4,  3,  0,  1,  1,  1,  -5, 1,  4,  -3, -3,
+    -2, 4,  0,  -4, -3, -4, 4,  1,  0,  1,  0,  -4, -1, 0,  3,  -4, -4, -2, 4,  4,  -1, -2, 4,  -4, 4,  3,  -5, -3, 4,
+    1,  2,  -1, -3, 1,  1,  -4, 1,  -3, -2, 4,  0,  2,  -3, 0,  -3, -5, -5, -5, -2, 0,  3,  1,  -1, 2,  2,  -1, 2,  -2,
+    4,  -2, 4,  -1, -1, 3,  -4, -1, -4, 4,  -3, 4,  -1, 3,  0,  -1, -3, -3, 2,  1,  0,  -2, 3,  1,  2,  -4, 2,  -1, 3,
+    4,  -5, 4,  -5, 4,  4,  2,  -1, 1,  3,  -3, 1,  -1, -1, 4,  -2, -1, 1,  2,  -5, 4,  2,  -3, 0,  -1, 0,  3,  2,  -2,
+    -5, -2, 4,  -5, -4, -4, -4, 4,  -2, 4,  -3, 0,  3,  2,  -2, 2,  0,  -1, -1, -4, 3,  -4, -4, 0,  -1, -5, -3, 3,  2,
+    -1, 3,  -2, -5, 0,  -1, 1,  -5, -4, 2,  -5, 1,  1,  3,  2,  -1, -3, 0,  -5, -4, -5, 2,  -4, -3, 2,  1,  -4, 2,  -3,
+    -2, 0,  1,  3,  3,  4,  -2, 2,  -2, -3, 4,  4,  -2, 0,  4,  4,  3,  -1, -2, -4, 3,  3,  2,  -1, 3,  -2, 2,  -2, 3,
+    0,  0,  4,  -5, 4,  -5, -1, 2,  -5, 3,  0,  1,  3,  0,  -1, -2, -5, 2,  3,  -1, 4,  -2, 4,  4,  -4, -2, -5, -5, -1,
+    3,  0,  -5, -2, 2,  -2, 4,  -4, -3, 0,  2,  -1, 3,  2,  -2, -1, -4, 2,  -5, -2, 2,  2,  3,  4,  4,  -5, -3, 4,  -1,
+    0,  4,  1,  -3, -3, 2,  2,  2,  4,  -2, -3, -4, 1,  3,  -3, -5, 4,  -1, -4, -1, -5, 4,  -5, -2, 3,  3,  3,  1,  1,
+    4,  3,  0,  -4, 4,  2,  -3, 3,  -5, 4,  -3, -2, -5, -1, 1,  -2, -3, -3, -3, 4,  0,  2,  -3, -3, 1,  2,  -5, -2, 2,
+    3,  2,  3,  -4, 1,  2,  4,  1,  4,  3,  -1, 0,  1,  -2, -5, -1, -3, -2, 4,  -3, -4, -1, -2, -4, -5, 0,  -1, -5, -4,
+    -4, -1, 1,  -5, -3, -1, 2,  -1, 4,  1,  3,  3,  -5, -4, 2,  2,  -2, 0,  -5, 1,  -2, -1, 3,  1,  -4, -3, 2,  4,  2,
+    -5, 2,  -4, 3,  -1, 0,  3,  -5, 1,  4,  3,  -2, 3,  3,  -2, 3,  -2, 1,  0,  -5, 3,  -4, -4, -4, 0,  -5, 4,  -5, -2,
+    -4, 4,  -5, 0,  1,  -4, -1, 0,  3,  3,  -3, -1, 1,  -3, -5, 4,  -4, -4, -3, -3, 0,  -4, -3, 3,  -2, -4, 0,  -3, -4,
+    -3, -1, -4, -2, -4, 4,  -2, 4,  -2, 3,  4,  2,  -4, 2,  -4, -5, 2,  2,  1,  -5, 2,  0,  4,  4,  1,  0,  2,  3,  -2,
+    2,  1,  -1, -1, 3,  1,  1,  -3, 3,  2,  -5, -1, 0,  3,  -3, -2, -2, 4,  2,  4,  -5, -4, 2,  3,  0,  4,  4,  1,  -5,
+    4,  4,  -4, 2,  -4, 1,  4,  -5, -3, -5, -2, -1, -1, -3, -4, 2,  -2, 0,  0,  -3, 3,  0,  1,  4,  -4, -5, -4, -2, 0,
+    -2, 4,  2,  3,  -1, 3,  -5, 3,  -2, -3, 3,  4,  3,  -4, -3, -3, -2, 3,  4,  -2, 2,  -3, 1,  3,  2,  -3, 2,  4,  -1,
+    3,  0,  2,  3,  1,  2,  3,  -5, 4,  3,  3,  -2, 2,  -2, -2, -4, 3,  0,  -4, -3, 4,  1,  2,  2,  -3, 0,  4,  -3, -2,
+    -3, -4, -4, -2, 0,  3,  -2, 2,  -5, -4, 0,  4,  3,  3,  0,  -5, -4, -2, -1, -3, 2,  3,  -3, -3, 1,  -3, -4, -5, 2,
+    1,  1,  -3, 1,  -5, 3,  4,  1,  -2, 1,  2,  0,  -2, -2, -4, -3, -5, -3, -5, 3,  -4, 1,  2,  -4, -4, 1,  4,  -5, 4,
+    -5, 3,  -5, -5, -3, -2, -3, 0,  -1, 4,  0,  -3, -4, 1,  -1, -3, -1, 2,  2,  -3, 4,  -5, -1, -1, -2, -3, -3, 4,  1,
+    -2, 2,  -1, -3, -4, 3,  -3, 2,  -2, 1,  -5, 0,  4,  -3, -4, 1,  -4, -3, -4, 0,  -4, 3,  4,  3,  -5, 0,  -4, 1,  -2,
+    4,  -4, -3, -4, -4, -1, 1,  4,  -4, -4, -5, -3, 1,  -3, 3,  -3, 2,  -3, 0,  -4, -1, 1,  2,  0,  -4, 4,  3,  4,  1,
+    -1, 1,  2,  0,  -3, 1,  -4, -3, -5, -5, 0,  1,  2,  -4, -2, 3,  -1, 4,  3,  1,  3,  -3, -5, 1,  -2, 1,  -5, -3, -3,
+    -4, 0,  -2, -3, 3,  0,  4,  -1, 4,  -4, 1,  -4, 3,  -4, 3,  -3, 3,  -1, -1, -3, -4, -2, -1, 4,  3,  -3, 2,  -2, 4,
+    -5, -2, -4, -4, -3, 3,  -2, -1, -2, 2,  3,  3,  -1, 3,  4,  3,  1,  -5, 4,  -2, 3,  -2, 0,  2,  2,  -5, -4, 1,  -1,
+    -2, 2,  0,  -1, -2, -2, 3,  2,  -3, -2, 3,  2,  -2, -3, 3,  1,  2,  -4, 4,  2,  3,  -4, -3, 0,  -4, 3,  0,  2,  -4,
+    -1, -4, -2, -2, 0,  -2, -1, 4,  2,  1,  -3, 1,  2,  -1, 2,  -3, -1, 4,  -5, 0,  1,  -3, 4,  2,  -5, -2, -4, 3,  -4,
+    -4, 3,  -4, 3,  2,  0,  3,  -1, 2,  4,  -4, -5, -5, -5, -4, -4, -4, 3,  -2, 4,  -1, 4,  0,  -4, 1,  3,  -3, 2,  -4,
+    1,  -5, -2, -4, 4,  2,  -3, -4, -4, -4, 2,  -1, 4,  -4, -2, 1,  -4, 4,  4,  3,  -3, 3,  3,  4,  -3, 4,  -4, 0,  4,
+    2,  0,  2,  -4, 1,  4,  2,  4,  2,  4,  -4, -4, 0,  -3, 0,  3,  -4, -5, 2,  3,  -3, 1,  1,  1,  4,  -5, -5, -4, -4,
+    1,  -5, -1, 1,  0,  -1, -4, -2, -3, 0,  4,  4,  -2, 1,  -3, 3,  0,  -5, -4, -5, -2, 4,  1,  2,  0,  -2, -5, 2,  4,
+    4,  0,  -4, 1,  -3, 2,  3,  -5, 2,  3,  -4, -2, 4,  -4, -1, -5, 0,  -4, -4, -4, 3,  -1, -2, -3, -4, -1, 1,  -3, 2,
+    -4, -2, 3,  -4, 0,  0,  4,  2,  2,  -2, 0,  -4, 1,  -3, 3,  0,  1,  -3, 4,  -5, 1,  -5, -2, -1, 4,  2,  -2, 0,  -5,
+    -1, -2, 0,  -4, -1, 1,  -4, 0,  -5, -4, -4, -4, -4, -5, -1, 4,  3,  -2, -4, -2, -2, -4, -5, -5, -1, -4, 3,  4,  -4,
+    -3, 2,  -3, 2,  -5, -4, 3,  4,  -3, 2,  2,  -3, -1, -4, 2,  2,  1,  4,  1,  -5, 4,  -3, -3, 2,  2,  -3, -2, 4,  -1,
+    -3, -5, 0,  -1, -4, 3,  -4, 0,  3,  -2, 4,  -3, 0,  -4, 4,  2,  4,  -4, 1,  0,  0,  -1, -2, 2,  3,  -5, 1,  4,  -4,
+    3,  1,  4,  4,  -1, -1, -2, -4, 0,  4,  3,  0,  -3, -2, -5, 1,  2,  2,  -2, -2, 1,  -4, -5, 3,  -3, -1, 1,  -5, -3,
+    3,  -2, -4, -1, -2, -2, -2, -3, -5, 0,  -2, 0,  -4, 1,  -3, -2, 2,  -3, 1,  1,  4,  3,  2,  0,  1,  -1, -2, 2,  -3,
+    -5, 4,  3,  -3, 4,  -5, -3, -3, 2,  4,  -1, -2, 1,  -5, 2,  -2, 2,  2,  1,  -2, 0,  2,  -3, -5, 4,  4,  0,  1,  -5,
+    1,  -1, -3, -5, 0,  2,  3,  2,  -2, 2,  -4, 4,  2,  0,  -1, 1,  -1, 0,  -2, 3,  2,  -1, -5, -1, -1, -2, 0,  2,  4,
+    -1, 2,  -1, -2, 4,  1,  2,  -5, -1, -2, 2,  -2, -1, 4,  -4, 3,  -5, -3, -3, -3, 1,  -4, -3, 2,  -2, -5, 0,  2,  -2,
+    0,  1,  0,  -3, -4, 1,  -5, -5, -4, -4, -4, 0,  -1, -4, -2, -2, 1,  -2, -5, 3,  2,  -4, 1,  1,  -1, 0,  3,  -1, -3,
+    3,  -1, 2,  2,  3,  4,  -3, -2, -2, 2,  1,  -3, -2, -2, 0,  2,  -3, 2,  -1, 0,  -1, 1,  1,  1,  1,  -4, 0,  2,  4,
+    -3, -4, -2, 1,  -1, -1, 4,  -1, -1, -1, 4,  -4, -5, 1,  1,  -4, -3, 2,  1,  2,  -2, 3,  3,  -1, 3,  -5, 3,  1,  -1,
+    -1, 4,  3,  -4, -5, -1, 1,  4,  -4, -3, -5, -2, -4, 3,  -1, 1,  -1, 2,  -5, -1, -2, 3,  1,  -4, -5, -5, 2,  -1, -5,
+    -4, 1,  -5, -2, -1, -1, 0,  -5, -2, -4, 2,  2,  -4, 0,  4,  -2, -5, 1,  0,  -4, 4,  1,  1,  3,  -5, -5, 2,  -5, 2,
+    -1, -1, -5, 2,  0,  2,  -5, 1,  -1, 3,  -3, 0,  -1, -4, 4,  -1, -4, -1, 3,  1,  4,  4,  1,  2,  -1, 0,  -3, 1,  -5,
+    3,  3,  3,  -3, -4, 4,  -3, 4,  1,  2,  -3, -1, 1,  -1, 1,  -5, -5, -4, 4,  -3, -2, 4,  -1, 3,  0,  4,  -5, -3, 3,
+    -2, -4, 0,  -1, 0,  4,  -5, -4, 2,  4,  -3, 2,  2,  -5, -4, 0,  -4, -2, 1,  -3, 0,  1,  4,  0,  -1, -1, -4, 1,  0,
+    3,  4,  -4, -3, 1,  4,  -4, 1,  4,  1,  3,  3,  -2, 1,  0,  4,  0,  -5, 2,  3,  -2, 4,  0,  2,  -5, -1, -5, -5, -1,
+    -1, 2,  -5, -2, 2,  -3, -5, 3,  -1, 4,  2,  2,  -5, -4, -2, -2, -3, 4,  3,  2,  -2, 3,  -2, 0,  -5, -2, 3,  4,  0,
+    0,  1,  -5, -1, 2,  4,  -3, -1, -5, -5, -1, 0,  2,  4,  0,  4,  -5, 1,  -4, 2,  2,  3,  3,  4,  -2, 3,  2,  -1, 1,
+    2,  -5, 4,  3,  -1, 3,  3,  -4, 0,  -1, -1, -5, 0,  -5, 4,  1,  2,  0,  -2, -4, -4, 0,  2,  -5, -2, 4,  -2, 3,  -5,
+    -5, 1,  -3, 0,  3,  -4, 2,  3,  4,  -2, 1,  1,  -1, -4, 0,  4,  -2, 2,  3,  -3, -3, -3, -5, -1, -4, 0,  1,  4,  1,
+    -3, 4,  -2, 3,  2,  0,  0,  1,  -1, 0,  2,  2,  -2, 1,  3,  2,  -1, -5, 2,  -5, -5, -5, -1, -4, -2, 4,  -4, -4, -5,
+    3,  4,  2,  2,  4,  1,  0,  2,  -2, 4,  2,  -2, -1, -1, -5, -2, -5, -5, 4,  -3, 4,  2,  4,  -2, -4, -3, -5, -5, -5,
+    1,  3,  -2, 0,  4,  4,  -2, 3,  -3, -5, 1,  -4, -4, 1,  -5, -4, -3, 1,  0,  -2, -5, -1, -3, -3, 0,  1,  1,  0,  2,
+    0,  2,  -1, -2, -5, -4, -4, 2,  -5, -3, -5, 2,  -3, 4,  -1, 2,  -4, 1,  4,  -3, -3, -1, -5, 4,  1,  0,  3,  3,  4,
+    -1, -3, 4,  -5, -3, -5, -3, -4, -2, -2, 3,  -5, -2, -2, -3, 3,  1,  2,  3,  4,  -3, 0,  1,  4,  -5, -2, -4, 0,  0,
+    1,  -4, 3,  -5, 0,  -3, 1,  -3, -5, 1,  1,  4,  -5, 2,  3,  3,  3,  -5, 3,  1,  3,  0,  -5, -2, 2,  4,  -1, -3, -4,
+    -3, -2, -2, 2,  0,  -2, 0,  -2, 1,  -3, -3, -1, -1, -5, -4, -2, -4, 3,  3,  0,  3,  -1, -1, 2,  3,  2,  4,  -5, 4,
+    -2, -3, 3,  -5, 4,  1,  -2, 2,  2,  -5, -4, -4, 3,  4,  -3, -1, -4, -5, 1,  4,  -1, 2,  3,  1,  3,  -5, 3,  4,  -5,
+    0,  2,  -3, -3, -4, 0,  2,  3,  -4, -5, 4,  3,  -5, -4, -4, 2,  0,  0,  -5, -3, -5, 0,  4,  1,  -2, 0,  -5, 1,  -2,
+    -5, -3, -3, -3, -5, -2, -2, 1,  -1, 0,  3,  -2, -4, -2, 0,  2,  -5, -2, -5, -4, 4,  4,  -3, 0,  3,  -4, -1, -4, 4,
+    3,  -1, -4, -1, -4, 2,  1,  -2, -1, -5, -5, -5, -4, -5, 0,  0,  -3, -3, -1, 1,  -3, 1,  -3, 1,  4,  3,  4,  0,  -2,
+    3,  2,  -4, -4, -1, 3,  0,  3,  -5, -5, -2, 4,  -5, -2, 2,  -5, -4, -5, -4, 3,  2,  -5, -3, 4,  1,  2,  2,  3,  3,
+    -1, -1, -1, 2,  -4, -1, -5, 1,  3,  0,  1,  4,  -2, 0,  2,  -5, -5, 1,  -3, 0,  3,  2,  -1, 1,  0,  -2, 1,  -2, -2,
+    3,  -2, 0,  -4, 2,  -2, -2, 0,  0,  -1, 4,  -4, 2,  -2, 3,  1,  -4, -3, -5, 0,  -2, -3, -1, -2, 2,  1,  -5, 1,  -1,
+    0,  2,  -1, 2,  0,  -2, 4,  0,  -2, 1,  -3, -5, 4,  -2, -1, -3, 1,  2,  -4, -1, 0,  -3, 0,  -2, -1, 0,  3,  2,  3,
+    -5, -4, 0,  4,  -4, -2, -1, -5, -4, 2,  1,  -1, 1,  0,  4,  -2, 2,  -4, 0,  4,  2,  -3, 0,  4,  2,  0,  1,  -4, -4,
+    -1, -5, 0,  -3, 2,  -5, 3,  0,  -2, 1,  0,  -2, 0,  -1, -2, 4,  -3, -4, -1, 1,  2,  2,  -3, -4, -3, -2, -5, 0,  4,
+    4,  -4, -5, 2,  -1, -4, -2, 0,  -2, 4,  0,  1,  -3, -2, -1, 4,  -1, -2, -4, 3,  1,  3,  3,  0,  0,  0,  -2, -1, 2,
+    -4, -2, 1,  4,  -4, -3, 3,  -2, 0,  -5, -2, -1, -2, 2,  -4, 2,  -4, 0,  -5, -1, 0,  0,  -3, 1,  2,  1,  0,  -3, 1,
+    3,  0,  -3, 2,  2,  -1, -1, 3,  -4, -4, -4, 0,  -5, -4, 3,  1,  4,  0,  -4, -3, -5, 4,  -5, -2, 3,  -2, -2, -1, -1,
+    -4, -1, -5, 4,  4,  -3, -4, -1, -3, -4, -2, -2, 2,  -1, -1, -1, 1,  -1, 4,  -5, 0,  4,  -3, 1,  -1, 0,  -1, 4,  -4,
+    0,  0,  -3, 4,  -5, -5, 2,  4,  -2, 0,  -5, -4, -2, 2,  0,  -5, 1,  2,  -2, -1, -1, 0,  -3, -3, -2, -2, 1,  1,  -1,
+    -3, 4,  -1, -4, 0,  -4, 4,  4,  -5, -4, 1,  4,  4,  4,  -3, -3, 0,  4,  1,  -2, 3,  -4, -3, 3,  4,  -1, 1,  -2, -1,
+    2,  2,  -3, -3, -1, -4, 1,  -4, -1, 3,  -5, -2, 4,  -2, 3,  -2, -5, -1, 1,  -5, 3,  2,  1,  -4, -4, -4, -5, -2, -1,
+    -5, 1,  1,  -1, -1, -5, -2, 2,  -1, -3, 0,  1,  -4, 1,  1,  -5, 4,  2,  0,  2,  -3, -4, -4, 2,  2,  -1, 2,  2,  -2,
+    2,  -1, -4, -4, -4, -3, 2,  -2, 4,  -3, -5, 2,  3,  2,  -4, -4, 1,  -1, 4,  4,  2,  4,  1,  -5, 4,  2,  -2, 3,  3,
+    -2, 1,  3,  -5, 4,  2,  2,  -1, 2,  -5, 1,  -3, -5, -2, 0,  2,  0,  -5, -4, -3, 2,  4,  3,  3,  -4, 1,  -5, -2, -4,
+    -3, 3,  0,  3,  -3, -3, 1,  1,  2,  4,  -4, -3, -1, -4, 2,  1,  3,  3,  3,  -5, 3,  0,  -5, 1,  3,  -2, -1, -4, -5,
+    -2, -5, 3,  0,  2,  -2, 1,  4,  -2, -1, -3, 2,  -4, -1, -3, 1,  -5, 3,  -2, 1,  3,  4,  -3, -5, 0,  1,  3,  3,  1,
+    2,  1,  -2, 4,  -3, -2, -3, -5, -1, -2, 2,  -5, -1, 0,  4,  3,  -3, 0,  2,  0,  -5, 1,  -5, 4,  4,  2,  -3, 2,  3,
+    1,  -4, -1, 3,  -2, -3, -4, -2, -5, -1, 4,  -3, -4, -1, 1,  -2, 2,  -1, 3,  1,  -3, -2, -5, -3, 0,  -4, -3, -1, 4,
+    4,  -4, 1,  4,  -4, 1,  -3, 2,  -5, -2, 0,  -1, -2, -5, 3,  3,  0,  0,  -3, 3,  -4, 1,  1,  -3, -4, 1,  -2, -4, 3,
+    -4, 3,  -3, 4,  -4, -5, -5, 4,  -4, 4,  1,  -1, -1, -5, -2, 4,  4,  2,  -3, 4,  -5, -2, 1,  2,  3,  2,  1,  -5, -4,
+    0,  4,  -4, 0,  -3, -2, -4, -3, -4, 2,  0,  -5, -1, -5, -5, -5, 3,  4,  0,  1,  -3, -3, 3,  2,  1,  4,  3,  2,  -3,
+    -2, -5, -1, -1, 4,  -2, -3, -2, -3, -2, -5, -1, -1, -5, -2, 4,  -1, -5, -5, -4, -5, -3, 1,  0,  -1, -5, -5, -4, -4,
+    -5, 0,  -4, 0,  1,  2,  0,  -1, 3,  4,  -5, -4, 3,  4,  -4, -4, 2,  -2, 1,  -5, -2, -2, -1, -5, 1,  2,  -4, 1,  2,
+    -3, -2, 2,  3,  1,  -2, 2,  -2, 1,  -2, -3, 0,  0,  2,  4,  0,  -2, -1, 3,  3,  -1, 2,  4,  1,  2,  0,  1,  2,  0,
+    0,  -4, -4, 4,  -5, -3, -3, 2,  4,  -3, 1,  3,  3,  3,  3,  -1, 3,  -1, 1,  -2, -4, -1, -4, -5, -4, 2,  -2, -3, 4,
+    4,  -3, 2,  -4, -3, 3,  -2, 4,  -1, 4,  -1, 1,  1,  -2, -5, -2, 3,  2,  -4, 1,  3,  -2, -5, 1,  -3, -1, -1, -2, -3,
+    1,  -1, -3, -1, 3,  -2, -4, 3,  0,  0,  -1, 2,  0,  2,  3,  -1, 3,  0,  -1, 1,  -2, 3,  2,  4,  -1, -5, 0,  -2, -2,
+    3,  -2, -5, -2, -4, 4,  -5, 0,  2,  4,  -4, -4, 4,  -4, -4, 0,  3,  3,  -1, -2, 0,  2,  3,  -3, -1, -4, 3,  -2, 4,
+    4,  -3, 0,  -2, 4,  -4, -2, -5, 0,  -5, -5, 4,  0,  -1, -2, -3, 2,  2,  4,  -5, 0,  -5, 3,  -3, -5, 1,  2,  -2, -5,
+    -1, 1,  -1, 3,  2,  4,  -5, -3, 0,  -5, 0,  -2, -1, -2, 4,  -1, -1, 2,  0,  -3, 2,  -2, -1, 4,  -3, -2, -3, -4, 3,
+    -3, -2, -1, -2, 0,  -2, 1,  2,  3,  -3, -2, -2, -5, -4, -1, 3,  4,  -1, 3,  0,  3,  -2, 3,  4,  1,  1,  3,  -1, -4,
+    4,  -3, -4, -5, -2, 0,  -5, -2, 4,  -4, 2,  -4, -5, 4,  -1, -2, 0,  2,  -1, 1,  3,  4,  -4, -2, 2,  3,  -2, -5, 4,
+    0,  -4, 2,  -3, 0,  0,  4,  0,  -3, 0,  -1, -4, -2, -1, -1, 4,  3,  -4, -5, -2, -2, -4, 4,  2,  -5, -1, 4,  -2, 0,
+    3,  -2, -4, 1,  -2, -4, -3, -1, 3,  -3, -2, 2,  -4, -5, 1,  1,  4,  4,  1,  3,  1,  2,  -2, -4, 0,  -2, -3, -4, -2,
+    -2, 1,  -1, -2, -5, -1, 0,  -5, 3,  -1, -3, -2, 1,  -5, -4, 4,  -3, -4, -1, 2,  0,  -3, 2,  3,  -2, 0,  2,  -5, 1,
+    3,  -3, 2,  3,  0,  -5, -4, -3, -3, 4,  4,  -2, -3, -3, 4,  -3, -1, -1, 2,  -1, -4, 0,  1,  3,  -1, 1,  0,  -4, 1,
+    0,  -5, -1, -5, -3, -1, -1, -2, 2,  3,  -1, -4, -3, 2,  0,  -1, 3,  1,  1,  3,  -4, 0,  1,  -1, 4,  0,  -5, -4, -2,
+    -1, -2, -2, -2, -3, -2, -3, 1,  0,  -5, -4, 3,  -4, 1,  -3, -5, -2, -3, 0,  3,  -3, 4,  2,  -5, 1,  4,  3,  -3, -5,
+    2,  -2, -1, 1,  -2, 4,  4,  4,  4,  0,  -5, 3,  0,  0,  2,  -3, -5, -5, -5, -2, -4, -1, 4,  4,  1,  -1, 3,  -4, -1,
+    -5, 0,  2,  2,  -5, -1, 1,  -3, -5, -2, -2, -1, -4, 0,  0,  3,  0,  -1, 3,  0,  -2, 4,  -1, 0,  1,  3,  0,  0,  3,
+    0,  3,  0,  1,  -3, -2, -5, 1,  4,  4,  0,  -2, 0,  1,  -2, 1,  -4, 0,  0,  4,  -2, 2,  -5, 0,  1,  -2, 4,  3,  2,
+    4,  -4, 0,  1,  1,  -2, 2,  -4, 2,  -3, -3, -1, 0,  2,  -3, 1,  -3, 2,  -1, -1, 1,  -5, -3, -1, -1, -3, 0,  3,  1,
+    1,  -2, 2,  -2, -2, -2, -1, -1, -4, -3, -3, -4, 1,  4,  -4, -4, -3, -1, -5, -2, -2, 3,  -1, -1, 3,  -5, 4,  -3, 1,
+    3,  -4, -1, 4,  -5, -1, -2, -3, -4, -4, -5, -5, -3, 1,  4,  -1, -4, -3, 2,  -3, 0,  -3, 1,  2,  -1, -2, -5, -5, -2,
+    3,  -1, -1, -4, -4, 3,  1,  1,  -5, -2, -3, -1, -3, -1, 2,  -2, -5, -5, 0,  2,  -1, -1, 3,  -1, -3, 3,  3,  -2, -1,
+    1,  -2, 4,  2,  2,  0,  -5, 4,  4,  1,  3,  1,  3,  1,  -2, -5, -3, 3,  3,  3,  -3, -4, 1,  -5, -4, 0,  2,  -1, 0,
+    -1, -4, 3,  4,  -5, -5, 3,  -5, 4,  4,  0,  -2, 2,  2,  -5, -1, 1,  3,  3,  3,  3,  -2, 0,  0,  -4, -2, 1,  -3, 2,
+    0,  -2, 3,  -5, -4, -1, 2,  -5, -1, 0,  4,  -3, -2, 2,  -4, -3, 0,  -1, 0,  2,  -1, -3, -2, 4,  3,  0,  -3, -2, -5,
+    -2, 2,  4,  3,  -5, -2, -5, 3,  -4, 4,  0,  -5, 2,  4,  1,  -2, -5, -1, 0,  0,  -5, 0,  -4, 1,  3,  0,  1,  0,  -3,
+    -4, -5, -3, 2,  0,  -5, 1,  -2, -2, -2, 1,  -4, 0,  -2, 3,  3,  2,  -2, 2,  -5, -3, -4, 0,  -1, -4, -5, 2,  1,  0,
+    -1, 1,  -3, -5, -4, 0,  4,  3,  -3, 0,  -1, 1,  -5, 3,  -2, -2, 4,  -4, -5, 4,  -2, -4, 0,  2,  2,  4,  0,  -4, -2,
+    2,  2,  -3, -5, 2,  -3, -1, 4,  2,  -5, 0,  -1, 2,  0,  -4, 4,  3,  -2, -1, 1,  0,  -4, 1,  -2, -2, 0,  -1, -3, -3,
+    -2, -2, -4, -4, -3, 1,  -3, -5, 0,  -4, 2,  4,  3,  3,  2,  4,  -3, -5, 1,  -1, 0,  -3, -2, 2,  1,  -1, 2,  -5, 2,
+    -3, -2, -2, -3, 3,  -3, -1, 0,  0,  2,  -4, -3, -5, 1,  1,  4,  -2, -2, -1, -5, 2,  -1, 2,  4,  -1, 4,  -4, 3,  -5,
+    -1, -3, -1, 0,  3,  2,  0,  -2, 1,  3,  1,  -2, -2, 4,  3,  -3, 0,  -4, 3,  4,  -1, 2,  0,  -2, 1,  -2, -4, -1, 3,
+    2,  3,  -4, 2,  -3, 0,  -4, -2, -1, 3,  0,  4,  1,  -4, -5, 4,  4,  -1, -3, 0,  -5, -5, 0,  2,  4,  -2, 3,  -4, 2,
+    0,  -3, -1, -2, 2,  0,  -1, -5, 3,  -3, -5, -3, 1,  -3, -4, -4, -5, -5, 1,  -1, 4,  -2, 2,  -3, 0,  0,  2,  0,  0,
+    -5, 4,  4,  0,  3,  -2, 3,  -1, 0,  0,  -2, 2,  3,  -3, -2, -1, -2, -1, 4,  2,  -3, -2, -3, -4, -4, -4, 3,  -3, 0,
+    2,  -5, 4,  -1, 3,  -4, 4,  -3, 4,  -2, -2, 4,  4,  -2, -2, 2,  2,  -1, 0,  1,  -2, -4, -4, -2, 4,  2,  -4, -5, -2,
+    3,  -4, -3, 3,  0,  1,  1,  -3, 3,  -4, 3,  2,  1,  2,  0,  -3, 0,  0,  -3, 2,  -3, 2,  0,  4,  -4, -2, 3,  -5, -5,
+    0,  -3, 3,  -3, 3,  -1, -5, 3,  4,  -2, 0,  -1, 0,  2,  4,  -5, -4, 1,  -2, 2,  3,  1,  -5, -5, -2, -1, -5, 4,  0,
+    3,  2,  4,  2,  2,  3,  -4, 2,  -4, 4,  3,  0,  -3, -3, -1, -5, 4,  -5, 0,  -1, -1, 3,  0,  -5, 3,  4,  -2, -3, 2,
+    2,  2,  1,  1,  3,  1,  4,  -5, -1, 4,  3,  -3, -1, -2, 3,  -4, -2, -2, -1, 0,  0,  -4, 4,  -3, 0,  4,  3,  3,  2,
+    2,  -1, 0,  3,  -2, 0,  4,  3,  -1, -4, -1, 2,  -5, 0,  0,  3,  -2, 0,  -3, 4,  3,  -5, 2,  -5, -4, 1,  0,  -1, -5,
+    -4, 3,  -2, -5, 1,  -2, 3,  1,  4,  -3, -2, -1, 3,  4,  0,  1,  -4, -3, -5, -5, 4,  -4, -1, 1,  -5, -3, 0,  -3, 2,
+    3,  -3, 1,  2,  -2, 3,  2,  -2, -4, -2, 2,  3,  -2, -5, 3,  3,  0,  4,  -3, 2,  -5, 3,  -2, -2, -4, 3,  -3, -4, -1,
+    4,  3,  -4, -1, -5, 2,  4,  2,  -5, 1,  2,  3,  3,  -4, 1,  -3, 4,  -1, -1, 2,  2,  0,  -1, 2,  -1, 3,  -5, 4,  -1,
+    -1, -5, -5, -1, -2, 2,  -4, 0,  -4, 4,  -2, 0,  -1, 0,  -3, -1, 3,  4,  1,  1,  -1, -3, -5, 0,  2,  -4, 2,  4,  -1,
+    0,  -3, -4, -4, 2,  2,  -1, 2,  -2, 3,  4,  3,  0,  -4, -4, -3, 4,  1,  3,  -3, 3,  3,  -1, 2,  2,  0,  2,  1,  -1,
+    0,  4,  -5, 0,  2,  -3, 2,  3,  4,  0,  -5, 3,  1,  0,  2,  3,  1,  -3, 4,  3,  -2, 4,  2,  4,  -3, -3, 4,  2,  -5,
+    -3, 1,  1,  2,  3,  4,  -3, -4, 2,  2,  3,  4,  -4, -1, -3, -4, 3,  3,  3,  2,  -4, -2, -2, -5, -3, -2, 2,  0,  4,
+    0,  -3, 4,  -2, 4,  4,  -3, -5, 4,  -4, 0,  -4, -5, 4,  -2, -5, 0,  -5, 3,  -3, -1, 3,  -5, -2, -4, 4,  0,  2,  -4,
+    4,  -3, -4, -3, -2, -5, -3, -2, 3,  -4, 0,  -5, 1,  3,  -5, 0,  -2, 0,  -4, -1, -5, 0,  -3, -5, -5, -3, -4, 3,  -5,
+    1,  -1, 2,  3,  3,  2,  -1, -1, -1, 2,  -1, -2, -1, -4, 2,  2,  -5, 1,  -3, -3, 2,  0,  0,  -2, 2,  -2, 1,  3,  3,
+    1,  -1, -2, -2, -3, 2,  1,  3,  -5, -4, -4, 3,  -2, -5, -1, 4,  -2, 1,  -2, -3, 2,  3,  -4, -4, 1,  -4, -1, 3,  2,
+    -2, 3,  -3, -3, 1,  3,  4,  1,  -1, -4, -4, 0,  -1, -1, -2, 3,  3,  -1, -1, -1, -3, -1, -1, -2, -2, 1,  -5, 1,  2,
+    3,  3,  4,  -5, 3,  -4, 2,  2,  -2, -4, -4, -4, -5, -5, -4, 1,  -5, 2,  3,  2,  -1, -3, -3, -5, 0,  2,  -1, 0,  -4,
+    1,  -3, -3, 3,  2,  4,  -3, -4, 1,  -4, 1,  0,  -5, -5, -2, 3,  -1, -5, 1,  -5, -5, -2, 2,  -4, 2,  -4, 1,  -2, -2,
+    -3, -5, -3, -5, 0,  0,  -1, 4,  -4, -5, 0,  -1, 3,  -1, 0,  -5, 3,  0,  -4, 3,  2,  1,  0,  2,  1,  -4, 3,  -2, -1,
+    0,  4,  0,  -4, -3, -3, -3, -3, 4,  3,  0,  1,  0,  -3, -4, -5, 2,  -5, -3, -4, -3, -2, -3, -4, -5, -4, 2,  -1, -1,
+    2,  -2, -1, 1,  -1, 4,  -3, 0,  3,  -5, -2, 3,  -3, -5, -2, 3,  -4, -1, 4,  -5, -5, -1, -3, 1,  -1, 2,  -5, 1,  -4,
+    0,  0,  -5, -1, -5, 4,  -1, -3, -1, -4, -3, 4,  3,  -4, 1,  -4, -4, -2, -4, 4,  1,  1,  -3, 1,  4,  -2, 2,  4,  2,
+    -4, 2,  4,  1,  -2, 3,  -3, -5, 1,  2,  -5, 0,  1,  4,  2,  -2, -5, 4,  -2, 3,  0,  -2, -3, -1, 1,  4,  0,  1,  1,
+    4,  -5, 2,  4,  -5, -5, 0,  4,  -5, -5, -4, -3, 2,  -1, 3,  -2, 2,  4,  1,  -1, -5, 3,  -4, -1, -1, 4,  0,  3,  2,
+    3,  2,  -1, 4,  -5, -2, -3, 1,  -3, 0,  2,  -3, -3, -3, 0,  2,  -2, -4, 4,  3,  3,  -3, 2,  -2, 1,  0,  -3, 4,  -1,
+    -5, -4, -2, 1,  3,  -4, 3,  2,  2,  1,  4,  2,  4,  -4, -1, -4, 0,  1,  3,  -3, -1, -1, -3, 3,  3,  2,  0,  2,  0,
+    4,  -4, 4,  -5, -2, 1,  2,  2,  0,  -1, -1, 0,  2,  -4, -4, 2,  4,  3,  -1, -1, 0,  -1, -1, -2, -5, 0,  -4, 4,  -2,
+    1,  -1, 2,  -5, -5, -1, 0,  -5, -4, -3, 1,  3,  3,  4,  2,  -3, 4,  -2, 4,  -2, 4,  -3, -2, 1,  -5, 0,  1,  3,  -5,
+    4,  3,  -3, -2, -2, 2,  -3, -5, 2,  4,  -2, 1,  3,  -1, 4,  3,  -4, -3, 3,  1,  -3, -3, 2,  4,  1,  -1, -3, 0,  0,
+    -2, 0,  -5, 4,  4,  -4, -2, 4,  -2, -3, 0,  -2, -1, -3, -5, 2,  -3, 2,  -4, 1,  4,  1,  -5, -1, -1, -4, -4, 0,  -2,
+    -5, 1,  4,  -1, -3, -1, -5, -5, 2,  3,  1,  -4, 0,  -5, 1,  4,  -3, -5, 2,  3,  1,  -3, -3, 1,  -5, -3, -1, 1,  -5,
+    0,  0,  3,  -4, -2, -3, 4,  -1, -2, -2, -2, -1, -1, -1, -3, -4, 1,  -5, -5, 2,  -4, -1, -1, -2, 1,  1,  4,  -4, 4,
+    -5, 2,  -5, -4, -3, -5, -2, -5, -4, -2, 4,  3,  -2, -3, 3,  -2, 0,  0,  -3, -1, 1,  3,  -2, -1, -1, -4, -3, 2,  0,
+    -2, 2,  3,  -5, 2,  -2, 1,  1,  -3, -2, 2,  1,  -3, 0,  1,  4,  -1, -1, -3, 0,  -5, 3,  2,  2,  3,  -1, -4, 3,  -1,
+    3,  3,  3,  4,  -3, -5, 1,  1,  -4, 0,  2,  2,  -3, 4,  3,  2,  0,  3,  0,  1,  -1, 0,  -2, -4, -2, 3,  0,  0,  3,
+    0,  2,  0,  3,  -4, 4,  0,  -2, 0,  4,  -5, -1, -4, -1, -2, 2,  -5, 0,  3,  -2, -1, 3,  0,  3,  2,  2,  3,  -1, 1,
+    0,  3,  -3, 3,  -2, 3,  1,  -4, -1, -2, 4,  1,  -3, 4,  1,  -5, -3, 2,  2,  -5, -4, 4,  -5, -4, 4,  3,  -5, 0,  -2,
+    4,  -2, -2, 4,  0,  1,  -2, -3, 3,  -3, 3,  -2, -2, 4,  -3, 1,  -3, 4,  2,  4,  -1, -4, 1,  1,  -3, 2,  -3, 2,  -5,
+    3,  -3, -5, -5, 1,  1,  -5, 4,  -1, -3, -4, 1,  1,  -1, -3, -2, 4,  -4, 3,  -3, -3, 1,  3,  4,  -4, 2,  3,  -4, 1,
+    2,  0,  1,  1,  -5, -1, -5, -3, -2, 2,  1,  4,  4,  3,  1,  -1, -1, 2,  -4, 2,  -3, -3, -3, -3, -4, -3, -2, -4, 3,
+    3,  -1, 3,  -2, 2,  -2, -1, -2, -5, 0,  -3, -5, -4, 3,  3,  0,  2,  -4, 3,  2,  -3, -1, -5, 2,  2,  -2, -2, 2,  -2,
+    -2, 3,  -1, 1,  3,  2,  -1, -1, -1, 4,  -4, 4,  -3, -3, 4,  -5, -2, -1, -4, 3,  1,  4,  -2, 3,  1,  -3, 4,  -5, 1,
+    2,  -3, -5, 2,  -3, 1,  -3, 3,  2,  0,  -4, 1,  4,  -4, 2,  1,  1,  1,  -4, 1,  3,  -5, 0,  -2, 3,  -3, -4, -4, -3,
+    -4, -2, -1, -2, -5, -4, -1, -5, -4, -2, -3, 1,  -1, -3, -3, 4,  1,  0,  1,  3,  -3, -2, -3, 4,  -3, 3,  2,  -2, 4,
+    2,  -5, 0,  1,  -3, 3,  -5, 0,  -4, -2, 2,  3,  -5, 4,  -5, 0,  4,  -1, 0,  -4, 4,  2,  -1, 3,  -1, 4,  -2, 1,  -3,
+    0,  -4, -3, -1, 2,  3,  1,  -1, -2, -5, 2,  2,  0,  -5, -3, -4, 4,  -4, -2, -3, 3,  1,  -4, -4, 4,  -5, 3,  0,  4,
+    -4, -5, 4,  -3, -2, -5, -1, -5, -3, 3,  -1, 1,  -5, -1, -5, -3, 1,  -1, 2,  -5, -4, 0,  -5, -1, -5, -1, 3,  -5, 2,
+    -2, -3, -2, 2,  0,  -2, -1, -4, -1, -5, -1, 4,  3,  -1, -1, -3, -3, -3, 2,  0,  0,  2,  -3, -3, -5, 1,  0,  -1, -5,
+    -3, 3,  -1, 3,  1,  2,  3,  4,  -2, 4,  -4, 0,  -4, 3,  -2, 2,  -1, 2,  0,  -3, -4, 0,  0,  -1, 0,  4,  -1, -4, 3,
+    1,  3,  3,  -5, 4,  3,  2,  4,  3,  0,  1,  4,  1,  0,  -5, 2,  -3, -3, -3, 4,  2,  2,  -2, 0,  -2, -3, -2, 0,  2,
+    3,  2,  -2, -5, 2,  3,  -5, -4, -2, -4, 2,  1,  -2, -4, 0,  0,  2,  -2, -1, -5, 3,  1,  3,  -1, 0,  0,  -4, -1, -5,
+    2,  -5, 4,  -4, -2, -4, 4,  0,  -5, 3,  1,  -3, -4, -2, 0,  -3, -4, 1,  -5, -5, 3,  0,  2,  2,  4,  3,  -5, -1, -5,
+    -4, 0,  -2, 0,  2,  -1, 4,  -4, -4, 4,  -5, 4,  3,  0,  -2, -5, 0,  2,  -4, -3, -4, 2,  0,  -2, -1, 4,  -2, 1,  0,
+    4,  -5, -2, 4,  4,  -4, 4,  1,  3,  4,  -1, -1, 1,  -1, -1, -2, -4, -3, 4,  0,  2,  0,  1,  -4, -2, 0,  4,  -3, 3,
+    4,  -5, -3, -4, -5, 2,  0,  3,  -5, -2, -2, -3, 4,  -2, 1,  -1, -4, -2, -1, -3, 1,  -4, 0,  -1, -4, -3, -2, 3,  1,
+    -5, 2,  -2, 1,  -4, -5, 4,  -1, -5, -4, 1,  -1, -4, -1, -3, -4, 2,  1,  4,  -2, -5, 1,  0,  -4, 0,  -5, -3, -3, 4,
+    -1, 0,  -2, 1,  -1, -3, -1, -2, -3, -3, -1, -1, -1, -4, -4, -2, 0,  -1, -2, -5, 2,  -4, 4,  -4, 0,  4,  1,  1,  -4,
+    3,  -5, -1, -5, 2,  0,  3,  -4, -1, -4, -2, -1, 0,  -3, 2,  3,  -5, -5, 4,  4,  -2, 4,  -1, 0,  -2, 2,  -2, 2,  -3,
+    -1, -1, 1,  0,  -5, -3, -3, 0,  -5, 2,  -5, -1, 2,  -3, 3,  4,  2,  -5, 4,  2,  1,  1,  -1, -4, -5, 1,  -4, 1,  1,
+    -4, 4,  -1, 0,  3,  -1, -3, 1,  0,  0,  3,  3,  -3, 1,  -1, 3,  -5, -2, 0,  3,  -2, 2,  0,  -4, -3, -2, -3, 1,  3,
+    -2, -4, -3, 2,  -3, -1, 0,  -2, 2,  -2, -3, 4,  2,  -5, -5, 1,  -1, -5, -4, 0,  -5, -3, -5, -1, 4,  -1, 1,  2,  1,
+    -2, 2,  3,  0,  0,  1,  3,  -1, 4,  -5, 1,  4,  2,  -2, -2, 1,  4,  -5, 0,  4,  -4, 2,  -5, 4,  -1, 2,  1,  1,  1,
+    -1, 0,  3,  2,  4,  -5, 0,  -3, 3,  -3, -2, -1, -2, -2, -2, 1,  4,  -2, -1, -4, 3,  -1, 1,  -1, 0,  0,  3,  -2, 1,
+    1,  -3, -4, -1, -2, -5, -1, -1, 0,  0,  4,  2,  4,  3,  2,  0,  0,  -1, 1,  -1, 1,  -2, 3,  3,  -3, -5, 1,  -4, 0,
+    -3, 2,  0,  -2, 2,  3,  1,  0,  -3, 4,  -5, 1,  2,  -1, 2,  1,  3,  -1, -2, -4, -2, -1, 4,  1,  0,  -3, -4, 1,  2,
+    -4, -3, -5, 3,  -4, -2, -1, -3, 2,  -3, -2, 0,  0,  2,  -1, -4, -5, 1,  3,  2,  -5, 4,  -3, 1,  -2, 4,  -1, -5, -1,
+    4,  -2, -4, -1, -3, -2, -5, 4,  2,  0,  4,  1,  3,  -4, -2, 1,  2,  4,  2,  -1, -5, -3, 0,  1,  -4, -4, -2, 0,  -3,
+    4,  1,  -3, 1,  -1, -2, -4, -4, 0,  -4, -5, 4,  0,  0,  1,  -5, -4, -5, -2, -1, -2, -4, 0,  -1, 2,  -1, -4, 2,  2,
+    1,  -2, 2,  3,  1,  -4, -4, -3, 2,  -1, -1, 1,  -2, -3, -5, -1, 3,  1,  -1, 4,  -3, -5, 0,  0,  2,  1,  2,  -2, 0,
+    -5, -5, -2, 2,  1,  4,  -1, -1, -1, -5, 2,  2,  -1, -3, -3, 2,  -4, 0,  -1, -1, 1,  0,  0,  3,  -5, -2, 0,  -3, 3,
+    -4, 2,  -1, 4,  -3, 2,  1,  4,  2,  -1, 3,  -5, 4,  -5, -4, 3,  4,  -4, -5, -3, -2, 0,  1,  -5, 4,  -2, -2, -2, 4,
+    0,  -3, 0,  4,  -2, 2,  -1, 4,  -3, 3,  -5, -2, -3, -5, -3, 4,  3,  2,  -1, -1, -1, 2,  1,  -3, -3, 0,  -4, -4, -4,
+    -3, -1, 4,  0,  4,  -4, -3, -1, 4,  -2, 1,  -1, -1, -4, -5, 2,  -3, -5, -3, 2,  -1, -2, -5, -2, 3,  0,  -2, 2,  -1,
+    -2, -5, -4, 2,  0,  -1, 2,  4,  -5, -1, -1, -4, 2,  2,  -2, -1, -5, -3, -5, 2,  2,  4,  2,  4,  4,  3,  1,  -5, 1,
+    1,  -1, 1,  -4, -2, 0,  -3, -4, -4, -1, -3, -5, 4,  0,  -2, -2, -4, -1, 3,  -4, -2, -4, 2,  1,  1,  -3, -2, -2, 1,
+    -5, -3, -5, 3,  -5, 2,  -4, -2, -1, 0,  1,  2,  0,  3,  -5, 3,  0,  -5, -2, 2,  -3, 1,  -2, 3,  3,  -4, -5, -5, 1,
+    3,  -2, -3, -2, -3, -2, 1,  4,  -4, 4,  2,  4,  3,  -2, -2, -3, 2,  1,  0,  0,  -2, -1, 2,  -4, -3, 2,  2,  -3, -1,
+    -2, -2, -5, -4, -5, 2,  2,  4,  -3, 1,  3,  -3, -2, 1,  -2, 0,  -3, -1, 3,  -2, -4, 1,  3,  2,  -1, -3, 1,  0,  2,
+    -5, -3, 4,  1,  -2, -3, 4,  4,  2,  -4, -1, -4, -4, -5, 2,  -2, -3, -5, -1, -4, -4, 0,  0,  3,  -1, -1, 0,  -4, -2,
+    1,  0,  -3, 4,  2,  4,  2,  -2, -2, -4, -1, 0,  0,  -5, -3, 0,  3,  -2, -4, 2,  -4, -3, 3,  3,  0,  3,  1,  2,  -1,
+    -4, 4,  -1, -1, -4, 3,  0,  2,  3,  1,  -3, -3, 1,  -3, -4, 4,  -5, 1,  -2, 1,  1,  2,  -2, -3, -1, 0,  -3, -3, -5,
+    3,  -4, 0,  0,  -4, 3,  -2, -5, -3, -1, -3, -1, -2, 1,  2,  1,  -4, -3, 2,  4,  -4, -1, -2, 1,  -5, 1,  4,  -1, 4,
+    1,  0,  -4, 2,  2,  3,  -3, 3,  -2, -5, -5, 3,  0,  4,  -5, -1, -2, -2, 3,  2,  2,  -1, 0,  2,  1,  1,  4,  -1, -5,
+    2,  2,  -2, 2,  0,  -3, -3, 1,  -4, -5, -2, -3, -3, -2, -1, 3,  -3, 2,  -1, -3, -5, -4, 3,  -2, -1, -4, 2,  3,  -3,
+    -1, 2,  2,  -4, 4,  -2, -4, 2,  -3, -4, -1, -3, -3, 0,  -3};
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/biases_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/biases_data.h
new file mode 100644
index 0000000..a4a87a4
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/biases_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int64_t int16xint8_biases[4] = {-260092, -1040368, -780276, -520184};
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/config_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/config_data.h
new file mode 100644
index 0000000..79f7e52
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/config_data.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#define INT16XINT8_OUT_CH 4
+#define INT16XINT8_IN_CH 3
+#define INT16XINT8_INPUT_W 7
+#define INT16XINT8_INPUT_H 8
+#define INT16XINT8_DST_SIZE 48
+#define INT16XINT8_INPUT_SIZE 168
+#define INT16XINT8_OUT_ACTIVATION_MIN -32768
+#define INT16XINT8_OUT_ACTIVATION_MAX 32767
+#define INT16XINT8_INPUT_BATCHES 1
+#define INT16XINT8_INPUT_OFFSET 0
+#define INT16XINT8_OUTPUT_OFFSET 0
+#define INT16XINT8_FILTER_X 2
+#define INT16XINT8_FILTER_Y 4
+#define INT16XINT8_STRIDE_X 2
+#define INT16XINT8_STRIDE_Y 3
+#define INT16XINT8_PAD_X 0
+#define INT16XINT8_PAD_Y 1
+#define INT16XINT8_OUTPUT_W 4
+#define INT16XINT8_OUTPUT_H 3
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/input_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/input_data.h
new file mode 100644
index 0000000..abd5bd6
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/input_data.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t int16xint8_input[168] = {
+    0,      -16384, 16384,  -16384, -16384, -8192,  8192,   0,      -32768, 0,      -32768, -16384, 0,      -32768,
+    -16384, -32768, 8192,   0,      0,      -24576, 8192,   -8192,  -16384, 16384,  -32768, 8192,   -16384, -8192,
+    8192,   -16384, 16384,  -8192,  16384,  -32768, -24576, -8192,  -24576, -8192,  -16384, -32768, 0,      -8192,
+    -24576, 24576,  -16384, 16384,  24576,  8192,   -32768, -24576, -8192,  8192,   24576,  8192,   -24576, -16384,
+    -32768, 0,      0,      8192,   8192,   24576,  -8192,  -16384, -16384, -24576, 0,      16384,  24576,  -32768,
+    24576,  8192,   0,      -32768, -24576, 0,      -8192,  -24576, -16384, -16384, -8192,  16384,  8192,   -16384,
+    24576,  -24576, -32768, 24576,  0,      -32768, -16384, 0,      24576,  16384,  0,      -16384, 8192,   8192,
+    24576,  16384,  8192,   -8192,  -24576, -8192,  8192,   24576,  -24576, 16384,  8192,   0,      -16384, -16384,
+    0,      -8192,  -32768, 0,      -24576, -8192,  24576,  -8192,  8192,   -16384, 0,      -16384, -24576, 24576,
+    8192,   24576,  -24576, -32768, -24576, 0,      -8192,  16384,  0,      -32768, 16384,  8192,   -24576, 8192,
+    0,      8192,   -16384, -32768, 24576,  -8192,  -32768, 16384,  16384,  -32768, 0,      8192,   8192,   0,
+    -16384, -32768, 0,      -32768, 8192,   -24576, 8192,   16384,  16384,  0,      16384,  0,      8192,   16384};
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/output_mult_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/output_mult_data.h
new file mode 100644
index 0000000..41ef7d7
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/output_mult_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t int16xint8_output_mult[4] = {1082212997, 1082212997, 1082212997, 1082212997};
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/output_ref_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/output_ref_data.h
new file mode 100644
index 0000000..a3fc92c
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/output_ref_data.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t int16xint8_output_ref[48] = {0,   -9, -6, -47, 2,  7,  15, 23,  27,  11, 1,   -13, 24, -5,  -8,  -6,
+                                         -36, 12, -1, 20,  5,  47, 62, 33,  26,  24, 39,  2,   0,  -32, -11, 37,
+                                         14,  -6, 6,  -6,  -3, 14, 20, -10, -11, 0,  -17, 33,  45, -6,  22,  7};
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/output_shift_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/output_shift_data.h
new file mode 100644
index 0000000..d5e9299
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/output_shift_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t int16xint8_output_shift[4] = {-17, -17, -17, -17};
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/test_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/test_data.h
new file mode 100644
index 0000000..1af706c
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/test_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/features/cmsis_nn_sample_code/test-includes/int16xint8/weights_data.h b/features/cmsis_nn_sample_code/test-includes/int16xint8/weights_data.h
new file mode 100644
index 0000000..2b6e3a1
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/int16xint8/weights_data.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t int16xint8_weights[96] = {
+    32,   -127, 64,   -95, 0,    0,    95,   -64, -127, 0,   -64, -64,  -127, 32,  32,   -32, -95, -95, 64,  64,
+    0,    0,    -127, 0,   -95,  -95,  -127, -32, 0,    0,   -95, 0,    -64,  95,  -127, -32, 32,  0,   -64, -95,
+    -127, -64,  64,   -32, -127, -64,  -64,  0,   -32,  -95, 0,   -127, 64,   32,  64,   64,  -95, 32,  -64, -64,
+    32,   -64,  0,    95,  -127, -127, -95,  95,  -64,  -64, -64, -127, -64,  95,  32,   95,  95,  95,  0,   64,
+    -64,  -32,  -127, 64,  -127, 32,   64,   95,  -127, 64,  64,  -95,  95,   -64, -95,  95};
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/biases_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/biases_data.h
new file mode 100644
index 0000000..b4c6a1e
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/biases_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int64_t requantize_s64_biases[2] = {2147483647, 2147483647};
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/config_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/config_data.h
new file mode 100644
index 0000000..525b810
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/config_data.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#define REQUANTIZE_S64_OUT_CH 2
+#define REQUANTIZE_S64_IN_CH 2
+#define REQUANTIZE_S64_INPUT_W 3
+#define REQUANTIZE_S64_INPUT_H 2
+#define REQUANTIZE_S64_DST_SIZE 4
+#define REQUANTIZE_S64_INPUT_SIZE 12
+#define REQUANTIZE_S64_OUT_ACTIVATION_MIN -32768
+#define REQUANTIZE_S64_OUT_ACTIVATION_MAX 32767
+#define REQUANTIZE_S64_INPUT_BATCHES 1
+#define REQUANTIZE_S64_INPUT_OFFSET 0
+#define REQUANTIZE_S64_OUTPUT_OFFSET 0
+#define REQUANTIZE_S64_FILTER_X 2
+#define REQUANTIZE_S64_FILTER_Y 2
+#define REQUANTIZE_S64_STRIDE_X 1
+#define REQUANTIZE_S64_STRIDE_Y 1
+#define REQUANTIZE_S64_PAD_X 0
+#define REQUANTIZE_S64_PAD_Y 0
+#define REQUANTIZE_S64_OUTPUT_W 2
+#define REQUANTIZE_S64_OUTPUT_H 1
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/input_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/input_data.h
new file mode 100644
index 0000000..7a47dba
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/input_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t requantize_s64_input[12] = {-14, 10, -14, -12, -6, 14, -6, -8, 4, 8, 10, -14};
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_mult_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_mult_data.h
new file mode 100644
index 0000000..74a3970
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_mult_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t requantize_s64_output_mult[2] = {1082196484, 1623294726};
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_ref_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_ref_data.h
new file mode 100644
index 0000000..3db6f0c
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_ref_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t requantize_s64_output_ref[4] = {32767, 32767, 32767, 32767};
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_shift_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_shift_data.h
new file mode 100644
index 0000000..c9332d5
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/output_shift_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t requantize_s64_output_shift[2] = {-5, -5};
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/test_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/test_data.h
new file mode 100644
index 0000000..1af706c
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/test_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/features/cmsis_nn_sample_code/test-includes/requantize_s64/weights_data.h b/features/cmsis_nn_sample_code/test-includes/requantize_s64/weights_data.h
new file mode 100644
index 0000000..d59cb47
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/requantize_s64/weights_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t requantize_s64_weights[16] = {-127, 32, 32, 0, 95, 32, 95, -95, -42, -85, -127, -106, 0, -64, 0, 106};
diff --git a/features/cmsis_nn_sample_code/test-includes/test_arm_convolve_fast_s16.h b/features/cmsis_nn_sample_code/test-includes/test_arm_convolve_fast_s16.h
new file mode 100644
index 0000000..168c01c
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/test_arm_convolve_fast_s16.h
@@ -0,0 +1,6 @@
+
+#ifndef INC_TEST_ARM_CONVOLVE_FAST_S16_H
+#define INC_TEST_ARM_CONVOLVE_FAST_S16_H
+int int16xint8_arm_convolve_fast_s16(void);
+int requantize_s64_arm_convolve_fast_s16(void);
+#endif
diff --git a/features/cmsis_nn_sample_code/test-includes/test_arm_fully_connected_s16.h b/features/cmsis_nn_sample_code/test-includes/test_arm_fully_connected_s16.h
new file mode 100644
index 0000000..c204ea2
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/test_arm_fully_connected_s16.h
@@ -0,0 +1,6 @@
+
+#ifndef INC_TEST_ARM_FULLY_CONNECTED_S16_H
+#define INC_TEST_ARM_FULLY_CONNECTED_S16_H
+int fully_connected_int16_arm_fully_connected_s16(void);
+int fully_connected_int16_big_arm_fully_connected_s16(void);
+#endif
diff --git a/features/cmsis_nn_sample_code/test-includes/validate.h b/features/cmsis_nn_sample_code/test-includes/validate.h
new file mode 100644
index 0000000..e72a5cf
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test-includes/validate.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+static inline int validate(int8_t *act, const int8_t *ref, int size)
+{
+    int test_passed = true;
+    int count = 0;
+    int total = 0;
+
+    for (int i = 0; i < size; ++i)
+    {
+        total++;
+        if (act[i] != ref[i])
+        {
+            count++;
+            printf("ERROR at pos %d: Act: %d Ref: %d\r\n", i, act[i], ref[i]);
+            test_passed = false;
+        }
+        else
+        {
+            // printf("PASS at pos %d: %d\r\n", i, act[i]);
+        }
+    }
+
+    if (!test_passed)
+    {
+        printf("%d of %d failed\r\n", count, total);
+    }
+
+    return test_passed;
+}
+
+static inline int validate_s16(int16_t *act, const int16_t *ref, int size)
+{
+    int test_passed = true;
+    int count = 0;
+    int total = 0;
+
+    for (int i = 0; i < size; ++i)
+    {
+        total++;
+        if (act[i] != ref[i])
+        {
+            count++;
+            printf("ERROR at pos %d: Act: %d Ref: %d\r\n", i, act[i], ref[i]);
+            test_passed = false;
+        }
+        else
+        {
+            //printf("PASS at pos %d: %d\r\n", i, act[i]);
+        }
+    }
+
+    if (!test_passed)
+    {
+        printf("%d of %d failed\r\n", count, total);
+    }
+
+    return test_passed;
+}
diff --git a/features/cmsis_nn_sample_code/test_arm_convolve_fast_s16.c b/features/cmsis_nn_sample_code/test_arm_convolve_fast_s16.c
new file mode 100644
index 0000000..da41933
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test_arm_convolve_fast_s16.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include <arm_nnfunctions.h>
+
+#include "int16xint8/test_data.h"
+#include "requantize_s64/test_data.h"
+#include "validate.h"
+
+int int16xint8_arm_convolve_fast_s16(void)
+{
+    q15_t output[INT16XINT8_DST_SIZE] = {0};
+    int ret_value = true;
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q63_t *bias_data = int16xint8_biases;
+    const q7_t *kernel_data = int16xint8_weights;
+    const q15_t *input_data = int16xint8_input;
+    const q15_t *output_ref = int16xint8_output_ref;
+    const int32_t output_ref_size = INT16XINT8_DST_SIZE;
+
+    input_dims.n = INT16XINT8_INPUT_BATCHES;
+    input_dims.w = INT16XINT8_INPUT_W;
+    input_dims.h = INT16XINT8_INPUT_H;
+    input_dims.c = INT16XINT8_IN_CH;
+    filter_dims.w = INT16XINT8_FILTER_X;
+    filter_dims.h = INT16XINT8_FILTER_Y;
+    output_dims.w = INT16XINT8_OUTPUT_W;
+    output_dims.h = INT16XINT8_OUTPUT_H;
+    output_dims.c = INT16XINT8_OUT_CH;
+
+    conv_params.padding.w = INT16XINT8_PAD_X;
+    conv_params.padding.h = INT16XINT8_PAD_Y;
+    conv_params.stride.w = INT16XINT8_STRIDE_X;
+    conv_params.stride.h = INT16XINT8_STRIDE_Y;
+
+    conv_params.input_offset = 0;
+    conv_params.output_offset = 0;
+    conv_params.activation.min = INT16XINT8_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = INT16XINT8_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)int16xint8_output_mult;
+    quant_params.shift = (int32_t *)int16xint8_output_shift;
+
+    int buf_size = arm_convolve_wrapper_s16_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
+    ctx.buf = malloc(buf_size);
+
+    arm_status result = arm_convolve_wrapper_s16(&ctx,
+                                                 &conv_params,
+                                                 &quant_params,
+                                                 &input_dims,
+                                                 input_data,
+                                                 &filter_dims,
+                                                 kernel_data,
+                                                 &bias_dims,
+                                                 bias_data,
+                                                 &output_dims,
+                                                 output);
+    free(ctx.buf);
+
+    arm_status result_desired = ARM_MATH_SUCCESS;
+    ret_value &= validate(&result, &result_desired, 1);
+    ret_value &= validate_s16(output, output_ref, output_ref_size);
+
+    buf_size = arm_convolve_fast_s16_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+
+    result = arm_convolve_fast_s16(&ctx,
+                                   &conv_params,
+                                   &quant_params,
+                                   &input_dims,
+                                   input_data,
+                                   &filter_dims,
+                                   kernel_data,
+                                   &bias_dims,
+                                   bias_data,
+                                   &output_dims,
+                                   output);
+    free(ctx.buf);
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+    ret_value &= validate(&result, &result_desired, 1);
+    ret_value &= validate_s16(output, output_ref, output_ref_size);
+#else
+    TEST_ASSERT_EQUAL(ARM_MATH_ARGUMENT_ERROR, result);
+#endif
+    return ret_value;
+}
+
+int requantize_s64_arm_convolve_fast_s16(void)
+{
+    q15_t output[REQUANTIZE_S64_DST_SIZE] = {0};
+    int ret_value = true;
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q63_t *bias_data = requantize_s64_biases;
+    const q7_t *kernel_data = requantize_s64_weights;
+    const q15_t *input_data = requantize_s64_input;
+    const q15_t *output_ref = requantize_s64_output_ref;
+    const int32_t output_ref_size = REQUANTIZE_S64_DST_SIZE;
+
+    input_dims.n = REQUANTIZE_S64_INPUT_BATCHES;
+    input_dims.w = REQUANTIZE_S64_INPUT_W;
+    input_dims.h = REQUANTIZE_S64_INPUT_H;
+    input_dims.c = REQUANTIZE_S64_IN_CH;
+    filter_dims.w = REQUANTIZE_S64_FILTER_X;
+    filter_dims.h = REQUANTIZE_S64_FILTER_Y;
+    output_dims.w = REQUANTIZE_S64_OUTPUT_W;
+    output_dims.h = REQUANTIZE_S64_OUTPUT_H;
+    output_dims.c = REQUANTIZE_S64_OUT_CH;
+
+    conv_params.padding.w = REQUANTIZE_S64_PAD_X;
+    conv_params.padding.h = REQUANTIZE_S64_PAD_Y;
+    conv_params.stride.w = REQUANTIZE_S64_STRIDE_X;
+    conv_params.stride.h = REQUANTIZE_S64_STRIDE_Y;
+
+    conv_params.input_offset = REQUANTIZE_S64_INPUT_OFFSET;
+    conv_params.output_offset = REQUANTIZE_S64_OUTPUT_OFFSET;
+    conv_params.activation.min = REQUANTIZE_S64_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = REQUANTIZE_S64_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)requantize_s64_output_mult;
+    quant_params.shift = (int32_t *)requantize_s64_output_shift;
+
+    int buf_size = arm_convolve_wrapper_s16_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
+    ctx.buf = malloc(buf_size);
+
+    arm_status result = arm_convolve_wrapper_s16(&ctx,
+                                                 &conv_params,
+                                                 &quant_params,
+                                                 &input_dims,
+                                                 input_data,
+                                                 &filter_dims,
+                                                 kernel_data,
+                                                 &bias_dims,
+                                                 bias_data,
+                                                 &output_dims,
+                                                 output);
+
+    free(ctx.buf);
+
+    arm_status result_desired = ARM_MATH_SUCCESS;
+    ret_value &= validate(&result, &result_desired, 1);
+    ret_value &= validate_s16(output, output_ref, output_ref_size);
+
+    buf_size = arm_convolve_fast_s16_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+
+    result = arm_convolve_fast_s16(&ctx,
+                                   &conv_params,
+                                   &quant_params,
+                                   &input_dims,
+                                   input_data,
+                                   &filter_dims,
+                                   kernel_data,
+                                   &bias_dims,
+                                   bias_data,
+                                   &output_dims,
+                                   output);
+    free(ctx.buf);
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+    ret_value &= validate(&result, &result_desired, 1);
+    ret_value &= validate_s16(output, output_ref, output_ref_size);
+#else
+    TEST_ASSERT_EQUAL(ARM_MATH_ARGUMENT_ERROR, result);
+#endif
+    return ret_value;
+}
diff --git a/features/cmsis_nn_sample_code/test_arm_fully_connected_s16.c b/features/cmsis_nn_sample_code/test_arm_fully_connected_s16.c
new file mode 100644
index 0000000..9593883
--- /dev/null
+++ b/features/cmsis_nn_sample_code/test_arm_fully_connected_s16.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arm_nnfunctions.h>
+#include <stdlib.h>
+//#include <unity.h>
+
+#include "fully_connected_int16/test_data.h"
+#include "fully_connected_int16_big/test_data.h"
+#include "validate.h"
+
+int fully_connected_int16_arm_fully_connected_s16(void)
+{
+    q15_t output[FULLY_CONNECTED_INT16_DST_SIZE] = {0};
+    int ret_value = true;
+
+    cmsis_nn_context ctx;
+    cmsis_nn_fc_params fc_params;
+    cmsis_nn_per_tensor_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q63_t *bias_data = fully_connected_int16_biases;
+    const q7_t *kernel_data = fully_connected_int16_weights;
+    const q15_t *input_data = fully_connected_int16_input;
+    const q15_t *output_ref = fully_connected_int16_output_ref;
+    const int32_t output_ref_size = FULLY_CONNECTED_INT16_DST_SIZE;
+
+    input_dims.n = FULLY_CONNECTED_INT16_INPUT_BATCHES;
+    input_dims.w = FULLY_CONNECTED_INT16_INPUT_W;
+    input_dims.h = FULLY_CONNECTED_INT16_INPUT_H;
+    input_dims.c = FULLY_CONNECTED_INT16_IN_CH;
+    filter_dims.n = FULLY_CONNECTED_INT16_ACCUMULATION_DEPTH;
+    filter_dims.c = FULLY_CONNECTED_INT16_OUT_CH;
+    filter_dims.h = FULLY_CONNECTED_INT16_INPUT_H;
+    filter_dims.w = FULLY_CONNECTED_INT16_INPUT_W;
+    output_dims.n = FULLY_CONNECTED_INT16_INPUT_BATCHES;
+    output_dims.c = FULLY_CONNECTED_INT16_OUT_CH;
+
+    fc_params.input_offset = 0;
+    fc_params.filter_offset = 0;
+    fc_params.output_offset = 0;
+    fc_params.activation.min = FULLY_CONNECTED_INT16_OUT_ACTIVATION_MIN;
+    fc_params.activation.max = FULLY_CONNECTED_INT16_OUT_ACTIVATION_MAX;
+
+    quant_params.multiplier = FULLY_CONNECTED_INT16_OUTPUT_MULTIPLIER;
+    quant_params.shift = FULLY_CONNECTED_INT16_OUTPUT_SHIFT;
+
+    int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
+    arm_status result = arm_fully_connected_s16(&ctx,
+                                                &fc_params,
+                                                &quant_params,
+                                                &input_dims,
+                                                input_data,
+                                                &filter_dims,
+                                                kernel_data,
+                                                &bias_dims,
+                                                bias_data,
+                                                &output_dims,
+                                                output);
+
+    free(ctx.buf);
+    arm_status result_desired = ARM_MATH_SUCCESS;
+    ret_value &= validate(&result, &result_desired, 1);
+    ret_value &= validate_s16(output, output_ref, output_ref_size);
+    return ret_value;
+}
+
+int fully_connected_int16_big_arm_fully_connected_s16(void)
+{
+    q15_t output[FULLY_CONNECTED_INT16_BIG_DST_SIZE] = {0};
+    int ret_value = true;
+
+    cmsis_nn_context ctx;
+    cmsis_nn_fc_params fc_params;
+    cmsis_nn_per_tensor_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q63_t *bias_data = fully_connected_int16_big_biases;
+    const q7_t *kernel_data = fully_connected_int16_big_weights;
+    const q15_t *input_data = fully_connected_int16_big_input;
+    const q15_t *output_ref = fully_connected_int16_big_output_ref;
+    const int32_t output_ref_size = FULLY_CONNECTED_INT16_BIG_DST_SIZE;
+
+    input_dims.n = FULLY_CONNECTED_INT16_BIG_INPUT_BATCHES;
+    input_dims.w = FULLY_CONNECTED_INT16_BIG_INPUT_W;
+    input_dims.h = FULLY_CONNECTED_INT16_BIG_INPUT_H;
+    input_dims.c = FULLY_CONNECTED_INT16_BIG_IN_CH;
+    filter_dims.n = FULLY_CONNECTED_INT16_BIG_ACCUMULATION_DEPTH;
+    filter_dims.c = FULLY_CONNECTED_INT16_BIG_OUT_CH;
+    filter_dims.h = FULLY_CONNECTED_INT16_BIG_INPUT_H;
+    filter_dims.w = FULLY_CONNECTED_INT16_BIG_INPUT_W;
+    output_dims.n = FULLY_CONNECTED_INT16_BIG_INPUT_BATCHES;
+    output_dims.c = FULLY_CONNECTED_INT16_BIG_OUT_CH;
+
+    fc_params.input_offset = 0;
+    fc_params.filter_offset = 0;
+    fc_params.output_offset = 0;
+    fc_params.activation.min = FULLY_CONNECTED_INT16_BIG_OUT_ACTIVATION_MIN;
+    fc_params.activation.max = FULLY_CONNECTED_INT16_BIG_OUT_ACTIVATION_MAX;
+
+    quant_params.multiplier = FULLY_CONNECTED_INT16_BIG_OUTPUT_MULTIPLIER;
+    quant_params.shift = FULLY_CONNECTED_INT16_BIG_OUTPUT_SHIFT;
+
+    int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
+    arm_status result = arm_fully_connected_s16(&ctx,
+                                                &fc_params,
+                                                &quant_params,
+                                                &input_dims,
+                                                input_data,
+                                                &filter_dims,
+                                                kernel_data,
+                                                &bias_dims,
+                                                bias_data,
+                                                &output_dims,
+                                                output);
+
+    free(ctx.buf);
+    arm_status result_desired = ARM_MATH_SUCCESS;
+    ret_value &= validate(&result, &result_desired, 1);
+    ret_value &= validate_s16(output, output_ref, output_ref_size);
+    return ret_value;
+}