Matches in SemOpenAlex for { <https://semopenalex.org/work/W4384699872> ?p ?o ?g. }
Showing items 1 to 82 of
82
with 100 items per page.
- W4384699872 endingPage "226" @default.
- W4384699872 startingPage "212" @default.
- W4384699872 abstract "Enabling On-Device Learning (ODL) for Ultra-Low-Power Micro-Controller Units (MCUs) is a key step for post-deployment adaptation and fine-tuning of Deep Neural Network (DNN) models in future TinyML applications. This paper tackles this challenge by introducing a novel reduced precision optimization technique for ODL primitives on MCU-class devices, leveraging the State-of-Art advancements in RISC-V RV32 architectures with support for vectorized 16-bit floating-point (FP16) Single-Instruction Multiple-Data (SIMD) operations. Our approach for the Forward and Backward steps of the Back-Propagation training algorithm is composed of specialized shape transform operators and Matrix Multiplication (MM) kernels, accelerated with parallelization and loop unrolling. When evaluated on a single training step of a 2D Convolution layer, the SIMD-optimized FP16 primitives result up to 1.72× faster than the FP32 baseline on a RISC-V-based 8+1-core MCU. An average computing efficiency of 3.11 Multiply and Accumulate operations per clock cycle (MAC/clk) and 0.81 MAC/clk is measured for the end-to-end training tasks of a ResNet8 and a DS-CNN for Image Classification and Keyword Spotting, respectively – requiring 17.1 ms and 6.4 ms on the target platform to compute a training step on a single sample. Overall, our approach results more than two orders of magnitude faster than existing ODL software frameworks for single-core MCUs and outperforms by 1.6 × previous FP32 parallel implementations on a Continual Learning setup." @default.
- W4384699872 created "2023-07-20" @default.
- W4384699872 creator A5030025275 @default.
- W4384699872 creator A5038717922 @default.
- W4384699872 creator A5043408422 @default.
- W4384699872 creator A5049655627 @default.
- W4384699872 date "2023-12-01" @default.
- W4384699872 modified "2023-09-25" @default.
- W4384699872 title "Reduced precision floating-point optimization for Deep Neural Network On-Device Learning on microcontrollers" @default.
- W4384699872 cites W2395579298 @default.
- W4384699872 cites W2604319603 @default.
- W4384699872 cites W2981638331 @default.
- W4384699872 cites W2995086119 @default.
- W4384699872 cites W3013557734 @default.
- W4384699872 cites W3015636663 @default.
- W4384699872 cites W3086809868 @default.
- W4384699872 cites W3130320103 @default.
- W4384699872 cites W3137875211 @default.
- W4384699872 cites W3170113470 @default.
- W4384699872 cites W3190679134 @default.
- W4384699872 cites W3203242999 @default.
- W4384699872 cites W3205667954 @default.
- W4384699872 cites W4213348099 @default.
- W4384699872 cites W4312235774 @default.
- W4384699872 doi "https://doi.org/10.1016/j.future.2023.07.020" @default.
- W4384699872 hasPublicationYear "2023" @default.
- W4384699872 type Work @default.
- W4384699872 citedByCount "0" @default.
- W4384699872 crossrefType "journal-article" @default.
- W4384699872 hasAuthorship W4384699872A5030025275 @default.
- W4384699872 hasAuthorship W4384699872A5038717922 @default.
- W4384699872 hasAuthorship W4384699872A5043408422 @default.
- W4384699872 hasAuthorship W4384699872A5049655627 @default.
- W4384699872 hasConcept C108583219 @default.
- W4384699872 hasConcept C113775141 @default.
- W4384699872 hasConcept C11413529 @default.
- W4384699872 hasConcept C150552126 @default.
- W4384699872 hasConcept C154945302 @default.
- W4384699872 hasConcept C169590947 @default.
- W4384699872 hasConcept C173018170 @default.
- W4384699872 hasConcept C173608175 @default.
- W4384699872 hasConcept C199360897 @default.
- W4384699872 hasConcept C2777904410 @default.
- W4384699872 hasConcept C41008148 @default.
- W4384699872 hasConcept C50644808 @default.
- W4384699872 hasConcept C76970557 @default.
- W4384699872 hasConcept C84211073 @default.
- W4384699872 hasConcept C9390403 @default.
- W4384699872 hasConceptScore W4384699872C108583219 @default.
- W4384699872 hasConceptScore W4384699872C113775141 @default.
- W4384699872 hasConceptScore W4384699872C11413529 @default.
- W4384699872 hasConceptScore W4384699872C150552126 @default.
- W4384699872 hasConceptScore W4384699872C154945302 @default.
- W4384699872 hasConceptScore W4384699872C169590947 @default.
- W4384699872 hasConceptScore W4384699872C173018170 @default.
- W4384699872 hasConceptScore W4384699872C173608175 @default.
- W4384699872 hasConceptScore W4384699872C199360897 @default.
- W4384699872 hasConceptScore W4384699872C2777904410 @default.
- W4384699872 hasConceptScore W4384699872C41008148 @default.
- W4384699872 hasConceptScore W4384699872C50644808 @default.
- W4384699872 hasConceptScore W4384699872C76970557 @default.
- W4384699872 hasConceptScore W4384699872C84211073 @default.
- W4384699872 hasConceptScore W4384699872C9390403 @default.
- W4384699872 hasLocation W43846998721 @default.
- W4384699872 hasOpenAccess W4384699872 @default.
- W4384699872 hasPrimaryLocation W43846998721 @default.
- W4384699872 hasRelatedWork W1585350690 @default.
- W4384699872 hasRelatedWork W1992878653 @default.
- W4384699872 hasRelatedWork W2008876287 @default.
- W4384699872 hasRelatedWork W2053732522 @default.
- W4384699872 hasRelatedWork W2074226157 @default.
- W4384699872 hasRelatedWork W2094969226 @default.
- W4384699872 hasRelatedWork W2127972303 @default.
- W4384699872 hasRelatedWork W2166191672 @default.
- W4384699872 hasRelatedWork W4245302940 @default.
- W4384699872 hasRelatedWork W7941220 @default.
- W4384699872 hasVolume "149" @default.
- W4384699872 isParatext "false" @default.
- W4384699872 isRetracted "false" @default.
- W4384699872 workType "article" @default.