Add NPU device type and three fp16 models for image classification

mingmingtasd · Apr 29, 2024 · 7ce7915 · 7ce7915
1 parent 82021b5
commit 7ce7915
Show file tree

Hide file tree

Showing 7 changed files with 553 additions and 43 deletions.
diff --git a/common/utils.js b/common/utils.js
@@ -57,9 +57,17 @@ export async function buildConstantByNpy(builder, url) {
   const dimensions = npArray.shape;
   const type = dataTypeMap.get(npArray.dataType).type;
   const TypedArrayConstructor = dataTypeMap.get(npArray.dataType).array;
-  const dataView = new Uint8Array(npArray.data.buffer);
-  const dataView2 = dataView.slice();
-  const typedArray = new TypedArrayConstructor(dataView2.buffer);
+  const typedArray = new TypedArrayConstructor(sizeOfShape(dimensions));
+  const dataView = new DataView(npArray.data.buffer);
+  const littleEndian = npArray.byteOrder === '<';
+  let getFuncName = `get` + type[0].toUpperCase() + type.substr(1);
+  if (type == 'float16') {
+    getFuncName = `getUint16`;
+  }
+  for (let i = 0; i < sizeOfShape(dimensions); ++i) {
+    typedArray[i] = dataView[getFuncName](
+        i * TypedArrayConstructor.BYTES_PER_ELEMENT, littleEndian);
+  }
   return builder.constant({dataType: type, type, dimensions}, typedArray);
 }
 
@@ -94,6 +102,100 @@ export function stopCameraStream(id, stream) {
   }
 }
 
+// ref: http://stackoverflow.com/questions/32633585/how-do-you-convert-to-half-floats-in-javascript
+const toHalf = (function() {
+  const floatView = new Float32Array(1);
+  const int32View = new Int32Array(floatView.buffer);
+
+  /* This method is faster than the OpenEXR implementation (very often
+   * used, eg. in Ogre), with the additional benefit of rounding, inspired
+   * by James Tursa?s half-precision code. */
+  return function toHalf(val) {
+    floatView[0] = val;
+    const x = int32View[0];
+
+    let bits = (x >> 16) & 0x8000; /* Get the sign */
+    let m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
+    const e = (x >> 23) & 0xff; /* Using int is faster here */
+
+    /* If zero, or denormal, or exponent underflows too much for a denormal
+     * half, return signed zero. */
+    if (e < 103) {
+      return bits;
+    }
+
+    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
+    if (e > 142) {
+      bits |= 0x7c00;
+      /* If exponent was 0xff and one mantissa bit was set, it means NaN,
+       * not Inf, so make sure we set one mantissa bit too. */
+      bits |= ((e == 255) ? 0 : 1) && (x & 0x007fffff);
+      return bits;
+    }
+
+    /* If exponent underflows but not too much, return a denormal */
+    if (e < 113) {
+      m |= 0x0800;
+      /* Extra rounding may overflow and set mantissa to 0 and exponent
+       * to 1, which is OK. */
+      bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+      return bits;
+    }
+
+    bits |= ((e - 112) << 10) | (m >> 1);
+    /* Extra rounding. An overflow will set mantissa to 0 and increment
+     * the exponent, which is OK. */
+    bits += m & 1;
+    return bits;
+  };
+})();
+
+// This function converts a Float16 stored as the bits of a Uint16 into
+// a Javascript Number.
+// Adapted from: https://gist.github.com/martinkallman/5049614
+// input is a Uint16 (eg, new Uint16Array([value])[0])
+
+export function float16ToNumber(input) {
+  // Create a 32 bit DataView to store the input
+  const arr = new ArrayBuffer(4);
+  const dv = new DataView(arr);
+
+  // Set the Float16 into the last 16 bits of the dataview
+  // So our dataView is [00xx]
+  dv.setUint16(2, input, false);
+
+  // Get all 32 bits as a 32 bit integer
+  // (JS bitwise operations are performed on 32 bit signed integers)
+  const asInt32 = dv.getInt32(0, false);
+
+  // All bits aside from the sign
+  let rest = asInt32 & 0x7fff;
+  // Sign bit
+  let sign = asInt32 & 0x8000;
+  // Exponent bits
+  const exponent = asInt32 & 0x7c00;
+
+  // Shift the non-sign bits into place for a 32 bit Float
+  rest <<= 13;
+  // Shift the sign bit into place for a 32 bit Float
+  sign <<= 16;
+
+  // Adjust bias
+  // https://en.wikipedia.org/wiki/Half-precision_floating-point_format#Exponent_encoding
+  rest += 0x38000000;
+  // Denormals-as-zero
+  rest = (exponent === 0 ? 0 : rest);
+  // Re-insert sign bit
+  rest |= sign;
+
+  // Set the adjusted float32 (stored as int32) back into the dataview
+  dv.setInt32(0, rest, false);
+
+  // Get it back out as a float32 (which js will convert to a Number)
+  const asFloat32 = dv.getFloat32(0, false);
+
+  return asFloat32;
+}
 /**
  * This method is used to covert input element to tensor data.
  * @param {Object} inputElement, an object of HTML [<img> | <video>] element.
@@ -129,9 +231,16 @@ export function stopCameraStream(id, stream) {
  * @return {Object} tensor, an object of input tensor.
  */
 export function getInputTensor(inputElement, inputOptions) {
+  const dataType = inputOptions.dataType || 'float32';
   const inputDimensions = inputOptions.inputDimensions;
-  const tensor = new Float32Array(
-      inputDimensions.slice(1).reduce((a, b) => a * b));
+  let tensor;
+  if (dataType === 'float16') {
+    tensor = new Uint16Array(
+        inputDimensions.slice(1).reduce((a, b) => a * b));
+  } else {
+    tensor = new Float32Array(
+        inputDimensions.slice(1).reduce((a, b) => a * b));
+  }
 
   inputElement.width = inputElement.videoWidth ||
       inputElement.naturalWidth;
@@ -188,11 +297,12 @@ export function getInputTensor(inputElement, inputOptions) {
           value = pixels[h * width * imageChannels + w * imageChannels + c];
         }
         if (inputLayout === 'nchw') {
-          tensor[c * width * height + h * width + w] =
-              (value - mean[c]) / std[c];
+          tensor[c * width * height + h * width + w] = dataType === 'float16' ?
+              toHalf((value - mean[c]) / std[c]) : (value - mean[c]) / std[c];
         } else {
-          tensor[h * width * channels + w * channels + c] =
-              (value - mean[c]) / std[c];
+          tensor[h * width * channels + w * channels + c] = dataType ===
+          'float16' ?
+              toHalf((value - mean[c]) / std[c]) : (value - mean[c]) / std[c];
         }
       }
     }
@@ -494,7 +604,8 @@ export function getDefaultLayout(deviceType) {
     // Windows or Mac platform.
     if (deviceType.indexOf('cpu') != -1) {
       return 'nhwc';
-    } else if (deviceType.indexOf('gpu') != -1) {
+    } else if (deviceType.indexOf('gpu') != -1 ||
+      deviceType.indexOf('npu') != -1) {
       return 'nchw';
     }
   }

diff --git a/image_classification/efficientnet_fp16_nchw.js b/image_classification/efficientnet_fp16_nchw.js
@@ -0,0 +1,166 @@
+'use strict';
+
+import {buildConstantByNpy, weightsOrigin} from '../common/utils.js';
+
+// EfficientNet fp16 model with 'nchw' input layout
+export class EfficientNetFP16Nchw {
+  constructor() {
+    this.context_ = null;
+    this.builder_ = null;
+    this.graph_ = null;
+    this.weightsUrl_ = weightsOrigin() +
+    '/test-data/models/efficientnet_fp16_nchw_optimized/weights/';
+    this.inputOptions = {
+      mean: [0.485, 0.456, 0.406],
+      std: [0.229, 0.224, 0.225],
+      norm: true,
+      inputLayout: 'nchw',
+      labelUrl: './labels/labels1000.txt',
+      inputDimensions: [1, 3, 224, 224],
+      dataType: 'float16',
+    };
+    this.outputDimensions = [1, 1000];
+  }
+
+  async buildConv_(input, name, blockName, clip = false, options = {}) {
+    let prefix = '';
+    if (blockName !== '') {
+      prefix = this.weightsUrl_ + 'block' + blockName + '_conv' +
+          name;
+    } else {
+      prefix = this.weightsUrl_ + 'conv' + name;
+    }
+    const weight = buildConstantByNpy(this.builder_, prefix + '_w.npy');
+    options.bias = await buildConstantByNpy(this.builder_, prefix + '_b.npy');
+    if (clip) {
+      return this.builder_.clamp(
+          this.builder_.conv2d(await input, await weight, options),
+          {minValue: 0, maxValue: 6});
+    }
+    return this.builder_.conv2d(await input, await weight, options);
+  }
+
+  async buildGemm_(input, name) {
+    const prefix = this.weightsUrl_ + 'dense' + name;
+    const weightName = prefix + '_w.npy';
+    const weight = buildConstantByNpy(this.builder_, weightName);
+    const biasName = prefix + '_b.npy';
+    const bias = buildConstantByNpy(this.builder_, biasName);
+    const options =
+        {c: this.builder_.reshape(await bias, [1, 1000])};
+    return this.builder_.gemm(await input, await weight, options);
+  }
+
+  async buildBottleneck_(input, blockName, group, pad = 1) {
+    const conv1 = this.buildConv_(input, '0', blockName, true);
+    const conv2 = this.buildConv_(conv1, '1', blockName, true,
+        {groups: group, padding: [pad, pad, pad, pad]});
+    const conv3 = this.buildConv_(conv2, '2', blockName);
+    return this.builder_.add(await conv3, await input);
+  }
+
+  async buildBottlenecks_(input, blockNames, group, pad = 1) {
+    let result = input;
+    for (let i = 0; i < blockNames.length; i++) {
+      const bottleneck = await this.buildBottleneck_(result, blockNames[i],
+          group, pad);
+      result = bottleneck;
+    }
+    return result;
+  }
+
+  async load(contextOptions) {
+    this.context_ = await navigator.ml.createContext(contextOptions);
+    this.builder_ = new MLGraphBuilder(this.context_);
+    const data = this.builder_.input('input', {
+        dataType: this.inputOptions.dataType,
+        dimensions: this.inputOptions.inputDimensions
+    });
+    // Block 0
+    const conv1 = this.buildConv_(
+        data, '0', '0', true, {padding: [0, 1, 0, 1], strides: [2, 2]});
+    const conv2 = this.buildConv_(conv1, '1', '0', true,
+        {groups: 32, padding: [1, 1, 1, 1]});
+    const conv3 = this.buildConv_(conv2, '2', '0');
+
+    // Block 1
+    const conv4 = this.buildConv_(conv3, '0', '1', true);
+    const conv5 = this.buildConv_(conv4, '1', '1', true,
+        {groups: 144, padding: [0, 1, 0, 1], strides: [2, 2]});
+    const conv6 = this.buildConv_(conv5, '2', '1');
+
+    // Block 2~4
+    const bottleneck4 = this.buildBottlenecks_(conv6,
+        ['2', '3', '4'], 192);
+
+    // Block 5
+    const conv7 = this.buildConv_(bottleneck4, '0', '5', true);
+    const conv8 = this.buildConv_(conv7, '1', '5', true,
+        {groups: 192, padding: [1, 2, 1, 2], strides: [2, 2]});
+    const conv9 = this.buildConv_(conv8, '2', '5');
+
+    // Block 6~8
+    const bottleneck8 = this.buildBottlenecks_(conv9,
+        ['6', '7', '8'], 336, 2);
+
+    // Block 9
+    const conv10 = this.buildConv_(bottleneck8, '0', '9', true);
+    const conv11 = this.buildConv_(conv10, '1', '9', true,
+        {groups: 336, padding: [0, 1, 0, 1], strides: [2, 2]});
+    const conv12 = this.buildConv_(conv11, '2', '9');
+
+    // Block 10~14
+    const bottleneck14 = this.buildBottlenecks_(conv12,
+        ['10', '11', '12', '13', '14'], 672);
+
+    // Block 15
+    const conv13 = this.buildConv_(bottleneck14, '0', '15', true);
+    const conv14 = this.buildConv_(conv13, '1', '15', true,
+        {groups: 672, padding: [2, 2, 2, 2]});
+    const conv15 = this.buildConv_(conv14, '2', '15');
+
+    // Block 16~20
+    const bottleneck20 = await this.buildBottlenecks_(conv15,
+        ['16', '17', '18', '19', '20'], 960, 2);
+
+    // Block 21
+    const conv16 = this.buildConv_(bottleneck20, '0', '21', true);
+    const conv17 = this.buildConv_(conv16, '1', '21', true,
+        {groups: 960, padding: [1, 2, 1, 2], strides: [2, 2]});
+    const conv18 = this.buildConv_(conv17, '2', '21');
+
+    // Block 22~28
+    const bottleneck28 = this.buildBottlenecks_(conv18,
+        ['22', '23', '24', '25', '26', '27', '28'], 1632, 2);
+
+    // Block 29
+    const conv19 = this.buildConv_(bottleneck28, '0', '29', true);
+    const conv20 = this.buildConv_(conv19, '1', '29', true,
+        {groups: 1632, padding: [1, 1, 1, 1]});
+    const conv21 = this.buildConv_(conv20, '2', '29');
+
+    const conv22 = this.buildConv_(conv21, '0', '', true);
+    const pool1 = this.builder_.averagePool2d(await conv22);
+    const reshape = this.builder_.reshape(pool1, [1, 1280]);
+    return this.buildGemm_(reshape, '0');
+  }
+
+  async build(outputOperand) {
+    this.graph_ = await this.builder_.build({'output': outputOperand});
+  }
+
+  // Release the constant tensors of a model
+  dispose() {
+    // dispose() is only available in webnn-polyfill
+    if (this.graph_ !== null && 'dispose' in this.graph_) {
+      this.graph_.dispose();
+    }
+  }
+
+  async compute(inputBuffer, outputBuffer) {
+    const inputs = {'input': inputBuffer};
+    const outputs = {'output': outputBuffer};
+    const results = await this.context_.compute(this.graph_, inputs, outputs);
+    return results;
+  }
+}