Skip to content

Commit

Permalink
Add NPU device type and three fp16 models for image classification
Browse files Browse the repository at this point in the history
  • Loading branch information
mingmingtasd committed Apr 29, 2024
1 parent 82021b5 commit 7ce7915
Show file tree
Hide file tree
Showing 7 changed files with 553 additions and 43 deletions.
131 changes: 121 additions & 10 deletions common/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,17 @@ export async function buildConstantByNpy(builder, url) {
const dimensions = npArray.shape;
const type = dataTypeMap.get(npArray.dataType).type;
const TypedArrayConstructor = dataTypeMap.get(npArray.dataType).array;
const dataView = new Uint8Array(npArray.data.buffer);
const dataView2 = dataView.slice();
const typedArray = new TypedArrayConstructor(dataView2.buffer);
const typedArray = new TypedArrayConstructor(sizeOfShape(dimensions));
const dataView = new DataView(npArray.data.buffer);
const littleEndian = npArray.byteOrder === '<';
let getFuncName = `get` + type[0].toUpperCase() + type.substr(1);
if (type == 'float16') {
getFuncName = `getUint16`;
}
for (let i = 0; i < sizeOfShape(dimensions); ++i) {
typedArray[i] = dataView[getFuncName](
i * TypedArrayConstructor.BYTES_PER_ELEMENT, littleEndian);
}
return builder.constant({dataType: type, type, dimensions}, typedArray);
}

Expand Down Expand Up @@ -94,6 +102,100 @@ export function stopCameraStream(id, stream) {
}
}

// ref: http://stackoverflow.com/questions/32633585/how-do-you-convert-to-half-floats-in-javascript
const toHalf = (function() {
const floatView = new Float32Array(1);
const int32View = new Int32Array(floatView.buffer);

/* This method is faster than the OpenEXR implementation (very often
* used, eg. in Ogre), with the additional benefit of rounding, inspired
* by James Tursa?s half-precision code. */
return function toHalf(val) {
floatView[0] = val;
const x = int32View[0];

let bits = (x >> 16) & 0x8000; /* Get the sign */
let m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
const e = (x >> 23) & 0xff; /* Using int is faster here */

/* If zero, or denormal, or exponent underflows too much for a denormal
* half, return signed zero. */
if (e < 103) {
return bits;
}

/* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
if (e > 142) {
bits |= 0x7c00;
/* If exponent was 0xff and one mantissa bit was set, it means NaN,
* not Inf, so make sure we set one mantissa bit too. */
bits |= ((e == 255) ? 0 : 1) && (x & 0x007fffff);
return bits;
}

/* If exponent underflows but not too much, return a denormal */
if (e < 113) {
m |= 0x0800;
/* Extra rounding may overflow and set mantissa to 0 and exponent
* to 1, which is OK. */
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
return bits;
}

bits |= ((e - 112) << 10) | (m >> 1);
/* Extra rounding. An overflow will set mantissa to 0 and increment
* the exponent, which is OK. */
bits += m & 1;
return bits;
};
})();

// This function converts a Float16 stored as the bits of a Uint16 into
// a Javascript Number.
// Adapted from: https://gist.github.com/martinkallman/5049614
// input is a Uint16 (eg, new Uint16Array([value])[0])

export function float16ToNumber(input) {
// Create a 32 bit DataView to store the input
const arr = new ArrayBuffer(4);
const dv = new DataView(arr);

// Set the Float16 into the last 16 bits of the dataview
// So our dataView is [00xx]
dv.setUint16(2, input, false);

// Get all 32 bits as a 32 bit integer
// (JS bitwise operations are performed on 32 bit signed integers)
const asInt32 = dv.getInt32(0, false);

// All bits aside from the sign
let rest = asInt32 & 0x7fff;
// Sign bit
let sign = asInt32 & 0x8000;
// Exponent bits
const exponent = asInt32 & 0x7c00;

// Shift the non-sign bits into place for a 32 bit Float
rest <<= 13;
// Shift the sign bit into place for a 32 bit Float
sign <<= 16;

// Adjust bias
// https://en.wikipedia.org/wiki/Half-precision_floating-point_format#Exponent_encoding
rest += 0x38000000;
// Denormals-as-zero
rest = (exponent === 0 ? 0 : rest);
// Re-insert sign bit
rest |= sign;

// Set the adjusted float32 (stored as int32) back into the dataview
dv.setInt32(0, rest, false);

// Get it back out as a float32 (which js will convert to a Number)
const asFloat32 = dv.getFloat32(0, false);

return asFloat32;
}
/**
* This method is used to covert input element to tensor data.
* @param {Object} inputElement, an object of HTML [<img> | <video>] element.
Expand Down Expand Up @@ -129,9 +231,16 @@ export function stopCameraStream(id, stream) {
* @return {Object} tensor, an object of input tensor.
*/
export function getInputTensor(inputElement, inputOptions) {
const dataType = inputOptions.dataType || 'float32';
const inputDimensions = inputOptions.inputDimensions;
const tensor = new Float32Array(
inputDimensions.slice(1).reduce((a, b) => a * b));
let tensor;
if (dataType === 'float16') {
tensor = new Uint16Array(
inputDimensions.slice(1).reduce((a, b) => a * b));
} else {
tensor = new Float32Array(
inputDimensions.slice(1).reduce((a, b) => a * b));
}

inputElement.width = inputElement.videoWidth ||
inputElement.naturalWidth;
Expand Down Expand Up @@ -188,11 +297,12 @@ export function getInputTensor(inputElement, inputOptions) {
value = pixels[h * width * imageChannels + w * imageChannels + c];
}
if (inputLayout === 'nchw') {
tensor[c * width * height + h * width + w] =
(value - mean[c]) / std[c];
tensor[c * width * height + h * width + w] = dataType === 'float16' ?
toHalf((value - mean[c]) / std[c]) : (value - mean[c]) / std[c];
} else {
tensor[h * width * channels + w * channels + c] =
(value - mean[c]) / std[c];
tensor[h * width * channels + w * channels + c] = dataType ===
'float16' ?
toHalf((value - mean[c]) / std[c]) : (value - mean[c]) / std[c];
}
}
}
Expand Down Expand Up @@ -494,7 +604,8 @@ export function getDefaultLayout(deviceType) {
// Windows or Mac platform.
if (deviceType.indexOf('cpu') != -1) {
return 'nhwc';
} else if (deviceType.indexOf('gpu') != -1) {
} else if (deviceType.indexOf('gpu') != -1 ||
deviceType.indexOf('npu') != -1) {
return 'nchw';
}
}
Expand Down
166 changes: 166 additions & 0 deletions image_classification/efficientnet_fp16_nchw.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
'use strict';

import {buildConstantByNpy, weightsOrigin} from '../common/utils.js';

// EfficientNet fp16 model with 'nchw' input layout
export class EfficientNetFP16Nchw {
constructor() {
this.context_ = null;
this.builder_ = null;
this.graph_ = null;
this.weightsUrl_ = weightsOrigin() +
'/test-data/models/efficientnet_fp16_nchw_optimized/weights/';
this.inputOptions = {
mean: [0.485, 0.456, 0.406],
std: [0.229, 0.224, 0.225],
norm: true,
inputLayout: 'nchw',
labelUrl: './labels/labels1000.txt',
inputDimensions: [1, 3, 224, 224],
dataType: 'float16',
};
this.outputDimensions = [1, 1000];
}

async buildConv_(input, name, blockName, clip = false, options = {}) {
let prefix = '';
if (blockName !== '') {
prefix = this.weightsUrl_ + 'block' + blockName + '_conv' +
name;
} else {
prefix = this.weightsUrl_ + 'conv' + name;
}
const weight = buildConstantByNpy(this.builder_, prefix + '_w.npy');
options.bias = await buildConstantByNpy(this.builder_, prefix + '_b.npy');
if (clip) {
return this.builder_.clamp(
this.builder_.conv2d(await input, await weight, options),
{minValue: 0, maxValue: 6});
}
return this.builder_.conv2d(await input, await weight, options);
}

async buildGemm_(input, name) {
const prefix = this.weightsUrl_ + 'dense' + name;
const weightName = prefix + '_w.npy';
const weight = buildConstantByNpy(this.builder_, weightName);
const biasName = prefix + '_b.npy';
const bias = buildConstantByNpy(this.builder_, biasName);
const options =
{c: this.builder_.reshape(await bias, [1, 1000])};
return this.builder_.gemm(await input, await weight, options);
}

async buildBottleneck_(input, blockName, group, pad = 1) {
const conv1 = this.buildConv_(input, '0', blockName, true);
const conv2 = this.buildConv_(conv1, '1', blockName, true,
{groups: group, padding: [pad, pad, pad, pad]});
const conv3 = this.buildConv_(conv2, '2', blockName);
return this.builder_.add(await conv3, await input);
}

async buildBottlenecks_(input, blockNames, group, pad = 1) {
let result = input;
for (let i = 0; i < blockNames.length; i++) {
const bottleneck = await this.buildBottleneck_(result, blockNames[i],
group, pad);
result = bottleneck;
}
return result;
}

async load(contextOptions) {
this.context_ = await navigator.ml.createContext(contextOptions);
this.builder_ = new MLGraphBuilder(this.context_);
const data = this.builder_.input('input', {
dataType: this.inputOptions.dataType,
dimensions: this.inputOptions.inputDimensions
});
// Block 0
const conv1 = this.buildConv_(
data, '0', '0', true, {padding: [0, 1, 0, 1], strides: [2, 2]});
const conv2 = this.buildConv_(conv1, '1', '0', true,
{groups: 32, padding: [1, 1, 1, 1]});
const conv3 = this.buildConv_(conv2, '2', '0');

// Block 1
const conv4 = this.buildConv_(conv3, '0', '1', true);
const conv5 = this.buildConv_(conv4, '1', '1', true,
{groups: 144, padding: [0, 1, 0, 1], strides: [2, 2]});
const conv6 = this.buildConv_(conv5, '2', '1');

// Block 2~4
const bottleneck4 = this.buildBottlenecks_(conv6,
['2', '3', '4'], 192);

// Block 5
const conv7 = this.buildConv_(bottleneck4, '0', '5', true);
const conv8 = this.buildConv_(conv7, '1', '5', true,
{groups: 192, padding: [1, 2, 1, 2], strides: [2, 2]});
const conv9 = this.buildConv_(conv8, '2', '5');

// Block 6~8
const bottleneck8 = this.buildBottlenecks_(conv9,
['6', '7', '8'], 336, 2);

// Block 9
const conv10 = this.buildConv_(bottleneck8, '0', '9', true);
const conv11 = this.buildConv_(conv10, '1', '9', true,
{groups: 336, padding: [0, 1, 0, 1], strides: [2, 2]});
const conv12 = this.buildConv_(conv11, '2', '9');

// Block 10~14
const bottleneck14 = this.buildBottlenecks_(conv12,
['10', '11', '12', '13', '14'], 672);

// Block 15
const conv13 = this.buildConv_(bottleneck14, '0', '15', true);
const conv14 = this.buildConv_(conv13, '1', '15', true,
{groups: 672, padding: [2, 2, 2, 2]});
const conv15 = this.buildConv_(conv14, '2', '15');

// Block 16~20
const bottleneck20 = await this.buildBottlenecks_(conv15,
['16', '17', '18', '19', '20'], 960, 2);

// Block 21
const conv16 = this.buildConv_(bottleneck20, '0', '21', true);
const conv17 = this.buildConv_(conv16, '1', '21', true,
{groups: 960, padding: [1, 2, 1, 2], strides: [2, 2]});
const conv18 = this.buildConv_(conv17, '2', '21');

// Block 22~28
const bottleneck28 = this.buildBottlenecks_(conv18,
['22', '23', '24', '25', '26', '27', '28'], 1632, 2);

// Block 29
const conv19 = this.buildConv_(bottleneck28, '0', '29', true);
const conv20 = this.buildConv_(conv19, '1', '29', true,
{groups: 1632, padding: [1, 1, 1, 1]});
const conv21 = this.buildConv_(conv20, '2', '29');

const conv22 = this.buildConv_(conv21, '0', '', true);
const pool1 = this.builder_.averagePool2d(await conv22);
const reshape = this.builder_.reshape(pool1, [1, 1280]);
return this.buildGemm_(reshape, '0');
}

async build(outputOperand) {
this.graph_ = await this.builder_.build({'output': outputOperand});
}

// Release the constant tensors of a model
dispose() {
// dispose() is only available in webnn-polyfill
if (this.graph_ !== null && 'dispose' in this.graph_) {
this.graph_.dispose();
}
}

async compute(inputBuffer, outputBuffer) {
const inputs = {'input': inputBuffer};
const outputs = {'output': outputBuffer};
const results = await this.context_.compute(this.graph_, inputs, outputs);
return results;
}
}
Loading

0 comments on commit 7ce7915

Please sign in to comment.