index.html

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>gestureRecognition</title>
    <link rel="stylesheet" href="./css/main.css" />
    <!-- 引入样式 -->
    <link
      rel="stylesheet"
      href="https://unpkg.com/element-ui/lib/theme-chalk/index.css"
    />
    <!-- 引入组件库 -->
    <script src="https://unpkg.com/element-ui/lib/index.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/vue@2"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@2.0.0/dist/tf.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-data@2.0.0/dist/tf-data.min.js"></script>
  </head>

  <body>
    <div id="app">
      <div class="contain">
        <div class="left">
          <video
            class="video"
            autoplay
            playsinline
            muted
            id="webcam"
            width="80%"
            height="80%"
          ></video>
          <div
            id="buttons"
            style="
              display: flex;
              justify-content: space-around;
              padding: 0 1rem;
            "
          >
            <button onclick="captureSample(0)">None</button>
            <button onclick="captureSample(1)">✊ (Rock)</button>
            <button onclick="captureSample(2)">🖐 (Paper)</button>
            <button onclick="captureSample(3)">✌️ (Scissors)</button>
            <button onclick="trainModel()">Train</button>
          </div>
          <h3 id="status" style="margin-left: 1.25rem"></h3>
        </div>
        <div class="right" style="display: flex; align-items: center">
          <video
            class="video video-teach"
            src="https://api.kexie.space/data/resource/hello/videos/ml-game.mp4"
            width="90%"
            height="auto"
            controls
          ></video>
        </div>
      </div>
    </div>
    <script>
      let trainingData = [];

      const labels = ["None", "✊ (Rock)", "🖐 (Paper)", "✌️ (Scissors)"];

      function setText(text) {
        document.getElementById("status").innerText = text;
      }

      async function predictImage() {
        if (!hasTrained) {
          return;
        } // Skip prediction until trained
        const img = await getWebcamImage();
        let result = tf.tidy(() => {
          const input = img.reshape([1, 224, 224, 3]);
          return model.predict(input);
        });
        img.dispose();
        let prediction = await result.data();
        result.dispose();
        // Get the index of the highest value in the prediction
        let id = prediction.indexOf(Math.max(...prediction));
        setText(labels[id]);
      }

      function createTransferModel(model) {
        // Create the truncated base model (remove the "top" layers, classification + bottleneck layers)
        const bottleneck = model.getLayer("dropout"); // This is the final layer before the conv_pred pre-trained classification layer
        const baseModel = tf.model({
          inputs: model.inputs,
          outputs: bottleneck.output,
        });
        // Freeze the convolutional base
        for (const layer of baseModel.layers) {
          layer.trainable = false;
        }
        // Add a classification head
        const newHead = tf.sequential();
        newHead.add(
          tf.layers.flatten({
            inputShape: baseModel.outputs[0].shape.slice(1),
          })
        );
        newHead.add(tf.layers.dense({ units: 100, activation: "relu" }));
        newHead.add(tf.layers.dense({ units: 100, activation: "relu" }));
        newHead.add(tf.layers.dense({ units: 10, activation: "relu" }));
        newHead.add(
          tf.layers.dense({
            units: labels.length,
            kernelInitializer: "varianceScaling",
            useBias: false,
            activation: "softmax",
          })
        );
        // Build the new model
        const newOutput = newHead.apply(baseModel.outputs[0]);
        const newModel = tf.model({
          inputs: baseModel.inputs,
          outputs: newOutput,
        });
        return newModel;
      }

      async function trainModel() {
        hasTrained = false;
        setText("Training...");

        // Setup training data
        const imageSamples = [];
        const targetSamples = [];
        trainingData.forEach((sample) => {
          imageSamples.push(sample.image);
          let cat = [];
          for (let c = 0; c < labels.length; c++) {
            cat.push(c === sample.category ? 1 : 0);
          }
          targetSamples.push(tf.tensor1d(cat));
        });
        const xs = tf.stack(imageSamples);
        const ys = tf.stack(targetSamples);

        // Train the model on new image samples
        model.compile({
          loss: "meanSquaredError",
          optimizer: "adam",
          metrics: ["acc"],
        });

        await model.fit(xs, ys, {
          epochs: 30,
          shuffle: true,
          callbacks: {
            onEpochEnd: (epoch, logs) => {
              console.log("Epoch #", epoch, logs);
            },
          },
        });
        hasTrained = true;
      }

      // Mobilenet v1 0.25 224x224 model
      const mobilenet =
        "https://storage.googleapis.com/tfjs-models/tfjs/mobilenet_v1_0.25_224/model.json";

      let model = null;
      let hasTrained = false;

      async function setupWebcam() {
        return new Promise((resolve, reject) => {
          const webcamElement = document.getElementById("webcam");
          const navigatorAny = navigator;
          navigator.getUserMedia =
            navigator.getUserMedia ||
            navigatorAny.webkitGetUserMedia ||
            navigatorAny.mozGetUserMedia ||
            navigatorAny.msGetUserMedia;
          if (navigator.getUserMedia) {
            navigator.getUserMedia(
              { video: true },
              (stream) => {
                webcamElement.srcObject = stream;
                webcamElement.addEventListener("loadeddata", resolve, false);
              },
              (error) => reject()
            );
          } else {
            reject();
          }
        });
      }

      async function resizeImage(imageTensor, targetHeight, targetWidth) {
        const resizedImage = tf.image.resizeBilinear(imageTensor, [
          targetHeight,
          targetWidth,
        ]);
        return resizedImage;
      }

      async function getWebcamImage() {
        const img = (await webcam.capture()).toFloat();
        const resizedImg = await resizeImage(img, 224, 224);
        const normalized = resizedImg.div(127).sub(1);
        return normalized;
      }

      async function captureSample(category) {
        trainingData.push({
          image: await getWebcamImage(),
          category: category,
        });
        setText("Captured: " + labels[category]);
      }

      let webcam = null;

      (async () => {
        // Load the model
        model = await tf.loadLayersModel(mobilenet);
        model = createTransferModel(model);
        await setupWebcam();
        webcam = await tf.data.webcam(document.getElementById("webcam"));
        // Setup prediction every 200 ms
        setInterval(predictImage, 200);
      })();
    </script>
  </body>
</html>