celeba_progan.py

# -*- coding: utf-8 -*-
"""CelebA_ProGAN.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1jJSvIb0ShUrpKcis66MurtWwKEDUd8Xs

# **One time execute cell**
**Run this to save time and only see the generated faces and not the entire paper!**
"""

# Prerequisites:
# Tensorflow 2.0+ (pip install --upgrade tensorflow)
# PIL (Built-in to Python)
# Tensorflow Hub (pip install --upgrade tensorflow-hub)

# AUTHOR: Aryan Mishra (https://www.github.com/ahmishra)

# GOOGLE COLAB NOTEBOOK: https://www.shorturl.at/kqzE2
# GITHUB: https://github.com/ahmishra/CelebA-ProGAN

from time import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
start = time()
verbose = True

if verbose:
    print("[INFO] Loading libraries")

from tensorflow.image import convert_image_dtype
from tensorflow.random import normal
from tensorflow import constant
from tensorflow_hub import load
from tensorflow import Variable
from tensorflow import uint8

from matplotlib.pyplot import imshow, show, tick_params
from PIL.Image import fromarray

if verbose:
    print("[INFO] Loading helpers")

def display_image(image):
  image = constant(image)
  image = convert_image_dtype(image, uint8)
  tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)
  imshow(image.numpy())
  show()

if verbose:
    print("[GEN] Generating face...")

latent_dim = 512
progan = load("https://tfhub.dev/google/progan-128/1").signatures['default']
initial_vector = normal([1, latent_dim])
vector = Variable(initial_vector)
image = progan(vector.read_value())['default'][0]

if verbose:
    print("[INFO] Displaying generated face...")
    print(f"Finished in {round(time() - start, 2)}s")
    
display_image(image)

"""# Get started (Full paper below)

## **Generating artificial faces with [CelebA Progressive GAN Model](https://tfhub.dev/google/progan-128/1)**

<br>

#### Author: **[Aryan Mishra](https://github.com/ahmishra)**
#### Model download: **https://tfhub.dev/google/progan-128/1**

<br>

This Colab demonstrates use of a TF Hub module based on a generative adversarial network (GAN). The module maps from N-dimensional vectors, called latent space, to RGB images.

Two examples are provided:
* **Mapping** from latent space to images, and
* Given a target image, **using gradient descent to find** a latent vector that generates an image similar to the target image.

<br>

## Optional prerequisites

* Familiarity with [low level Tensorflow concepts](https://www.tensorflow.org/guide/eager).
* [Generative Adversarial Network](https://en.wikipedia.org/wiki/Generative_adversarial_network) on Wikipedia.
* Paper on Progressive GANs: [Progressive Growing of GANs for Improved Quality, Stability, and Variation](https://arxiv.org/abs/1710.10196).

### More models
# [Here](https://tfhub.dev/s?module-type=image-generator) you can find all models currently hosted on [tfhub.dev](tfhub.dev) that can generate images.

# **FULL PAPER 👇**

# Setup
"""

# Install imageio for creating animations.
# Install scikit-image for image-preprocessing.
# Install tensorflow_docs for tensorflow documentation.
!pip -q install imageio
!pip -q install scikit-image
!pip install git+https://github.com/tensorflow/docs

from absl import logging

import imageio
import PIL.Image
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
tf.random.set_seed(0)

import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import time

try:
  from google.colab import files
except ImportError:
  pass

from IPython import display
from skimage import transform

# We could retrieve this value from module.get_input_shapes() if we didn't know
# beforehand which module we will be using.
latent_dim = 512


# Interpolates between two vectors that are non-zero and don't both lie on a
# line going through origin. First normalizes v2 to have the same norm as v1. 
# Then interpolates between the two vectors on the hypersphere.
def interpolate_hypersphere(v1, v2, num_steps):
  v1_norm = tf.norm(v1)
  v2_norm = tf.norm(v2)
  v2_normalized = v2 * (v1_norm / v2_norm)

  vectors = []
  for step in range(num_steps):
    interpolated = v1 + (v2_normalized - v1) * step / (num_steps - 1)
    interpolated_norm = tf.norm(interpolated)
    interpolated_normalized = interpolated * (v1_norm / interpolated_norm)
    vectors.append(interpolated_normalized)
  return tf.stack(vectors)


ANIM_FILE = "./animation.gif"

# Simple way to display an image.
def display_image(image):
  image = tf.constant(image)
  image = tf.image.convert_image_dtype(image, tf.uint8)
  return PIL.Image.fromarray(image.numpy())

# Given a set of images, show an animation.
def animate(images):
  images = np.array(images)
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave(ANIM_FILE, converted_images)
  return embed.embed_file(ANIM_FILE)

logging.set_verbosity(logging.ERROR)

"""# Latent space interpolation

### Random vectors

Latent space interpolation between two randomly initialized vectors. We will use a TF Hub module [progan-128](https://tfhub.dev/google/progan-128/1) that contains a pre-trained Progressive GAN.
"""

# Loading up ProGAN

# Why are we using ProGAN?
# Unfortunately training Progressive GANs is a very-very computationally intensive and expensive task, and currently Google Colab only has around 12 GB RAM and 10 GB
# GPU RAM, and for a ProGAN you need around 32 GB RAM & 16-24 GB GPU RAM, and along with the that, the GPU & CPU should be able to handle long and extensive tasks for 48+ hrs
# And, unfortunately, Google Colab doesn't offer that high of specs, so we are using a pre-trained model by Google itself on it's high end servers.


progan = hub.load("https://tfhub.dev/google/progan-128/1").signatures['default']

def interpolate_between_vectors():
  v1 = tf.random.normal([latent_dim])
  v2 = tf.random.normal([latent_dim])
    
  # Creates a tensor with 25 steps of interpolation between v1 and v2.
  vectors = interpolate_hypersphere(v1, v2, 50)

  # Uses module to generate images from the latent space.
  interpolated_images = progan(vectors)['default']

  return interpolated_images

interpolated_images = interpolate_between_vectors()
animate(interpolated_images)

"""## Finding closest vector in latent space
Fix a target image. As an example use an image generated from the module or upload your own.
"""

image_from_module_space = True

def get_module_space_image():
  vector = tf.random.normal([1, latent_dim])
  images = progan(vector)['default'][0]
  return images

def upload_image():
  uploaded = files.upload()
  image = imageio.imread(uploaded[list(uploaded.keys())[0]])
  return transform.resize(image, [128, 128])

if image_from_module_space:
  target_image = get_module_space_image()
else:
  target_image = upload_image()

display_image(target_image)

"""After defining a loss function between the target image and the image generated by a latent space variable, we can use gradient descent to find variable values that minimize the loss."""

initial_vector = tf.random.normal([1, latent_dim])

display_image(progan(initial_vector)['default'][0])

def find_closest_latent_vector(initial_vector, num_optimization_steps,
                               steps_per_image):
  images = []
  losses = []

  vector = tf.Variable(initial_vector)  
  optimizer = tf.optimizers.Adam(learning_rate=0.01)
  loss_fn = tf.losses.MeanAbsoluteError(reduction="sum")

  for step in range(num_optimization_steps):
    if (step % 100)==0:
      print()
    print('.', end='')
    with tf.GradientTape() as tape:
      image = progan(vector.read_value())['default'][0]
      if (step % steps_per_image) == 0:
        images.append(image.numpy())
      target_image_difference = loss_fn(image, target_image[:,:,:3])
      # The latent vectors were sampled from a normal distribution. We can get
      # more realistic images if we regularize the length of the latent vector to 
      # the average length of vector from this distribution.
      regularizer = tf.abs(tf.norm(vector) - np.sqrt(latent_dim))
      
      loss = target_image_difference + regularizer
      losses.append(loss.numpy())
    grads = tape.gradient(loss, [vector])
    optimizer.apply_gradients(zip(grads, [vector]))
    
  return images, losses


num_optimization_steps=200
steps_per_image=5
images, loss = find_closest_latent_vector(initial_vector, num_optimization_steps, steps_per_image)

plt.plot(loss)
plt.ylim([0,max(plt.ylim())])

animate(np.stack(images))

display_image(np.concatenate([images[-1], target_image], axis=1))