From c762b862fb7b7391ba78aa41b22c5df027d5f697 Mon Sep 17 00:00:00 2001 From: Itay Zandbank Date: Tue, 1 May 2018 19:35:52 +0300 Subject: [PATCH 1/2] Create a PyPI package, adapt to Python 3 --- README.md | 130 +++++++++++++++++++++++++++++++++++++--- image_color_analysis.py | 104 ++++++++++++++++++++++++++++++++ setup.py | 32 ++++++++++ 3 files changed, 259 insertions(+), 7 deletions(-) create mode 100644 image_color_analysis.py create mode 100644 setup.py diff --git a/README.md b/README.md index 08f88d2..ae3327a 100644 --- a/README.md +++ b/README.md @@ -9,21 +9,27 @@ The method can be readily applied to any other national Web (or Web archive), an The following is a tutorial for using the tool on a given folder of images. We will compare the color histogram of Google images results to "Donald Trump" and "Hillary Clinton" (as of November 2016). Two demo folders are included in this tutorial. +### Tools +There are 2 ways to run this tool +1. Use "Jupyter Notebook". +2. Install image-color-analysis as package -#### Preparations +#### "Jupyter Notebook" + +##### Preparations 1. Please Download and install [Anaconda, version Python 2.7](https://www.continuum.io/downloads). 2. Please 'download zip' the project folders from [Github](https://github.com/omilab/image-color-analysis/archive/master.zip). 3. Unzip the folder -#### A Step by Step Guide for using the tool +##### A Step by Step Guide for using the tool 1. In Anaconda, open "Jupyter Notebook". 2. When notebook opens - it automatically opens your default browser and shows your file directory. 3. Open your downloaded and extracted folder. 4. Open the file "image_color_analysis.ipynb". -#### Now, let's have some fun! +##### Now, let's have some fun! The script in the file you just opened is divided into three sections: @@ -33,14 +39,14 @@ The script in the file you just opened is divided into three sections: Although the code is annotated, below is an explanation of each section. -##### Part 1: Creating a collage +###### Part 1: Creating a collage In this section, we first specify the location of an images folder and load the images. Then, we create a collage from all the images. This is done by calculating the maximal width of all images and the sum of the heights of all images. Finally, it creates a new image with an alpha channel. That is, all images are arranged one below the other, and the empty spaces between them are marked as transparent. Click the 'run' button to start this procedure. When it's done, it prints the time it took to build the collage (this give a good indication of the process when analyzing a large corpus). The generated collage pops up. You may want to view or save it. -##### Part 2: Building a K-Means model and running it on the collage +###### Part 2: Building a K-Means model and running it on the collage In order to calculate and identify clusters of colors in the dataset, we first need to convert the images into a numerical representation - a color array. The array has four dimensions: Red, Green, Blue (RGB) and Alpha (the transparent color we added to mark the empty spaces in the previous section). Subsequently, the collage is represented as a matrix of the total height * total width * 4(that is, RGB+A). @@ -52,7 +58,7 @@ Finally, we remove all transparent colors, as they are not necessary for the cal Click the 'run' button to start this procedure. When it's done, it prints the time it took to build the collage. (The larger your collage is and the larger the number of clusters, the longer it will take to complete). -##### Part 3: Generating the color histogram +###### Part 3: Generating the color histogram This section calculates the proportion of colors for each section. Then, it normalizes the histogram, so that the proportions sum to 1. Finally, it generates an image that puts the width of each color in a histogram. @@ -61,7 +67,7 @@ Finally, it generates an image that puts the width of each color in a histogram. Click the 'run' button to start this procedure. The resulting histogram that pops up summarizes the color composition of your corpus! -##### N.B. +###### N.B. this is the bit of code where you may change the name of the demo folder to your own folder of images: @@ -71,4 +77,114 @@ This is the bit of code where you may change the number of clusters (=colors in kmeans_model = KMeans(n_clusters=YOUR NUMBER) +#### Install image-color-analysis + +##### Preparations + +1. Please Download and install python 3.6 +2. Install the package +``` +pip install image-color-analysis +``` +3. Create new python file and import the package +``` +from image_color_analysis import * +``` +##### Now, let's have some fun! +The package has 4 functions: +1. image_collage +2. k_means +3. colors_bar +4. analyze_folder + +###### image_collage: Creating a collage + The function creates a collage of all the images in a folder. + The function receives directory/folder path. + The function return a collage as image (object's type: PIL.Image). + + ```python +import image_color_analysis + +folder = 'YOUR FOLDER' +image = image_color_analysis.images_collage(folder) + # print the collage +image.show() + + # save the image in png format (you can choose another format as well) +image.save('collage.png') +``` + +in order to load the image: + ```python +from PIL import Image +im = Image.open('collage.png') +``` + +###### k_means: Building a K-Means model +The function find a k-mean model for a collage. +The function receives collage image (object PIL.Image) and number of clusters (default value = 5). +The function returns the model it create (object's type: KMeans) + + ```python +import image_color_analysis + +folder = 'YOUR FOLDER' +image = image_color_analysis.images_collage(folder) +# train model with 5 clusters (k=5) +model_5_clusters = image_color_analysis.k_means(image) +# train model with 8 clusters (k=8) +model_8_clusters = image_color_analysis.k_means(image, 8) +``` + +if you want to save the model to a file and load it later: + + ```python +import image_color_analysis +import pickle +folder = 'YOUR FOLDER' +image = image_color_analysis.images_collage(folder) +# train model with 5 clusters (k=5) +model_5_clusters = image_color_analysis.k_means(image) +# save the model +model_file_name = 'model.pkl' +pickle.dump(model_5_clusters, open(model_file_name, 'wb')) + +# load the model +loaded_model = pickle.load(open(model_file_name, 'rb')) + +``` + +###### colors_bar: Generating the color histogram +Then the function creates a color bar image from a KMeans model and save it to a file. +The function receives model(object sklearn.cluster.Kmeans) and file path (path and file name for saving the color bar image). +The function return a color bar image (object's type: PIL.Image). + ```python +import image_color_analysis + +folder = 'YOUR FOLDER' +image = image_color_analysis.images_collage(folder) +# train model with 5 clusters (k=5) +model_5_clusters = image_color_analysis.k_means(image) +color_bar_path = "new_color_bar.png" +color_bar_image = image_color_analysis.colors_bar(model_5_clusters,color_bar_path) + # print the color bar +color_bar_image.show() +``` + +###### analyze_folder: +This function run all the functions , creates color bar and presents it to the user. +The function receives: + 1. directory/folder path of images + 2. optional - file' name to save the color bar( default = 'colors_bar.png' in current dir) + 3. optional - number of clusters in k-means (default - 5) + + ```python +import image_color_analysis + +folder = 'YOUR FOLDER' +# 5 clusters and and create file 'colors_bar.png' +image_color_analysis.analyze_folder(folder) +# 7 clusters and and create file 'new_colors_bar.jpg' +image_color_analysis.analyze_folder(folder, 'new_colors_bar.jpg', 7) +``` \ No newline at end of file diff --git a/image_color_analysis.py b/image_color_analysis.py new file mode 100644 index 0000000..5d3db27 --- /dev/null +++ b/image_color_analysis.py @@ -0,0 +1,104 @@ +from sklearn.cluster import KMeans +import numpy as np +import time +from PIL import Image +import os +from PIL import ImageDraw + + +''' +@param images_path: Images' directory from which the function creates collage +@return: Object of type PIL.Image +''' +def images_collage(images_path): + startTime = time.time() + + # load images from the folder + images = [] + for image_name in os.listdir(images_path): + images.append(Image.open(images_path + '/' + image_name)) + + # calculate the total height and the max width of the collage + total_height = sum(img.size[1] for img in images) + max_width = max(img.size[0] for img in images) + + # create a collage with alpha channel + # every image will be placed below the previous one + collage = Image.new('RGBA', (max_width, total_height)) + y = 0 + for img in images: + collage.paste(img, (0, y)) + y += img.size[1] + + endTime = time.time() + + print ('creating collage time: ', endTime - startTime) + print ('total_height: ', total_height) + print ('max_width: ', max_width) + + # print the collage + # collage.show() + return collage + +''' +@param collage: Object of type PIL.Image +@param clusters: Number of clusters (k) for k-mean (=final number of colors in the histogram) +@return: Object of type KMeans model +''' +def k_means(collage,clusters = 5): + startTime = time.time() + # conevrt the collage to a color array (total_height X max_width X 4) + collage_array = np.array(collage) + + # reshape the array + collage_array = collage_array.reshape((collage_array.shape[0] * collage_array.shape[1], 4)) + + # remove all transparent colors + collage_array = collage_array[~np.all(collage_array == 0, axis=1)] + + # fit k-means model with 5 clusters + kmeans_model = KMeans(n_clusters=clusters) + kmeans_model.fit(collage_array) + + endTime = time.time() + print ('fitting model time: ', endTime - startTime) + return kmeans_model + +''' +@param kmeans_model: Object of type KMeans model +@param bar_save_path: The file name for the color bar image +@return: Object of type PIL.Image +''' +def colors_bar(kmeans_model, color_bar_save_path ): + # create a histogram of the number of clusters + numLabels = np.arange(0, len(np.unique(kmeans_model.labels_)) + 1) + (hist, _) = np.histogram(kmeans_model.labels_, bins=numLabels) + + # normalize the histogram, such that it sums to one + hist = hist.astype("float") + hist /= hist.sum() + + startX = 0 + + # create a bar image that displays the most used colors + im = Image.new('RGB', (300, 300), (0, 0, 0)) + dr = ImageDraw.Draw(im) + + for (percent, color) in sorted(zip(hist, kmeans_model.cluster_centers_), key=lambda t: t[0], reverse=True): + endX = startX + (float(percent) * 300) + dr.rectangle([int(startX), 0, int(endX), 300], fill=tuple(map(int, color))) + startX = endX + + im.save(color_bar_save_path) + return im + +''' +@param images_path: Images' directory from which the function creates collage +@param colors_bar_path: The file path for the color bar image, default = 'colors_bar.png' in current dir +@param clusters: Number of clusters (k) for k-mean, default = 5 +''' +def analyze_folder(images_path, colors_bar_path ='colors_bar.png' , clusters=5 ): + collage = images_collage(images_path) + model = k_means(collage, clusters) + color_bar_image = colors_bar(model, colors_bar_path) + color_bar_image.show() \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..1dc1869 --- /dev/null +++ b/setup.py @@ -0,0 +1,32 @@ +import os +from setuptools import setup + +with open(os.path.join(os.path.dirname(__file__), 'README.md')) as readme: + README = readme.read() + +with open(os.path.join(os.path.dirname(__file__), 'LICENSE')) as license: + LICENSE = license.read() + +# allow setup.py to be run from any path +os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) + +setup(name='image_color_analysis', + version='1.0', + install_requires=['Pillow>=5.1.0', 'sklearn>=0.0', 'numpy>=1.14.3', 'scipy>=1.0.1'], + include_package_data=True, + license=LICENSE, # example license + description='image color analysis', + long_description=README, + packages=[], + classifiers=[ + 'Environment :: Console', + 'Intended Audience :: Science/Research', + 'License :: Other/Proprietary License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.6', + ], + # author='Yael Segal', + # author_email='devorawitty@chelem.co.il', + zip_safe=False) + From 2da0db73b00c9763481a11e9a765a85e55ed7962 Mon Sep 17 00:00:00 2001 From: Itay Zandbank Date: Tue, 1 May 2018 19:55:24 +0300 Subject: [PATCH 2/2] Python 3, a setup package. --- __init__.py | 1 + image_color_analysis.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..b8aa757 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +from image_color_analysis import * \ No newline at end of file diff --git a/image_color_analysis.py b/image_color_analysis.py index 5d3db27..39d0e65 100644 --- a/image_color_analysis.py +++ b/image_color_analysis.py @@ -1,3 +1,5 @@ +from __future__ import print_function + from sklearn.cluster import KMeans import numpy as np import time