diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..8d557fc86d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,11 @@ +language: python +python: + - "3.5" +install: + - pip install pillow + - pip install numpy + - pip install requests + - pip install beautifulsoup4 + - pip install opencv-python +script: + - python3 tests/tests.py diff --git a/README.md b/README.md index 8b627e2794..9756b49296 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# TextRecognitionDataGenerator +# TextRecognitionDataGenerator [![TravisCI](https://travis-ci.org/Belval/TextRecognitionDataGenerator.svg?branch=master)](https://travis-ci.org/Belval/TextRecognitionDataGenerator) A synthetic data generator for text recognition ## What is it for? diff --git a/background_generator.py b/TextRecognitionDataGenerator/background_generator.py similarity index 100% rename from background_generator.py rename to TextRecognitionDataGenerator/background_generator.py diff --git a/computer_text_generator.py b/TextRecognitionDataGenerator/computer_text_generator.py similarity index 62% rename from computer_text_generator.py rename to TextRecognitionDataGenerator/computer_text_generator.py index 7bafaf1238..8d12116ae7 100644 --- a/computer_text_generator.py +++ b/TextRecognitionDataGenerator/computer_text_generator.py @@ -8,14 +8,14 @@ class ComputerTextGenerator(object): @classmethod - def generate(cls, text, font): - image_font = ImageFont.truetype(font=os.path.join('fonts', font), size=32) + def generate(cls, text, font, text_color): + image_font = ImageFont.truetype(font=font, size=32) text_width, text_height = image_font.getsize(text) txt_img = Image.new('L', (text_width, text_height), 255) txt_draw = ImageDraw.Draw(txt_img) - txt_draw.text((0, 0), text, fill=random.randint(1, 80), font=image_font) + txt_draw.text((0, 0), text, fill=random.randint(1, 80) if text_color < 0 else text_color, font=image_font) return txt_img diff --git a/data_generator.py b/TextRecognitionDataGenerator/data_generator.py similarity index 77% rename from data_generator.py rename to TextRecognitionDataGenerator/data_generator.py index 4830a4b6f4..f366a27c35 100644 --- a/data_generator.py +++ b/TextRecognitionDataGenerator/data_generator.py @@ -4,38 +4,29 @@ from PIL import Image, ImageFilter -from computer_text_generator import ComputerTextGenerator +from TextRecognitionDataGenerator.computer_text_generator import ComputerTextGenerator try: - from handwritten_text_generator import HandwrittenTextGenerator + from TextRecognitionDataGenerator.handwritten_text_generator import HandwrittenTextGenerator except ImportError as e: print('Missing modules for handwritten text generation.') -from background_generator import BackgroundGenerator +from TextRecognitionDataGenerator.background_generator import BackgroundGenerator class FakeTextDataGenerator(object): @classmethod - def generate(cls, index, text, font, out_dir, height, extension, skewing_angle, random_skew, blur, random_blur, background_type, is_handwritten): + def generate(cls, index, text, font, out_dir, height, extension, skewing_angle, random_skew, blur, random_blur, background_type, is_handwritten, text_color=-1): image = None if is_handwritten: image = HandwrittenTextGenerator.generate(text) else: - image = ComputerTextGenerator.generate(text, font) + image = ComputerTextGenerator.generate(text, font, text_color) random_angle = random.randint(0-skewing_angle, skewing_angle) - # Somehow the handwritten text always has a little bit of angle. - # this fixes it. - if is_handwritten: - random_angle = 0 - skewing_angle = 0 - rotated_img = image.rotate(skewing_angle if not random_skew else random_angle, expand=1) new_text_width, new_text_height = rotated_img.size - # We create our background a bit bigger than the text - background = None - if background_type == 0: background = BackgroundGenerator.gaussian_noise(new_text_height + 10, new_text_width + 10) elif background_type == 1: diff --git a/dicts/de.txt b/TextRecognitionDataGenerator/dicts/de.txt similarity index 100% rename from dicts/de.txt rename to TextRecognitionDataGenerator/dicts/de.txt diff --git a/dicts/en.txt b/TextRecognitionDataGenerator/dicts/en.txt similarity index 100% rename from dicts/en.txt rename to TextRecognitionDataGenerator/dicts/en.txt diff --git a/dicts/es.txt b/TextRecognitionDataGenerator/dicts/es.txt similarity index 100% rename from dicts/es.txt rename to TextRecognitionDataGenerator/dicts/es.txt diff --git a/dicts/fr.txt b/TextRecognitionDataGenerator/dicts/fr.txt similarity index 100% rename from dicts/fr.txt rename to TextRecognitionDataGenerator/dicts/fr.txt diff --git a/fonts/AllerDisplay.ttf b/TextRecognitionDataGenerator/fonts/AllerDisplay.ttf similarity index 100% rename from fonts/AllerDisplay.ttf rename to TextRecognitionDataGenerator/fonts/AllerDisplay.ttf diff --git a/fonts/Aller_Bd.ttf b/TextRecognitionDataGenerator/fonts/Aller_Bd.ttf similarity index 100% rename from fonts/Aller_Bd.ttf rename to TextRecognitionDataGenerator/fonts/Aller_Bd.ttf diff --git a/fonts/Aller_BdIt.ttf b/TextRecognitionDataGenerator/fonts/Aller_BdIt.ttf similarity index 100% rename from fonts/Aller_BdIt.ttf rename to TextRecognitionDataGenerator/fonts/Aller_BdIt.ttf diff --git a/fonts/Aller_It.ttf b/TextRecognitionDataGenerator/fonts/Aller_It.ttf similarity index 100% rename from fonts/Aller_It.ttf rename to TextRecognitionDataGenerator/fonts/Aller_It.ttf diff --git a/fonts/Aller_Lt.ttf b/TextRecognitionDataGenerator/fonts/Aller_Lt.ttf similarity index 100% rename from fonts/Aller_Lt.ttf rename to TextRecognitionDataGenerator/fonts/Aller_Lt.ttf diff --git a/fonts/Aller_LtIt.ttf b/TextRecognitionDataGenerator/fonts/Aller_LtIt.ttf similarity index 100% rename from fonts/Aller_LtIt.ttf rename to TextRecognitionDataGenerator/fonts/Aller_LtIt.ttf diff --git a/fonts/Aller_Rg.ttf b/TextRecognitionDataGenerator/fonts/Aller_Rg.ttf similarity index 100% rename from fonts/Aller_Rg.ttf rename to TextRecognitionDataGenerator/fonts/Aller_Rg.ttf diff --git a/fonts/Amatic-Bold.ttf b/TextRecognitionDataGenerator/fonts/Amatic-Bold.ttf similarity index 100% rename from fonts/Amatic-Bold.ttf rename to TextRecognitionDataGenerator/fonts/Amatic-Bold.ttf diff --git a/fonts/AmaticSC-Regular.ttf b/TextRecognitionDataGenerator/fonts/AmaticSC-Regular.ttf similarity index 100% rename from fonts/AmaticSC-Regular.ttf rename to TextRecognitionDataGenerator/fonts/AmaticSC-Regular.ttf diff --git a/fonts/BEBAS___.ttf b/TextRecognitionDataGenerator/fonts/BEBAS___.ttf similarity index 100% rename from fonts/BEBAS___.ttf rename to TextRecognitionDataGenerator/fonts/BEBAS___.ttf diff --git a/fonts/Capture_it.ttf b/TextRecognitionDataGenerator/fonts/Capture_it.ttf similarity index 100% rename from fonts/Capture_it.ttf rename to TextRecognitionDataGenerator/fonts/Capture_it.ttf diff --git a/fonts/Capture_it_2.ttf b/TextRecognitionDataGenerator/fonts/Capture_it_2.ttf similarity index 100% rename from fonts/Capture_it_2.ttf rename to TextRecognitionDataGenerator/fonts/Capture_it_2.ttf diff --git a/fonts/CaviarDreams.ttf b/TextRecognitionDataGenerator/fonts/CaviarDreams.ttf similarity index 100% rename from fonts/CaviarDreams.ttf rename to TextRecognitionDataGenerator/fonts/CaviarDreams.ttf diff --git a/fonts/CaviarDreams_BoldItalic.ttf b/TextRecognitionDataGenerator/fonts/CaviarDreams_BoldItalic.ttf similarity index 100% rename from fonts/CaviarDreams_BoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/CaviarDreams_BoldItalic.ttf diff --git a/fonts/CaviarDreams_Italic.ttf b/TextRecognitionDataGenerator/fonts/CaviarDreams_Italic.ttf similarity index 100% rename from fonts/CaviarDreams_Italic.ttf rename to TextRecognitionDataGenerator/fonts/CaviarDreams_Italic.ttf diff --git a/fonts/Caviar_Dreams_Bold.ttf b/TextRecognitionDataGenerator/fonts/Caviar_Dreams_Bold.ttf similarity index 100% rename from fonts/Caviar_Dreams_Bold.ttf rename to TextRecognitionDataGenerator/fonts/Caviar_Dreams_Bold.ttf diff --git a/fonts/DroidSans-Bold.ttf b/TextRecognitionDataGenerator/fonts/DroidSans-Bold.ttf similarity index 100% rename from fonts/DroidSans-Bold.ttf rename to TextRecognitionDataGenerator/fonts/DroidSans-Bold.ttf diff --git a/fonts/DroidSans.ttf b/TextRecognitionDataGenerator/fonts/DroidSans.ttf similarity index 100% rename from fonts/DroidSans.ttf rename to TextRecognitionDataGenerator/fonts/DroidSans.ttf diff --git a/fonts/FFF_Tusj.ttf b/TextRecognitionDataGenerator/fonts/FFF_Tusj.ttf similarity index 100% rename from fonts/FFF_Tusj.ttf rename to TextRecognitionDataGenerator/fonts/FFF_Tusj.ttf diff --git a/fonts/Lato-Black.ttf b/TextRecognitionDataGenerator/fonts/Lato-Black.ttf similarity index 100% rename from fonts/Lato-Black.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Black.ttf diff --git a/fonts/Lato-BlackItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-BlackItalic.ttf similarity index 100% rename from fonts/Lato-BlackItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-BlackItalic.ttf diff --git a/fonts/Lato-Bold.ttf b/TextRecognitionDataGenerator/fonts/Lato-Bold.ttf similarity index 100% rename from fonts/Lato-Bold.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Bold.ttf diff --git a/fonts/Lato-BoldItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-BoldItalic.ttf similarity index 100% rename from fonts/Lato-BoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-BoldItalic.ttf diff --git a/fonts/Lato-Hairline.ttf b/TextRecognitionDataGenerator/fonts/Lato-Hairline.ttf similarity index 100% rename from fonts/Lato-Hairline.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Hairline.ttf diff --git a/fonts/Lato-HairlineItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-HairlineItalic.ttf similarity index 100% rename from fonts/Lato-HairlineItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-HairlineItalic.ttf diff --git a/fonts/Lato-Heavy.ttf b/TextRecognitionDataGenerator/fonts/Lato-Heavy.ttf similarity index 100% rename from fonts/Lato-Heavy.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Heavy.ttf diff --git a/fonts/Lato-HeavyItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-HeavyItalic.ttf similarity index 100% rename from fonts/Lato-HeavyItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-HeavyItalic.ttf diff --git a/fonts/Lato-Italic.ttf b/TextRecognitionDataGenerator/fonts/Lato-Italic.ttf similarity index 100% rename from fonts/Lato-Italic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Italic.ttf diff --git a/fonts/Lato-Light.ttf b/TextRecognitionDataGenerator/fonts/Lato-Light.ttf similarity index 100% rename from fonts/Lato-Light.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Light.ttf diff --git a/fonts/Lato-LightItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-LightItalic.ttf similarity index 100% rename from fonts/Lato-LightItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-LightItalic.ttf diff --git a/fonts/Lato-Medium.ttf b/TextRecognitionDataGenerator/fonts/Lato-Medium.ttf similarity index 100% rename from fonts/Lato-Medium.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Medium.ttf diff --git a/fonts/Lato-MediumItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-MediumItalic.ttf similarity index 100% rename from fonts/Lato-MediumItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-MediumItalic.ttf diff --git a/fonts/Lato-Regular.ttf b/TextRecognitionDataGenerator/fonts/Lato-Regular.ttf similarity index 100% rename from fonts/Lato-Regular.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Regular.ttf diff --git a/fonts/Lato-Semibold.ttf b/TextRecognitionDataGenerator/fonts/Lato-Semibold.ttf similarity index 100% rename from fonts/Lato-Semibold.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Semibold.ttf diff --git a/fonts/Lato-SemiboldItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-SemiboldItalic.ttf similarity index 100% rename from fonts/Lato-SemiboldItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-SemiboldItalic.ttf diff --git a/fonts/Lato-Thin.ttf b/TextRecognitionDataGenerator/fonts/Lato-Thin.ttf similarity index 100% rename from fonts/Lato-Thin.ttf rename to TextRecognitionDataGenerator/fonts/Lato-Thin.ttf diff --git a/fonts/Lato-ThinItalic.ttf b/TextRecognitionDataGenerator/fonts/Lato-ThinItalic.ttf similarity index 100% rename from fonts/Lato-ThinItalic.ttf rename to TextRecognitionDataGenerator/fonts/Lato-ThinItalic.ttf diff --git a/fonts/OpenSans-Bold.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-Bold.ttf similarity index 100% rename from fonts/OpenSans-Bold.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-Bold.ttf diff --git a/fonts/OpenSans-BoldItalic.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-BoldItalic.ttf similarity index 100% rename from fonts/OpenSans-BoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-BoldItalic.ttf diff --git a/fonts/OpenSans-ExtraBold.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-ExtraBold.ttf similarity index 100% rename from fonts/OpenSans-ExtraBold.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-ExtraBold.ttf diff --git a/fonts/OpenSans-ExtraBoldItalic.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-ExtraBoldItalic.ttf similarity index 100% rename from fonts/OpenSans-ExtraBoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-ExtraBoldItalic.ttf diff --git a/fonts/OpenSans-Italic.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-Italic.ttf similarity index 100% rename from fonts/OpenSans-Italic.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-Italic.ttf diff --git a/fonts/OpenSans-Light.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-Light.ttf similarity index 100% rename from fonts/OpenSans-Light.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-Light.ttf diff --git a/fonts/OpenSans-LightItalic.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-LightItalic.ttf similarity index 100% rename from fonts/OpenSans-LightItalic.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-LightItalic.ttf diff --git a/fonts/OpenSans-Regular.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-Regular.ttf similarity index 100% rename from fonts/OpenSans-Regular.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-Regular.ttf diff --git a/fonts/OpenSans-Semibold.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-Semibold.ttf similarity index 100% rename from fonts/OpenSans-Semibold.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-Semibold.ttf diff --git a/fonts/OpenSans-SemiboldItalic.ttf b/TextRecognitionDataGenerator/fonts/OpenSans-SemiboldItalic.ttf similarity index 100% rename from fonts/OpenSans-SemiboldItalic.ttf rename to TextRecognitionDataGenerator/fonts/OpenSans-SemiboldItalic.ttf diff --git a/fonts/Pacifico.ttf b/TextRecognitionDataGenerator/fonts/Pacifico.ttf similarity index 100% rename from fonts/Pacifico.ttf rename to TextRecognitionDataGenerator/fonts/Pacifico.ttf diff --git a/fonts/Raleway-Black.ttf b/TextRecognitionDataGenerator/fonts/Raleway-Black.ttf similarity index 100% rename from fonts/Raleway-Black.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-Black.ttf diff --git a/fonts/Raleway-BlackItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-BlackItalic.ttf similarity index 100% rename from fonts/Raleway-BlackItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-BlackItalic.ttf diff --git a/fonts/Raleway-Bold.ttf b/TextRecognitionDataGenerator/fonts/Raleway-Bold.ttf similarity index 100% rename from fonts/Raleway-Bold.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-Bold.ttf diff --git a/fonts/Raleway-BoldItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-BoldItalic.ttf similarity index 100% rename from fonts/Raleway-BoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-BoldItalic.ttf diff --git a/fonts/Raleway-ExtraBold.ttf b/TextRecognitionDataGenerator/fonts/Raleway-ExtraBold.ttf similarity index 100% rename from fonts/Raleway-ExtraBold.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-ExtraBold.ttf diff --git a/fonts/Raleway-ExtraBoldItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-ExtraBoldItalic.ttf similarity index 100% rename from fonts/Raleway-ExtraBoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-ExtraBoldItalic.ttf diff --git a/fonts/Raleway-ExtraLight.ttf b/TextRecognitionDataGenerator/fonts/Raleway-ExtraLight.ttf similarity index 100% rename from fonts/Raleway-ExtraLight.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-ExtraLight.ttf diff --git a/fonts/Raleway-ExtraLightItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-ExtraLightItalic.ttf similarity index 100% rename from fonts/Raleway-ExtraLightItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-ExtraLightItalic.ttf diff --git a/fonts/Raleway-Italic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-Italic.ttf similarity index 100% rename from fonts/Raleway-Italic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-Italic.ttf diff --git a/fonts/Raleway-Light.ttf b/TextRecognitionDataGenerator/fonts/Raleway-Light.ttf similarity index 100% rename from fonts/Raleway-Light.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-Light.ttf diff --git a/fonts/Raleway-LightItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-LightItalic.ttf similarity index 100% rename from fonts/Raleway-LightItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-LightItalic.ttf diff --git a/fonts/Raleway-Medium.ttf b/TextRecognitionDataGenerator/fonts/Raleway-Medium.ttf similarity index 100% rename from fonts/Raleway-Medium.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-Medium.ttf diff --git a/fonts/Raleway-MediumItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-MediumItalic.ttf similarity index 100% rename from fonts/Raleway-MediumItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-MediumItalic.ttf diff --git a/fonts/Raleway-Regular.ttf b/TextRecognitionDataGenerator/fonts/Raleway-Regular.ttf similarity index 100% rename from fonts/Raleway-Regular.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-Regular.ttf diff --git a/fonts/Raleway-SemiBold.ttf b/TextRecognitionDataGenerator/fonts/Raleway-SemiBold.ttf similarity index 100% rename from fonts/Raleway-SemiBold.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-SemiBold.ttf diff --git a/fonts/Raleway-SemiBoldItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-SemiBoldItalic.ttf similarity index 100% rename from fonts/Raleway-SemiBoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-SemiBoldItalic.ttf diff --git a/fonts/Raleway-Thin.ttf b/TextRecognitionDataGenerator/fonts/Raleway-Thin.ttf similarity index 100% rename from fonts/Raleway-Thin.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-Thin.ttf diff --git a/fonts/Raleway-ThinItalic.ttf b/TextRecognitionDataGenerator/fonts/Raleway-ThinItalic.ttf similarity index 100% rename from fonts/Raleway-ThinItalic.ttf rename to TextRecognitionDataGenerator/fonts/Raleway-ThinItalic.ttf diff --git a/fonts/Roboto-Black.ttf b/TextRecognitionDataGenerator/fonts/Roboto-Black.ttf similarity index 100% rename from fonts/Roboto-Black.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-Black.ttf diff --git a/fonts/Roboto-BlackItalic.ttf b/TextRecognitionDataGenerator/fonts/Roboto-BlackItalic.ttf similarity index 100% rename from fonts/Roboto-BlackItalic.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-BlackItalic.ttf diff --git a/fonts/Roboto-Bold.ttf b/TextRecognitionDataGenerator/fonts/Roboto-Bold.ttf similarity index 100% rename from fonts/Roboto-Bold.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-Bold.ttf diff --git a/fonts/Roboto-BoldItalic.ttf b/TextRecognitionDataGenerator/fonts/Roboto-BoldItalic.ttf similarity index 100% rename from fonts/Roboto-BoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-BoldItalic.ttf diff --git a/fonts/Roboto-Italic.ttf b/TextRecognitionDataGenerator/fonts/Roboto-Italic.ttf similarity index 100% rename from fonts/Roboto-Italic.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-Italic.ttf diff --git a/fonts/Roboto-Light.ttf b/TextRecognitionDataGenerator/fonts/Roboto-Light.ttf similarity index 100% rename from fonts/Roboto-Light.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-Light.ttf diff --git a/fonts/Roboto-LightItalic.ttf b/TextRecognitionDataGenerator/fonts/Roboto-LightItalic.ttf similarity index 100% rename from fonts/Roboto-LightItalic.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-LightItalic.ttf diff --git a/fonts/Roboto-Medium.ttf b/TextRecognitionDataGenerator/fonts/Roboto-Medium.ttf similarity index 100% rename from fonts/Roboto-Medium.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-Medium.ttf diff --git a/fonts/Roboto-MediumItalic.ttf b/TextRecognitionDataGenerator/fonts/Roboto-MediumItalic.ttf similarity index 100% rename from fonts/Roboto-MediumItalic.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-MediumItalic.ttf diff --git a/fonts/Roboto-Regular.ttf b/TextRecognitionDataGenerator/fonts/Roboto-Regular.ttf similarity index 100% rename from fonts/Roboto-Regular.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-Regular.ttf diff --git a/fonts/Roboto-Thin.ttf b/TextRecognitionDataGenerator/fonts/Roboto-Thin.ttf similarity index 100% rename from fonts/Roboto-Thin.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-Thin.ttf diff --git a/fonts/Roboto-ThinItalic.ttf b/TextRecognitionDataGenerator/fonts/Roboto-ThinItalic.ttf similarity index 100% rename from fonts/Roboto-ThinItalic.ttf rename to TextRecognitionDataGenerator/fonts/Roboto-ThinItalic.ttf diff --git a/fonts/RobotoCondensed-Bold.ttf b/TextRecognitionDataGenerator/fonts/RobotoCondensed-Bold.ttf similarity index 100% rename from fonts/RobotoCondensed-Bold.ttf rename to TextRecognitionDataGenerator/fonts/RobotoCondensed-Bold.ttf diff --git a/fonts/RobotoCondensed-BoldItalic.ttf b/TextRecognitionDataGenerator/fonts/RobotoCondensed-BoldItalic.ttf similarity index 100% rename from fonts/RobotoCondensed-BoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/RobotoCondensed-BoldItalic.ttf diff --git a/fonts/RobotoCondensed-Italic.ttf b/TextRecognitionDataGenerator/fonts/RobotoCondensed-Italic.ttf similarity index 100% rename from fonts/RobotoCondensed-Italic.ttf rename to TextRecognitionDataGenerator/fonts/RobotoCondensed-Italic.ttf diff --git a/fonts/RobotoCondensed-Light.ttf b/TextRecognitionDataGenerator/fonts/RobotoCondensed-Light.ttf similarity index 100% rename from fonts/RobotoCondensed-Light.ttf rename to TextRecognitionDataGenerator/fonts/RobotoCondensed-Light.ttf diff --git a/fonts/RobotoCondensed-LightItalic.ttf b/TextRecognitionDataGenerator/fonts/RobotoCondensed-LightItalic.ttf similarity index 100% rename from fonts/RobotoCondensed-LightItalic.ttf rename to TextRecognitionDataGenerator/fonts/RobotoCondensed-LightItalic.ttf diff --git a/fonts/RobotoCondensed-Regular.ttf b/TextRecognitionDataGenerator/fonts/RobotoCondensed-Regular.ttf similarity index 100% rename from fonts/RobotoCondensed-Regular.ttf rename to TextRecognitionDataGenerator/fonts/RobotoCondensed-Regular.ttf diff --git a/fonts/SEASRN__.ttf b/TextRecognitionDataGenerator/fonts/SEASRN__.ttf similarity index 100% rename from fonts/SEASRN__.ttf rename to TextRecognitionDataGenerator/fonts/SEASRN__.ttf diff --git a/fonts/Sansation-Bold.ttf b/TextRecognitionDataGenerator/fonts/Sansation-Bold.ttf similarity index 100% rename from fonts/Sansation-Bold.ttf rename to TextRecognitionDataGenerator/fonts/Sansation-Bold.ttf diff --git a/fonts/Sansation-BoldItalic.ttf b/TextRecognitionDataGenerator/fonts/Sansation-BoldItalic.ttf similarity index 100% rename from fonts/Sansation-BoldItalic.ttf rename to TextRecognitionDataGenerator/fonts/Sansation-BoldItalic.ttf diff --git a/fonts/Sansation-Italic.ttf b/TextRecognitionDataGenerator/fonts/Sansation-Italic.ttf similarity index 100% rename from fonts/Sansation-Italic.ttf rename to TextRecognitionDataGenerator/fonts/Sansation-Italic.ttf diff --git a/fonts/Sansation-Light.ttf b/TextRecognitionDataGenerator/fonts/Sansation-Light.ttf similarity index 100% rename from fonts/Sansation-Light.ttf rename to TextRecognitionDataGenerator/fonts/Sansation-Light.ttf diff --git a/fonts/Sansation-LightItalic.ttf b/TextRecognitionDataGenerator/fonts/Sansation-LightItalic.ttf similarity index 100% rename from fonts/Sansation-LightItalic.ttf rename to TextRecognitionDataGenerator/fonts/Sansation-LightItalic.ttf diff --git a/fonts/Sansation-Regular.ttf b/TextRecognitionDataGenerator/fonts/Sansation-Regular.ttf similarity index 100% rename from fonts/Sansation-Regular.ttf rename to TextRecognitionDataGenerator/fonts/Sansation-Regular.ttf diff --git a/fonts/Walkway_Black.ttf b/TextRecognitionDataGenerator/fonts/Walkway_Black.ttf similarity index 100% rename from fonts/Walkway_Black.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_Black.ttf diff --git a/fonts/Walkway_Bold.ttf b/TextRecognitionDataGenerator/fonts/Walkway_Bold.ttf similarity index 100% rename from fonts/Walkway_Bold.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_Bold.ttf diff --git a/fonts/Walkway_Oblique.ttf b/TextRecognitionDataGenerator/fonts/Walkway_Oblique.ttf similarity index 100% rename from fonts/Walkway_Oblique.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_Oblique.ttf diff --git a/fonts/Walkway_Oblique_Black.ttf b/TextRecognitionDataGenerator/fonts/Walkway_Oblique_Black.ttf similarity index 100% rename from fonts/Walkway_Oblique_Black.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_Oblique_Black.ttf diff --git a/fonts/Walkway_Oblique_Bold.ttf b/TextRecognitionDataGenerator/fonts/Walkway_Oblique_Bold.ttf similarity index 100% rename from fonts/Walkway_Oblique_Bold.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_Oblique_Bold.ttf diff --git a/fonts/Walkway_Oblique_SemiBold.ttf b/TextRecognitionDataGenerator/fonts/Walkway_Oblique_SemiBold.ttf similarity index 100% rename from fonts/Walkway_Oblique_SemiBold.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_Oblique_SemiBold.ttf diff --git a/fonts/Walkway_Oblique_UltraBold.ttf b/TextRecognitionDataGenerator/fonts/Walkway_Oblique_UltraBold.ttf similarity index 100% rename from fonts/Walkway_Oblique_UltraBold.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_Oblique_UltraBold.ttf diff --git a/fonts/Walkway_SemiBold.ttf b/TextRecognitionDataGenerator/fonts/Walkway_SemiBold.ttf similarity index 100% rename from fonts/Walkway_SemiBold.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_SemiBold.ttf diff --git a/fonts/Walkway_UltraBold.ttf b/TextRecognitionDataGenerator/fonts/Walkway_UltraBold.ttf similarity index 100% rename from fonts/Walkway_UltraBold.ttf rename to TextRecognitionDataGenerator/fonts/Walkway_UltraBold.ttf diff --git a/handwritten_model/model-29.data-00000-of-00001 b/TextRecognitionDataGenerator/handwritten_model/model-29.data-00000-of-00001 similarity index 100% rename from handwritten_model/model-29.data-00000-of-00001 rename to TextRecognitionDataGenerator/handwritten_model/model-29.data-00000-of-00001 diff --git a/handwritten_model/model-29.index b/TextRecognitionDataGenerator/handwritten_model/model-29.index similarity index 100% rename from handwritten_model/model-29.index rename to TextRecognitionDataGenerator/handwritten_model/model-29.index diff --git a/handwritten_model/model-29.meta b/TextRecognitionDataGenerator/handwritten_model/model-29.meta similarity index 100% rename from handwritten_model/model-29.meta rename to TextRecognitionDataGenerator/handwritten_model/model-29.meta diff --git a/handwritten_model/translation.pkl b/TextRecognitionDataGenerator/handwritten_model/translation.pkl similarity index 100% rename from handwritten_model/translation.pkl rename to TextRecognitionDataGenerator/handwritten_model/translation.pkl diff --git a/handwritten_text_generator.py b/TextRecognitionDataGenerator/handwritten_text_generator.py similarity index 100% rename from handwritten_text_generator.py rename to TextRecognitionDataGenerator/handwritten_text_generator.py diff --git a/run.py b/TextRecognitionDataGenerator/run.py similarity index 98% rename from run.py rename to TextRecognitionDataGenerator/run.py index a9cfdcdeb9..9d2bd5c797 100644 --- a/run.py +++ b/TextRecognitionDataGenerator/run.py @@ -6,7 +6,7 @@ from bs4 import BeautifulSoup from PIL import Image, ImageFont -from data_generator import FakeTextDataGenerator +from TextRecognitionDataGenerator.data_generator import FakeTextDataGenerator from multiprocessing import Pool def parse_arguments(): @@ -168,7 +168,7 @@ def load_fonts(): Load all fonts in the fonts directory """ - return [font for font in os.listdir('fonts')] + return [os.path.join('fonts', font) for font in os.listdir('fonts')] def create_strings_from_file(filename, count): """ diff --git a/texts/random_1.txt b/TextRecognitionDataGenerator/texts/random_1.txt similarity index 100% rename from texts/random_1.txt rename to TextRecognitionDataGenerator/texts/random_1.txt diff --git a/texts/random_2.txt b/TextRecognitionDataGenerator/texts/random_2.txt similarity index 100% rename from texts/random_2.txt rename to TextRecognitionDataGenerator/texts/random_2.txt diff --git a/texts/random_3.txt b/TextRecognitionDataGenerator/texts/random_3.txt similarity index 100% rename from texts/random_3.txt rename to TextRecognitionDataGenerator/texts/random_3.txt diff --git a/texts/test.txt b/TextRecognitionDataGenerator/texts/test.txt similarity index 100% rename from texts/test.txt rename to TextRecognitionDataGenerator/texts/test.txt diff --git a/samples/1.jpg b/samples/1.jpg deleted file mode 100644 index d1c626a69c..0000000000 Binary files a/samples/1.jpg and /dev/null differ diff --git a/samples/10.jpg b/samples/10.jpg deleted file mode 100644 index eb290a483d..0000000000 Binary files a/samples/10.jpg and /dev/null differ diff --git a/samples/11.jpg b/samples/11.jpg deleted file mode 100644 index 409533a63c..0000000000 Binary files a/samples/11.jpg and /dev/null differ diff --git a/samples/12.jpg b/samples/12.jpg deleted file mode 100644 index 2dcef91f1f..0000000000 Binary files a/samples/12.jpg and /dev/null differ diff --git a/samples/13.jpg b/samples/13.jpg deleted file mode 100644 index 42ce735152..0000000000 Binary files a/samples/13.jpg and /dev/null differ diff --git a/samples/14.jpg b/samples/14.jpg deleted file mode 100644 index 5c7f9d10f2..0000000000 Binary files a/samples/14.jpg and /dev/null differ diff --git a/samples/15.jpg b/samples/15.jpg deleted file mode 100644 index c5f692927c..0000000000 Binary files a/samples/15.jpg and /dev/null differ diff --git a/samples/16.jpg b/samples/16.jpg deleted file mode 100644 index 6e2c891019..0000000000 Binary files a/samples/16.jpg and /dev/null differ diff --git a/samples/17.jpg b/samples/17.jpg deleted file mode 100644 index 8b9ff2da0d..0000000000 Binary files a/samples/17.jpg and /dev/null differ diff --git a/samples/18.jpg b/samples/18.jpg deleted file mode 100644 index 11e3498c39..0000000000 Binary files a/samples/18.jpg and /dev/null differ diff --git a/samples/19.jpg b/samples/19.jpg deleted file mode 100644 index 12e8198ba8..0000000000 Binary files a/samples/19.jpg and /dev/null differ diff --git a/samples/2.jpg b/samples/2.jpg deleted file mode 100644 index b4045eadb7..0000000000 Binary files a/samples/2.jpg and /dev/null differ diff --git a/samples/20.jpg b/samples/20.jpg deleted file mode 100644 index 2b4456e9ee..0000000000 Binary files a/samples/20.jpg and /dev/null differ diff --git a/samples/21.jpg b/samples/21.jpg deleted file mode 100644 index 122cb14141..0000000000 Binary files a/samples/21.jpg and /dev/null differ diff --git a/samples/22.jpg b/samples/22.jpg deleted file mode 100644 index c1a6ca56b8..0000000000 Binary files a/samples/22.jpg and /dev/null differ diff --git a/samples/3.jpg b/samples/3.jpg deleted file mode 100644 index 042407c69b..0000000000 Binary files a/samples/3.jpg and /dev/null differ diff --git a/samples/4.jpg b/samples/4.jpg deleted file mode 100644 index 485a57ef3c..0000000000 Binary files a/samples/4.jpg and /dev/null differ diff --git a/samples/5.jpg b/samples/5.jpg deleted file mode 100644 index 59c4a5991c..0000000000 Binary files a/samples/5.jpg and /dev/null differ diff --git a/samples/6.jpg b/samples/6.jpg deleted file mode 100644 index 84cbcb6ba9..0000000000 Binary files a/samples/6.jpg and /dev/null differ diff --git a/samples/7.jpg b/samples/7.jpg deleted file mode 100644 index 5b44bbbedb..0000000000 Binary files a/samples/7.jpg and /dev/null differ diff --git a/samples/8.jpg b/samples/8.jpg deleted file mode 100644 index 584d30285a..0000000000 Binary files a/samples/8.jpg and /dev/null differ diff --git a/samples/9.jpg b/samples/9.jpg deleted file mode 100644 index bf6f367303..0000000000 Binary files a/samples/9.jpg and /dev/null differ diff --git a/tests/expected_results/TEST TEST TEST_0.jpg b/tests/expected_results/TEST TEST TEST_0.jpg new file mode 100644 index 0000000000..8a9f6632f4 Binary files /dev/null and b/tests/expected_results/TEST TEST TEST_0.jpg differ diff --git a/tests/expected_results/TEST TEST TEST_1.png b/tests/expected_results/TEST TEST TEST_1.png new file mode 100644 index 0000000000..b12089cfe7 Binary files /dev/null and b/tests/expected_results/TEST TEST TEST_1.png differ diff --git a/tests/expected_results/TEST TEST TEST_2.jpg b/tests/expected_results/TEST TEST TEST_2.jpg new file mode 100644 index 0000000000..623d788a37 Binary files /dev/null and b/tests/expected_results/TEST TEST TEST_2.jpg differ diff --git a/tests/expected_results/TEST TEST TEST_3.jpg b/tests/expected_results/TEST TEST TEST_3.jpg new file mode 100644 index 0000000000..71792887b7 Binary files /dev/null and b/tests/expected_results/TEST TEST TEST_3.jpg differ diff --git a/tests/expected_results/gaussian_background.jpg b/tests/expected_results/gaussian_background.jpg new file mode 100644 index 0000000000..47e9f513f3 Binary files /dev/null and b/tests/expected_results/gaussian_background.jpg differ diff --git a/tests/expected_results/white_background.jpg b/tests/expected_results/white_background.jpg new file mode 100644 index 0000000000..d2c823e678 Binary files /dev/null and b/tests/expected_results/white_background.jpg differ diff --git a/tests/font.ttf b/tests/font.ttf new file mode 100644 index 0000000000..db433349b7 Binary files /dev/null and b/tests/font.ttf differ diff --git a/tests/test.txt b/tests/test.txt new file mode 100644 index 0000000000..49678a4b57 --- /dev/null +++ b/tests/test.txt @@ -0,0 +1,3 @@ +TEST1 +TEST2 +TEST3 \ No newline at end of file diff --git a/tests/tests.py b/tests/tests.py new file mode 100644 index 0000000000..7ed8cb8b5f --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,172 @@ +import os +import sys +import unittest +import hashlib + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +try: + os.mkdir('tests/out') +except: + pass + +from TextRecognitionDataGenerator.data_generator import FakeTextDataGenerator +from TextRecognitionDataGenerator.background_generator import BackgroundGenerator +from TextRecognitionDataGenerator.run import ( + create_strings_from_file, + create_strings_from_dict, + create_strings_from_wikipedia +) + +def md5(filename): + hash_md5 = hashlib.md5() + with open(filename, "rb") as f: + hash_md5.update(f.read()) + h = hash_md5.hexdigest() + return h + +class DataGenerator(unittest.TestCase): + def test_create_string_from_wikipedia(self): + """ + Test that the function returns different output if called twice. + (And that it doesn't throw of course) + """ + + strings = create_strings_from_wikipedia(20, 2, 'en') + + self.assertTrue( + len(strings) == 2 and + strings[0] != strings[1] and + len(strings[0].split(' ')) >= 20 and + len(strings[1].split(' ')) >= 20 + ) + + def test_create_string_from_file(self): + strings = create_strings_from_file('tests/test.txt', 6) + + self.assertTrue( + len(strings) == 6 and + strings[0] != strings[1] and + strings[0] == strings[3] + ) + + def test_create_strings_from_dict(self): + strings = create_strings_from_dict(3, False, 2, ['TEST\n', 'TEST\n', 'TEST\n', 'TEST\n']) + + self.assertTrue( + len(strings) == 2 and + len(strings[0].split(' ')) == 3 + ) + + def test_generate_data_with_format(self): + FakeTextDataGenerator.generate( + 0, + 'TEST TEST TEST', + 'tests/font.ttf', + 'tests/out/', + 64, + 'jpg', + 0, + False, + 0, + False, + 1, + False, + 1 + ) + + self.assertTrue( + md5('tests/out/TEST TEST TEST_0.jpg') == md5('tests/expected_results/TEST TEST TEST_0.jpg') + ) + + os.remove('tests/out/TEST TEST TEST_0.jpg') + + def test_generate_data_with_extension(self): + FakeTextDataGenerator.generate( + 1, + 'TEST TEST TEST', + 'tests/font.ttf', + 'tests/out/', + 32, + 'png', + 0, + False, + 0, + False, + 1, + False, + 1 + ) + + self.assertTrue( + md5('tests/out/TEST TEST TEST_1.png') == md5('tests/expected_results/TEST TEST TEST_1.png') + ) + + os.remove('tests/out/TEST TEST TEST_1.png') + + def test_generate_data_with_skew_angle(self): + FakeTextDataGenerator.generate( + 2, + 'TEST TEST TEST', + 'tests/font.ttf', + 'tests/out/', + 64, + 'jpg', + 15, + False, + 0, + False, + 1, + False, + 1 + ) + + self.assertTrue( + md5('tests/out/TEST TEST TEST_2.jpg') == md5('tests/expected_results/TEST TEST TEST_2.jpg') + ) + + os.remove('tests/out/TEST TEST TEST_2.jpg') + + def test_generate_data_with_blur(self): + FakeTextDataGenerator.generate( + 3, + 'TEST TEST TEST', + 'tests/font.ttf', + 'tests/out/', + 64, + 'jpg', + 0, + False, + 3, + False, + 1, + False, + 1 + ) + + self.assertTrue( + md5('tests/out/TEST TEST TEST_3.jpg') == md5('tests/expected_results/TEST TEST TEST_3.jpg') + ) + + os.remove('tests/out/TEST TEST TEST_3.jpg') + + def test_generate_data_with_white_background(self): + BackgroundGenerator.plain_white(64, 128).save('tests/out/white_background.jpg') + + self.assertTrue( + md5('tests/out/white_background.jpg') == md5('tests/expected_results/white_background.jpg') + ) + + os.remove('tests/out/white_background.jpg') + + def test_generate_data_with_gaussian_background(self): + BackgroundGenerator.gaussian_noise(64, 128).save('tests/out/gaussian_background.jpg') + + self.assertTrue( + md5('tests/out/gaussian_background.jpg') == md5('tests/expected_results/gaussian_background.jpg') + ) + + os.remove('tests/out/gaussian_background.jpg') + +if __name__=='__main__': + unittest.main()