-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
facebook-swing-tiny-coco image segmentation
- Loading branch information
Showing
14 changed files
with
309 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,5 +15,6 @@ old | |
*.flac | ||
*.jpg | ||
*photo* | ||
*.png | ||
|
||
|
5 changes: 5 additions & 0 deletions
5
3-experiments/4-text-to-speech/espnet-tacotron2-notokenbad.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from espnet2.bin.tts_inference import Text2Speech | ||
|
||
model = Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_tts_train_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.best") | ||
|
||
speech, *_ = model("text to generate speech from") |
26 changes: 26 additions & 0 deletions
26
3-experiments/4-text-to-speech/huggingface-inference-api/coquiai404.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import requests | ||
|
||
API_URL = "https://huggingface.co/spaces/coqui-ai/TTS/+/api/predict" | ||
headers = {"Content-Type": "application/json"} | ||
|
||
def query(payload): | ||
response = requests.post(API_URL, headers=headers, json=payload) | ||
response.raise_for_status() # Esto lanzará una excepción si el código de estado no es 200 | ||
return response.json() | ||
|
||
# Formato de la carga útil para el espacio de coqui-ai/TTS | ||
payload = { | ||
"data": ["The answer to the universe is 42"] | ||
} | ||
|
||
result = query(payload) | ||
|
||
# El audio está en result['data'] que contiene una lista de resultados. | ||
# Aquí asumimos que el primer resultado es el audio deseado. | ||
audio_url = result['data'][0] | ||
audio_bytes = requests.get(audio_url).content | ||
|
||
# Guardar los bytes de audio en un archivo .wav | ||
with open('output.wav', 'wb') as f: | ||
f.write(audio_bytes) | ||
print("Audio guardado en output.wav") |
26 changes: 26 additions & 0 deletions
26
...s/4-text-to-speech/huggingface-inference-api/huggingface-spaces-facebook-fastspeech404.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import requests | ||
|
||
API_URL = "https://hf.space/embed/facebook/fastspeech2-en-ljspeech/+/api/predict" | ||
headers = {"Content-Type": "application/json"} | ||
|
||
def query(payload): | ||
response = requests.post(API_URL, headers=headers, json=payload) | ||
response.raise_for_status() # Esto lanzará una excepción si el código de estado no es 200 | ||
return response.json() | ||
|
||
# Formato de la carga útil para el espacio de fastspeech2 | ||
payload = { | ||
"data": ["The answer to the universe is 42"] | ||
} | ||
|
||
result = query(payload) | ||
|
||
# El audio está en result['data'] que contiene una lista de resultados. | ||
# Aquí asumimos que el primer resultado es el audio deseado. | ||
audio_url = result['data'][0]['name'] | ||
audio_bytes = requests.get(audio_url).content | ||
|
||
# Guardar los bytes de audio en un archivo .wav | ||
with open('output.wav', 'wb') as f: | ||
f.write(audio_bytes) | ||
print("Audio guardado en output.wav") |
18 changes: 18 additions & 0 deletions
18
3-experiments/4-text-to-speech/huggingface-inference-api/huggingface-speak-ng404.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import requests | ||
|
||
API_URL = "https://api-inference.huggingface.co/models/espeak-ng" | ||
headers = {"Content-Type": "application/json"} | ||
|
||
def query(payload): | ||
response = requests.post(API_URL, headers=headers, json=payload) | ||
response.raise_for_status() # Esto lanzará una excepción si el código de estado no es 200 | ||
return response.content | ||
|
||
audio_bytes = query({ | ||
"inputs": "The answer to the universe is 42", | ||
}) | ||
|
||
# Guardar los bytes de audio en un archivo .wav | ||
with open('output.wav', 'wb') as f: | ||
f.write(audio_bytes) | ||
print("Audio guardado en output.wav") |
1 change: 1 addition & 0 deletions
1
3-experiments/4-text-to-speech/huggingface-microsoft-tts5/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
https://huggingface.co/microsoft/speecht5_tts |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
24 changes: 24 additions & 0 deletions
24
3-experiments/4-text-to-speech/transformers-tacotron2-nomodel.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | ||
|
||
cache_dir = ".cache" | ||
|
||
# Nombre del modelo TTS | ||
model_name = "espnet/kan-bayashi_ljspeech_tts_train_tacotron2" | ||
|
||
# Cargar el modelo y el tokenizer | ||
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir=cache_dir ) | ||
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) | ||
|
||
# Crear un pipeline de text-to-speech | ||
text_to_speech = pipeline("text2speech", model=model, tokenizer=tokenizer) | ||
|
||
# Texto de ejemplo | ||
text = "Hello, how are you?" | ||
|
||
# Generar el audio | ||
outputs = text_to_speech(text) | ||
|
||
# Guardar el audio en un archivo | ||
with open("output.wav", "wb") as f: | ||
f.write(outputs["audio"]) | ||
print("Audio guardado en output.wav") |
File renamed without changes.
99 changes: 99 additions & 0 deletions
99
...noptic-segmentation/facebook-swing-tiny-coco/hf-iapi-facebook-swig-tiny-coco-overlayed.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import requests | ||
from pycocotools import mask | ||
import matplotlib.pyplot as plt | ||
from PIL import Image, ImageDraw, ImageOps, ImageFont | ||
from dotenv import find_dotenv, load_dotenv | ||
import os | ||
import base64 | ||
import io | ||
import random | ||
import numpy as np | ||
|
||
# Load environment variables | ||
load_dotenv(find_dotenv()) | ||
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") | ||
|
||
API_URL = "https://api-inference.huggingface.co/models/facebook/mask2former-swin-tiny-coco-panoptic" | ||
headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"} | ||
|
||
def segment_image(image_path): | ||
with open(image_path, "rb") as f: | ||
data = f.read() | ||
response = requests.post(API_URL, headers=headers, data=data) | ||
return response.json() | ||
|
||
def decode_mask(mask_str, size): | ||
mask_data = base64.b64decode(mask_str) | ||
mask_image = Image.open(io.BytesIO(mask_data)) | ||
mask_image = mask_image.resize(size).convert("L") | ||
return mask_image | ||
|
||
def overlay_masks_on_image(image_path, segments, transparency=0.4): | ||
original_image = Image.open(image_path).convert("RGBA") | ||
overlay = Image.new("RGBA", original_image.size, (255, 255, 255, 0)) | ||
|
||
# Nueva capa para el texto | ||
|
||
text_layer = Image.new("RGBA", original_image.size, (255, 255, 255, 0)) | ||
|
||
for segment in segments: | ||
print(segment['label'] + " " + str(segment['score'])) | ||
mask_str = segment['mask'] | ||
mask_image = decode_mask(mask_str, original_image.size) | ||
color = generate_random_color() | ||
|
||
color_mask = ImageOps.colorize(mask_image, black="black", white=color) | ||
color_mask.putalpha(mask_image) | ||
|
||
overlay = Image.alpha_composite(overlay, color_mask) | ||
|
||
# Calcula el centroide de la mascara | ||
|
||
x, y = np.where(np.array(mask_image) > 0) | ||
centroid_x = x.mean() | ||
centroid_y = y.mean() | ||
|
||
# Imprime la etiqueta y la puntuación en la capa de texto | ||
|
||
font_size = 30 | ||
draw = ImageDraw.Draw(text_layer) | ||
font_path = "/System/Library/Fonts/Arial.ttf" # Path to Arial font on macOS | ||
font = ImageFont.truetype(font_path, font_size) | ||
label = segment['label'] | ||
score = segment['score'] | ||
text =f"{label}: {score}" | ||
|
||
# Estima el tamaño del texto hard rockandroll way | ||
|
||
text_width = 500 | ||
text_height = 100 | ||
draw.text((centroid_x - text_width / 2, centroid_y - text_height / 2), text, fill=(255, 255, 255, 255), font=font) | ||
|
||
|
||
# Ajusta la transparencia de la capa de superposición | ||
|
||
overlay = Image.blend(original_image, overlay, transparency) | ||
|
||
# Combina la capa de superposición con la capa de texto | ||
|
||
final_image = Image.alpha_composite(overlay, text_layer) | ||
|
||
|
||
return final_image | ||
|
||
def generate_random_color(): | ||
return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) | ||
|
||
|
||
def main(): | ||
image_path = "cats.jpg" | ||
response = segment_image(image_path) | ||
|
||
if isinstance(response, list): | ||
overlayed_image = overlay_masks_on_image(image_path, response) | ||
overlayed_image.show() | ||
overlayed_image.save("overlayed_image.png") | ||
else: | ||
print("Error in segmentation:", response) | ||
|
||
__main__ = main() |
109 changes: 109 additions & 0 deletions
109
...anoptic-segmentation/facebook-swing-tiny-coco/hf-iapi-facebook-swig-tiny-coco-segments.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import requests | ||
from pycocotools import mask | ||
import matplotlib.pyplot as plt | ||
from PIL import Image, ImageDraw, ImageOps | ||
from pprint import pprint | ||
from dotenv import find_dotenv, load_dotenv | ||
import os | ||
import base64 | ||
import io | ||
from pprint import pprint | ||
#load_dotenv(find_dotenv()) | ||
HUGGINGFACEHUB_API_TOKEN=os.getenv("HUGGINGFACEHUB_API_TOKEN") | ||
|
||
API_URL = "https://api-inference.huggingface.co/models/facebook/mask2former-swin-tiny-coco-panoptic" | ||
headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"} | ||
#print(headers) | ||
|
||
#def query(filename): | ||
# with open(filename, "rb") as f: | ||
# data = f.read() | ||
# response = requests.post(API_URL, headers=headers, data=data) | ||
# return response.json() | ||
|
||
#s¡output = query("cats.jpg") | ||
# Imprime el tipo de la variable 'output' | ||
# ∫print(type(output)) | ||
|
||
|
||
def segment_image(image_path): | ||
with open(image_path, "rb") as f: | ||
data = f.read() | ||
response = requests.post(API_URL, headers=headers, data=data) | ||
return response.json() | ||
|
||
|
||
|
||
|
||
def draw_segmented_image(image_path, segments): | ||
image = Image.open(image_path).convert("RGBA") | ||
overlay = Image.new("RGBA", image.size, (255, 255, 255, 0)) | ||
draw = ImageDraw.Draw(overlay) | ||
|
||
for segment in segments: | ||
mask = Image.open(io.BytesIO(segment['mask']['data'])) | ||
mask = mask.resize(image.size) | ||
color = tuple(segment['color']) | ||
mask = mask.convert("L").point(lambda p: p > 128 and 255) | ||
overlay = Image.composite(Image.new("RGBA", image.size, color + (128,)), overlay, mask) | ||
|
||
combined = Image.alpha_composite(image, overlay) | ||
return combined | ||
|
||
def decode_mask(mask_str): | ||
mask_data = base64.b64decode(mask_str) | ||
mask_image = Image.open(io.BytesIO(mask_data)).convert("L") | ||
return mask_image | ||
|
||
def overlay_mask_on_image(original_image, mask_image, color=(255, 0, 0), alpha=0.5): | ||
# Create a color version of the mask | ||
color_mask = ImageOps.colorize(mask_image, black="black", white=color) | ||
# Convert the mask to have an alpha channel | ||
color_mask.putalpha(mask_image) | ||
# Resize the mask to match the original image size | ||
color_mask = color_mask.resize(original_image.size, resample=Image.BILINEAR) | ||
# Composite the mask with the original image | ||
overlay_image = Image.alpha_composite(original_image.convert("RGBA"), color_mask) | ||
return overlay_image | ||
|
||
|
||
|
||
|
||
|
||
|
||
def decode_mask(mask_str, size): | ||
mask_data = base64.b64decode(mask_str) | ||
mask_image = Image.open(io.BytesIO(mask_data)) | ||
mask_image = mask_image.resize(size).convert("L") | ||
return mask_image | ||
|
||
def decode_and_display_mask(mask_str): | ||
mask_data = base64.b64decode(mask_str) | ||
mask_image = Image.open(io.BytesIO(mask_data)).convert("L") | ||
mask_image.show() # Display the mask image | ||
|
||
|
||
image_path = "cats.jpg" | ||
response = segment_image(image_path) | ||
|
||
if isinstance(response, list): # Check if the result is a list of segments | ||
original_image = Image.open(image_path).convert("RGBA") | ||
for segment in response: | ||
label = segment['label'] | ||
score = segment['score'] | ||
mask = segment['mask'] | ||
print(f"Label: {label}, Score: {score}") | ||
# decode_and_display_mask(mask) | ||
|
||
mask_image = decode_mask(mask, original_image.size) | ||
overlay_image = overlay_mask_on_image(original_image, mask_image) | ||
|
||
overlay_image.show() # Display the image with overlay | ||
overlay_image.save(f"overlay_{label}.png") | ||
else: | ||
print("Error in segmentation:", response) | ||
#if 'segments' in segments: | ||
# segmented_image = draw_segmented_image(image_path, segments['segments']) | ||
# segmented_image.show() | ||
#else: | ||
# print("Error in segmentation:", segments) |