-
Notifications
You must be signed in to change notification settings - Fork 0
/
sisco_vis.py
367 lines (305 loc) · 14.6 KB
/
sisco_vis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
import os
import sys
import rospy
import PySimpleGUI as sg
import time
from pathlib import Path
import numpy as np
from threading import Thread
import ctypes
import json
import argparse
from context.context4task import *
from context.core import *
from llm_utils import LLM_UTILS
# Define a class for SiSCo_Vis
class SiSCo_Vis:
def __init__(self, model="4.0-turbo") -> None:
"""
Initialize SiSCo_Vis class.
Parameters:
- model (str): The model to use. Defaults to "4.0-turbo".
"""
# Initialize font settings
self.font = 'Roboto'
self.font_size = 20
self.text_size = 20
self.input_text_size = 50
# Flag to control the display
self.kill_display = False
# Initialize LLM_UTILS object with the specified model
self.llms = LLM_UTILS(model=model)
def get_sg_font(self):
"""
Get the font and font size used for PySimpleGUI elements.
Returns:
- tuple: A tuple containing the font name and font size.
"""
return (self.font, self.font_size)
def get_sg_text(self, text, size=None, justification='left'):
"""
Get a PySimpleGUI Text element with specified text, size, and justification.
Parameters:
- text (str): The text to display in the Text element.
- size (tuple or None): The size of the Text element. Defaults to None, which uses the class's default text size.
- justification (str): The justification of the text within the element. Defaults to 'left'.
Returns:
- sg.Text: A PySimpleGUI Text element with the specified properties.
"""
if size is None:
size = self.text_size
return sg.Text(text, font=self.get_sg_font(), size=size, justification=justification)
def get_sg_input_text(self, key, size=None):
"""
Get a PySimpleGUI Input element with specified key and size.
Parameters:
- key (str): The key of the Input element.
- size (tuple or None): The size of the Input element. Defaults to None, which uses the class's default input text size.
Returns:
- sg.Input: A PySimpleGUI Input element with the specified properties.
"""
if size is None:
size = self.input_text_size
return sg.Input(key=key, size=size)
def get_sg_image(self, img_path, expand_x=False, expand_y=False):
"""
Get a PySimpleGUI Image element with specified image path and expansion settings.
Parameters:
- img_path (str or Path): The path to the image file.
- expand_x (bool): Whether to expand the image horizontally. Defaults to False.
- expand_y (bool): Whether to expand the image vertically. Defaults to False.
Returns:
- sg.Image: A PySimpleGUI Image element with the specified properties.
"""
if not isinstance(img_path, str):
img_path = str(img_path)
return sg.Image(img_path, expand_x=expand_x, expand_y=expand_y)
def display_signal(self, obj_vss_rsp, inst_vss_rsp, only_icon=False):
"""
Display visual signals using OpenCV.
Parameters:
- obj_vss_rsp (str): Object visual signal response.
- inst_vss_rsp (str): Instruction visual signal response.
- only_icon (bool): Whether to display only the icon without animation. Defaults to False.
Returns:
- None
"""
# Convert SVG codes to images
inst_img = svg_to_cv(svg_code=get_svg_files(inst_vss_rsp)[0], show=False)
(start_x, start_y), (goal_x, goal_y), ori = get_start_goal_orientation(inst_vss_rsp)
# Define transformations according to icon_size
icon_size = ICON_RES
cv_icon_ctr_x = icon_size[0] // 2
cv_icon_ctr_y = icon_size[1] // 2
# Create object icon and rotated object icon
obj_img_init = svg_to_cv(svg_code=get_svg_files(obj_vss_rsp)[0], show=False)
M = cv2.getRotationMatrix2D((cv_icon_ctr_x, cv_icon_ctr_y), -ori, 1.0)
obj_img_rot = cv2.warpAffine(obj_img_init, M, (icon_size[1], icon_size[0]))
iih, iiw, _ = inst_img.shape
proj_x = 1.6 # meters
proj_y = 0.8 # meters
outer_image = np.ones((int(1000 * proj_y), int(1000 * proj_x), 3), dtype=np.uint8) * 0
copy_outer_image_1 = np.copy(outer_image)
copy_outer_image_2 = np.copy(outer_image)
copy_outer_image_3 = np.copy(outer_image)
copy_outer_image_4 = np.copy(outer_image)
oih, oiw, _ = outer_image.shape
x_offset = int((outer_image.shape[1] - inst_img.shape[1]) / 2)
y_offset = int((outer_image.shape[0] - inst_img.shape[0]) / 2)
start_x = start_x + x_offset
start_y = start_y + y_offset
goal_x = goal_x + x_offset
goal_y = goal_y + y_offset
copy_outer_image_1[start_y - cv_icon_ctr_y: start_y + cv_icon_ctr_y,
start_x - cv_icon_ctr_x: start_x + cv_icon_ctr_x,
:] += obj_img_init
copy_outer_image_2[int((oih - iih) / 2):-int((oih - iih) / 2),
int((oiw - iiw) / 2):-int((oiw - iiw) / 2),
:] += inst_img
copy_outer_image_3[goal_y - cv_icon_ctr_y: goal_y + cv_icon_ctr_y,
goal_x - cv_icon_ctr_x: goal_x + cv_icon_ctr_x,
:] += obj_img_rot
copy_outer_image_4[goal_y - cv_icon_ctr_y: goal_y + cv_icon_ctr_y,
goal_x - cv_icon_ctr_x: goal_x + cv_icon_ctr_x,
:] += obj_img_init
counter = 0
alpha1 = 0.0
alpha2 = 0.0
alpha3 = 0.0
delta = 0.08
while counter < 1000:
print("press \"q\" to stop the visualization..")
if only_icon:
cv2.imshow("visualize", copy_outer_image_4)
key = cv2.waitKey(1)
if key == ord('q') or self.kill_display:
counter = 100
cv2.destroyAllWindows()
cv2.waitKey(1)
self.kill_display = False
return
else:
# Animation Starts
while alpha1 < 1.0 or alpha2 < 1.0 or alpha3 < 1.0:
visual_signals = [outer_image, copy_outer_image_1, copy_outer_image_2, copy_outer_image_3]
proj_image = animated_image(visual_signals, alpha1, alpha2, alpha3)
proj_image_ = cv2.rotate(proj_image, cv2.ROTATE_180)
cv2.imshow("visualize", proj_image)
key = cv2.waitKey(1)
alpha1 += delta
alpha2 += delta
alpha3 += delta
if key == ord('q') or self.kill_display:
counter = 100
cv2.destroyAllWindows()
cv2.waitKey(1)
self.kill_display = False
return
alpha1 = 0.0
alpha2 = 0.0
alpha3 = 0.0
counter += 1
time.sleep(1)
return
def synthesize_signals(self, properties, temperature=0.0, max_token=16000, obj_=True, inst_=True, nli_=False):
"""
Synthesizes signals based on input properties and options.
Args:
- properties (dict): Dictionary containing task-related properties.
- temperature (float): Temperature parameter for text generation (default: 0.0).
- max_token (int): Maximum number of tokens for text generation (default: 16000).
- obj_ (bool): Flag to enable object visual signal synthesis (default: True).
- inst_ (bool): Flag to enable instruction visual signal synthesis (default: True).
- nli_ (bool): Flag to enable natural language interaction signal synthesis (default: True).
Returns:
- tuple: Tuple containing synthesized object visual signal, instruction visual signal, and natural language interaction signal.
"""
# Task manager prompt and response generation
task_context = firstExperimentTaskDescription(
structure=properties.get("structure"),
object_description=properties.get("object_description"),
object_color=properties.get("object_color"),
orientation=properties.get("orientation"),
instruction=properties.get("instruction"),
goal_position=properties.get("goal_position")
)
tm_prompt = generate_task_master_prompt(task_context)
tm_msg = self.llms.generate_openai_message(prompt=tm_prompt, printout=False)
tm_rsp = self.llms.call_model_response(messages=tm_msg, temperature=temperature, max_tokens=max_token)
if not isinstance(tm_rsp, str):
tm_rsp = tm_rsp.choices[0].message.content
# Object visual signal synthesis
obj_vss_rsp = None
if obj_:
obj_vss_prompt = generate_obj_vss_prompt(response=tm_rsp)
obj_vss_msg = self.llms.generate_openai_message(prompt=obj_vss_prompt, printout=False)
obj_vss_rsp = self.llms.call_model_response(messages=obj_vss_msg, temperature=temperature, max_tokens=max_token)
if not isinstance(obj_vss_rsp, str):
obj_vss_rsp = obj_vss_rsp.choices[0].message.content
# Instruction visual signal synthesis
inst_vss_rsp = None
if inst_:
inst_vss_prompt = generate_inst_vss_prompt(response=tm_rsp)
inst_vss_msg = self.llms.generate_openai_message(prompt=inst_vss_prompt, printout=False)
inst_vss_rsp = self.llms.call_model_response(messages=inst_vss_msg, temperature=temperature, max_tokens=max_token)
if not isinstance(inst_vss_rsp, str):
inst_vss_rsp = inst_vss_rsp.choices[0].message.content
# Natural language interaction signal synthesis
nlis_rsp = None
if nli_:
nlis_prompt = generate_nlis_prompt(response=tm_rsp)
nlis_msg = self.llms.generate_openai_message(prompt=nlis_prompt, printout=False)
nlis_rsp = self.llms.call_model_response(messages=nlis_msg, temperature=temperature, max_tokens=max_token)
if not isinstance(nlis_rsp, str):
nlis_rsp = nlis_rsp.choices[0].message.content
return obj_vss_rsp, inst_vss_rsp, nlis_rsp
def show_signal(self, properties, temperature=0.0):
"""
Synthesize visual signals based on properties and display them.
Parameters:
- properties (dict): Properties for synthesizing visual signals.
- temperature (float): Temperature for model response, default is 0.0.
Returns:
- None
"""
# Synthesize object and instruction visual signals
obj_vss_rsp, inst_vss_rsp, _ = self.synthesize_signals(properties=properties, temperature=temperature)
# Reset kill_display flag
self.kill_display = False
# Display the visual signals
self.display_signal(obj_vss_rsp, inst_vss_rsp)
return
def vis(self):
"""
Display a GUI window to generate and visualize a unique visual signal.
Returns:
- None
"""
# Set default temperature
temperature = 0.0
# Example input column
col_exp = [[self.get_sg_text('Properties',size=100)],
[sg.HSeparator()],
[self.get_sg_text('Object Description', size=15) , sg.VSeparator(), sg.Input('Carrots',disabled= True)],
[self.get_sg_text('Color', size = 15) , sg.VSeparator(), sg.Input('Orange',disabled= True)],
[self.get_sg_text('Orientation', size = 15) , sg.VSeparator(), sg.Input('Vertical',disabled= True)],
[self.get_sg_text('Instruction', size = 15) , sg.VSeparator(), sg.Input('Insert from bottom-right', disabled= True)],
]
# Define the layout of the GUI window
col_user = [
[self.get_sg_text('Properties',size=100)],
[sg.HSeparator()],
[self.get_sg_text('Object Description', size=15), sg.VSeparator(), sg.Input(key='Object_Description')],
[self.get_sg_text('Color', size=15), sg.VSeparator(), sg.Input(key='Color')],
[self.get_sg_text('Orientation', size=15), sg.VSeparator(), sg.Input(key='Orientation')],
[self.get_sg_text('Instruction', size=15), sg.VSeparator(), sg.Input(key='Instruction')],
[sg.HSeparator()],
[sg.Button("Generate Visual Signal (window will close once button is pressed)", key="VSS")],
[sg.Text("Note: After clicking the button above, please wait for 20 seconds.", font=("Arial", 20),
text_color="blue")],
[sg.HSeparator()],
[sg.Button("OK", key="OK")]
]
layout = [
[self.get_sg_text(f"\nHere, you can create your own unique visual signal.", size=(100, 2))],
[sg.HSeparator()],
[self.get_sg_text("Here is example input!", size=100)],
col_exp,
[sg.HSeparator()],
[self.get_sg_text("Provide your input below. It could be anything!", size=100)],
col_user
]
# Create the Window
window = sg.Window(f'SiSCo Visualization', layout, font=self.get_sg_font(), size=(1200, 800), finalize=True,
location=(0, 0), keep_on_top=True, resizable=True)
window[f'VSS'].bind("<Button>", "_Button")
flag = False
# Event Loop to process "events" and get the "values" of the inputs
while True:
event, sg_values = window.read()
if event == "VSS_Button":
# Prepare properties based on user input
properties = {
"structure": " ",
"object_description": sg_values.get("Object_Description"),
"object_color": sg_values.get("Color"),
"orientation": sg_values.get("Orientation"),
"instruction": sg_values.get("Instruction"),
"goal_position": [500, 250]
}
# Start a new thread to show the visual signal
if not flag:
print(f"\nWait while LLM synthesize the signal..\n")
th = Thread(target=self.show_signal, args=(properties, temperature))
th.start()
flag = True
break
if event == "OK":
break
window.close()
return
if __name__ == "__main__":
# Create an instance of SiSCo_Vis
sisco_vis = SiSCo_Vis()
# Call the vis method to display the GUI window
sisco_vis.vis()