-
Notifications
You must be signed in to change notification settings - Fork 96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ten/five crop augmentation #110
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ def __init__(self, args) -> None: | |
super().__init__( | ||
feature_type=args.feature_type, | ||
on_extraction=args.on_extraction, | ||
save_option=args.save_option, | ||
tmp_path=args.tmp_path, | ||
output_path=args.output_path, | ||
keep_tmp_files=args.keep_tmp_files, | ||
|
@@ -38,15 +39,24 @@ def __init__(self, args) -> None: | |
self.extraction_fps = args.extraction_fps | ||
self.step_size = 64 if args.step_size is None else args.step_size | ||
self.stack_size = 64 if args.stack_size is None else args.stack_size | ||
self.aug_type = args.augment | ||
self.resize_transforms = torchvision.transforms.Compose([ | ||
torchvision.transforms.ToPILImage(), | ||
ResizeImproved(self.min_side_size), | ||
PILToTensor(), | ||
ToFloat(), | ||
]) | ||
if self.aug_type is None: | ||
aug_transform = TensorCenterCrop(self.central_crop_size) | ||
elif self.aug_type == 'five_crop': | ||
aug_transform = torchvision.transforms.FiveCrop(self.central_crop_size) | ||
self.num_crop = 5 | ||
elif self.aug_type == 'ten_crop': | ||
aug_transform = torchvision.transforms.TenCrop(self.central_crop_size) | ||
self.num_crop = 10 | ||
self.i3d_transforms = { | ||
'rgb': torchvision.transforms.Compose([ | ||
TensorCenterCrop(self.central_crop_size), | ||
aug_transform, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason why we can't do it for the flow? |
||
ScaleTo1_1(), | ||
PermuteAndUnsqueeze() | ||
]), | ||
|
@@ -82,8 +92,12 @@ def extract(self, video_path: str) -> Dict[str, np.ndarray]: | |
# timestamp when the last frame in the stack begins (when the old frame of the last pair ends) | ||
timestamps_ms = [] | ||
rgb_stack = [] | ||
feats_dict = {stream: [] for stream in self.streams} | ||
|
||
|
||
if self.aug_type is not None: | ||
feats_dict = {stream: [[] for _ in range(self.num_crop)] for stream in self.streams} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why treat each crop as a separate tensor instead of a batch dimension: |
||
else: | ||
feats_dict = {stream: [] for stream in self.streams} | ||
|
||
# sometimes when the target fps is 1 or 2, the first frame of the reencoded video is missing | ||
# and cap.read returns None but the rest of the frames are ok. timestep is 0.0 for the 2nd frame in | ||
# this case | ||
|
@@ -113,7 +127,11 @@ def extract(self, video_path: str) -> Dict[str, np.ndarray]: | |
if len(rgb_stack) - 1 == self.stack_size: | ||
batch_feats_dict = self.run_on_a_stack(rgb_stack, stack_counter, padder) | ||
for stream in self.streams: | ||
feats_dict[stream].extend(batch_feats_dict[stream].tolist()) | ||
if isinstance(batch_feats_dict[stream], tuple): | ||
for i in range(len(batch_feats_dict[stream])): | ||
feats_dict[stream][i].extend(batch_feats_dict[stream][i].tolist()) | ||
else: | ||
feats_dict[stream].extend(batch_feats_dict[stream].tolist()) | ||
# leaving the elements if step_size < stack_size so they will not be loaded again | ||
# if step_size == stack_size one element is left because the flow between the last element | ||
# in the prev list and the first element in the current list | ||
|
@@ -161,8 +179,11 @@ def run_on_a_stack(self, rgb_stack, stack_counter, padder=None) -> Dict[str, tor | |
raise NotImplementedError | ||
# apply transforms depending on the stream (flow or rgb) | ||
stream_slice = self.i3d_transforms[stream](stream_slice) | ||
# extract features for a stream | ||
batch_feats_dict[stream] = models[stream](stream_slice, features=True) # (B, 1024) | ||
if isinstance(stream_slice, tuple): | ||
# extract features for a stream | ||
batch_feats_dict[stream] = tuple([models[stream](stream_crop, features=True) for stream_crop in stream_slice]) | ||
else: | ||
batch_feats_dict[stream] = models[stream](stream_slice, features=True) # (B, 1024) | ||
# add features to the output dict | ||
self.maybe_show_pred(stream_slice, self.name2module['model'][stream], stack_counter) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -145,13 +145,17 @@ def __call__(self, tensor: torch.FloatTensor) -> torch.FloatTensor: | |
|
||
class ScaleTo1_1(object): | ||
|
||
def __call__(self, tensor: torch.FloatTensor) -> torch.FloatTensor: | ||
def __call__(self, tensor): | ||
if isinstance(tensor, tuple): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lost typing |
||
return tuple([(2 * t / 255) - 1 for t in tensor]) | ||
return (2 * tensor / 255) - 1 | ||
|
||
|
||
class PermuteAndUnsqueeze(object): | ||
|
||
def __call__(self, tensor: torch.FloatTensor) -> torch.FloatTensor: | ||
def __call__(self, tensor): | ||
if isinstance(tensor, tuple): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lost typing |
||
return tuple([t.permute(1, 0, 2, 3).unsqueeze(0) for t in tensor]) | ||
return tensor.permute(1, 0, 2, 3).unsqueeze(0) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,9 +50,18 @@ def show_predictions_on_dataset(logits: torch.FloatTensor, dataset: Union[str, L | |
print(f'{logit:8.3f} | {smax:.3f} | {cls}') | ||
print() | ||
|
||
def make_path(output_root, video_path, output_key, ext): | ||
def make_path(output_root, video_path, output_key, ext, idx=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we shouldn't resort to this. it became incredibly redundant. we need to save all features in one file |
||
# extract file name and change the extention | ||
fname = f'{Path(video_path).stem}_{output_key}{ext}' | ||
if idx is not None: | ||
if output_key is not None: | ||
fname = f'{Path(video_path).stem}_{output_key}_{idx}{ext}' | ||
else: | ||
fname = f'{Path(video_path).stem}_{idx}{ext}' | ||
else: | ||
if output_key is not None: | ||
fname = f'{Path(video_path).stem}_{output_key}{ext}' | ||
else: | ||
fname = f'{Path(video_path).stem}_{idx}{ext}' | ||
# construct the paths to save the features | ||
return os.path.join(output_root, fname) | ||
|
||
|
@@ -131,8 +140,7 @@ def form_list_from_user_input( | |
to_shuffle: bool = True, | ||
) -> list: | ||
'''User specifies either list of videos in the cmd or a path to a file with video paths. This function | ||
transforms the user input into a list of paths. Files are expected to be formatted with a single | ||
video-path in each line. | ||
transforms the user input into a list of paths. | ||
|
||
Args: | ||
video_paths (Union[str, ListConfig, None], optional): a list of video paths. Defaults to None. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's wrong with the
streams
argument in i3d?