Compare commits
3 commits
74dcc919d6
...
39e7cb081d
Author | SHA1 | Date | |
---|---|---|---|
|
39e7cb081d | ||
|
0d8a8409f9 | ||
|
f2e199949f |
3 changed files with 109 additions and 73 deletions
62
decoder.py
62
decoder.py
|
@ -7,7 +7,6 @@ import torch
|
|||
from creedsolo import RSCodec
|
||||
from raptorq import Decoder
|
||||
|
||||
from corner_training.models import QuantizedV2, QuantizedV5
|
||||
from decoding_utils import localize_corners_wrapper
|
||||
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
@ -25,9 +24,7 @@ args = parser.parse_args()
|
|||
if args.version == 0:
|
||||
cheight = cwidth = max(args.height // 10, args.width // 10)
|
||||
elif args.version == 1:
|
||||
assert args.height * 3 % 80 == args.width * 3 % 80 == 0
|
||||
cheight = int(args.height * 0.15)
|
||||
cwidth = int(args.width * 0.15)
|
||||
cheight = cwidth = int(max(args.height, args.width) * 0.16)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
@ -39,8 +36,6 @@ rs_bytes = frame_bytes - (frame_bytes + 254) // 255 * int(args.level * 255) - 4
|
|||
rsc = RSCodec(int(args.level * 255))
|
||||
decoder = Decoder.with_defaults(args.size, rs_bytes)
|
||||
|
||||
input_crop_size = 1024
|
||||
|
||||
if args.version == 0:
|
||||
def find_corner(A, f):
|
||||
cx, cy = A.shape[:2]
|
||||
|
@ -92,7 +87,19 @@ if args.version == 0:
|
|||
return frame, (wcol, rcol, gcol, bcol)
|
||||
|
||||
elif args.version == 1:
|
||||
localize_corners = localize_corners_wrapper(args, input_crop_size)
|
||||
localize_corners = localize_corners_wrapper(args)
|
||||
|
||||
# ####
|
||||
# gtruth_frames = []
|
||||
# cap = cv2.VideoCapture("vid_mid_v1.mkv")
|
||||
# data = None
|
||||
# while data is None:
|
||||
# ret, raw_frame = cap.read()
|
||||
# if not ret:
|
||||
# print("End of stream")
|
||||
# break
|
||||
# gtruth_frames.append(cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB))
|
||||
# ####
|
||||
|
||||
if args.input.isdecimal():
|
||||
args.input = int(args.input)
|
||||
|
@ -113,12 +120,13 @@ while data is None:
|
|||
X, Y = raw_frame.shape[:2]
|
||||
raw_frame = raw_frame[X // 4: 3 * X // 4, Y // 4: 3 * Y // 4]
|
||||
elif args.version == 1:
|
||||
h, w, _ = raw_frame.shape
|
||||
raw_frame = raw_frame[(h - input_crop_size) // 2:-(h - input_crop_size) // 2,
|
||||
(w - input_crop_size) // 2:-(w - input_crop_size) // 2]
|
||||
pass
|
||||
# h, w, _ = raw_frame.shape
|
||||
# raw_frame = raw_frame[(h - input_crop_size) // 2:-(h - input_crop_size) // 2,
|
||||
# (w - input_crop_size) // 2:-(w - input_crop_size) // 2]
|
||||
|
||||
cv2.imshow("", raw_frame)
|
||||
cv2.waitKey(1)
|
||||
# cv2.imshow("", raw_frame)
|
||||
# cv2.waitKey(1)
|
||||
raw_frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB)
|
||||
|
||||
frame, (wcol, rcol, gcol, bcol) = localize_corners(raw_frame)
|
||||
|
@ -134,10 +142,31 @@ while data is None:
|
|||
# Convert to new color space
|
||||
# calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 192).astype(np.uint8)
|
||||
calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 128).astype(np.uint8)
|
||||
fig, axs = plt.subplots(1, 2)
|
||||
axs[0].imshow(frame)
|
||||
axs[1].imshow(calibrated_frame * 255)
|
||||
plt.show()
|
||||
|
||||
# fig, axs = plt.subplots(1, 2)
|
||||
# axs[0].imshow(frame)
|
||||
# axs[1].imshow(calibrated_frame * 255)
|
||||
# plt.show()
|
||||
#
|
||||
# closest_ind = None
|
||||
# closest_diff = 1
|
||||
# for i, gtruth_frame in enumerate(gtruth_frames):
|
||||
# diff = (gtruth_frame != calibrated_frame * 255).any(axis=2).mean()
|
||||
# if diff < closest_diff:
|
||||
# closest_ind = i
|
||||
# closest_diff = diff
|
||||
#
|
||||
# gtruth = gtruth_frames[closest_ind]
|
||||
# fig, axs = plt.subplots(1, 2)
|
||||
# correct_mask = np.logical_not((calibrated_frame * 255 != gtruth).any(axis=2))
|
||||
# calibrated_frame_copy = calibrated_frame.copy()
|
||||
# gtruth_copy = gtruth.copy()
|
||||
# calibrated_frame_copy[correct_mask] = [0, 0, 0]
|
||||
# gtruth_copy[correct_mask] = [0, 0, 0]
|
||||
# axs[0].imshow(gtruth_copy)
|
||||
# axs[1].imshow(calibrated_frame_copy * 255)
|
||||
# plt.show()
|
||||
|
||||
calibrated_frame = np.packbits(
|
||||
np.concatenate(
|
||||
(
|
||||
|
@ -155,6 +184,7 @@ while data is None:
|
|||
break
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
cap.release()
|
||||
with open(args.output, "wb") as f:
|
||||
f.write(data)
|
||||
|
|
|
@ -18,7 +18,7 @@ from corner_training.utils import get_gaussian_filter, get_bounded_slices
|
|||
torch.backends.quantized.engine = 'qnnpack'
|
||||
|
||||
|
||||
def localize_corners_wrapper(args, input_crop_size, debug=False):
|
||||
def localize_corners_wrapper(args, debug=False):
|
||||
stage1_model_checkpt_path = "checkpts/QuantizedV2_Stage1_128_9.pt"
|
||||
stage2_model_checkpt_path = "checkpts/QuantizedV5_Stage2_128_9.pt"
|
||||
|
||||
|
@ -38,27 +38,24 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
torch.ao.quantization.convert(stage2_model, inplace=True)
|
||||
stage2_model.load_state_dict(torch.load(stage2_model_checkpt_path, map_location=torch.device('cpu')))
|
||||
|
||||
stage1_size = 128
|
||||
stage2_size = input_crop_size // 16
|
||||
|
||||
assert stage1_size & 1 == 0, "Assuming even size when dividing into quadrants"
|
||||
assert stage2_size & 1 == 0, "Assuming even size when center cropping"
|
||||
stage1_model.eval()
|
||||
stage2_model.eval()
|
||||
|
||||
preprocess_img_stage1 = transforms.Compose([
|
||||
transforms.Lambda(lambda img: cv2.resize(img, (stage1_size, stage1_size), interpolation=cv2.INTER_NEAREST)),
|
||||
stage1_size = 128
|
||||
assert stage1_size & 1 == 0, "Assuming even size when dividing into quadrants"
|
||||
|
||||
np_to_fp32_tensor = transforms.Compose([
|
||||
transforms.ToImage(),
|
||||
transforms.ToDtype(torch.float32, scale=True),
|
||||
])
|
||||
|
||||
preprocess_img_stage1 = transforms.Compose([
|
||||
transforms.Lambda(lambda img: resize_keep_aspect(img, stage1_size)),
|
||||
np_to_fp32_tensor,
|
||||
])
|
||||
|
||||
gaussian_filter = get_gaussian_filter(4, 4) # for stage1 NMS heuristic
|
||||
|
||||
preprocess_img_stage2 = transforms.Compose([
|
||||
transforms.ToImage(),
|
||||
transforms.ToDtype(torch.float32, scale=True),
|
||||
])
|
||||
|
||||
# Transform cropped corners until they all look like top left corners, as that's what the model is trained on
|
||||
transforms_by_corner = [
|
||||
lambda img: img, # identity
|
||||
|
@ -75,9 +72,8 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
cropped_frame: Square numpy array
|
||||
"""
|
||||
orig_h, orig_w, _ = cropped_frame.shape
|
||||
assert orig_w == orig_h, "Assuming square img"
|
||||
assert orig_w % stage1_size == 0
|
||||
upscale_factor = orig_w // stage1_size # for stage 2
|
||||
stage2_size = max(orig_h, orig_w) // 16
|
||||
upscale_factor = min(orig_w, orig_h) / stage1_size # for stage 2
|
||||
|
||||
start_time = time.time()
|
||||
stage1_img = preprocess_img_stage1(cropped_frame)
|
||||
|
@ -93,27 +89,28 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
if debug:
|
||||
print(57, time.time() - start_time)
|
||||
|
||||
quad_size = stage1_size // 2
|
||||
quad_h = stage1_img.size(1) // 2 # might miss 1 pixel on edge if odd
|
||||
quad_w = stage1_img.size(2) // 2
|
||||
|
||||
corners_by_quad = dict()
|
||||
|
||||
for top_half in (0, 1): # TODO: bot/right to remove all 1 minuses
|
||||
for left_half in (0, 1):
|
||||
quad_i_start = quad_size * (1 - top_half)
|
||||
quad_j_start = quad_size * (1 - left_half)
|
||||
quad_i_start = quad_h * (1 - top_half)
|
||||
quad_j_start = quad_w * (1 - left_half)
|
||||
curr_quad_preds = stage1_pred[
|
||||
quad_i_start: quad_i_start + quad_size,
|
||||
quad_j_start: quad_j_start + quad_size,
|
||||
quad_i_start: quad_i_start + quad_h,
|
||||
quad_j_start: quad_j_start + quad_w,
|
||||
].clone()
|
||||
|
||||
max_locs = []
|
||||
for i in range(6): # expect 4 points, but get top 6 to be safe
|
||||
max_ind = torch.argmax(curr_quad_preds).item() # TODO: more efficient like segtree, maybe account for neighbors too
|
||||
max_loc = (max_ind // quad_size, max_ind % quad_size)
|
||||
max_loc = (max_ind // quad_w, max_ind % quad_w)
|
||||
max_locs.append(max_loc)
|
||||
|
||||
# TODO: improve, maybe scale Gaussian peak to val of max_loc, probably better to not subtract from a location multiple times
|
||||
preds_slice, gaussian_slice = get_bounded_slices((quad_size, quad_size), gaussian_filter.size(),
|
||||
preds_slice, gaussian_slice = get_bounded_slices((quad_h, quad_w), gaussian_filter.size(),
|
||||
*max_loc)
|
||||
curr_quad_preds[preds_slice] -= gaussian_filter[gaussian_slice]
|
||||
|
||||
|
@ -122,7 +119,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
|
||||
min_cost = 1e9
|
||||
min_square = None
|
||||
for potential_combo in itertools.combinations(max_locs, 4): # TODO: don't repeat symmetrical squares
|
||||
for potential_combo in itertools.combinations(max_locs, 4):
|
||||
curr_pts, curr_cost = score_combo(potential_combo)
|
||||
if curr_cost < min_cost:
|
||||
min_cost = curr_cost
|
||||
|
@ -139,7 +136,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
|
||||
outer_corners = []
|
||||
# corner_colors = [] # by center, currently rounding to the pixel in the original image
|
||||
origin = (quad_size, quad_size)
|
||||
origin = (quad_h, quad_w)
|
||||
for quad in range(4): # TODO: consistent (x, y) or (i, j)
|
||||
outer_corners.append(max((l2_dist(corner, origin), corner) for corner in corners_by_quad[quad])[1])
|
||||
# corner_colors.append(cropped_frame[int((sum(corner[0] for corner in corners_by_quad[quad]) / 4 * upscale_factor)),
|
||||
|
@ -152,7 +149,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
for left_half in (0, 1):
|
||||
corner_ind = top_half * 2 + left_half
|
||||
y, x = outer_corners[corner_ind]
|
||||
upscaled_y, upscaled_x = y * upscale_factor, x * upscale_factor
|
||||
upscaled_y, upscaled_x = round(y * upscale_factor), round(x * upscale_factor)
|
||||
|
||||
top = max(0, upscaled_y - stage2_size // 2)
|
||||
bottom = min(orig_h, upscaled_y + stage2_size // 2)
|
||||
|
@ -164,7 +161,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
corner_padding[(1 - top_half) * 2 + 1] = stage2_size - (bottom - top)
|
||||
corner_padding[(1 - left_half) * 2] = stage2_size - (right - left)
|
||||
cropped_corner_img = transforms_f.pad( # TODO: don't pad since that should speed up inference
|
||||
preprocess_img_stage2(cropped_frame[top:bottom, left:right]),
|
||||
np_to_fp32_tensor(cropped_frame[top:bottom, left:right]),
|
||||
corner_padding
|
||||
)
|
||||
stage2_imgs.append(cropped_corner_img)
|
||||
|
@ -195,8 +192,8 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
if debug:
|
||||
print(137, time.time() - start_time)
|
||||
|
||||
orig_pred_pts = [(orig_x * upscale_factor + stage2_pred_x - stage2_size // 2,
|
||||
orig_y * upscale_factor + stage2_pred_y - stage2_size // 2)
|
||||
orig_pred_pts = [(round(orig_x * upscale_factor) + stage2_pred_x - stage2_size // 2,
|
||||
round(orig_y * upscale_factor) + stage2_pred_y - stage2_size // 2)
|
||||
for (orig_y, orig_x), (stage2_pred_x, stage2_pred_y) in zip(outer_corners, stage2_pred_pts)]
|
||||
|
||||
if debug:
|
||||
|
@ -206,40 +203,37 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
|
|||
# plt.scatter(np.array(orig_pred_pts).T[0], np.array(orig_pred_pts).T[1])
|
||||
# plt.show()
|
||||
|
||||
cheight = int(args.height * 0.15)
|
||||
cwidth = int(args.width * 0.15)
|
||||
cch = int(args.height * 0.15) // 4 - 1 # 0-indexed
|
||||
ccw = int(args.width * 0.15) // 4 - 1
|
||||
corner_size = int(max(args.height, args.width) * 0.16)
|
||||
qtr_corner_size = corner_size // 4
|
||||
|
||||
grid_coords = np.float32([
|
||||
[qtr_corner_size, qtr_corner_size],
|
||||
[args.width - qtr_corner_size, qtr_corner_size],
|
||||
[qtr_corner_size, args.height - qtr_corner_size],
|
||||
[args.width - qtr_corner_size, args.height - qtr_corner_size],
|
||||
])
|
||||
|
||||
grid_coords -= 1/2
|
||||
|
||||
M = cv2.getPerspectiveTransform(
|
||||
np.float32(orig_pred_pts),
|
||||
np.float32(
|
||||
[
|
||||
[ccw, cch],
|
||||
[args.width - ccw - 1, cch],
|
||||
[ccw, args.height - cch - 1],
|
||||
[args.width - ccw - 1, args.height - cch - 1],
|
||||
]
|
||||
),
|
||||
grid_coords,
|
||||
)
|
||||
|
||||
cropped_frame = cv2.warpPerspective(cropped_frame, M, (args.width, args.height))
|
||||
|
||||
# 1-index
|
||||
cch += 1
|
||||
ccw += 1
|
||||
# cropped_frame = cv2.warpPerspective(cropped_frame, M, (args.width, args.height), flags=cv2.INTER_NEAREST)
|
||||
|
||||
padding = math.ceil(max(args.height, args.width) / 80) # arbitrary
|
||||
|
||||
# guessing wildly on +/- 1s
|
||||
white_sq = cropped_frame[cch + padding: cheight - cch - padding,
|
||||
ccw + padding: cwidth - ccw - padding]
|
||||
red_sq = cropped_frame[cch + padding: cheight - cch - padding,
|
||||
args.width - cwidth + ccw + padding: args.width - ccw - padding]
|
||||
green_sq = cropped_frame[args.height - cheight + cch + padding: args.height - cch - padding,
|
||||
ccw + padding: cwidth - ccw - padding]
|
||||
blue_sq = cropped_frame[args.height - cheight + cch + padding: args.height - cch - padding,
|
||||
args.width - cwidth + ccw + padding: args.width - ccw - padding]
|
||||
white_sq = cropped_frame[qtr_corner_size + padding: corner_size - qtr_corner_size - padding,
|
||||
qtr_corner_size + padding: corner_size - qtr_corner_size - padding]
|
||||
red_sq = cropped_frame[qtr_corner_size + padding: corner_size - qtr_corner_size - padding,
|
||||
args.width - corner_size + qtr_corner_size + padding: args.width - qtr_corner_size - padding]
|
||||
green_sq = cropped_frame[args.height - corner_size + qtr_corner_size + padding: args.height - qtr_corner_size - padding,
|
||||
qtr_corner_size + padding: corner_size - qtr_corner_size - padding]
|
||||
blue_sq = cropped_frame[args.height - corner_size + qtr_corner_size + padding: args.height - qtr_corner_size - padding,
|
||||
args.width - corner_size + qtr_corner_size + padding: args.width - qtr_corner_size - padding]
|
||||
|
||||
corner_colors = [white_sq.mean(axis=(0, 1)), red_sq.mean(axis=(0, 1)),
|
||||
green_sq.mean(axis=(0, 1)), blue_sq.mean(axis=(0, 1))]
|
||||
|
@ -288,6 +282,16 @@ def score_combo(combo):
|
|||
return hull, (max(side_lens) - min(side_lens)) / min(side_lens)
|
||||
|
||||
|
||||
def resize_keep_aspect(img: np.ndarray, min_len: int) -> np.ndarray:
|
||||
h, w, _ = img.shape
|
||||
if h < w:
|
||||
output_size = (round(min_len * w / h), min_len)
|
||||
else:
|
||||
output_size = (min_len, round(min_len * h / w))
|
||||
|
||||
return cv2.resize(img, output_size, interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
|
||||
# Gift wrapping code, adapted from GeeksForGeeks.
|
||||
# "This code is contributed by Akarsh Somani, IIIT Kalyani"
|
||||
class Point:
|
||||
|
|
10
encoder.py
10
encoder.py
|
@ -20,10 +20,12 @@ args = parser.parse_args()
|
|||
if args.version == 0:
|
||||
cheight = cwidth = max(args.height // 10, args.width // 10)
|
||||
elif args.version == 1:
|
||||
# cell borders are 0.0375% of width/height
|
||||
assert args.height * 3 % 80 == args.width * 3 % 80 == 0 # TODO: less strict better ratio
|
||||
cheight = int(args.height * 0.15)
|
||||
cwidth = int(args.width * 0.15)
|
||||
# # cell borders are 0.0375% of width/height
|
||||
# assert args.height * 3 % 80 == args.width * 3 % 80 == 0 # TODO: less strict better ratio
|
||||
# cheight = int(args.height * 0.15)
|
||||
# cwidth = int(args.width * 0.15)
|
||||
|
||||
cheight = cwidth = int(max(args.height, args.width) * 0.16)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
Loading…
Reference in a new issue