diff --git a/decoder.py b/decoder.py
index e8e548f..b328411 100644
--- a/decoder.py
+++ b/decoder.py
@@ -7,7 +7,6 @@ import torch
 from creedsolo import RSCodec
 from raptorq import Decoder
 
-from corner_training.models import QuantizedV2, QuantizedV5
 from decoding_utils import localize_corners_wrapper
 
 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@@ -25,9 +24,7 @@ args = parser.parse_args()
 if args.version == 0:
     cheight = cwidth = max(args.height // 10, args.width // 10)
 elif args.version == 1:
-    assert args.height * 3 % 80 == args.width * 3 % 80 == 0
-    cheight = int(args.height * 0.15)
-    cwidth = int(args.width * 0.15)
+    cheight = cwidth = int(max(args.height, args.width) * 0.16)
 else:
     raise NotImplementedError
 
@@ -39,8 +36,6 @@ rs_bytes = frame_bytes - (frame_bytes + 254) // 255 * int(args.level * 255) - 4
 rsc = RSCodec(int(args.level * 255))
 decoder = Decoder.with_defaults(args.size, rs_bytes)
 
-input_crop_size = 1024
-
 if args.version == 0:
     def find_corner(A, f):
         cx, cy = A.shape[:2]
@@ -92,25 +87,20 @@ if args.version == 0:
         return frame, (wcol, rcol, gcol, bcol)
 
 elif args.version == 1:
-    localize_corners = localize_corners_wrapper(args, input_crop_size)
+    localize_corners = localize_corners_wrapper(args)
 
 # ####
-# vid_frames = []
-# # cap = cv2.VideoCapture("/Users/kevinzhao/Downloads/IMG_0994.MOV")
-# cap = cv2.VideoCapture("vid_tiny_v1.mkv")
+# gtruth_frames = []
+# cap = cv2.VideoCapture("vid_mid_v1.mkv")
 # data = None
-# start_time = time.time()
 # while data is None:
 #     ret, raw_frame = cap.read()
 #     if not ret:
 #         print("End of stream")
 #         break
-#     vid_frames.append(raw_frame)
-# gtruth = cv2.cvtColor(vid_frames[0], cv2.COLOR_BGR2RGB)
+#     gtruth_frames.append(cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB))
 # ####
 
-
-
 if args.input.isdecimal():
     args.input = int(args.input)
 cap = cv2.VideoCapture(args.input)
@@ -119,7 +109,6 @@ start_time = time.time()
 while data is None:
     try:
         ret, raw_frame = cap.read()
-        # raw_frame = cv2.resize(raw_frame, (1024, 1024), interpolation=cv2.INTER_NEAREST)  # TODO: remove
         if not ret:
             print("End of stream")
             break
@@ -131,12 +120,13 @@ while data is None:
                 X, Y = raw_frame.shape[:2]
                 raw_frame = raw_frame[X // 4: 3 * X // 4, Y // 4: 3 * Y // 4]
         elif args.version == 1:
-            h, w, _ = raw_frame.shape
-            raw_frame = raw_frame[(h - input_crop_size) // 2:-(h - input_crop_size) // 2,  # TODO: put back
-                            (w - input_crop_size) // 2:-(w - input_crop_size) // 2]
+            pass
+            # h, w, _ = raw_frame.shape
+            # raw_frame = raw_frame[(h - input_crop_size) // 2:-(h - input_crop_size) // 2,
+            #                 (w - input_crop_size) // 2:-(w - input_crop_size) // 2]
 
-        cv2.imshow("", raw_frame)
-        cv2.waitKey(1)
+        # cv2.imshow("", raw_frame)
+        # cv2.waitKey(1)
         raw_frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB)
 
         frame, (wcol, rcol, gcol, bcol) = localize_corners(raw_frame)
@@ -151,21 +141,30 @@ while data is None:
 
         # Convert to new color space
         # calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 192).astype(np.uint8)
-        # calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 192).astype(np.uint8) * 255
-        calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 150).astype(np.uint8) * 255
-        # fig, axs = plt.subplots(1, 2)
-        # axs[0].imshow(frame)
-        # axs[1].imshow(calibrated_frame)
-        # plt.show()
+        calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 128).astype(np.uint8)
 
         # fig, axs = plt.subplots(1, 2)
-        # correct_mask = np.logical_not((calibrated_frame != gtruth).any(axis=2))
+        # axs[0].imshow(frame)
+        # axs[1].imshow(calibrated_frame * 255)
+        # plt.show()
+        #
+        # closest_ind = None
+        # closest_diff = 1
+        # for i, gtruth_frame in enumerate(gtruth_frames):
+        #     diff = (gtruth_frame != calibrated_frame * 255).any(axis=2).mean()
+        #     if diff < closest_diff:
+        #         closest_ind = i
+        #         closest_diff = diff
+        #
+        # gtruth = gtruth_frames[closest_ind]
+        # fig, axs = plt.subplots(1, 2)
+        # correct_mask = np.logical_not((calibrated_frame * 255 != gtruth).any(axis=2))
         # calibrated_frame_copy = calibrated_frame.copy()
         # gtruth_copy = gtruth.copy()
         # calibrated_frame_copy[correct_mask] = [0, 0, 0]
         # gtruth_copy[correct_mask] = [0, 0, 0]
         # axs[0].imshow(gtruth_copy)
-        # axs[1].imshow(calibrated_frame_copy)
+        # axs[1].imshow(calibrated_frame_copy * 255)
         # plt.show()
 
         calibrated_frame = np.packbits(
diff --git a/decoding_utils.py b/decoding_utils.py
index d3bf2d7..5ad74d2 100644
--- a/decoding_utils.py
+++ b/decoding_utils.py
@@ -18,7 +18,7 @@ from corner_training.utils import get_gaussian_filter, get_bounded_slices
 torch.backends.quantized.engine = 'qnnpack'
 
 
-def localize_corners_wrapper(args, input_crop_size, debug=False):
+def localize_corners_wrapper(args, debug=False):
     stage1_model_checkpt_path = "checkpts/QuantizedV2_Stage1_128_9.pt"
     stage2_model_checkpt_path = "checkpts/QuantizedV5_Stage2_128_9.pt"
 
@@ -38,27 +38,24 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
     torch.ao.quantization.convert(stage2_model, inplace=True)
     stage2_model.load_state_dict(torch.load(stage2_model_checkpt_path, map_location=torch.device('cpu')))
 
-    stage1_size = 128
-    stage2_size = input_crop_size // 16
-
-    assert stage1_size & 1 == 0, "Assuming even size when dividing into quadrants"
-    assert stage2_size & 1 == 0, "Assuming even size when center cropping"
     stage1_model.eval()
     stage2_model.eval()
 
-    preprocess_img_stage1 = transforms.Compose([
-        transforms.Lambda(lambda img: cv2.resize(img, (stage1_size, stage1_size), interpolation=cv2.INTER_NEAREST)),
+    stage1_size = 128
+    assert stage1_size & 1 == 0, "Assuming even size when dividing into quadrants"
+
+    np_to_fp32_tensor = transforms.Compose([
         transforms.ToImage(),
         transforms.ToDtype(torch.float32, scale=True),
     ])
 
+    preprocess_img_stage1 = transforms.Compose([
+        transforms.Lambda(lambda img: resize_keep_aspect(img, stage1_size)),
+        np_to_fp32_tensor,
+    ])
+
     gaussian_filter = get_gaussian_filter(4, 4)  # for stage1 NMS heuristic
 
-    preprocess_img_stage2 = transforms.Compose([
-        transforms.ToImage(),
-        transforms.ToDtype(torch.float32, scale=True),
-    ])
-
     # Transform cropped corners until they all look like top left corners, as that's what the model is trained on
     transforms_by_corner = [
         lambda img: img,  # identity
@@ -75,9 +72,8 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
             cropped_frame: Square numpy array
         """
         orig_h, orig_w, _ = cropped_frame.shape
-        assert orig_w == orig_h, "Assuming square img"
-        assert orig_w % stage1_size == 0
-        upscale_factor = orig_w // stage1_size  # for stage 2
+        stage2_size = max(orig_h, orig_w) // 16
+        upscale_factor = min(orig_w, orig_h) / stage1_size  # for stage 2
 
         start_time = time.time()
         stage1_img = preprocess_img_stage1(cropped_frame)
@@ -93,27 +89,28 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
         if debug:
             print(57, time.time() - start_time)
 
-        quad_size = stage1_size // 2
+        quad_h = stage1_img.size(1) // 2  # might miss 1 pixel on edge if odd
+        quad_w = stage1_img.size(2) // 2
 
         corners_by_quad = dict()
 
         for top_half in (0, 1):  # TODO: bot/right to remove all 1 minuses
             for left_half in (0, 1):
-                quad_i_start = quad_size * (1 - top_half)
-                quad_j_start = quad_size * (1 - left_half)
+                quad_i_start = quad_h * (1 - top_half)
+                quad_j_start = quad_w * (1 - left_half)
                 curr_quad_preds = stage1_pred[
-                    quad_i_start: quad_i_start + quad_size,
-                    quad_j_start: quad_j_start + quad_size,
+                    quad_i_start: quad_i_start + quad_h,
+                    quad_j_start: quad_j_start + quad_w,
                 ].clone()
 
                 max_locs = []
                 for i in range(6):  # expect 4 points, but get top 6 to be safe
                     max_ind = torch.argmax(curr_quad_preds).item()  # TODO: more efficient like segtree, maybe account for neighbors too
-                    max_loc = (max_ind // quad_size, max_ind % quad_size)
+                    max_loc = (max_ind // quad_w, max_ind % quad_w)
                     max_locs.append(max_loc)
 
                     # TODO: improve, maybe scale Gaussian peak to val of max_loc, probably better to not subtract from a location multiple times
-                    preds_slice, gaussian_slice = get_bounded_slices((quad_size, quad_size), gaussian_filter.size(),
+                    preds_slice, gaussian_slice = get_bounded_slices((quad_h, quad_w), gaussian_filter.size(),
                                                                      *max_loc)
                     curr_quad_preds[preds_slice] -= gaussian_filter[gaussian_slice]
 
@@ -122,7 +119,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
 
                 min_cost = 1e9
                 min_square = None
-                for potential_combo in itertools.combinations(max_locs, 4):  # TODO: don't repeat symmetrical squares
+                for potential_combo in itertools.combinations(max_locs, 4):
                     curr_pts, curr_cost = score_combo(potential_combo)
                     if curr_cost < min_cost:
                         min_cost = curr_cost
@@ -139,7 +136,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
 
         outer_corners = []
         # corner_colors = []  # by center, currently rounding to the pixel in the original image
-        origin = (quad_size, quad_size)
+        origin = (quad_h, quad_w)
         for quad in range(4):  # TODO: consistent (x, y) or (i, j)
             outer_corners.append(max((l2_dist(corner, origin), corner) for corner in corners_by_quad[quad])[1])
             # corner_colors.append(cropped_frame[int((sum(corner[0] for corner in corners_by_quad[quad]) / 4 * upscale_factor)),
@@ -152,7 +149,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
             for left_half in (0, 1):
                 corner_ind = top_half * 2 + left_half
                 y, x = outer_corners[corner_ind]
-                upscaled_y, upscaled_x = y * upscale_factor, x * upscale_factor
+                upscaled_y, upscaled_x = round(y * upscale_factor), round(x * upscale_factor)
 
                 top = max(0, upscaled_y - stage2_size // 2)
                 bottom = min(orig_h, upscaled_y + stage2_size // 2)
@@ -164,7 +161,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
                 corner_padding[(1 - top_half) * 2 + 1] = stage2_size - (bottom - top)
                 corner_padding[(1 - left_half) * 2] = stage2_size - (right - left)
                 cropped_corner_img = transforms_f.pad(  # TODO: don't pad since that should speed up inference
-                    preprocess_img_stage2(cropped_frame[top:bottom, left:right]),
+                    np_to_fp32_tensor(cropped_frame[top:bottom, left:right]),
                     corner_padding
                 )
                 stage2_imgs.append(cropped_corner_img)
@@ -195,8 +192,8 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
         if debug:
             print(137, time.time() - start_time)
 
-        orig_pred_pts = [(orig_x * upscale_factor + stage2_pred_x - stage2_size // 2,
-                          orig_y * upscale_factor + stage2_pred_y - stage2_size // 2)
+        orig_pred_pts = [(round(orig_x * upscale_factor) + stage2_pred_x - stage2_size // 2,
+                          round(orig_y * upscale_factor) + stage2_pred_y - stage2_size // 2)
                          for (orig_y, orig_x), (stage2_pred_x, stage2_pred_y) in zip(outer_corners, stage2_pred_pts)]
 
         if debug:
@@ -206,56 +203,18 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
         # plt.scatter(np.array(orig_pred_pts).T[0], np.array(orig_pred_pts).T[1])
         # plt.show()
 
-        cheight = int(args.height * 0.15)
-        cwidth = int(args.width * 0.15)
-        cch = int(args.height * 0.15) // 4
-        ccw = int(args.width * 0.15) // 4
-
-        # plt.imshow(cropped_frame)
-        # plt.show()
+        corner_size = int(max(args.height, args.width) * 0.16)
+        qtr_corner_size = corner_size // 4
 
         grid_coords = np.float32([
-            [ccw, cch],
-            [args.width - ccw, cch],
-            [ccw, args.height - cch],
-            [args.width - ccw, args.height - cch],
+            [qtr_corner_size, qtr_corner_size],
+            [args.width - qtr_corner_size, qtr_corner_size],
+            [qtr_corner_size, args.height - qtr_corner_size],
+            [args.width - qtr_corner_size, args.height - qtr_corner_size],
         ])
 
         grid_coords -= 1/2
 
-        #
-        # grid_coords *= orig_w / args.width
-        # torch_frame = transforms_f.perspective(
-        #     transforms.Compose([
-        #         # transforms.Lambda(
-        #         #     lambda img: cv2.resize(img, (stage1_size, stage1_size), interpolation=cv2.INTER_NEAREST)),
-        #         transforms.ToImage(),
-        #         transforms.ToDtype(torch.float32, scale=True),
-        #     ])(cropped_frame),
-        #     orig_pred_pts,
-        #     grid_coords,
-        # )
-        #
-        # torch_frame = cv2.resize(torch_frame.permute(1, 2, 0).numpy(), (args.width, args.height), interpolation=cv2.INTER_AREA)
-        # # torch_frame = cv2.resize(torch_frame.permute(1, 2, 0).numpy(), (args.width, args.height), interpolation=cv2.INTER_NEAREST)
-        #
-        # # torch_frame = transforms_f.resize(torch_frame, [args.height, args.width]).permute(1, 2, 0).numpy()
-        # # torch_frame = torch_frame.permute(1, 2, 0).numpy()
-        # cropped_frame = (torch_frame * 255).astype(np.uint8)
-        # plt.imshow(cropped_frame)
-        #
-        # plt.axis("off")
-        # plt.show()
-
-
-
-        # grid_coords = np.float32([
-        #     [ccw, cch],
-        #     [args.width - ccw - 1, cch],
-        #     [ccw, args.height - cch - 1],
-        #     [args.width - ccw - 1, args.height - cch - 1],
-        # ])
-
         M = cv2.getPerspectiveTransform(
             np.float32(orig_pred_pts),
             grid_coords,
@@ -267,14 +226,14 @@ def localize_corners_wrapper(args, input_crop_size, debug=False):
         padding = math.ceil(max(args.height, args.width) / 80)  # arbitrary
 
         # guessing wildly on +/- 1s
-        white_sq = cropped_frame[cch + padding: cheight - cch - padding,
-                                 ccw + padding: cwidth - ccw - padding]
-        red_sq = cropped_frame[cch + padding: cheight - cch - padding,
-                               args.width - cwidth + ccw + padding: args.width - ccw - padding]
-        green_sq = cropped_frame[args.height - cheight + cch + padding: args.height - cch - padding,
-                                 ccw + padding: cwidth - ccw - padding]
-        blue_sq = cropped_frame[args.height - cheight + cch + padding: args.height - cch - padding,
-                                args.width - cwidth + ccw + padding: args.width - ccw - padding]
+        white_sq = cropped_frame[qtr_corner_size + padding: corner_size - qtr_corner_size - padding,
+                                 qtr_corner_size + padding: corner_size - qtr_corner_size - padding]
+        red_sq = cropped_frame[qtr_corner_size + padding: corner_size - qtr_corner_size - padding,
+                               args.width - corner_size + qtr_corner_size + padding: args.width - qtr_corner_size - padding]
+        green_sq = cropped_frame[args.height - corner_size + qtr_corner_size + padding: args.height - qtr_corner_size - padding,
+                                 qtr_corner_size + padding: corner_size - qtr_corner_size - padding]
+        blue_sq = cropped_frame[args.height - corner_size + qtr_corner_size + padding: args.height - qtr_corner_size - padding,
+                                args.width - corner_size + qtr_corner_size + padding: args.width - qtr_corner_size - padding]
 
         corner_colors = [white_sq.mean(axis=(0, 1)), red_sq.mean(axis=(0, 1)),
                          green_sq.mean(axis=(0, 1)), blue_sq.mean(axis=(0, 1))]
@@ -323,6 +282,16 @@ def score_combo(combo):
     return hull, (max(side_lens) - min(side_lens)) / min(side_lens)
 
 
+def resize_keep_aspect(img: np.ndarray, min_len: int) -> np.ndarray:
+    h, w, _ = img.shape
+    if h < w:
+        output_size = (round(min_len * w / h), min_len)
+    else:
+        output_size = (min_len, round(min_len * h / w))
+
+    return cv2.resize(img, output_size, interpolation=cv2.INTER_NEAREST)
+
+
 # Gift wrapping code, adapted from GeeksForGeeks.
 # "This code is contributed by Akarsh Somani, IIIT Kalyani"
 class Point:
diff --git a/encoder.py b/encoder.py
index caf9b59..dbeb2f6 100644
--- a/encoder.py
+++ b/encoder.py
@@ -20,10 +20,12 @@ args = parser.parse_args()
 if args.version == 0:
     cheight = cwidth = max(args.height // 10, args.width // 10)
 elif args.version == 1:
-    # cell borders are 0.0375% of width/height
-    assert args.height * 3 % 80 == args.width * 3 % 80 == 0  # TODO: less strict better ratio
-    cheight = int(args.height * 0.15)
-    cwidth = int(args.width * 0.15)
+    # # cell borders are 0.0375% of width/height
+    # assert args.height * 3 % 80 == args.width * 3 % 80 == 0  # TODO: less strict better ratio
+    # cheight = int(args.height * 0.15)
+    # cwidth = int(args.width * 0.15)
+
+    cheight = cwidth = int(max(args.height, args.width) * 0.16)
 else:
     raise NotImplementedError
 
@@ -74,6 +76,8 @@ def mkframe(packet):
     frame = np.unpackbits(frame)
     # Pad to be multiple of 3 so we can reshape into RGB channels
     frame = np.pad(frame, (0, (3 - len(frame)) % 3))
+    print(frame_size)
+    print(frame.shape)
     frame = np.reshape(frame, (frame_size, 3))
     frame = np.concatenate(
         (