diff --git a/demos/sdl2.c b/demos/sdl2.c
index 8039b2e0e20c9c3c4835fa59af95a62586ac8c4c..b5cdc0e7841f47866245fc4a767aba273be67939 100644
--- a/demos/sdl2.c
+++ b/demos/sdl2.c
@@ -284,7 +284,7 @@ static void render_frame(const struct pl_swapchain_frame *frame)
         .len = icc_profile.size,
     };
 
-    pl_rect2d_aspect_copy(&target.dst_rect, &image.src_rect, 0.0);
+    pl_rect2df_aspect_copy(&target.dst_rect, &image.src_rect, 0.0);
 
     const struct pl_tex *osd = osd_plane.texture;
     struct pl_overlay target_ol;
diff --git a/meson.build b/meson.build
index ab5c46eacd4daea479640645c058e824fc9d601f..d4e69ba8a296a9f2febc3b19bc81fcd82062f031 100644
--- a/meson.build
+++ b/meson.build
@@ -2,7 +2,7 @@ project('libplacebo', ['c', 'cpp'],
   license: 'LGPL2.1+',
   default_options: ['c_std=c99', 'cpp_std=c++11', 'warning_level=2'],
   meson_version: '>=0.49',
-  version: '2.65.0',
+  version: '2.66.0',
 )
 
 # Version number
diff --git a/src/common.c b/src/common.c
index 5cbabfc20e35991716862fd21f4b65784ee033b1..ff91a171353a7e94ae71b43a5d104f55adaecf83 100644
--- a/src/common.c
+++ b/src/common.c
@@ -41,6 +41,28 @@ void pl_rect3d_normalize(struct pl_rect3d *rc)
     };
 }
 
+void pl_rect2df_normalize(struct pl_rect2df *rc)
+{
+    *rc = (struct pl_rect2df) {
+        .x0 = PL_MIN(rc->x0, rc->x1),
+        .x1 = PL_MAX(rc->x0, rc->x1),
+        .y0 = PL_MIN(rc->y0, rc->y1),
+        .y1 = PL_MAX(rc->y0, rc->y1),
+    };
+}
+
+void pl_rect3df_normalize(struct pl_rect3df *rc)
+{
+    *rc = (struct pl_rect3df) {
+        .x0 = PL_MIN(rc->x0, rc->x1),
+        .x1 = PL_MAX(rc->x0, rc->x1),
+        .y0 = PL_MIN(rc->y0, rc->y1),
+        .y1 = PL_MAX(rc->y0, rc->y1),
+        .z0 = PL_MIN(rc->z0, rc->z1),
+        .z1 = PL_MAX(rc->z0, rc->z1),
+    };
+}
+
 struct pl_rect2d pl_rect2df_round(const struct pl_rect2df *rc)
 {
     return (struct pl_rect2d) {
@@ -321,44 +343,3 @@ void pl_rect2df_offset(struct pl_rect2df *rc, float offset_x, float offset_y)
     rc->y0 += offset_y;
     rc->y1 += offset_y;
 }
-
-float pl_rect2d_aspect(const struct pl_rect2d *rc)
-{
-    float w = abs(pl_rect_w(*rc)), h = abs(pl_rect_h(*rc));
-    return h ? (w / h) : 0.0;
-}
-
-void pl_rect2d_aspect_set(struct pl_rect2d *rc, float aspect, float panscan)
-{
-    struct pl_rect2df frc = { rc->x0, rc->y0, rc->x1, rc->y1 };
-    pl_rect2df_aspect_set(&frc, aspect, panscan);
-    *rc = pl_rect2df_round(&frc);
-}
-
-void pl_rect2d_aspect_fit(struct pl_rect2d *rc, const struct pl_rect2df *src,
-                          float panscan)
-{
-    struct pl_rect2df frc = { rc->x0, rc->y0, rc->x1, rc->y1 };
-    pl_rect2df_aspect_fit(&frc, src, panscan);
-    *rc = pl_rect2df_round(&frc);
-}
-
-void pl_rect2d_stretch(struct pl_rect2d *rc, float stretch_x, float stretch_y)
-{
-    struct pl_rect2df frc = { rc->x0, rc->y0, rc->x1, rc->y1 };
-    pl_rect2df_stretch(&frc, stretch_x, stretch_y);
-    *rc = pl_rect2df_round(&frc);
-}
-
-void pl_rect2d_offset(struct pl_rect2d *rc, int offset_x, int offset_y)
-{
-    if (rc->x1 < rc->x0)
-        offset_x = -offset_x;
-    if (rc->y1 < rc->y0)
-        offset_y = -offset_y;
-
-    rc->x0 += offset_x;
-    rc->x1 += offset_x;
-    rc->y0 += offset_y;
-    rc->y1 += offset_y;
-}
diff --git a/src/include/libplacebo/common.h b/src/include/libplacebo/common.h
index 8d8a79c44c10f90bce287450ded78b44d89aead6..ce0bc04fbafa35fd25213efc686f9de6daab3025 100644
--- a/src/include/libplacebo/common.h
+++ b/src/include/libplacebo/common.h
@@ -62,6 +62,9 @@ struct pl_rect3df {
 void pl_rect2d_normalize(struct pl_rect2d *rc);
 void pl_rect3d_normalize(struct pl_rect3d *rc);
 
+void pl_rect2df_normalize(struct pl_rect2df *rc);
+void pl_rect3df_normalize(struct pl_rect3df *rc);
+
 // Return the rounded form of a rect.
 struct pl_rect2d pl_rect2df_round(const struct pl_rect2df *rc);
 struct pl_rect3d pl_rect3df_round(const struct pl_rect3df *rc);
@@ -172,17 +175,4 @@ void pl_rect2df_offset(struct pl_rect2df *rc, float offset_x, float offset_y);
 // Scale a rect uniformly in both dimensions.
 #define pl_rect2df_zoom(rc, zoom) pl_rect2df_stretch((rc), (zoom), (zoom))
 
-// Variants of the functions above that operate directly on rounded rects.
-// Note: Applying multiple of these operations compounds rounding error in each
-// step. Consider doing the calculations on pl_rect2df and rounding at the end.
-float pl_rect2d_aspect(const struct pl_rect2d *rc);
-void pl_rect2d_aspect_set(struct pl_rect2d *rc, float aspect, float panscan);
-#define pl_rect2d_aspect_copy(rc, src, panscan) \
-    pl_rect2d_aspect_set((rc), pl_rect2df_aspect(src), (panscan))
-void pl_rect2d_aspect_fit(struct pl_rect2d *rc, const struct pl_rect2df *src,
-                          float panscan);
-void pl_rect2d_stretch(struct pl_rect2d *rc, float stretch_x, float stretch_y);
-void pl_rect2d_offset(struct pl_rect2d *rc, int offset_x, int offset_y);
-#define pl_rect2d_zoom(rc, zoom) pl_rect2d_stretch((rc), (zoom), (zoom))
-
 #endif // LIBPLACEBO_COMMON_H_
diff --git a/src/include/libplacebo/renderer.h b/src/include/libplacebo/renderer.h
index 11d7012494dbd6d6370d02fcd0425c82f044f7da..b3ccf57ba3c69197b7629338c1b060d19b630e8b 100644
--- a/src/include/libplacebo/renderer.h
+++ b/src/include/libplacebo/renderer.h
@@ -387,7 +387,12 @@ struct pl_render_target {
     // larger or smaller than the src_rect, or if the aspect ratio is
     // different, scaling will occur. `dst_rect` may be flipped, and may be
     // partially or wholly outside the bounds of the fbo. (Optional)
-    struct pl_rect2d dst_rect;
+    //
+    // Note: Because the GPU can only render to a whole number of pixels, the
+    // actual rendered area is rounded to the nearest pixel. The only reason
+    // this is `pl_rect2df` and not `pl_rect2d` is to make it easier to
+    // directly use `pl_rect2df_aspect_*` functions on it.
+    struct pl_rect2df dst_rect;
 
     // The color representation and space of the output. If this does not match
     // the color space of the source, libplacebo will convert the colors
diff --git a/src/renderer.c b/src/renderer.c
index 0fc212940543db0574c73274acf1a0de8e401edd..2a6990773cbff001850a72d3e78576d78d252ed9 100644
--- a/src/renderer.c
+++ b/src/renderer.c
@@ -243,6 +243,9 @@ struct pass_state {
     // also updates as the refplane evolves (e.g. due to user hook prescalers)
     struct pl_rect2df ref_rect;
 
+    // Integer version of `target.dst_rect`. Semantically identical.
+    struct pl_rect2d dst_rect;
+
     // Cached copies of the `image` / `target` for this rendering pass,
     // corrected to make sure all rects etc. are properly defaulted/inferred.
     struct pl_image image;
@@ -654,7 +657,7 @@ static bool pass_hook(struct pass_state *pass, struct img *img,
             .color = img->color,
             .components = img->comps,
             .src_rect = pass->ref_rect,
-            .dst_rect = pass->target.dst_rect,
+            .dst_rect = pass->dst_rect,
         };
 
         // TODO: Add some sort of `test` API function to the hooks that allows
@@ -1050,6 +1053,8 @@ static bool plane_user_hooks(struct pass_state *pass, struct plane_state *st,
     return pass_hook(pass, &st->img, plane_stages[st->type], params);
 }
 
+static void fix_rects(struct pass_state *pass, const struct pl_tex *ref_tex);
+
 // This scales and merges all of the source images, and initializes pass->img.
 static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
                             const struct pl_render_params *params)
@@ -1082,8 +1087,6 @@ static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
         *st = (struct plane_state) {
             .plane = image->planes[i],
             .img = {
-                .w = image->planes[i].texture->params.w,
-                .h = image->planes[i].texture->params.h,
                 .tex = image->planes[i].texture,
                 .repr = image->repr,
                 .color = image->color,
@@ -1111,16 +1114,8 @@ static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
     // Original ref texture, even after preprocessing
     const struct pl_tex *ref_tex = ref->plane.texture;
 
-    // At this point in time we can finally infer src_rect to ensure it's valid
-    if (!pl_rect_w(image->src_rect)) {
-        image->src_rect.x0 = 0;
-        image->src_rect.x1 = ref_tex->params.w;
-    }
-
-    if (!pl_rect_h(image->src_rect)) {
-        image->src_rect.y0 = 0;
-        image->src_rect.y1 = ref_tex->params.h;
-    }
+    // At this point in time we can finally infer/fix the image/target rects
+    fix_rects(pass, ref_tex);
 
     // Guess the source primaries based on resolution
     if (!image->color.primaries) {
@@ -1152,6 +1147,9 @@ static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
             .y1 = image->src_rect.y1 / rry - sy / ry,
         };
 
+        st->img.w = roundf(pl_rect_w(st->img.rect));
+        st->img.h = roundf(pl_rect_h(st->img.rect));
+
         if (st == ref) {
             // Make sure st->rc == src_rect
             pl_assert(rrx == 1 && rry == 1 && sx == 0 && sy == 0);
@@ -1300,17 +1298,15 @@ static bool pass_scale_main(struct pl_renderer *rr, struct pass_state *pass,
         return true;
     }
 
-    const struct pl_image *image = &pass->image;
-    const struct pl_render_target *target = &pass->target;
-
     struct img *img = &pass->img;
     struct pl_sample_src src = {
         .components = img->comps,
-        .new_w      = abs(pl_rect_w(target->dst_rect)),
-        .new_h      = abs(pl_rect_h(target->dst_rect)),
+        .new_w      = abs(pl_rect_w(pass->dst_rect)),
+        .new_h      = abs(pl_rect_h(pass->dst_rect)),
         .rect       = img->rect,
     };
 
+    const struct pl_image *image = &pass->image;
     bool need_fbo = image->num_overlays > 0;
     need_fbo |= rr->peak_detect_state && !params->allow_delayed_peak_detect;
 
@@ -1532,34 +1528,66 @@ fallback:
     bool ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
         .shader = &sh,
         .target = fbo,
-        .rect   = target->dst_rect,
+        .rect   = pass->dst_rect,
     });
 
     *img = (struct img) {0};
     return ok;
 }
 
-static void fix_rects(struct pl_image *image, struct pl_render_target *target)
+static void fix_rects(struct pass_state *pass, const struct pl_tex *ref_tex)
 {
-    if ((!target->dst_rect.x0 && !target->dst_rect.x1) ||
-        (!target->dst_rect.y0 && !target->dst_rect.y1))
-    {
-        target->dst_rect = (struct pl_rect2d) {
-            0, 0, target->fbo->params.w, target->fbo->params.h,
-        };
-    }
+    struct pl_rect2df *src = &pass->image.src_rect,
+                      *dst = &pass->target.dst_rect;
 
-    // We always want to prefer flipping in the dst_rect over flipping in
-    // the src_rect. They're functionally equivalent either way.
-    if (image->src_rect.x0 > image->src_rect.x1) {
-        PL_SWAP(image->src_rect.x0, image->src_rect.x1);
-        PL_SWAP(target->dst_rect.x0, target->dst_rect.x1);
-    }
+    if ((!src->x0 && !src->x1) || (!src->y0 && !src->y1)) {
+        src->x1 = ref_tex->params.w;
+        src->y1 = ref_tex->params.h;
+    };
 
-    if (image->src_rect.y0 > image->src_rect.y1) {
-        PL_SWAP(image->src_rect.y0, image->src_rect.y1);
-        PL_SWAP(target->dst_rect.y0, target->dst_rect.y1);
+    if ((!dst->x0 && !dst->x1) || (!dst->y0 && !dst->y1)) {
+        dst->x1 = pass->target.fbo->params.w;
+        dst->y1 = pass->target.fbo->params.h;
     }
+
+    // Keep track of whether the end-to-end rendering is flipped
+    bool flipped_x = (src->x0 > src->x1) != (dst->x0 > dst->x1),
+         flipped_y = (src->y0 > src->y1) != (dst->y0 > dst->y1);
+
+    // Normalize both rects to make the math easier
+    pl_rect2df_normalize(src);
+    pl_rect2df_normalize(dst);
+
+    // Round the output rect and clip it to the framebuffer dimensions
+    float rx0 = roundf(PL_MAX(dst->x0, 0.0)),
+          ry0 = roundf(PL_MAX(dst->y0, 0.0)),
+          rx1 = roundf(PL_MIN(dst->x1, pass->target.fbo->params.w)),
+          ry1 = roundf(PL_MIN(dst->y1, pass->target.fbo->params.h));
+
+    // Adjust the src rect corresponding to the rounded crop
+    float scale = pl_rect_w(*src) / pl_rect_w(*dst),
+          base_x = src->x0,
+          base_y = src->y0;
+
+    src->x0 = base_x + (rx0 - dst->x0) * scale;
+    src->x1 = base_x + (rx1 - dst->x0) * scale;
+    src->y0 = base_y + (ry0 - dst->y0) * scale;
+    src->y1 = base_y + (ry1 - dst->y0) * scale;
+
+    // Update dst_rect to the rounded values and re-apply flip if needed. We
+    // always do this in the `dst_rect` rather than the `src_rect` because this
+    // allows e.g. polar sampling compute shaders to work.
+    *dst = (struct pl_rect2df) {
+        .x0 = flipped_x ? rx1 : rx0,
+        .y0 = flipped_y ? ry1 : ry0,
+        .x1 = flipped_x ? rx0 : rx1,
+        .y1 = flipped_y ? ry0 : ry1,
+    };
+
+    // Integer copy of the above, for convenience
+    pass->dst_rect = (struct pl_rect2d) {
+        dst->x0, dst->y0, dst->x1, dst->y1,
+    };
 }
 
 bool pl_render_image(struct pl_renderer *rr, const struct pl_image *pimage,
@@ -1580,10 +1608,11 @@ bool pl_render_image(struct pl_renderer *rr, const struct pl_image *pimage,
     struct pl_image *image = &pass.image;
     struct pl_render_target *target = &pass.target;
 
-    fix_rects(image, target);
     pl_color_space_infer(&image->color);
     pl_color_space_infer(&target->color);
 
+    // Note: the rects are fixed as part of `pass_read_image`
+
     // As a special case, don't infer the image primaries just yet, since
     // that's done in a resolution-dependent way in pass_read_image
     if (!pimage->color.primaries)
@@ -1655,10 +1684,10 @@ void pl_render_target_from_swapchain(struct pl_render_target *out_target,
 
 bool pl_render_target_partial(const struct pl_render_target *target)
 {
-    int x0 = PL_MIN(target->dst_rect.x0, target->dst_rect.x1),
-        y0 = PL_MIN(target->dst_rect.y0, target->dst_rect.y1),
-        x1 = PL_MAX(target->dst_rect.x0, target->dst_rect.x1),
-        y1 = PL_MAX(target->dst_rect.y0, target->dst_rect.y1),
+    int x0 = roundf(PL_MIN(target->dst_rect.x0, target->dst_rect.x1)),
+        y0 = roundf(PL_MIN(target->dst_rect.y0, target->dst_rect.y1)),
+        x1 = roundf(PL_MAX(target->dst_rect.x0, target->dst_rect.x1)),
+        y1 = roundf(PL_MAX(target->dst_rect.y0, target->dst_rect.y1)),
         fbo_w = target->fbo->params.w,
         fbo_h = target->fbo->params.h;