/**************************************************************************
 *
 * Copyright 2010-2021 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/

/**
 * Setup/binning code for screen-aligned quads.
 */

#include "util/u_math.h"
#include "util/u_memory.h"
#include "lp_perf.h"
#include "lp_setup_context.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
#include "lp_state_setup.h"


#define NUM_CHANNELS 4

#define UNDETERMINED_BLIT  -1


static inline int
subpixel_snap(float a)
{
   return util_iround(FIXED_ONE * a);
}


static inline float
fixed_to_float(int a)
{
   return a * (1.0f / FIXED_ONE);
}


/**
 * Alloc space for a new rectangle plus the input.a0/dadx/dady arrays
 * immediately after it.
 * The memory is allocated from the per-scene pool, not per-tile.
 * \param size  returns number of bytes allocated
 * \param nr_inputs  number of fragment shader inputs
 * \return pointer to rectangle space
 */
struct lp_rast_rectangle *
lp_setup_alloc_rectangle(struct lp_scene *scene, unsigned nr_inputs)
{
   unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
   struct lp_rast_rectangle *rect;
   unsigned bytes;

   bytes = sizeof(*rect) + (3 * input_array_sz);

   rect = lp_scene_alloc_aligned( scene, bytes, 16 );
   if (rect == NULL)
      return NULL;

   rect->inputs.stride = input_array_sz;

   return rect;
}


/**
 * The rectangle covers the whole tile- shade whole tile.
 * XXX no rectangle/triangle dependencies in this file - share it with
 * the same code in lp_setup_tri.c
 * \param tx, ty  the tile position in tiles, not pixels
 */
boolean
lp_setup_whole_tile(struct lp_setup_context *setup,
                    const struct lp_rast_shader_inputs *inputs,
                    int tx, int ty)
{
   struct lp_scene *scene = setup->scene;

   LP_COUNT(nr_fully_covered_64);

   /* if variant is opaque and scissor doesn't effect the tile */
   if (inputs->opaque) {
      /* Several things prevent this optimization from working:
       * - For layered rendering we can't determine if this covers the same layer
       * as previous rendering (or in case of clears those actually always cover
       * all layers so optimization is impossible). Need to use fb_max_layer and
       * not setup->layer_slot to determine this since even if there's currently
       * no slot assigned previous rendering could have used one.
       * - If there were any Begin/End query commands in the scene then those
       * would get removed which would be very wrong. Furthermore, if queries
       * were just active we also can't do the optimization since to get
       * accurate query results we unfortunately need to execute the rendering
       * commands.
       */
      if (!scene->fb.zsbuf && scene->fb_max_layer == 0 && !scene->had_queries) {
         /*
          * All previous rendering will be overwritten so reset the bin.
          */
         lp_scene_bin_reset( scene, tx, ty );
      }

      if (inputs->is_blit) {
         LP_COUNT(nr_blit_64);
         return lp_scene_bin_cmd_with_state( scene, tx, ty,
                                             setup->fs.stored,
                                             LP_RAST_OP_BLIT,
                                             lp_rast_arg_inputs(inputs) );
      }
      else {
         LP_COUNT(nr_shade_opaque_64);
         return lp_scene_bin_cmd_with_state( scene, tx, ty,
                                             setup->fs.stored,
                                             LP_RAST_OP_SHADE_TILE_OPAQUE,
                                             lp_rast_arg_inputs(inputs) );
      }
   }
   else {
      LP_COUNT(nr_shade_64);
      return lp_scene_bin_cmd_with_state( scene, tx, ty,
                                          setup->fs.stored,
                                          LP_RAST_OP_SHADE_TILE,
                                          lp_rast_arg_inputs(inputs) );
   }
}


boolean
lp_setup_is_blit(const struct lp_setup_context *setup,
                 const struct lp_rast_shader_inputs *inputs)
{
   const struct lp_fragment_shader_variant *variant =
      setup->fs.current.variant;

   if (variant->blit) {
      /*
       * Detect blits.
       */
      const struct lp_jit_texture *texture =
         &setup->fs.current.jit_context.textures[0];
      float dsdx, dsdy, dtdx, dtdy;

      /* XXX: dadx vs dady confusion below?
       */
      dsdx = GET_DADX(inputs)[1][0]*texture->width;
      dsdy = GET_DADX(inputs)[1][1]*texture->width;
      dtdx = GET_DADY(inputs)[1][0]*texture->height;
      dtdy = GET_DADY(inputs)[1][1]*texture->height;

      /*
       * We don't need to check s0/t0 tolerances
       * as we establish as pre-condition that there is no
       * texture filtering.
       */

      ASSERTED struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(&variant->key, 0);
      assert(samp0);
      assert(samp0->sampler_state.min_img_filter == PIPE_TEX_FILTER_NEAREST);
      assert(samp0->sampler_state.mag_img_filter == PIPE_TEX_FILTER_NEAREST);

      /*
       * Check for 1:1 match of texels to dest pixels
       */

      if (util_is_approx(dsdx, 1.0f, 1.0f/LP_MAX_WIDTH) &&
          util_is_approx(dsdy, 0.0f, 1.0f/LP_MAX_HEIGHT) &&
          util_is_approx(dtdx, 0.0f, 1.0f/LP_MAX_WIDTH) &&
          util_is_approx(dtdy, 1.0f, 1.0f/LP_MAX_HEIGHT)) {
         return true;
      }
      else {
#if 0
         debug_printf("dsdx = %f\n", dsdx);
         debug_printf("dsdy = %f\n", dsdy);
         debug_printf("dtdx = %f\n", dtdx);
         debug_printf("dtdy = %f\n", dtdy);
         debug_printf("\n");
#endif
         return FALSE;
      }
   }

   return FALSE;
}


static inline void
partial(struct lp_setup_context *setup,
        const struct lp_rast_rectangle *rect,
        unsigned ix, unsigned iy,
        unsigned mask)
{
   if (mask == 0) {
      assert(rect->box.x0 <= ix * TILE_SIZE);
      assert(rect->box.y0 <= iy * TILE_SIZE);
      assert(rect->box.x1 >= (ix+1) * TILE_SIZE - 1);
      assert(rect->box.y1 >= (iy+1) * TILE_SIZE - 1);

      lp_setup_whole_tile(setup, &rect->inputs, ix, iy);
   }
   else {
      LP_COUNT(nr_partially_covered_64);
      lp_scene_bin_cmd_with_state( setup->scene,
                                   ix, iy,
                                   setup->fs.stored,
                                   LP_RAST_OP_RECTANGLE,
                                   lp_rast_arg_rectangle(rect) );
   }
}


/**
 * Setup/bin a screen-aligned rect.
 * We need three corner vertices in order to correctly setup
 * interpolated parameters.  We *could* get away with just the
 * diagonal vertices but it'd cause ugliness elsewhere.
 *
 *   + -------v0
 *   |        |
 *  v2 ------ v1
 *
 * By an unfortunate mixup between GL and D3D coordinate spaces, half
 * of this file talks about clockwise rectangles (which were CCW in GL
 * coordinate space), while the other half prefers to work with D3D
 * CCW rectangles.
 */
static boolean
try_rect_cw(struct lp_setup_context *setup,
            const float (*v0)[4],
            const float (*v1)[4],
            const float (*v2)[4],
            boolean frontfacing)
{
   const struct lp_fragment_shader_variant *variant =
      setup->fs.current.variant;
   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
   struct lp_scene *scene = setup->scene;
   struct lp_rast_rectangle *rect;
   boolean cw;
   struct u_rect bbox;
   unsigned viewport_index = 0;
   unsigned layer = 0;
   const float (*pv)[4];

   /* x/y positions in fixed point */
   int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset);
   int x1 = subpixel_snap(v1[0][0] - setup->pixel_offset);
   int x2 = subpixel_snap(v2[0][0] - setup->pixel_offset);
   int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset);
   int y1 = subpixel_snap(v1[0][1] - setup->pixel_offset);
   int y2 = subpixel_snap(v2[0][1] - setup->pixel_offset);

   LP_COUNT(nr_rects);

   /* Cull clockwise rects without overflowing.
    */
   cw = (x2 < x1) ^ (y0 < y2);
   if (cw) {
      LP_COUNT(nr_culled_rects);
      return TRUE;
   }

   if (setup->flatshade_first) {
      pv = v0;
   }
   else {
      pv = v2;
   }
   if (setup->viewport_index_slot > 0) {
      unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
      viewport_index = lp_clamp_viewport_idx(*udata);
   }
   if (setup->layer_slot > 0) {
      layer = *(unsigned*)pv[setup->layer_slot];
      layer = MIN2(layer, scene->fb_max_layer);
   }

   /* Bounding rectangle (in pixels) */
   {
      /* Yes this is necessary to accurately calculate bounding boxes
       * with the two fill-conventions we support.  GL (normally) ends
       * up needing a bottom-left fill convention, which requires
       * slightly different rounding.
       */
      int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;

      bbox.x0 = (MIN3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
      bbox.x1 = (MAX3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
      bbox.y0 = (MIN3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
      bbox.y1 = (MAX3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;

      /* Inclusive coordinates:
       */
      bbox.x1--;
      bbox.y1--;
   }

   if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
      if (0) debug_printf("no intersection\n");
      LP_COUNT(nr_culled_rects);
      return TRUE;
   }

   u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox);

   rect = lp_setup_alloc_rectangle(scene, key->num_inputs);
   if (!rect)
      return FALSE;

#ifdef DEBUG
   rect->v[0][0] = v0[0][0];
   rect->v[0][1] = v0[0][1];
   rect->v[1][0] = v1[0][0];
   rect->v[1][1] = v1[0][1];
#endif

   rect->box.x0 = bbox.x0;
   rect->box.x1 = bbox.x1;
   rect->box.y0 = bbox.y0;
   rect->box.y1 = bbox.y1;

   /* Setup parameter interpolants:
    */
   setup->setup.variant->jit_function( v0,
				       v1,
				       v2,
				       frontfacing,
				       GET_A0(&rect->inputs),
				       GET_DADX(&rect->inputs),
				       GET_DADY(&rect->inputs),
                                       &setup->setup.variant->key );

   rect->inputs.frontfacing = frontfacing;
   rect->inputs.disable = FALSE;
   rect->inputs.is_blit = lp_setup_is_blit(setup, &rect->inputs);
   rect->inputs.opaque = variant->opaque;
   rect->inputs.layer = layer;
   rect->inputs.viewport_index = viewport_index;
   rect->inputs.view_index = setup->view_index;

   return lp_setup_bin_rectangle(setup, rect);
}


boolean
lp_setup_bin_rectangle(struct lp_setup_context *setup,
                       struct lp_rast_rectangle *rect)
{
   struct lp_scene *scene = setup->scene;
   unsigned ix0, iy0, ix1, iy1;
   unsigned i, j;
   unsigned left_mask = 0;
   unsigned right_mask = 0;
   unsigned top_mask = 0;
   unsigned bottom_mask = 0;

   /*
    * All fields of 'rect' are now set.  The remaining code here is
    * concerned with binning.
    */

   /* Convert to inclusive tile coordinates:
    */
   ix0 = rect->box.x0 / TILE_SIZE;
   iy0 = rect->box.y0 / TILE_SIZE;
   ix1 = rect->box.x1 / TILE_SIZE;
   iy1 = rect->box.y1 / TILE_SIZE;

   /*
    * Clamp to framebuffer size
    */
   assert(ix0 == MAX2(ix0, 0));
   assert(iy0 == MAX2(iy0, 0));
   assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
   assert(iy1 == MIN2(iy1, scene->tiles_y - 1));

   if (ix0 * TILE_SIZE != rect->box.x0)
      left_mask = RECT_PLANE_LEFT;

   if (ix1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.x1)
      right_mask  = RECT_PLANE_RIGHT;

   if (iy0 * TILE_SIZE != rect->box.y0)
      top_mask    = RECT_PLANE_TOP;

   if (iy1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.y1)
      bottom_mask = RECT_PLANE_BOTTOM;

   /* Determine which tile(s) intersect the rectangle's bounding box
    */
   if (iy0 == iy1 && ix0 == ix1) {
      partial(setup, rect, ix0, iy0,
              (left_mask | right_mask | top_mask | bottom_mask));
   }
   else if (ix0 == ix1) {
      unsigned mask = left_mask | right_mask;
      partial(setup, rect, ix0, iy0, mask | top_mask);
      for (i = iy0 + 1; i < iy1; i++)
         partial(setup, rect, ix0, i, mask);
      partial(setup, rect, ix0, iy1, mask | bottom_mask);
   }
   else if (iy0 == iy1) {
      unsigned mask = top_mask | bottom_mask;
      partial(setup, rect, ix0, iy0, mask | left_mask);
      for (i = ix0 + 1; i < ix1; i++)
         partial(setup, rect, i, iy0, mask);
      partial(setup, rect, ix1, iy0, mask | right_mask);
   }
   else {
      partial(setup, rect, ix0, iy0, left_mask  | top_mask);
      partial(setup, rect, ix0, iy1, left_mask  | bottom_mask);
      partial(setup, rect, ix1, iy0, right_mask | top_mask);
      partial(setup, rect, ix1, iy1, right_mask | bottom_mask);

      /* Top/Bottom fringes
       */
      for (i = ix0 + 1; i < ix1; i++) {
         partial(setup, rect, i, iy0, top_mask);
         partial(setup, rect, i, iy1, bottom_mask);
      }

      /* Left/Right fringes
       */
      for (i = iy0 + 1; i < iy1; i++) {
         partial(setup, rect, ix0, i, left_mask);
         partial(setup, rect, ix1, i, right_mask);
      }

      /* Full interior tiles
       */
      for (j = iy0 + 1; j < iy1; j++) {
         for (i = ix0 + 1; i < ix1; i++) {
            lp_setup_whole_tile(setup, &rect->inputs, i, j);
         }
      }
   }

   /* Catch any out-of-memory which occurred during binning.  Do this
    * once here rather than checking all the return values throughout.
    */
   if (lp_scene_is_oom(scene)) {
      /* Disable rasterization of this partially-binned rectangle.
       * We'll flush this scene and re-bin the entire rectangle:
       */
      rect->inputs.disable = TRUE;
      return FALSE;
   }

   return TRUE;
}


void
lp_rect_cw(struct lp_setup_context *setup,
         const float (*v0)[4],
         const float (*v1)[4],
         const float (*v2)[4],
         boolean frontfacing)
{
   if (!try_rect_cw(setup, v0, v1, v2, frontfacing)) {
      if (!lp_setup_flush_and_restart(setup))
         return;

      if (!try_rect_cw(setup, v0, v1, v2, frontfacing))
         return;
   }
}


/**
 * Take the six vertices for two triangles and try to determine if they
 * form a screen-aligned quad/rectangle.  If so, draw the rect directly,
 * else, draw as two regular triangles.
 */
static boolean
do_rect_ccw(struct lp_setup_context *setup,
            const float (*v0)[4],
            const float (*v1)[4],
            const float (*v2)[4],
            const float (*v3)[4],
            const float (*v4)[4],
            const float (*v5)[4],
            boolean front)
{
   const float (*rv0)[4], (*rv1)[4], (*rv2)[4], (*rv3)[4];  /* rect verts */

#define SAME_POS(A, B)   (A[0][0] == B[0][0] && \
                          A[0][1] == B[0][1] && \
                          A[0][2] == B[0][2] && \
                          A[0][3] == B[0][3])

   /* Only need to consider CCW orientations.  There are nine ways
    * that two counter-clockwise triangles can join up:
    */
   if (SAME_POS(v0, v3)) {
      if (SAME_POS(v2, v4)) {
         /*
          *    v5   v4/v2
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v3/v0   v1
          */
         rv0 = v5;
         rv1 = v0;
         rv2 = v1;
         rv3 = v2;
      }
      else if (SAME_POS(v1, v5)) {
         /*
          *    v4   v3/v0
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v5/v1   v2
          */
         rv0 = v4;
         rv1 = v1;
         rv2 = v2;
         rv3 = v0;
      }
      else {
         goto emit_triangles;
      }
   }
   else if (SAME_POS(v0, v5)) {
      if (SAME_POS(v2, v3)) {
         /*
          *    v4   v3/v2
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v5/v0   v1
          */
         rv0 = v4;
         rv1 = v0;
         rv2 = v1;
         rv3 = v2;
      }
      else if (SAME_POS(v1, v4)) {
         /*
          *    v3   v5/v0
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v4/v1   v2
          */
         rv0 = v3;
         rv1 = v1;
         rv2 = v2;
         rv3 = v0;
      }
      else {
         goto emit_triangles;
      }
   }
   else if (SAME_POS(v0, v4)) {
      if (SAME_POS(v2, v5)) {
         /*
          *    v3   v5/v2
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v4/v0   v1
          */
         rv0 = v3;
         rv1 = v0;
         rv2 = v1;
         rv3 = v2;
      }
      else if (SAME_POS(v1, v3)) {
         /*
          *    v5   v4/v0
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v3/v1   v2
          */
         rv0 = v5;
         rv1 = v1;
         rv2 = v2;
         rv3 = v0;
      }
      else {
         goto emit_triangles;
      }
   }
   else if (SAME_POS(v2, v3)) {
      if (SAME_POS(v1, v4)) {
         /*
          *    v5   v4/v1
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v3/v2   v0
          */
         rv0 = v5;
         rv1 = v2;
         rv2 = v0;
         rv3 = v1;
      }
      else {
         goto emit_triangles;
      }
   }
   else if (SAME_POS(v2, v5)) {
      if (SAME_POS(v1, v3)) {
         /*
          *    v4   v3/v1
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v5/v2   v0
          */
         rv0 = v4;
         rv1 = v2;
         rv2 = v0;
         rv3 = v1;
      }
      else {
         goto emit_triangles;
      }
   }
   else if (SAME_POS(v2, v4)) {
      if (SAME_POS(v1, v5)) {
         /*
          *    v3   v5/v1
          *     +-----+
          *     |   / |
          *     |  /  |
          *     | /   |
          *     +-----+
          *   v4/v2   v0
          */
         rv0 = v3;
         rv1 = v2;
         rv2 = v0;
         rv3 = v1;
      }
      else {
         goto emit_triangles;
      }
   }
   else {
      goto emit_triangles;
   }


#define SAME_X(A, B)   (A[0][0] == B[0][0])
#define SAME_Y(A, B)   (A[0][1] == B[0][1])

   /* The vertices are now counter clockwise, as such:
    *
    *  rv0 -------rv3
    *    |        |
    *  rv1 ------ rv2
    *
    * To render as a rectangle,
    *   * The X values should be the same at v0, v1 and v2, v3.
    *   * The Y values should be the same at v0, v3 and v1, v2.
    */
   if (SAME_Y(rv0, rv1)) {
      const float (*tmp)[4];
      tmp = rv0;
      rv0 = rv1;
      rv1 = rv2;
      rv2 = rv3;
      rv3 = tmp;
   }

   if (SAME_X(rv0, rv1) && SAME_X(rv2, rv3) &&
       SAME_Y(rv0, rv3) && SAME_Y(rv1, rv2)) {

      const struct lp_setup_variant_key *key = &setup->setup.variant->key;
      const unsigned n = key->num_inputs;
      unsigned i, j;

      /* We have a rectangle.  Check that the other attributes are
       * coplanar.
       */
      for (i = 0; i < n; i++) {
         for (j = 0; j < 4; j++) {
            if (key->inputs[i].usage_mask & (1<<j)) {
               unsigned k = key->inputs[i].src_index;
               float dxdx1, dxdx2, dxdy1, dxdy2;
               dxdx1 = rv0[k][j] - rv3[k][j];
               dxdx2 = rv1[k][j] - rv2[k][j];
               dxdy1 = rv0[k][j] - rv1[k][j];
               dxdy2 = rv3[k][j] - rv2[k][j];
               if (dxdx1 != dxdx2 ||
                   dxdy1 != dxdy2) {
                  goto emit_triangles;
               }
            }
         }
      }

      /* Note we're changing to clockwise here.  Fix this by reworking
       * lp_rect_cw to expect/operate on ccw rects.  Note that
       * function was previously misnamed.
       */
      lp_rect_cw(setup, rv0, rv2, rv1, front);
      return TRUE;
   }
   else {
      /* setup->quad(setup, rv0, rv1, rv2, rv3); */
   }

emit_triangles:
   return FALSE;
}


enum winding {
   WINDING_NONE = 0,
   WINDING_CCW,
   WINDING_CW
};


static inline enum winding
winding(const float (*v0)[4],
        const float (*v1)[4],
        const float (*v2)[4])
{
   /* edge vectors e = v0 - v2, f = v1 - v2 */
   const float ex = v0[0][0] - v2[0][0];
   const float ey = v0[0][1] - v2[0][1];
   const float fx = v1[0][0] - v2[0][0];
   const float fy = v1[0][1] - v2[0][1];

   /* det = cross(e,f).z */
   const float det = ex * fy - ey * fx;

   if (det < 0.0f)
      return WINDING_CCW;
   else if (det > 0.0f)
      return WINDING_CW;
   else
      return WINDING_NONE;
}


static boolean
setup_rect_cw(struct lp_setup_context *setup,
              const float (*v0)[4],
              const float (*v1)[4],
              const float (*v2)[4],
              const float (*v3)[4],
              const float (*v4)[4],
              const float (*v5)[4])
{
   enum winding winding0 = winding(v0, v1, v2);
   enum winding winding1 = winding(v3, v4, v5);

   if (winding0 == WINDING_CW &&
       winding1 == WINDING_CW) {
      return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
   } else if (winding0 == WINDING_CW) {
      setup->triangle(setup, v0, v1, v2);
      return TRUE;
   } else if (winding1 == WINDING_CW) {
      setup->triangle(setup, v3, v4, v5);
      return TRUE;
   } else {
      return TRUE;
   }
}


static boolean
setup_rect_ccw(struct lp_setup_context *setup,
               const float (*v0)[4],
               const float (*v1)[4],
               const float (*v2)[4],
               const float (*v3)[4],
               const float (*v4)[4],
               const float (*v5)[4])
{
   enum winding winding0 = winding(v0, v1, v2);
   enum winding winding1 = winding(v3, v4, v5);

   if (winding0 == WINDING_CCW &&
       winding1 == WINDING_CCW) {
      return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
   } else if (winding0 == WINDING_CCW) {
      setup->triangle(setup, v0, v1, v2);
      return TRUE;
   } else if (winding1 == WINDING_CCW) {
      return FALSE;
      setup->triangle(setup, v3, v4, v5);
      return TRUE;
   } else {
      return TRUE;
   }
}


static boolean
setup_rect_noop(struct lp_setup_context *setup,
                const float (*v0)[4],
                const float (*v1)[4],
                const float (*v2)[4],
                const float (*v3)[4],
                const float (*v4)[4],
                const float (*v5)[4])
{
   return TRUE;
}


static boolean
setup_rect_both(struct lp_setup_context *setup,
                const float (*v0)[4],
                const float (*v1)[4],
                const float (*v2)[4],
                const float (*v3)[4],
                const float (*v4)[4],
                const float (*v5)[4])
{
   enum winding winding0 = winding(v0, v1, v2);
   enum winding winding1 = winding(v3, v4, v5);

   if (winding0 != winding1) {
      /* If we knew that the "front" parameter wasn't going to be
       * referenced, could rearrange one of the two triangles such
       * that they were both CCW.  Aero actually does send mixed
       * CW/CCW rectangles under some circumstances, but we catch them
       * explicitly.
       */
      return FALSE;
   }
   else if (winding0 == WINDING_CCW) {
      return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
   }
   else if (winding0 == WINDING_CW) {
      return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
   } else {
      return TRUE;
   }
}


void
lp_setup_choose_rect( struct lp_setup_context *setup )
{
   if (setup->rasterizer_discard) {
      setup->rect = setup_rect_noop;
      return;
   }

   switch (setup->cullmode) {
   case PIPE_FACE_NONE:
      setup->rect = setup_rect_both;
      break;
   case PIPE_FACE_BACK:
      setup->rect = setup->ccw_is_frontface ? setup_rect_ccw : setup_rect_cw;
      break;
   case PIPE_FACE_FRONT:
      setup->rect = setup->ccw_is_frontface ? setup_rect_cw : setup_rect_ccw;
      break;
   default:
      setup->rect = setup_rect_noop;
      break;
   }
}
