/* Copyright (C) 1991, 1995 Aladdin Enterprises.  All rights reserved.
  
  This file is part of Aladdin Ghostscript.
  
  Aladdin Ghostscript is distributed with NO WARRANTY OF ANY KIND.  No author
  or distributor accepts any responsibility for the consequences of using it,
  or for whether it serves any particular purpose or works at all, unless he
  or she says so in writing.  Refer to the Aladdin Ghostscript Free Public
  License (the "License") for full details.
  
  Every copy of Aladdin Ghostscript must include a copy of the License,
  normally in a plain ASCII text file named PUBLIC.  The License grants you
  the right to copy, modify and redistribute Aladdin Ghostscript, but only
  under certain conditions described in the License.  Among other things, the
  License requires that the copyright notice and this notice be preserved on
  all copies.
*/

/* gxclist.c */
/* Command list writing for Ghostscript. */
#include "memory_.h"
#include "gx.h"
#include "gpcheck.h"
#include "gserrors.h"
#include "gsbitops.h"
#include "gsutil.h"			/* for gs_next_ids */
#include "gxdevice.h"
#include "gxdevmem.h"			/* must precede gxcldev.h */
#include "gxcldev.h"

/* Forward declarations of procedures */
private dev_proc_open_device(clist_open);
private dev_proc_output_page(clist_output_page);
private dev_proc_fill_rectangle(clist_fill_rectangle);
private dev_proc_tile_rectangle(clist_tile_rectangle);
private dev_proc_copy_mono(clist_copy_mono);
private dev_proc_copy_color(clist_copy_color);
private dev_proc_copy_alpha(clist_copy_alpha);
extern dev_proc_get_bits(clist_get_bits);	/* in gxclread.c */
private dev_proc_get_band(clist_get_band);
private dev_proc_copy_rop(clist_copy_rop);

/* The device procedures */
gx_device_procs gs_clist_device_procs =
{	clist_open,
	gx_forward_get_initial_matrix,
	gx_default_sync_output,
	clist_output_page,
	gx_default_close_device,
	gx_forward_map_rgb_color,
	gx_forward_map_color_rgb,
	clist_fill_rectangle,
	clist_tile_rectangle,
	clist_copy_mono,
	clist_copy_color,
	gx_default_draw_line,
	clist_get_bits,
	gx_forward_get_params,
	gx_forward_put_params,
	gx_forward_map_cmyk_color,
	gx_forward_get_xfont_procs,
	gx_forward_get_xfont_device,
	gx_forward_map_rgb_alpha_color,
	gx_forward_get_page_device,
	gx_forward_get_alpha_bits,
	clist_copy_alpha,
	clist_get_band,
	clist_copy_rop
};

/* ------ Define the command set and syntax ------ */

#ifdef DEBUG
const char *cmd_op_names[16] = { cmd_op_name_strings };
const char *cmd_misc_op_names[16] = { cmd_misc_op_name_strings };
const char *cmd_path_op_names[16] = { cmd_path_op_name_strings };
private ulong far_data cmd_op_counts[256];
private ulong far_data cmd_op_sizes[256];
private ulong cmd_tile_reset, cmd_tile_found, cmd_tile_added;
private ulong cmd_same_band, cmd_other_band;
private int near
count_op(int op, uint size)
{	cmd_op_counts[op]++;
	cmd_op_sizes[op] += size;
	if ( gs_debug_c('L') )
	  { switch ( op & 0xf0 )
	      {
	      case cmd_op_misc:
		dprintf2(", %s(%u)\n", cmd_misc_op_names[op & 0xf], size);
		break;
	      case cmd_op_path:
		dprintf2(", %s(%u)\n", cmd_path_op_names[op & 0xf], size);
		break;
	      default:
		dprintf3(", %s %d(%u)\n", cmd_op_names[op >> 4], op & 0xf, size);
	      }
	    fflush(dstderr);
	  }
	return op;
}
private void near
uncount_op(int op, uint size)
{	cmd_op_counts[op]--;
	cmd_op_sizes[op] -= size;
}
#  define count_add1(v) (v++)
#else
#  define count_op(store_op, size) store_op
#  define uncount_op(op, size) DO_NOTHING
#  define count_add1(v) DO_NOTHING
#endif

/* Initialize the device state */
private void clist_init_tiles(P1(gx_device_clist *));
private int
clist_open(gx_device *dev)
{	/*
	 * The buffer area (data, data_size) holds a tile cache and a
	 * set of block range bit masks when both writing and reading.
	 * The rest of the space is used for
	 * the command buffer and band state bookkeeping when writing,
	 * and for the rendering buffer (image device) when reading.
	 * For the moment, we divide the space up arbitrarily, except that
	 * we allocate almost no space for the tile cache if the device
	 * doesn't need halftoning.
	 *
	 * This routine requires only data, data_size, target, and mdev
	 * to have been set in the device structure, and is idempotent,
	 * so it can be used to check whether a given-size buffer
	 * is large enough.
	 */
	byte *data = cdev->data;
	uint size = cdev->data_size;
#define alloc_data(n) data += (n), size -= (n)
	gx_device *target = cdev->target;
	uint raster, nbands, band;
	gx_clist_state *states;
	ulong state_size;

	cdev->ymin = cdev->ymax = -1;	/* render_init not done yet */
	cdev->tile_data = data;
	cdev->tile_data_size = (size / 5) & -4;		/* arbitrary */
	if ( target->color_info.depth >= 15 )
	  {	/* No halftones, so we only need a tile cache for */
		/* user-defined patterns. */
#define min_tile_data 1024				/* arbitrary */
		if ( cdev->tile_data_size > min_tile_data )
		  cdev->tile_data_size = min_tile_data;
	  }
	alloc_data(cdev->tile_data_size);
	raster = gx_device_raster(target, 1) + sizeof(byte *);
	cdev->band_height = size / raster;
	if ( cdev->band_height == 0 )	/* can't even fit one scan line */
	  return_error(gs_error_limitcheck);
	nbands = target->height / cdev->band_height + 1;
	cdev->nbands = nbands;
	if_debug4('l', "[l]width=%d, raster=%d, band_height=%d, nbands=%d\n",
		  target->width, raster, cdev->band_height, cdev->nbands);
	state_size = nbands * (ulong)sizeof(gx_clist_state);
	if ( state_size + sizeof(cmd_prefix) + cmd_largest_size + raster + 4 > size )		/* not enough room */
	  return_error(gs_error_limitcheck);
	cdev->mdev.base = data;
	cdev->states = states = (gx_clist_state *)data;
	alloc_data((uint)state_size);
	cdev->cbuf = data;
	cdev->cnext = data;
	cdev->cend = data + size;
	cdev->ccl = 0;
	cdev->all_bands.head = 0;
	for ( band = 0; band < nbands; band++, states++ )
	  *states = cls_initial;
#undef alloc_data
	/* Round up the size of the band mask so that */
	/* the bits, which follow it, stay aligned. */
	cdev->tile_band_mask_size =
	  ((nbands + (align_bitmap_mod * 8 - 1)) >> 3) &
	    ~(align_bitmap_mod - 1);
	cdev->tile_max_size =
	  cdev->tile_data_size -
	    (sizeof(tile_hash) * 2 + sizeof(tile_slot) +
	     cdev->tile_band_mask_size);
	cdev->tile_depth = 1;
	memset(&cdev->tile, 0, sizeof(cdev->tile));
	clist_init_tiles(cdev);
	return 0;
}

/* (Re)initialize the tile cache. */
/* Note that this routine may decrease cldev->tile.{size.x,size.y,raster} */
/* to allow storing fewer replicas of more tiles. */
private void
clist_init_tiles(register gx_device_clist *cldev)
{	uint rwidth = cldev->tile.rep_width, rheight = cldev->tile.rep_height;
	uint depth = cldev->tile_depth;
	uint bwidth = rwidth * depth;
	uint tiles_wanted = rwidth * rheight;
	int hc;

	if_debug5('l', "[l]requested tile.size=%dx%d(%ux%u)x%d\n",
		  cldev->tile.size.x, cldev->tile.size.y,
		  (uint)rwidth, (uint)rheight, depth);

	/* If we can't fit all tiles_wanted cells in the cache, */
	/* but we could do so by reducing replication, do that. */
	for ( ; ; )
	  {	uint replicas;
		ulong tile_max_count;

		cldev->tile.raster =
		  bitmap_raster(cldev->tile.size.x * depth);
		cldev->tile_slot_size =
		  sizeof(tile_slot) + cldev->tile_band_mask_size +
		    cldev->tile.raster * cldev->tile.size.y;
		tile_max_count = cldev->tile_data_size /
		  (sizeof(tile_hash) * 3 /*(worst case)*/ +
		   cldev->tile_slot_size);
		if ( tile_max_count > 0x3fff )
		  tile_max_count = 0x3fff;	/* hc is only an int */
		cldev->tile_max_count = tile_max_count;
		if ( cldev->tile_max_count >= tiles_wanted )
		  { /* All possible tiles will fit. */
		    break;
		  }
		if ( (replicas = cldev->tile.size.y / rheight) > 1 )
		  { /* Reduce Y replication. */
		    cldev->tile.size.y = rheight * (replicas >> 1);
		    continue;
		  }
		if ( (replicas = cldev->tile.size.x / rwidth) > 1 &&
		     bwidth * replicas > align_bitmap_mod
		   )
		  { /* Reduce X replication. */
		    replicas = align_bitmap_mod / bwidth;
		    cldev->tile.size.x = rwidth * max(replicas, 1);
		    continue;
		  }
		/* We can't do any better than this. */
		break;
	  }

	hc = (cldev->tile_max_count - 1) * 2;
	while ( (hc + 1) & hc )
	  hc |= hc >> 1;	/* make mask */
	if ( hc / 3 >= cldev->tile_max_count )
	  hc >>= 1;
	cldev->tile_hash_mask = hc;
	hc++;				/* make actual size */
	if_debug5('l', "[l]actual tile.size=%dx%d, slot_size=%d, max_count=%d, hc=%d\n",
		  cldev->tile.size.x, cldev->tile.size.y,
		  cldev->tile_slot_size, cldev->tile_max_count, hc);
	cldev->tile_hash_table =
	  (tile_hash *)(cldev->tile_data + cldev->tile_data_size) - hc;
	cldev->tile_count = 0;
	memset(cldev->tile_data, 0, cldev->tile_data_size);
	memset(cldev->tile_hash_table, -1, hc * sizeof(tile_hash));
	{ gx_clist_state *pcls;
	  int i;
	  for ( i = 0, pcls = cldev->states; i < cldev->nbands; i++, pcls++ )
	    pcls->tile = &no_tile;
	}
	count_add1(cmd_tile_reset);
}

/* Clean up after rendering a page. */
private int
clist_output_page(gx_device *dev, int num_copies, int flush)
{	if ( flush )
	   {	clist_rewind(cdev->cfile, true);
		clist_rewind(cdev->bfile, true);
		cdev->bfile_end_pos = 0;
	   }
	else
	   {	clist_fseek(cdev->cfile, 0L, SEEK_END);
		clist_fseek(cdev->bfile, 0L, SEEK_END);
	   }
	return clist_open(dev);		/* reinitialize */
}

/* Print statistics. */
#ifdef DEBUG
void
cmd_print_stats(void)
{	int ci, cj;
	dprintf3("[l]counts: reset = %ld, found = %ld, added = %ld\n",
	         cmd_tile_reset, cmd_tile_found, cmd_tile_added);
	dprintf2("           same_band = %ld, other_band = %ld\n",
		 cmd_same_band, cmd_other_band);
	for ( ci = 0; ci < 0x100; ci += 0x10 )
	   {	const char **subnames;
		switch ( ci )
		  {
		  case cmd_op_misc:
		    subnames = cmd_misc_op_names; break;
		  case cmd_op_path:
		    subnames = cmd_path_op_names; break;
		  default:
		    subnames = 0;
		  }
		if ( subnames != 0 )
		  { dprintf1("[l]  %s =", cmd_op_names[ci >> 4]);
		    for ( cj = ci; cj < ci + 0x10; cj += 2 )
		      dprintf6("\n\t%s = %lu(%lu), %s = %lu(%lu)",
			       subnames[cj-ci],
			       cmd_op_counts[cj], cmd_op_sizes[cj],
			       subnames[cj-ci+1],
			       cmd_op_counts[cj+1], cmd_op_sizes[cj+1]);
		  }
		else
		  { ulong tcounts = 0, tsizes = 0;
		    for ( cj = ci; cj < ci + 0x10; cj++ )
		      tcounts += cmd_op_counts[cj],
		      tsizes += cmd_op_sizes[cj];
		    dprintf3("[l]  %s (%lu,%lu) =\n\t",
			     cmd_op_names[ci >> 4], tcounts, tsizes);
		    for ( cj = ci; cj < ci + 0x10; cj++ )
		      if ( cmd_op_counts[cj] == 0 )
			dputs(" -");
		      else
			dprintf2(" %lu(%lu)", cmd_op_counts[cj],
				 cmd_op_sizes[cj]);
		  }
		dputs("\n");
	   }
}
#endif				/* DEBUG */

/* ------ Writing ------ */

/* Utilities */

#define cmd_set_rect(rect)\
  ((rect).x = x, (rect).y = y,\
   (rect).width = width, (rect).height = height)

/* Write the commands for one band. */
private int
cmd_write_band(gx_device_clist *cldev, int band, cmd_list *pcl)
{	const cmd_prefix *cp = pcl->head;

	if ( cp != 0 )
	{	clist_file_ptr cfile = cldev->cfile;
		clist_file_ptr bfile = cldev->bfile;
		cmd_block cb;
		char end = count_op(cmd_opv_end_run, 1);
		int code;

		cb.band = band;
		cb.pos = clist_ftell(cfile);
		if_debug2('l', "[l]writing for band %d at %ld\n",
			  band, cb.pos);
		clist_fwrite_chars(&cb, sizeof(cb), bfile);
		pcl->tail->next = 0;	/* terminate the list */
		for ( ; cp != 0; cp = cp->next )
		  clist_fwrite_chars(cp + 1, cp->size, cfile);
		pcl->head = pcl->tail = 0;
		clist_fwrite_chars(&end, 1, cfile);
		process_interrupts();
		if ( (code = clist_ferror_code(bfile)) < 0 ||
		     (code = clist_ferror_code(cfile)) < 0
		   )
		  return_error(code);
	}
	return 0;
}

/* Write out the buffered commands, and reset the buffer. */
private int
cmd_write_buffer(gx_device_clist *cldev)
{	int nbands = cldev->nbands;
	gx_clist_state *pcls;
	int band;
	int code = cmd_write_band(cldev, cmd_band_all, &cldev->all_bands);

	for ( band = 0, pcls = cldev->states;
	      code >= 0 && band < nbands; band++, pcls++
	    )
	  code = cmd_write_band(cldev, band, &pcls->list);
	cldev->cnext = cldev->cbuf;
	cldev->ccl = 0;
	cldev->all_bands.head = 0;
#ifdef DEBUG
	if ( gs_debug_c('l') )
	  cmd_print_stats();
#endif
	return_check_interrupt(code);
}
/* Export under a different name for gxclread.c */
int
clist_flush_buffer(gx_device_clist *cldev)
{	return cmd_write_buffer(cldev);
}

/* Add a command to the appropriate band list, */
/* and allocate space for its data. */
/* Return the pointer to the data area. */
/* If an error occurs, set cldev->error_code and return 0. */
#define cmd_headroom (sizeof(cmd_prefix) + arch_align_ptr_mod)
private byte *
cmd_put_list_op(gx_device_clist *cldev, cmd_list *pcl, uint size)
{	byte *dp = cldev->cnext;
	if ( size + cmd_headroom > cldev->cend - dp )
	  { int code = cldev->error_code = cmd_write_buffer(cldev);
	    if ( code < 0 )
	      return 0;
	    return cmd_put_list_op(cldev, pcl, size);
	  }
	if ( cldev->ccl == pcl )
	  { /* We're adding another command for the same band. */
	    /* Tack it onto the end of the previous one. */
	    count_add1(cmd_same_band);
	    pcl->tail->size += size;
	  }
	else
	  { /* Skip to an appropriate alignment boundary. */
	    /* (We assume the command buffer itself is aligned.) */
	    cmd_prefix *cp =
	      (cmd_prefix *)(dp +
			     ((cldev->cbuf - dp) & (arch_align_ptr_mod - 1)));
	    count_add1(cmd_other_band);
	    dp = (byte *)(cp + 1);
	    if ( pcl->tail != 0 )
	      pcl->tail->next = cp;
	    else
	      pcl->head = cp;
	    pcl->tail = cp;
	    cldev->ccl = pcl;
	    cp->size = size;
	  }
	cldev->cnext = dp + size;
	return dp;
}
#ifdef DEBUG
private byte *
cmd_put_op(gx_device_clist *cldev, gx_clist_state *pcls, uint size)
{	if_debug3('L', "[L]band %d: size=%u, left=%u",
		  (int)(pcls - cldev->states),
		  size, (uint)(cldev->cend - cldev->cnext));
	return cmd_put_list_op(cldev, &pcls->list, size);
}
#else
#  define cmd_put_op(cldev, pcls, size)\
     cmd_put_list_op(cldev, &(pcls)->list, size)
#endif
/* Call cmd_put_op and return properly if an error occurs. */
#define set_cmd_put_op(dp, cldev, pcls, op, csize)\
  if ( (dp = cmd_put_op(cldev, pcls, csize)) == 0 )\
    return (cldev)->error_code;\
  count_op(*dp = (op), csize)

/* Add a command for all bands. */
private byte *
cmd_put_all_op(gx_device_clist *cldev, uint size)
{	cmd_prefix *tail;
	if_debug2('L', "[L]all-band: size=%u, left=%u",
		  size, (uint)(cldev->cend - cldev->cnext));
	if ( cldev->all_bands.head == 0 ||
	     (tail = cldev->all_bands.tail,
	      cldev->cnext != (byte *)(tail + 1) + tail->size)
	   )
	  { if ( (cldev->error_code = cmd_write_buffer(cldev)) < 0 )
	      return 0;
	  }
	return cmd_put_list_op(cldev, &cldev->all_bands, size);
}
#define set_cmd_put_all_op(dp, cldev, op, csize)\
  if ( (dp = cmd_put_all_op(cldev, csize)) == 0 )\
    return (cldev)->error_code;\
  count_op(*dp = (op), csize)

/* Shorten the last allocated command. */
/* Note that this does not adjust the statistics. */
private void
cmd_shorten_list_op(gx_device_clist *cldev, cmd_list *pcl, uint delta)
{	pcl->tail->size -= delta;
	cldev->cnext -= delta;
}
#define cmd_shorten_op(cldev, pcls, delta)\
  cmd_shorten_list_op(cldev, &(pcls)->list, delta)

/* Write a variable-size positive integer. */
/* (This works for negative integers also; they are written as though */
/* they were unsigned.) */
#define w1byte(w) (!((w) & ~0x7f))
#define w2byte(w) (!((w) & ~0x3fff))
#define cmd_sizew(w)\
  (w1byte(w) ? 1 : w2byte(w) ? 2 : cmd_w_size((uint)(w)))
#define cmd_size2w(wx,wy)\
  (w1byte((wx) | (wy)) ? 2 :\
   cmd_w_size((uint)(wx)) + cmd_w_size((uint)(wy)))
#define cmd_sizexy(xy) cmd_size2w((xy).x, (xy).y)
private int near
cmd_w_size(register uint w)
{	register int size = 1;
	while ( w > 0x7f ) w >>= 7, size++;
	return size;
}
#define cmd_putw(w,dp)\
  (w1byte(w) ? (*dp = w, ++dp) :\
   w2byte(w) ? (*dp = (w) | 0x80, dp[1] = (w) >> 7, dp += 2) :\
   (dp = cmd_w_put((uint)(w), dp)))
#define cmd_put2w(wx,wy,dp)\
  (w1byte((wx) | (wy)) ? (dp[0] = (wx), dp[1] = (wy), dp += 2) :\
   (dp = cmd_w_put((uint)(wy), cmd_w_put((uint)(wx), dp))))
#define cmd_putxy(xy,dp) cmd_put2w((xy).x, (xy).y, dp)
private byte *near
cmd_w_put(register uint w, register byte *dp)
{	while ( w > 0x7f ) *dp++ = w | 0x80, w >>= 7;
	*dp = w;
	return dp + 1;
}

/* Write a rectangle. */
private int
cmd_size_rect(register const gx_cmd_rect *prect)
{	return
	  cmd_sizew(prect->x) + cmd_sizew(prect->y) +
	  cmd_sizew(prect->width) + cmd_sizew(prect->height);
}
private byte *
cmd_put_rect(register const gx_cmd_rect *prect, register byte *dp)
{	cmd_putw(prect->x, dp);
	cmd_putw(prect->y, dp);
	cmd_putw(prect->width, dp);
	cmd_putw(prect->height, dp);
	return dp;
}

private int
cmd_write_rect_cmd(gx_device *dev, gx_clist_state *pcls,
  int op, int x, int y, int width, int height)
{	int dx = x - pcls->rect.x;
	int dy = y - pcls->rect.y;
	int dwidth = width - pcls->rect.width;
	int dheight = height - pcls->rect.height;
#define check_ranges_1(rmin, rmax)\
  ((unsigned)(dx - rmin) <= (rmax - rmin) &&\
   (unsigned)(dy - rmin) <= (rmax - rmin) &&\
   (unsigned)(dwidth - rmin) <= (rmax - rmin))
#define check_ranges(rmin, rmax)\
  (check_ranges_1(rmin, rmax) &&\
   (unsigned)(dheight - rmin) <= (rmax - rmin))
#define rmin cmd_min_tiny
#define rmax cmd_max_tiny
	cmd_set_rect(pcls->rect);
	if ( dheight == 0 && check_ranges_1(rmin, rmax) )
	   {	byte *dp;
		set_cmd_put_op(dp, cdev, pcls, op + 0x20 + dwidth - rmin, 2);
		dp[1] = (dx << 4) + dy - (rmin * 0x11);
	   }
#undef rmin
#undef rmax
#define rmin cmd_min_short
#define rmax cmd_max_short
	else if ( check_ranges(rmin, rmax) )
	   {	int dh = dheight - cmd_min_tiny;
		byte *dp;
		if ( (unsigned)dh <= cmd_max_tiny - cmd_min_tiny && dh != 0 &&
		     dy == 0
		   )
		   {	op += dh;
			set_cmd_put_op(dp, cdev, pcls, op + 0x10, 3);
			if_debug3('L', "    rs2:%d,%d,0,%d\n",
				  dx, dwidth, dheight);
		   }
		else
		   {	set_cmd_put_op(dp, cdev, pcls, op + 0x10, 5);
			if_debug4('L', "    rs4:%d,%d,%d,%d\n",
				  dx, dwidth, dy, dheight);
			dp[3] = dy - rmin;
			dp[4] = dheight - rmin;
		   }
		dp[1] = dx - rmin;
		dp[2] = dwidth - rmin;
	   }
	else if ( dy >= -2 && dy <= 1 && dheight >= -2 && dheight <= 1 &&
		  (dy + dheight) != -4
		)
	  {	byte *dp;
		int rcsize = 1 + cmd_sizew(x) + cmd_sizew(width);
		set_cmd_put_op(dp, cdev, pcls,
			       op + ((dy + 2) << 2) + dheight + 2, rcsize);
		++dp;
		cmd_put2w(x, width, dp);
	  }
	else
	   {	byte *dp;
		int rcsize = 1 + cmd_size_rect(&pcls->rect);
		set_cmd_put_op(dp, cdev, pcls, op, rcsize);
		if_debug5('L', "    r%d:%d,%d,%d,%d\n",
			  rcsize - 1, dx, dwidth, dy, dheight);
		cmd_put_rect(&pcls->rect, dp + 1);
	   }
	return 0;
}

private int
cmd_put_color(gx_device *dev, gx_clist_state *pcls,
  int op, gx_color_index color, gx_color_index *pcolor)
{	byte *dp;
	long diff = (long)color - (long)(*pcolor);

	if ( diff == 0 )
	  return 0;
	if ( !(op & 0xf) && diff >= -7 && diff <= 7 )
	  {	set_cmd_put_op(dp, cdev, pcls, op + (int)diff + 8, 1);
	  }
	else if ( color == gx_no_color_index )
	  {	/* We must handle this specially, because it may take */
		/* more bytes than the color depth. */
		set_cmd_put_op(dp, cdev, pcls, op + 8, 1);
	  }
	else
	   {	int cibytes1 = (dev->color_info.depth + 15) >> 3;
		set_cmd_put_op(dp, cdev, pcls, op, cibytes1);
		switch ( cibytes1 )
		  {
		  case 5: *++dp = (byte)(color >> 24);
		  case 4: *++dp = (byte)(color >> 16);
		  case 3: *++dp = (byte)(color >> 8);
		  case 2: dp[1] = (byte)color;
		  }
	   }
	*pcolor = color;
	return 0;
}
#define cmd_set_color0(dev, pcls, color0)\
  cmd_put_color(dev, pcls, cmd_op_set_color0, color0, &(pcls)->colors[0])
#define cmd_set_color1(dev, pcls, color1)\
  cmd_put_color(dev, pcls, cmd_op_set_color1, color1, &(pcls)->colors[1])
private int
cmd_set_tile_colors(gx_device *dev, gx_clist_state *pcls,
  gx_color_index color0, gx_color_index color1)
{	if ( color0 != pcls->tile_colors[0] )
	   {	int code = cmd_put_color(dev, pcls, cmd_opv_set_tile_color0,
					 color0, &pcls->tile_colors[0]);
		if ( code < 0 )
		  return code;
	   }
	if ( color1 != pcls->tile_colors[1] )
	   {	int code = cmd_put_color(dev, pcls, cmd_opv_set_tile_color1,
					 color1, &pcls->tile_colors[1]);
		if ( code < 0 )
		  return code;
	   }
	return 0;
}

/* Enable or disable the logical operation. */
#define cmd_enable_lop(cldev, pcls, enable)\
  if ( (pcls)->lop_enabled == ((enable) ^ 1) &&\
         cmd_put_enable_lop(cldev, pcls, enable) < 0\
     )\
    return (cldev)->error_code
#define cmd_disable_lop(cldev, pcls)\
  cmd_enable_lop(cldev, pcls, 0)
private int
cmd_put_enable_lop(gx_device_clist *cldev, gx_clist_state *pcls, int enable)
{	byte *dp;
	set_cmd_put_op(dp, cldev, pcls,
		       (byte)(enable ? cmd_opv_enable_lop :
			      cmd_opv_disable_lop),
		       1);
	pcls->lop_enabled = enable;
	return 0;
}

/* ---------------- Driver interface ---------------- */

/*
 * Define macros for dividing up an operation into bands.
 * Note that BEGIN_RECT resets y and height.  It is OK for the code that
 * processes each band to reset height to a smaller (positive) value;
 * the vertical subdivision code in copy_mono, copy_color, and copy_alpha
 * makes use of this.
 */
#define BEGIN_RECT\
   {	int yend = y + height;\
	int band_height = cdev->band_height;\
	do\
	   {	int band = y / band_height;\
		gx_clist_state *pcls = cdev->states + band;\
		int band_end = (band + 1) * band_height;\
		height = min(band_end, yend) - y;\
		   {
#define END_RECT\
		   }\
		y += height;\
	   }\
	while ( y < yend );\
   }

private int
clist_fill_rectangle(gx_device *dev, int x, int y, int width, int height,
  gx_color_index color)
{	fit_fill(dev, x, y, width, height);
	BEGIN_RECT
	cmd_disable_lop(cdev, pcls);
	if ( color != pcls->colors[1] )
	  {	int code = cmd_put_color(dev, pcls, cmd_op_set_color1, color,
					 &pcls->colors[1]);
		if ( code < 0 )
		  return code;
	  }
	{ int code = cmd_write_rect_cmd(dev, pcls, cmd_op_fill_rect, x, y,
					width, height);
	  if ( code < 0 )
	    return code;
	}
	END_RECT
	return 0;
}

/* Add a command to set the tile size and depth. */
private int
cmd_put_tile_params(gx_device_clist *cldev, const gx_tile_bitmap *tile,
  int depth)
{	int tcsize = 1 + cmd_sizexy(tile->size) +
	  cmd_size2w(tile->rep_width, tile->rep_height);
	byte *dp;
	byte op = (depth == 1 ? (byte)cmd_opv_set_tile_size :
		   (byte)cmd_opv_set_tile_size_colored);

	set_cmd_put_all_op(dp, cldev, op, tcsize);
	++dp;
	cmd_putxy(tile->size, dp);
	cmd_put2w(tile->rep_width, tile->rep_height, dp);
	return 0;
}

/* Add a command to set the tile phase. */
private int
cmd_set_tile_phase(gx_device *dev, gx_clist_state *pcls,
  int px, int py)
{	int pcsize;
	byte *dp;

	pcls->tile_phase.x = px;
	pcls->tile_phase.y = py;
	pcsize = 1 + cmd_sizexy(pcls->tile_phase);
	set_cmd_put_op(dp, cdev, pcls, (byte)cmd_opv_set_tile_phase, pcsize);
	++dp;
	cmd_putxy(pcls->tile_phase, dp);
	return 0;
}

/* Compare unequal tiles.  Return -1 if unrelated, */
/* or 1<=N<=49 for the size of the delta encoding. */
private int
tile_diff(const byte *old_data, const byte *new_data, uint tsize,
  byte _ss *delta)
{	register const bits16 *old2, *new2;
	register bits16 diff;
	int count;
	register int i;
	byte _ss *pd;
	if ( tsize > 128 ) return -1;
	old2 = (const bits16 *)old_data;
	new2 = (const bits16 *)new_data;
	count = 0;
	pd = delta + 1;			/* skip opcode */
	for ( i = 0; i < tsize; i += 2, old2++, new2++ )
	  if ( (diff = *new2 ^ *old2) != 0 )
#if arch_is_big_endian
#  define i_hi 0
#  define b_0(w) ((w) >> 8)
#  define b_1(w) ((byte)(w))
#else
#  define i_hi 1
#  define b_0(w) ((byte)(w))
#  define b_1(w) ((w) >> 8)
#endif
	   {	if ( count == 16 ) return -1;
		if ( diff & 0xff00 )
		   {	if ( diff & 0xff )
				*pd++ = 0x80 + i,
				*pd++ = b_0(diff),
				*pd++ = b_1(diff);
			else
				*pd++ = i + i_hi, *pd++ = diff >> 8;
		   }
		else			/* know diff != 0 */
			*pd++ = i + (1 - i_hi), *pd++ = (byte)diff;
		count++;
	   }
#undef b_0
#undef b_1
#undef i_hi
	if ( count == 0 )
	{	/* Tiles are identical.  This is highly unusual, */
		/* but not impossible. */
		pd[0] = pd[1] = 0;
		pd += 2;
		count = 1;
	}
	delta[0] = (byte)cmd_op_delta_tile_bits + count - 1;
	return pd - delta;
}

/* Handle changing tiles for clist_tile_rectangle and clist_copy_rop. */
private int
clist_change_tile(gx_device_clist *cldev, gx_clist_state *pcls,
  const gx_tile_bitmap *tile, int depth)
{	tile_slot *old_tile, *new_tile;
	int slot_index;
	/* Look up the tile in the cache. */
top:	   {	gx_bitmap_id id = tile->id;
		uint probe = (uint)(id >> 16) + (uint)(id);
		old_tile = pcls->tile;
		for ( ; ; probe += 25 /* semi-random odd # */ )
		   {	tile_hash *hptr = cldev->tile_hash_table +
			  (probe & cldev->tile_hash_mask);
			if ( (slot_index = hptr->slot_index) < 0 ) /* empty entry */
			   {	/* Must change tiles.  Check whether the */
				/* tile size or depth has changed. */
				byte *new_bits;
				if ( tile->size.x < cldev->tile.size.x ||
				     tile->size.y < cldev->tile.size.y ||
				     tile->rep_width != cldev->tile.rep_width ||
				     tile->rep_height != cldev->tile.rep_height ||
				     depth != cldev->tile_depth
				   )
				   {	int code;
					gx_tile_bitmap save_tile;
					int save_depth;

					if ( bitmap_raster(tile->rep_width *
							   depth) *
					      tile->rep_height >
					       cldev->tile_max_size
					   )
					  return_error(gs_error_unknownerror);
					/* clist_init_tiles may adjust */
					/* the tile parameters.... */
					save_tile = cldev->tile;
					save_depth = cldev->tile_depth;
					cldev->tile = *tile;
					cldev->tile_depth = depth;
					clist_init_tiles(cldev);
					code = cmd_put_tile_params(cldev,
							&cldev->tile,
							cldev->tile_depth);
					if ( code < 0 )
					  { cldev->tile = save_tile;
					    cldev->tile_depth = save_depth;
					    clist_init_tiles(cldev);
					    return code;
					  }
					goto top;
				   }
				if ( cldev->tile_count == cldev->tile_max_count )
				   {	/* Punt. */
					clist_init_tiles(cldev);
					goto top;
				   }
				hptr->slot_index = slot_index =
				  cldev->tile_count++;
				new_tile = tile_slot_ptr(cldev, slot_index);
				new_tile->id = id;
				new_bits = ts_bits(cldev, new_tile);
				if ( cldev->tile.size.x == tile->size.x &&
				     cldev->tile.raster == tile->raster
				   )
				  memcpy(new_bits, tile->data,
					 tile->raster * cldev->tile.size.y);
				else
				  { uint width = cldev->tile.size.x * depth;
				    uint height = cldev->tile.size.y;
				    uint raster = bitmap_raster(width);
				    if ( raster * 8 - width >= 8 )
				      memset(new_bits, 0, raster * height);
				    bytes_copy_rectangle(new_bits,
							 raster,
							 tile->data,
							 tile->raster,
							 (width + 7) >> 3,
							 height);
				    /* Clean up trailing bits in last byte. */
				    if ( width & 7 )
				      { byte mask = (byte)(0xff00 >> (width & 7));
					byte *end = new_bits + (width >> 3);
					uint i;
					for ( i = 0; i < height; i++, end += raster )
					  *end &= mask;
				      }
				  }
				count_add1(cmd_tile_added);
				if_debug3('L', "[L]adding tile %d, hash=%d, id=0x%lx\n",
					 slot_index,
					 (int)(hptr - cldev->tile_hash_table),
					 id);
				break;
			   }
			else
			  new_tile = tile_slot_ptr(cldev, slot_index);
			if ( new_tile->id == id )
			   {	count_add1(cmd_tile_found);
				if_debug1('L', "[L]found tile %d\n",
					  slot_index);
				break;
			   }
		   }
	   }
	/*
	 * At this point we know:
	 *	cldev->tile.rep_{width,height} == tile->rep_{width,height}
	 *	cldev->tile.size.{x,y} <= tile->size.{x,y}
	 */
	/* Check whether this band knows about this tile yet. */
	   {	int band_index = pcls - cldev->states;
		byte pmask = 1 << (band_index & 7);
		byte *ppresent = ts_mask(new_tile) + (band_index >> 3);
		if ( *ppresent & pmask )
		   {	/* Tile is known, just put out the index. */
			byte *dp;
			uint slot_size = cldev->tile_slot_size;
			int delta;
			if ( old_tile != &no_tile &&
			     (delta =
			      (new_tile >= old_tile ?
			       ((byte *)new_tile - (byte *)old_tile) / slot_size :
			       -(((byte*)old_tile - (byte *)new_tile) / slot_size)) + 8,
			      !(delta & ~0xf))
			   )
			  { set_cmd_put_op(dp, cldev, pcls,
					   cmd_op_delta_tile_index + delta, 1);
			  }
			else
			if ( slot_index <= 255 )
			  { set_cmd_put_op(dp, cldev, pcls,
					   cmd_opv_set_tile_index, 2);
			    dp[1] = slot_index;
			  }
			else
			  { int csize = 1 + cmd_sizew(slot_index);
			    set_cmd_put_op(dp, cldev, pcls,
					   cmd_opv_set_tile_index_long, csize);
			    ++dp;
			    cmd_putw(slot_index, dp);
			  }
		   }
		else
		   {	/* Tile is not known, put out the bits.  Use a */
			/* delta encoding or a short encoding if possible. */
			byte *new_data = ts_bits(cldev, new_tile);
			byte *dp;
			byte delta[1+16*3];
			int csize = 1 + cmd_sizew(slot_index);
			int diff;

			*ppresent |= pmask;
			if ( old_tile != &no_tile &&
			     (diff = tile_diff(ts_bits(cldev, old_tile), new_data, cldev->tile.raster * cldev->tile.size.y, delta)) >= 0
			   )
			   {	/* Use delta representation */
				int dcsize = csize - 1 + diff;
				set_cmd_put_op(dp, cldev, pcls,
					       delta[0], dcsize);
				--diff;
				memcpy(dp + csize, delta + 1, diff);
			   }
			else
			   {	int bwidth =
				  (tile->rep_width * depth + 7) >> 3;
				if ( bwidth <= 2 )
				   {	int short_size =
					  bwidth * cldev->tile.rep_height;
					int bcsize = csize + short_size;
					set_cmd_put_op(dp, cldev, pcls,
						       cmd_opv_set_tile_bits_long,
						       bcsize);
					bytes_copy_rectangle(dp + csize,
							     bwidth,
							     new_data,
							     cldev->tile.raster,
							     bwidth,
							     cldev->tile.rep_height);
				   }
				else
				   {	int full_size =
					  cldev->tile.raster * cldev->tile.rep_height;
					int bcsize = csize + full_size;
					set_cmd_put_op(dp, cldev, pcls,
						       cmd_opv_set_tile_bits_long,
						       bcsize);
					memcpy(dp + csize, new_data, full_size);
				   }
			   }
			++dp;
			cmd_putw(slot_index, dp);
		   }
	   }
	pcls->tile = new_tile;
	return 0;
}
private int
clist_tile_rectangle(gx_device *dev, const gx_tile_bitmap *tile, int x, int y,
  int width, int height, gx_color_index color0, gx_color_index color1,
  int px, int py)
{	int depth =
	  (color1 == gx_no_color_index && color0 == gx_no_color_index ?
	   dev->color_info.depth : 1);
	fit_fill(dev, x, y, width, height);
	BEGIN_RECT
	cmd_disable_lop(cdev, pcls);
	if ( tile->id != pcls->tile->id )
	   {	if ( tile->id == gx_no_bitmap_id ||
		     clist_change_tile(cdev, pcls, tile, depth) < 0
		   )
		  {	int code =
			  gx_default_tile_rectangle(dev, tile,
						    x, y, width, height,
						    color0, color1, px, py);
			if ( code < 0 )
			  return code;
			goto endr;
		  }
	   }
	if ( color0 != pcls->tile_colors[0] || color1 != pcls->tile_colors[1] )
	  {	int code = cmd_set_tile_colors(dev, pcls, color0, color1);
		if ( code < 0 )
		  return code;
	  }
	if ( px != pcls->tile_phase.x || py != pcls->tile_phase.y )
	  {	int code = cmd_set_tile_phase(dev, pcls, px, py);
		if ( code < 0 )
		  return code;
	  }
	{ int code = cmd_write_rect_cmd(dev, pcls, cmd_op_tile_rect, x, y,
					width, height);
	  if ( code < 0 )
	    return code;
	}
endr:	;
	END_RECT
	return 0;
}

private int
clist_copy_mono(gx_device *dev,
    const byte *data, int data_x, int raster, gx_bitmap_id id,
    int x, int y, int width, int height,
    gx_color_index color0, gx_color_index color1)
{	int y0;
	fit_copy(dev, data, data_x, raster, id, x, y, width, height);
	y0 = y;
	BEGIN_RECT
	gx_cmd_rect rect;
	uint dsize;
	int rsize;
	int bwidth;
	const byte *row = data + (y - y0) * raster;
	byte *dp;

	cmd_disable_lop(cdev, pcls);
	if ( color0 != pcls->colors[0] )
	  {	int code = cmd_set_color0(dev, pcls, color0);
		if ( code < 0 )
		  return code;
	  }
	if ( color1 != pcls->colors[1] )
	  {	int code = cmd_set_color1(dev, pcls, color1);
		if ( code < 0 )
		  return code;
	  }
copy:	cmd_set_rect(rect);
	rsize = cmd_size_rect(&rect);
	if ( width >= 2 && (bwidth = (width + (data_x & 7) + 7) >> 3) <= 3 &&
	     height <= 255 &&
	     height <= (cbuf_size - cmd_largest_size) / align_bitmap_mod
	   )
	  {	int rcsize;
		dsize = height * bwidth;
		rcsize = 1 + rsize + dsize;
		set_cmd_put_op(dp, cdev, pcls,
			       (byte)cmd_op_copy_mono + (data_x & 7) + 1,
			       rcsize);
		++dp;
		dp = cmd_put_rect(&rect, dp);
		row += data_x >> 3;
		bytes_copy_rectangle(dp, bwidth, row, raster, bwidth, height);
		pcls->rect = rect;
	  }
	else
	   {	int dx, row_bytes;
		if ( height == 1 )
		  {	/* We don't need to write the entire row, */
			/* only the part that is being copied. */
			int w1 = width;
			dx = data_x & 7;
			row += data_x >> 3;
copy1:			row_bytes = (uint)(dx + w1 + 7) >> 3;
			if ( row_bytes > cbuf_size )
			  {	/* Split a single (very long) row. */
				int w2 = w1 >> 1;
				int code = clist_copy_mono(dev, row, dx + w2,
					row_bytes, gx_no_bitmap_id, x + w2, y,
					w1 - w2, 1, color0, color1);
				if ( code < 0 )
				  return code;
				w1 -= w2;
				rect.width = w1;
				rsize = cmd_size_rect(&rect);
				goto copy1;
			  }
			dsize = row_bytes;
		   }
		else
		  {	dx = data_x;
			row_bytes = raster;
			dsize = height * raster;
			if ( dsize > cbuf_size )
			  {	/* Split the transfer by reducing the */
				/* height. See the comment above BEGIN_RECT. */
				height >>= 1;
				goto copy;
			  }
		  }
		{	uint csize =
			  1 + rsize + cmd_sizew(dx) + cmd_sizew(row_bytes);
			int rcsize = csize + dsize;
			set_cmd_put_op(dp, cdev, pcls,
				       (byte)cmd_op_copy_mono, rcsize);
			/* See if compressing with RLE is worthwhile. */
			if ( dsize >= 50 )
			  {	stream_RLE_state sstate;
				stream_cursor_read r;
				byte *wbase = dp + (csize - 1);
				stream_cursor_write w;
				int status;
				uint wcount;
				sstate.EndOfData = true;
				sstate.record_size = 0;
				s_RLE_init_inline(&sstate);
				r.ptr = row - 1;
				r.limit = r.ptr + dsize;
				w.ptr = wbase;
				w.limit = w.ptr + dsize;
				status =
				  (*s_RLE_template.process)
				    ((stream_state *)&sstate, &r, &w, true);
				if ( status == 0 && (wcount = w.ptr - wbase) <= dsize >> 1 )
				  {	/* Use compressed representation. */
					uncount_op(cmd_op_copy_mono, rcsize);
					cmd_shorten_op(cdev, pcls, dsize - wcount);
					count_op(*dp = (byte)cmd_op_copy_mono_rle,
						 csize + wcount);
					goto out;
				  }
			  }
			memcpy(dp + csize, row, dsize);
out:			++dp;
			dp = cmd_put_rect(&rect, dp);
			cmd_putw(dx, dp);
			cmd_putw(row_bytes, dp);
			pcls->rect = rect;
		}
	   }
	END_RECT
	return 0;
}

private int
clist_copy_color(gx_device *dev,
    const byte *data, int data_x, int raster, gx_bitmap_id id,
    int x, int y, int width, int height)
{	int y0;
	fit_copy(dev, data, data_x, raster, id, x, y, width, height);
	y0 = y;
	BEGIN_RECT
	int dx, row_bytes;
	uint dsize;
	const byte *row = data + (y - y0) * raster;

	cmd_disable_lop(cdev, pcls);
copy:	if ( height == 1 )
	  {	/* We don't need to write the entire row, */
		/* only the part that is being copied. */
		int w1 = width;
		int depth = dev->color_info.depth;
		uint dbit = data_x * depth;
		dx = (dbit & 7) / depth;
		row += dbit >> 3;
copy1:		row_bytes = (uint)((dbit & 7) + (w1 * depth) + 7) >> 3;
		if ( row_bytes > cbuf_size )
		  {	/* Split a single (very long) row. */
			int w2 = w1 >> 1;
			int code = clist_copy_color(dev, row, dx + w2,
				row_bytes, gx_no_bitmap_id, x + w2, y,
				w1 - w2, 1);
			if ( code < 0 )
			  return code;
			w1 -= w2;
			goto copy1;
		  }
		dsize = row_bytes;
	  }
	else
	  {	dx = data_x;
		row_bytes = raster;
		dsize = height * raster;
		if ( dsize > cbuf_size )
		  {	/* Split the transfer by reducing the */
			/* height. See the comment above BEGIN_RECT. */
			height >>= 1;
			goto copy;
		  }
	  }
	  {	gx_cmd_rect rect;
		int rcsize;
		byte *dp;

		cmd_set_rect(rect);
		rcsize = 1 + cmd_size_rect(&rect) + cmd_sizew(dx) +
		  cmd_sizew(row_bytes) + dsize;
		set_cmd_put_op(dp, cdev, pcls,
			       (byte)cmd_op_copy_color, rcsize);
		++dp;
		dp = cmd_put_rect(&rect, dp);
		pcls->rect = rect;
		cmd_putw(dx, dp);
		cmd_putw(row_bytes, dp);
		memcpy(dp, row, dsize);
	  }
	END_RECT
	return 0;
}

private int
clist_copy_alpha(gx_device *dev, const byte *data, int data_x,
  int raster, gx_bitmap_id id, int x, int y, int width, int height,
  gx_color_index color, int depth)
{	/* I don't like copying the entire body of clist_copy_color */
	/* just to change 2 arguments and 1 opcode, */
	/* but I don't see any alternative that doesn't require */
	/* another level of procedure call even in the common case. */
	int y0;
	fit_copy(dev, data, data_x, raster, id, x, y, width, height);
	y0 = y;
	BEGIN_RECT
	int dx, row_bytes;
	uint dsize;
	const byte *row = data + (y - y0) * raster;

	cmd_disable_lop(cdev, pcls);
	if ( color != pcls->colors[1] )
	  {	int code = cmd_set_color1(dev, pcls, color);
		if ( code < 0 )
		  return code;
	  }
copy:	if ( height == 1 )
	  {	/* We don't need to write the entire row, */
		/* only the part that is being copied. */
		int w1 = width;
		uint dbit = data_x * depth;
		dx = (dbit & 7) / depth;
		row += dbit >> 3;
copy1:		row_bytes = (uint)((dbit & 7) + (w1 * depth) + 7) >> 3;
		if ( row_bytes > cbuf_size )
		  {	/* Split a single (very long) row. */
			int w2 = w1 >> 1;
			int code = clist_copy_alpha(dev, row, dx + w2,
				row_bytes, gx_no_bitmap_id, x + w2, y,
				w1 - w2, 1, color, depth);
			if ( code < 0 )
			  return code;
			w1 -= w2;
			goto copy1;
		  }
		dsize = row_bytes;
	  }
	else
	  {	dx = data_x;
		row_bytes = raster;
		dsize = height * raster;
		if ( dsize > cbuf_size )
		  {	/* Split the transfer by reducing the */
			/* height. See the comment above BEGIN_RECT. */
			height >>= 1;
			goto copy;
		  }
	  }
	  {	gx_cmd_rect rect;
		int rcsize;
		byte *dp;

		cmd_set_rect(rect);
		rcsize = 1 + cmd_size_rect(&rect) + cmd_sizew(dx) +
		  cmd_sizew(row_bytes) + dsize;
		set_cmd_put_op(dp, cdev, pcls,
			       cmd_op_copy_alpha + depth, rcsize);
		++dp;
		dp = cmd_put_rect(&rect, dp);
		pcls->rect = rect;
		cmd_putw(dx, dp);
		cmd_putw(row_bytes, dp);
		memcpy(dp, row, dsize);
	  }
	END_RECT
	return 0;
}

private int
clist_get_band(gx_device *dev, int y, int *band_start)
{	int start;
	if ( y < 0 )
	  y = 0;
	else if ( y >= dev->height )
	  y = dev->height;
	*band_start = start = y - y % cdev->band_height;
	return min(dev->height - start, cdev->band_height);
}

private int
clist_copy_rop(gx_device *dev,
  const byte *sdata, int sourcex, uint sraster, gx_bitmap_id id,
  const gx_color_index *scolors,
  const gx_tile_bitmap *texture, const gx_color_index *tcolors,
  int x, int y, int width, int height,
  int phase_x, int phase_y, gs_logical_operation_t command)
{	gs_rop3_t rop = command & lop_rop_mask;
	gx_tile_bitmap tile_with_id;
	const gx_tile_bitmap *tile = texture;

	if ( scolors != 0 && scolors[0] != scolors[1] )
	  {	fit_fill(dev, x, y, width, height);
	  }
	else
	  {	fit_copy(dev, sdata, sourcex, sraster, id,
			 x, y, width, height);
	  }
	/*
	 * We shouldn't need to put the logic below inside BEGIN/END_RECT,
	 * but the lop_enabled flags are per-band.
	 */
	BEGIN_RECT
	int code;

	if ( command != pcls->lop )
	  {	byte *dp;
		set_cmd_put_op(dp, cdev, pcls,
			       cmd_opv_set_lop, 1 + cmd_sizew(command));
		++dp;
		cmd_putw(command, dp);
		pcls->lop = command;
	  }
	cmd_enable_lop(cdev, pcls, 1);
	if ( rop3_uses_T(rop) )
	  {	if ( tcolors == 0 || tcolors[0] != tcolors[1] )
		  { if ( tile->id != pcls->tile->id )
		      { /* Change tile.  If there is no id, generate one. */
			if ( tile->id == gx_no_bitmap_id )
			  { tile_with_id = *tile;
			    tile_with_id.id = gs_next_ids(1);
			    tile = &tile_with_id;
			  }
			code = clist_change_tile(cdev, pcls, tile,
						 (tcolors != 0 ? 1 :
						  dev->color_info.depth));
			if ( code < 0 )
			  return code;
			if ( phase_x != pcls->tile_phase.x ||
			     phase_y != pcls->tile_phase.y
			   )
			  { code = cmd_set_tile_phase(dev, pcls, phase_x,
						      phase_y);
			    if ( code < 0 )
			      return code;
			  }
		      }
		  }
		/* Set the tile colors. */
		code = (tcolors != 0 ?
			cmd_set_tile_colors(dev, pcls, tcolors[0],
					    tcolors[1]) :
			cmd_set_tile_colors(dev, pcls, gx_no_color_index,
					    gx_no_color_index));
		if ( code < 0 )
		  return code;
	  }
	/* Set lop_enabled to -1 so that fill_rectangle / copy_* */
	/* won't attempt to set it to 0. */
	pcls->lop_enabled = -1;
	if ( scolors != 0 )
	  { if ( scolors[0] == scolors[1] )
	      code = clist_fill_rectangle(dev, x, y, width, height,
					  scolors[1]);
	    else
	      code = clist_copy_mono(dev, sdata, sourcex, sraster, id,
				     x, y, width, height,
				     scolors[0], scolors[1]);
	  }
	else
	  code = clist_copy_color(dev, sdata, sourcex, sraster, id,
				  x, y, width, height);
	pcls->lop_enabled = 1;
	if ( code < 0 )
	  return 0;
	END_RECT
	return 0;	  
}

/* Write a path.  This takes care of breaking up the path if necessary. */
private int clist_encode_path(P4(gs_path_enum *, stream_cursor_write *,
  gs_fixed_point *, bool));
private int
cmd_put_path(gx_device_clist *cldev, gx_clist_state *pcls, gx_path *ppath,
  gs_fixed_point *ppos, byte op, bool do_close)
{	gs_path_enum cenum;
	gx_path_enum_init(&cenum, ppath);
	for ( ; ; )
	  {	byte *dp;
		stream_cursor_write cw;
		int code, len, delta;
#define max_cmd_path min(cbuf_size, 257)
		set_cmd_put_op(dp, cldev, pcls, op, max_cmd_path);
		cw.limit = (cw.ptr = dp + 1) + max_cmd_path;
		code = clist_encode_path(&cenum, &cw, ppos, do_close);
		len = cw.ptr + 1 - dp;
		/* Shorten the path command. */
		dp[1] = len - 2;
		delta = max_cmd_path - len;
		uncount_op(op, delta);
		cmd_shorten_op(cldev, pcls, delta);
		if ( code )
		  return min(code, 0);
		/* Replace the path command with an append. */
		uncount_op(op, len);
		count_op(*dp = cmd_opv_append, len);
#undef max_cmd_path
	  }
}

/* Encode a command list path. */
/* Return 1 if finished, 0 if we ran out of room. */
private int
clist_encode_path(gs_path_enum *penum, stream_cursor_write *pw,
  gs_fixed_point *ppos, bool do_close)
{	byte *q = pw->ptr;
	byte *end = pw->limit;
	fixed px = ppos->x, py = ppos->y;
	fixed vs[6];
#define A vs[0]
#define B vs[1]
#define C vs[2]
#define D vs[3]
#define E vs[4]
#define F vs[5]
	int pe_op;

	while ( end - q < 1 + 6 * (1 + sizeof(fixed)) &&
	        (pe_op = gx_path_enum_next(penum, (gs_fixed_point *)vs)) != 0
	      )
	  { byte opcode;
	    int i;
	    fixed nx, ny;

	    switch ( pe_op )
	      {
	      case gs_pe_moveto:
		A = (nx = A) - px, B = (ny = B) - py;
		opcode = cp_rmoveto;
		break;
	      case gs_pe_lineto:
		A = (nx = A) - px, B = (ny = B) - py;
		if ( B == 0 )
		  opcode = cp_hlineto;
		else if ( A == 0 )
		  A = B, opcode = cp_vlineto;
		else
		  opcode = cp_rmoveto;
		break;
	      case gs_pe_closepath:
		if ( do_close )
		  { *++q = cp_closepath;
		    px = A, py = B;
		  }
		continue;
	      case gs_pe_curveto:
		E = (nx = E) - C, F = (ny = F) - D;
		C -= A, D -= B;
		A -= px, B -= py;
		if ( B == 0 && E == 0 )
		  B = C, C = D, D = F, opcode = cp_hvcurveto;
		else if ( A == 0 && F == 0 )
		  A = B, B = C, C = D, D = E, opcode = cp_vhcurveto;
		else if ( C == 0 && D == 0 )
		  C = E, D = F, opcode = cp_ccurveto;
		else
		  opcode = cp_rrcurveto;
		break;
	      default:
		return_error(gs_error_rangecheck);
	      }
	    *++q = opcode;
	    for ( i = 0; i < clist_path_op_num_operands[opcode]; ++i )
	      { fixed d = vs[i];
		if ( is_diff3(d) )
		  q[1] = (d >> 16) & 0x7f, q += 3;
		else if ( is_diff4(d) )
		  q[1] = ((d >> 24) & 0x3f) + 0x80,
		    q[2] = (byte)(d >> 16),
		    q += 4;
		else
		  { int b;
		    *++q = 0xc0;
		    for ( b = sizeof(fixed) - 1; b > 1; --b )
		      *++q = (byte)(d >> (b * 8));
		    q += 2;
		  }
		q[-1] = (byte)(d >> 8);
		*q = (byte)d;
	      }
	    px = nx, py = ny;
	  }
#undef A
#undef B
#undef C
#undef D
#undef E
#undef F
	pw->ptr = q;
	ppos->x = px, ppos->y = py;
	return 0;
}
