#include "c.h"
//#define DODEBUG
#define NORMAL_REG_ORDER
#define readsreg(p) \
	(generic((p)->op)==INDIR && (p)->kids[0]->op==VREG+P)
#define setsrc(d) ((d) && (d)->x.regnode && \
	(d)->x.regnode->set == src->x.regnode->set && \
	(d)->x.regnode->mask&src->x.regnode->mask)

#define relink(a, b) ((b)->x.prev = (a), (a)->x.next = (b))

static Symbol   askfixedreg     ARGS((Symbol));
static Symbol   askreg          ARGS((Symbol, unsigned*));
static void     blkunroll       ARGS((int, int, int, int, int, int, int[]));
static void     docall          ARGS((Node));
static void     dumpcover       ARGS((Node, int, int));
static void     dumpregs        ARGS((char *, char *, char *,char *));
static void     dumprule        ARGS((int));
void     dumptree        ARGS((Node,int));
unsigned	emitasm		ARGS((Node, int));
static void     genreload       ARGS((Node, Symbol, int));
static void     genspill        ARGS((Symbol, Node, Symbol));
static Symbol   getreg          ARGS((Symbol, unsigned*, Node,int));
static int      getrule         ARGS((Node, int));
void     linearize       ARGS((Node, Node));
int      moveself        ARGS((Node));
static void     prelabel        ARGS((Node));
static Node*    prune           ARGS((Node, Node*));
static void     putreg          ARGS((Symbol));
static void     ralloc          ARGS((Node));
static void     reduce          ARGS((Node, int));
static int      reprune         ARGS((Node*, int, int, Node));
int      requate         ARGS((Node));
static Node     reuse           ARGS((Node, int));
static void     rewrite         ARGS((Node));
static Symbol   spillee         ARGS((Symbol, Node));
static void     spillr          ARGS((Symbol, Node));
static int      trashes         ARGS((Node, Node));
static int      uses            ARGS((Node, unsigned));

int offset;

int maxoffset;

int framesize;
int argoffset;

int maxargoffset;

int dalign, salign;
int bflag = 0;  /* omit */
int dflag = 0;

int swap;

static int unsafe=1;

unsigned (*emitter) ARGS((Node, int)) = emitasm;
static char NeedsReg[] = {
	0,                      /* unused */
	1,                      /* CNST */
	0, 0,                   /* ARG ASGN */
	1,                      /* INDIR  */
	1, 1, 1, 1,             /* CVC CVD CVF CVI */
	1, 1, 1, 1,             /* CVP CVS CVU NEG */
	1,                      /* CALL */
	1,                      /* LOAD */
	0,                      /* RET */
	1, 1, 1,                /* ADDRG ADDRF ADDRL */
	1, 1, 1, 1, 1,          /* ADD SUB LSH MOD RSH */
	1, 1, 1, 1,             /* BAND BCOM BOR BXOR */
	1, 1,                   /* DIV MUL */
	0, 0, 0, 0, 0, 0,       /* EQ GE GT LE LT NE */
	0, 0,                   /* JUMP LABEL   */
	1
};
Symbol rmap[16];
Node head;

unsigned freemask[2];
unsigned usedmask[2];
unsigned tmask[2];
unsigned vmask[2];
Symbol mkreg(char *fmt,int n,int mask,int set)
{
	Symbol p;

	NEW0(p, PERM);
	p->name = fmt+1;
	p->x.name = stringf(fmt, n);
	NEW0(p->x.regnode, PERM);
	p->x.regnode->number = (short)n;
	p->x.regnode->mask = mask<<n;
	p->x.regnode->set = (short)set;
	return p;
}
Symbol mkwildcard(Symbol *syms)
{
	Symbol p;

	NEW0(p, PERM);
	p->x.name = "wildcard";
	p->x.wildcard = syms;
	return p;
}
void mkauto(Symbol p)
{
	assert(p->sclass == AUTO);
	offset = roundup(offset + p->type->size, p->type->align);
	p->x.offset = -offset;
	if (hasExceptions)
		p->x.offset -= 24;
	p->x.name = stringd(p->x.offset);
}
void blockbeg(Env *e)
{
	e->offset = offset;
	e->freemask[IREG] = freemask[IREG];
	e->freemask[FREG] = freemask[FREG];
}
void blockend(Env *e)
{
	if (offset > maxoffset)
		maxoffset = offset;
	offset = e->offset;
	freemask[IREG] = e->freemask[IREG];
	freemask[FREG] = e->freemask[FREG];
}
int mkactual(int align,int size)
{
	int n = roundup(argoffset, align);

	argoffset = n + size;
	return n;
}
static void docall(Node p)
{
	FunctionHasCalls = 1;
	p->syms[0] = intconst(argoffset);
	if (argoffset > maxargoffset)
		maxargoffset = argoffset;
	argoffset = 0;
}
void blkcopy(int dreg,int doff,int sreg,int soff,int size,int tmp[])
{
	assert(size >= 0);
	if (size == 0)
		return;
	else if (size <= 2)
		blkunroll(size, dreg, doff, sreg, soff, size, tmp);
	else if (size == 3) {
		blkunroll(2, dreg, doff,   sreg, soff,   2, tmp);
		blkunroll(1, dreg, doff+2, sreg, soff+2, 1, tmp);
	}
	else if (size <= 16) {
		blkunroll(4, dreg, doff, sreg, soff, size&~3, tmp);
		blkcopy(dreg, doff+(size&~3),
	                sreg, soff+(size&~3), size&3, tmp);
	}
	else
		(*IR->x.blkloop)(dreg, doff, sreg, soff, size, tmp);
}
static void blkunroll(int k,int dreg,int doff,int sreg,int soff,int size,int tmp[])
{
	int i;

	assert(IR->x.max_unaligned_load);
	if (k > IR->x.max_unaligned_load
	&& (k > salign || k > dalign))
		k = IR->x.max_unaligned_load;
	for (i = 0; i+k < size; i += 2*k) {
		(*IR->x.blkfetch)(k, soff+i,   sreg, tmp[0]);
		(*IR->x.blkfetch)(k, soff+i+k, sreg, tmp[1]);
		(*IR->x.blkstore)(k, doff+i,   dreg, tmp[0]);
		(*IR->x.blkstore)(k, doff+i+k, dreg, tmp[1]);
	}
	if (i < size) {
		(*IR->x.blkfetch)(k, i+soff, sreg, tmp[0]);
		(*IR->x.blkstore)(k, i+doff, dreg, tmp[0]);
	}
}
void parseflags(int argc,char *argv[])
{
	int i;

	for (i = 0; i < argc; i++)
		if (strcmp(argv[i], "-d") == 0)
			dflag = 1;
		else if (strcmp(argv[i], "-b") == 0)	/* omit */
			bflag = 1;			/* omit */
}
/*------------------------------------------------------------------------
 Procedure:     getrule ID:1
 Purpose:       This function just encapsulates the call to the
                interface rule function. 14.3.382
 Input:
 Output:
 Errors:
------------------------------------------------------------------------*/
static int getrule(Node p,int nt)
{
	int rulenum;

	assert(p);
	rulenum = (*IR->x._rule)(p->x.state, nt);
	if (rulenum == 0) {
		assert(0);
	}
	return rulenum;
}
static void reduce(Node p,int nt)
{
	int rulenum, i;
	short *nts;
	Node kids[10];

	p = reuse(p, nt);
#ifdef PURIFY
	rulenum = getrule(p, nt);
#else
	rulenum = (*IR->x._rule)(p->x.state, nt);
#endif
	nts = IR->x._nts[rulenum];
	p->x.rulenum = rulenum;
	(*IR->x._kids)(p, rulenum, kids);
	for (i = 0; nts[i]; i++)
		reduce(kids[i], nts[i]);
	if (IR->x._isinstruction[rulenum]) {
		assert(p->x.inst == 0 || p->x.inst == nt);
		p->x.inst = (short)nt;
		if (p->syms[RX] && p->syms[RX]->temporary) {
#ifdef DODEBUG
			fprint(2, "(using %s)\n", p->syms[RX]->name);
#endif
			p->syms[RX]->x.usecount++;
		}
	}
}
/*------------------------------------------------------------------------
 Procedure:     reuse ID:1
 Purpose:       The call to reuse sees if the reduction of node p using
                nonterminal nt uses a 'bonus match'. If so, it returns
                the common subexpression instead of p. The objective is
                to reprocess the common subexpression and ignore the
                temporary. It collaborates with reduce to recalculate
                expressions that are cheaper to recalculate than to
                store into a register.14.3.383
 Input:
 Output:
 Errors:
------------------------------------------------------------------------*/
static Node reuse(Node p,int nt)
{
	struct _state {
		short cost[1];
	};
	Symbol r = p->syms[RX];

	if (generic(p->op) == INDIR && p->kids[0]->op == VREG+P
	&& r->u.t.cse && p->x.mayrecalc
	&& ((struct _state*)r->u.t.cse->x.state)->cost[nt] == 0)
		return r->u.t.cse;
	else
		return p;
}

int mayrecalc(Node p)
{
	Node q;

	assert(p && p->syms[RX]);
	if (!p->syms[RX]->u.t.cse)
		return 0;
	for (q = head; q && q->x.listed; q = q->link)
		if (generic(q->op) == ASGN
		&& trashes(q->kids[0], p->syms[RX]->u.t.cse))
			return 0;
	p->x.mayrecalc = 1;
	return 1;
}
static int trashes(Node p,Node q)
{
	assert(p);
	if (!q)
		return 0;
	/*
	This read before syms[0]. It should be syms[RX] in my opinion. I leave both
	*/
	else if (p->op == q->op && 
		(p->syms[RX] == q->syms[RX] || p->syms[0] == q->syms[0]))
		return 1;
	else
		return trashes(p, q->kids[0])
		    || trashes(p, q->kids[1]);
}
static Node *prune(Node p,Node pp[])
{
	if (p == NULL)
		return pp;
#if 0 /* here I have a problem. The origianl sources didn't make this assertion. Where does this come from? */
	if(p->x.kids[2] != NULL) {
		assert(0);
	}
#endif
	p->x.kids[0] = p->x.kids[1] = p->x.kids[2] = NULL;
	if (p->x.inst == 0)
		return prune(p->kids[1], prune(p->kids[0], pp));
	else if (p->syms[RX] && p->syms[RX]->temporary
	&& p->syms[RX]->x.usecount < 2) {
		p->x.inst = 0;
#ifdef DODEBUG
		fprint(2, "(clobbering %s)\n", p->syms[RX]->name);
#endif
		return prune(p->kids[1], prune(p->kids[0], pp));
	}
	else {
		prune(p->kids[1], prune(p->kids[0], &p->x.kids[0]));
		*pp = p;
		return pp + 1;
	}
}

#define ck(i) return (i) ? 0 : LBURG_MAX

int range(Node p,int lo,int hi)
{
	Symbol s = p->syms[0];

	switch (p->op) {
	case ADDRFP: ck(s->x.offset >= lo && s->x.offset <= hi);
	case ADDRLP: ck(s->x.offset >= lo && s->x.offset <= hi);
	case CNSTC:  ck(s->u.c.v.sc >= lo && s->u.c.v.sc <= hi);
	case CNSTI:  ck(s->u.c.v.i  >= lo && s->u.c.v.i  <= hi);
	case CNSTS:  ck(s->u.c.v.ss >= lo && s->u.c.v.ss <= hi);
	case CNSTU:  ck(s->u.c.v.u  >= (unsigned)lo && s->u.c.v.u  <= (unsigned)hi);
	case CNSTP:  ck(s->u.c.v.p  == 0  && lo <= 0 && hi >= 0);
	}
	return LBURG_MAX;
}
void dumptree(Node p,int level)
{
	fprint(2, "%s(", IR->x._opname[p->op]);
	if (IR->x._arity[p->op] == 0 && p->syms[0])
		fprint(2, "%s", p->syms[0]->name);
	else if (IR->x._arity[p->op] == 1)
		dumptree(p->kids[0],level+1);
	else if (IR->x._arity[p->op] == 2) {
		dumptree(p->kids[0],level+1);
		fprint(2, ", ");
		dumptree(p->kids[1],level+1);
	}
	fprint(2, ")");
}
void ildumptree(Node p,int level)
{
	fprintf(ilFile, "%s(", IR->x._opname[p->op]);
	if (IR->x._arity[p->op] == 0 && p->syms[0]) {
		if (p->syms[0]->name == NULL)
			fprintf(ilFile,"%s",p->syms[0]->x.name);
		else
			fprintf(ilFile, "%s", p->syms[0]->name);
	}
	else if (IR->x._arity[p->op] == 1)
		ildumptree(p->kids[0],level+1);
	else if (IR->x._arity[p->op] == 2) {
		ildumptree(p->kids[0],level+1);
		fprintf(ilFile, ", ");
		ildumptree(p->kids[1],level+1);
	}
	fprintf(ilFile, ")");
}
static void ildumprule(int rulenum)
{
	assert(rulenum);
	fprintf(ilFile, "%s / %s", IR->x._string[rulenum],
		IR->x._templates[rulenum]);
	if (!IR->x._isinstruction[rulenum])
		fprintf(ilFile, "\n");
}
void ildumpcover(Node p,int nt,int in)
{
	int rulenum, i;
	short *nts;
	Node kids[10];

	p = reuse(p, nt);
	rulenum = (*IR->x._rule)(p->x.state, nt);
	nts = IR->x._nts[rulenum];
	for (i = 0; i < in; i++)
		fprintf(ilFile, " ");
	ildumprule(rulenum);
	(*IR->x._kids)(p, rulenum, kids);
	for (i = 0; nts[i]; i++)
		ildumpcover(kids[i], nts[i], in+1);
}
#ifdef DODEBUG

static void dumpcover(Node p,int nt,int in)
{
	int rulenum, i;
	short *nts;
	Node kids[10];

	p = reuse(p, nt);
#ifdef PURIFY
	rulenum = getrule(p, nt);
#else
	rulenum = (*IR->x._rule)(p->x.state, nt);
#endif
	nts = IR->x._nts[rulenum];
	fprint(2, "dumpcover(%x) = ", p);
	for (i = 0; i < in; i++)
		fprint(2, " ");
	dumprule(rulenum);
	(*IR->x._kids)(p, rulenum, kids);
	for (i = 0; nts[i]; i++)
		dumpcover(kids[i], nts[i], in+1);
}

static void dumprule(int rulenum)
{
	assert(rulenum);
	fprint(2, "%s / %s", IR->x._string[rulenum],
		IR->x._templates[rulenum]);
	if (!IR->x._isinstruction[rulenum])
		fprint(2, "\n");
}
#endif /* DODEBUG */
/*------------------------------------------------------------------------
 Procedure:     emitasm ID:1
 Purpose:       Processes a partially linearized tree. List elements are
                the roots of subtrees for instructions. It traces the
                sub-instructions which correspond to  addressing modes
                and other calculations inside a logical instruction. The
                driver of emitasm, 'emit' ensures that this instructions
                are seen in the right order.
                It sets the x.emitted flag as it emits them. 14.6.392
 Input:
 Output:
 Errors:
------------------------------------------------------------------------*/
extern int hasAllocA;
unsigned emitasm(Node p,int nt)
{
	int rulenum;
	short *nts; /* Used only in a recursive call
	               Contains the nonterminals for the rule's pattern */
	register char *fmt;
	Node kids[10];
	int isinstruction;
	char *oldbp;
	extern int cseg;

	p = reuse(p, nt);
#ifdef PURIFY
	rulenum = getrule(p, nt);
#else
	rulenum = (*IR->x._rule)(p->x.state, nt);
#endif
	nts = IR->x._nts[rulenum];
	fmt = IR->x._templates[rulenum];
	assert(fmt);
	oldbp = bp;
	isinstruction = IR->x._isinstruction[rulenum];
	if (p->x.intrinsic) {
		if (p->x.emitted) {
			outs(p->syms[RX]->x.name);
			return 0;
		}
		p->x.emitted = 1;
		Intrinsic(p);
		return 0;
	}
	if (isinstruction && p->x.emitted) {
		/* When an instruction has already been emitted, only the name of the
		   register where the instruction left its result will be output
		*/
		outs(p->syms[RX]->x.name);
	}
	else if (*fmt == '#')
		/* This is an escape hatch to generate machine specific code like
		   structure arguments
		*/
		(*IR->x.emit2)(p);
	else {
		if (*fmt == '?') {
			/* Omit redundant expressions */
			fmt++;
			assert(p->x.kids[0]);
			/* This was updated after my complaints about redundant expressions */
			if (p->syms[RX] == p->x.kids[0]->syms[RX])
				while (*fmt++ != '\n')
					;
		}
		if (p->x.Flags && (rulenum == 207 || rulenum == 209)) {
			while (*fmt++ != '\n')
				;
		}
		for ((*IR->x._kids)(p, rulenum, kids); *fmt; fmt++)
			if (*fmt != '%')
				*bp++ = *fmt;
			else if (*++fmt == 'F')
				/* Emit the framesize, for accessing local variables */
				print("%d", framesize);
			else if (*fmt >= '0' && *fmt <= '9') {
				int n = *fmt - '0';
				/* Emit recursively the subtree corresponding to the 'digit'-th
			       nonterminal for the pattern, counting from zero, left to
				   right.
				 */
				emitasm(kids[n], nts[n]);
				{
					if (p->x.intrinsicArg) {
						IntrinsicArg(kids[n],nts[n]);
					}
				}
				if (rulenum > 230 /*&& nt == 2*/) { /* call instruction */
						if (kids[n]->syms && kids[n]->syms[0] &&
					         kids[n]->syms[0]->Flags) {
						*bp++ = '\n';
						return(0);
					}
					else if (oldbp[7] == 'a' && !strncmp(oldbp+6,"_alloca",7)) {
						int c = oldbp[13];
						if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'z') ||
								(c >= '0' && c <= '9'))) {
							hasAllocA = 1;
							*bp++ = '\n';
							return 0;
						}
					}
				}
			}
			else if (*fmt >= 'a' && *fmt < 'a' + NELEMS(p->syms))
				/* This evaluates to a register name */
				outs(p->syms[*fmt - 'a']->x.name);
			else
				*bp++ = *fmt;
	}
	if (OptimizeFlag && hasExceptions == 0 && isinstruction && !p->x.emitted) {
		if (rulenum == 193 && cseg == 1 /*&& p->syms[0]->ref != 0.0*/) { /* LABELV */
			SendToOptimizer();
		}
	}
	return 0;
}


void emit(Node p)
{
	for (; p; p = p->x.next) {
		assert(p->x.registered);
		if (p->x.equatable && requate(p) || moveself(p)) {
			;
		}
		else {
			char *oldbp = bp;
			(*emitter)(p, p->x.inst);
			p->x.Asm = oldbp;
			if (IntermediateLanguageFile) {
				int rulenum;

				ildumptree(p,0);
				fprintf(ilFile,"\n");
				rulenum = getrule(p, p->x.inst);
				ildumprule(rulenum);
				fprintf(ilFile,"%s",oldbp);
			}
			oldbp = bp;
		}
		
		p->x.emitted = 1;
	}
}
int moveself(Node p)
{
	if (p->x.kids[0] == NULL) return 0;
	return p->x.copy
	&& p->syms[RX]->x.name == p->x.kids[0]->syms[RX]->x.name;
}
int move(Node p)
{
	p->x.copy = 1;
	return 1;
}
int requate(Node q)
{
	Symbol src = q->x.kids[0]->syms[RX];
	Symbol tmp = q->syms[RX];
	Node p;
	int n = 0;

#ifdef DODEBUG
	fprint(2, "(requate(%x): tmp=%s src=%s)\n", q, tmp->x.name, src->x.name);
#endif
	for (p = q->x.next; p; p = p->x.next)
		if (p->x.copy && p->syms[RX] == src
		&&  p->x.kids[0]->syms[RX] == tmp)
#ifdef DODEBUG
			fprint(2, "(requate arm 0 at %x)\n", p),
#endif
			p->syms[RX] = tmp;
		else if (setsrc(p->syms[RX]) && !moveself(p) && !readsreg(p))
			return 0;
#if OLD
		else if (generic(p->op) == ASGN && p->kids[0]->op == ADDRLP
		&& p->kids[0]->syms[0]->temporary
		&& p->kids[1]->syms[RX]->x.name == tmp->x.name)
			return 0;
#else
		else if (p->x.spills)
			return 0;
		
#endif
		else if (generic(p->op) == CALL && p->x.next)
			return 0;
		else if (p->op == LABEL+V && p->x.next)
			return 0;
		else if (p->syms[RX] == tmp && readsreg(p))
#ifdef DODEBUG
			fprint(2, "(requate arm 5 at %x)\n", p),
#endif
			n++;
		else if (p->syms[RX] == tmp)
			break;
#ifdef DODEBUG
	fprint(2, "(requate arm 7 at %x)\n", p);
#endif
	assert(n > 0);
	for (p = q->x.next; p; p = p->x.next)
		if (p->syms[RX] == tmp && readsreg(p)) {
			p->syms[RX] = src;
			if (--n <= 0)
				break;
		}
	return 1;
}


static void prelabel(Node p)
{
	if (p == NULL)
		return;
	prelabel(p->kids[0]);
	prelabel(p->kids[1]);
	if (NeedsReg[opindex(p->op)])
		setreg(p, rmap[optype(p->op)]);
	switch (generic(p->op)) {
	case ADDRF: case ADDRL:
		if (p->syms[0]->sclass == REGISTER)
			p->op = VREG+P;
		break;
	case INDIR:
		if (p->kids[0]->op == VREG+P)
			setreg(p, p->kids[0]->syms[0]);
		break;
	case ASGN:
		if (p->kids[0]->op == VREG+P) {
#ifdef DODEBUG
			fprint(2, "(cse=%x)\n", p->kids[0]->syms[0]->u.t.cse);
#endif
			rtarget(p, 1, p->kids[0]->syms[0]);
		}
		break;
	}
	(IR->x.target)(p);
}
void setreg(Node p,Symbol r)
{
	p->syms[RX] = r;
}
void rtarget(Node p,int n,Symbol r)
{
	Node q = p->kids[n];

	assert(q);
	assert(r->sclass == REGISTER || !r->x.wildcard);
	assert(q->syms[RX]);
	if (r != q->syms[RX] && !q->syms[RX]->x.wildcard) {
lab1:
		q = newnode(LOAD + optype(q->op),
			q, NULL, q->syms[0]);
		if (r->u.t.cse == p->kids[n])
			r->u.t.cse = q;
		p->kids[n] = p->x.kids[n] = q;
		q->x.kids[0] = q->kids[0];
		setreg(q,r);
	}
	else if (q->kids[1] == NULL ||
		p->kids[0]->syms[0] == NULL ||
		p->kids[0]->syms[0]->u.t.cse ||
		!(q->kids[0]->syms[RX]->x.name[0] == '?' &&
		p->kids[0]->syms[0] == q->kids[1]->syms[RX]))
		setreg(q, r);
	else {
//		printf("not taken line %d\n",src.y);
		goto lab1;
	}
//	q->x.unsafe = 1;
#ifdef DODEBUG
	fprint(2, "(targeting %x->x.kids[%d]=%x to %s)\n", p, n, p->kids[n], r->x.name);
#endif
}
static void rewrite(Node p)
{
	assert(p->x.inst == 0);
	prelabel(p);
#ifdef DODEBUG
	dumptree(p,0);
	fprint(2, "\n");
#endif
	(*IR->x._label)(p);
#ifdef DODEBUG
	dumpcover(p, 1, 0);
#endif
	reduce(p, 1);
}
/*
The interface function gen receives a forest from the front end and makes
several passes over the trees.
*/
Node gen(Node forest)
{
	int i;
	struct node sentinel;
	Node dummy, p;

	head = forest;
	if (OptimizeFlag)
		unsafe = head->x.unsafe;
	else
		unsafe = 1;
	/*
	The first pass calls rewrite to select instructions, and the second
	prunes the subinstructions out of the tree.
	*/
	for (p = forest; p; p = p->link) {
		assert(p->count == 0);
		if (generic(p->op) == CALL)
			docall(p);
		else /* If the call returns no value, or if the returned
		        value is ignored, then the call itself appears in the forst.
				This statement recognizes this pattern */
		if (generic(p->op) == ASGN
		&& generic(p->kids[1]->op) == CALL)
			docall(p->kids[1]);
		else if (generic(p->op) == ARG)
			(*IR->x.doarg)(p);
		rewrite(p);
		p->x.listed = 1;
	}
	for (p = forest; p; p = p->link)
		prune(p, &dummy);
	relink(&sentinel, &sentinel);
	for (p = forest; p; p = p->link)
		linearize(p, &sentinel);
	forest = sentinel.x.next;
	assert(forest);
	sentinel.x.next->x.prev = NULL;
	sentinel.x.prev->x.next = NULL;
	for (p = forest; p; p = p->x.next)
		for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++) {
			assert(p->x.kids[i]->syms[RX]);
			if (p->x.kids[i]->syms[RX]->temporary) {
				p->x.kids[i]->x.prevuse =
					p->x.kids[i]->syms[RX]->x.lastuse;
				p->x.kids[i]->syms[RX]->x.lastuse = p->x.kids[i];
			}
		}
#if 0 // This is the patch proposed by Chris Fraser end of April 97.
	for (p = forest; p; p = p->x.next)
		if (p->x.copy && p->x.kids[0] && p->x.kids[0]->syms[RX]->u.t.cse) {
			Symbol dst = p->syms[RX];
			Symbol temp = p->x.kids[0]->syms[RX];
			Node q;

			assert(temp->x.lastuse);
			for (q = temp->u.t.cse; q; q = q->x.next)
				if (p != q && dst == q->syms[RX]
				|| (q->op == LABELV || q->op == JUMPV || generic(q->op)==RET ||
				    generic(q->op)==EQ || generic(q->op)==NE ||
				    generic(q->op)==LE || generic(q->op)==LT ||
				    generic(q->op)==GE || generic(q->op)==GT ||
					q->op == ASGNB || /* This costed me a week */
				    (generic(q->op) == CALL && dst->sclass != REGISTER)))
					break;
			if (!q)
				for (q = temp->x.lastuse; q; q = q->x.prevuse)
					q->syms[RX] = dst;
		}
#endif
	for (p = forest; p; p = p->x.next) {
		if (p->x.unsafe || (p->x.next && p->x.next->x.unsafe))
			unsafe = 1;
		ralloc(p);
		if (p->x.listed && NeedsReg[opindex(p->op)]
		&& rmap[optype(p->op)]) {
			assert(generic(p->op) == CALL || generic(p->op) == LOAD);
			putreg(p->syms[RX]);
		}
	}
	unsafe = 1;
	return forest;
}
int notarget(Node p)
{
	return p->syms[RX]->x.wildcard ? 0 : LBURG_MAX;
}
static void putreg(Symbol r)
{
	assert(r && r->x.regnode);
	freemask[r->x.regnode->set] |= r->x.regnode->mask;
#ifdef DODEBUG
	dumpregs("(freeing %s)\n", r->x.name, NULL,NULL);
#endif
}
static Symbol askfixedreg(Symbol s)
{
	Regnode r = s->x.regnode;
	int n = r->set;

	if (r->mask&~freemask[n])
		return NULL;
	else {
		freemask[n] &= ~r->mask;
		usedmask[n] |=  r->mask;
		return s;
	}
}
extern int clobbering;

static Symbol askreg(Symbol rs,unsigned rmask[])
{
	int i;
	Symbol r;

	if (rs->x.wildcard == NULL)
		return askfixedreg(rs);
	if (unsafe)
	for (i = 31; i >= 0; i--) {
		r = rs->x.wildcard[i];
		if (r != NULL
		&& !(r->x.regnode->mask&~rmask[r->x.regnode->set])
		&& askfixedreg(r)) {
			return r;
		}
	}
	else {
		r = rs->x.wildcard[2]; // EDX
		if (r != NULL
			&& !(r->x.regnode->mask&~rmask[r->x.regnode->set])
			&& askfixedreg(r))
				return r;
		r = rs->x.wildcard[1]; // ECX
		if (r != NULL
			&& !(r->x.regnode->mask&~rmask[r->x.regnode->set])
			&& askfixedreg(r))
			return r;
		r = rs->x.wildcard[0]; // EAX
		if (r != NULL
			&& !(r->x.regnode->mask&~rmask[r->x.regnode->set])
			&& askfixedreg(r))
			return r;
	/* It is better to start with the eax, ebx ecx, etc registers.
	   This limits the number of unnecessary moves and makes esi edi
	   available for further optimizations.
	*/
		for (i = 0; i < 32; i++) {
			Symbol r = rs->x.wildcard[i];
			if (r != NULL
			&& !(r->x.regnode->mask&~rmask[r->x.regnode->set])
			&& askfixedreg(r))
				return r;
		}
	}
	return NULL;
}

static Symbol getreg(Symbol s,unsigned mask[],Node p,int tmpflag)
{
	Symbol r;

	if (p->x.dangerousChildren)
		mask[0] &= ~1;
	r = askreg(s, mask);

	if (r == NULL) {
		r = spillee(s, p);
		assert(r);
		spill(r->x.regnode->mask, r->x.regnode->set, p);
		putreg(r);
		r = askreg(s, mask);
		if (r == NULL) {
			r = askreg(s,mask);
			assert(r);
		}
	}
	assert(r->x.regnode);
	r->x.regnode->vbl = NULL;
	return r;
}
int askregvar(Symbol p,Symbol regs)
{
	Symbol r;

	assert(p);
	if (p->sclass != REGISTER)
		return 0;
	else if (!isscalar(p->type)) {
		p->sclass = AUTO;
		return 0;
	}
	else if (p->temporary && p->u.t.cse) {
		p->x.name = "?";
		return 1;
	}
	/*
	test if SetupRegisterVariables has assigned this already...
	*/
	else if (p->x.regnode && OptimizeFlag)
		return 1;
	else if ((r = askreg(regs, vmask)) != NULL) {
		p->x.regnode = r->x.regnode;
		p->x.regnode->vbl = p;
		p->x.name = r->x.name;
#ifdef DODEBUG
		dumpregs("(allocating %s to symbol %s)\n", p->x.name, p->name,NULL);
#endif
		return 1;
	}
	else {
		p->sclass = AUTO;
		return 0;
	}
}
void linearize(Node p,Node next)
{
	int i;

	for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++)
		linearize(p->x.kids[i], next);
	relink(next->x.prev, p);
	relink(p, next);
#ifdef DODEBUG
	fprint(2, "(listing %x)\n", p);
#endif
}
static void ralloc(Node p)
{
	int i,tmpflag=0;
	unsigned mask[2];

	mask[0] = tmask[0];
	mask[1] = tmask[1];
	assert(p);
#ifdef DODEBUG
	fprint(2, "(rallocing %x: %s)\n", p,IR->x._opname[p->op]);
#endif
	for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++) {
		Node kid = p->x.kids[i];
		Symbol r = kid->syms[RX];
		assert(r && kid->x.registered);
		if (r->sclass != REGISTER && r->x.lastuse == kid)
			putreg(r);
	}
	if (!p->x.registered && NeedsReg[opindex(p->op)]
	&& rmap[optype(p->op)]) {
		Symbol sym = p->syms[RX], set = sym;
		assert(sym);
		if (sym->temporary && sym->u.t.cse) {
			tmpflag = 1;
			set = rmap[optype(p->op)];
		}
		assert(set);
		if (set->sclass != REGISTER) {
			Symbol r;
			char *prule = IR->x._templates[getrule(p, p->x.inst)];
			if (*prule == '?')
				for (i = 1; i < NELEMS(p->x.kids) && p->x.kids[i]; i++) {
					Symbol r = p->x.kids[i]->syms[RX];
					assert(p->x.kids[i]->x.registered);
					assert(r && r->x.regnode);
//					assert(sym->x.wildcard || sym != r);

					if (!(sym->x.wildcard || sym != r)) {
#ifdef DODEBUG
						dumpregs("error in %s %x %s\n",r->x.name,(char *)p,opname(p->op));
						dumptree(p,0);
						dumptree(p->x.kids[i],0);
#endif
						assert(0);
					}
					mask[r->x.regnode->set] &= ~r->x.regnode->mask;
				}
			/* Avoid ESI and EDI when converting int to char, because
			those registers have no byte registers
			*/
			if (OptimizeFlag 
				&& (p->x.next && ((optype(p->x.next->op) == C) )
				    || optype(p->op) == C)) {
				mask[0] &= ~((1 << 6)|(1<< 7));
			}
			if (p->x.intrinsicArg) {
				r = AssignIntrinsicArg(p);
				if (r == NULL)
					r = getreg(set, mask, p, tmpflag);
			}
			else 
				r = getreg(set, mask, p,tmpflag);
			if (sym->temporary && sym->u.t.cse) {
				Node q;
				r->x.lastuse = sym->x.lastuse;
				for (q = sym->x.lastuse; q; q = q->x.prevuse) {
					q->syms[RX] = r;
					q->x.registered = 1;
					if (q->x.copy)
						q->x.equatable = 1;
				}
			} else {
				p->syms[RX] = r;
				r->x.lastuse = p;
			}
#ifdef DODEBUG
			dumpregs("(allocating %s to node %x %s)\n", r->x.name, (char *) p,opname(p->op));
			dumptree(p,0);
#endif
		}
	}
	p->x.registered = 1;
	(*IR->x.clobber)(p);
}
static Symbol spillee(Symbol set,Node here)
{
	Symbol bestreg = NULL;
	int bestdist = -1, i;

	assert(set);
	if (!set->x.wildcard)
		return set;
	if (FunctionInfo.leafFunction == 0)
	for (i = 31; i >= 0; i--) {
		Symbol ri = set->x.wildcard[i];
		if (ri != NULL && ri->x.lastuse
		&& ri->x.regnode->mask&tmask[ri->x.regnode->set]) {
			Regnode rn = ri->x.regnode;
			Node q = here;
			int dist = 0;
			for (; q && !uses(q, rn->mask); q = q->x.next)
				dist++;
			if (q && dist > bestdist) {
				bestdist = dist;
				bestreg = ri;
			}
		}
	}
	else {
		for (i = 0; i < 32; i++) {
			Symbol ri = set->x.wildcard[i];
			if (ri != NULL && ri->x.lastuse
			&& ri->x.regnode->mask&tmask[ri->x.regnode->set]) {
				Regnode rn = ri->x.regnode;
				Node q = here;
				int dist = 0;
				for (; q && !uses(q, rn->mask); q = q->x.next)
					dist++;
				if (q && dist > bestdist) {
					bestdist = dist;
					bestreg = ri;
				}
			}
		}
	}
	return bestreg;
}
static int uses(Node p,unsigned mask)
{
	int i;
	Node q;

	for (i = 0; i < NELEMS(p->x.kids)
		&& (q = p->x.kids[i]) != NULL; i++)
		if (q->x.registered
		&& mask&q->syms[RX]->x.regnode->mask)
			return 1;
	return 0;
}
static void spillr(Symbol r,Node here)
{
	int i;
	Node p = r->x.lastuse;
	Symbol tmp;
	assert(p);
	while (p->x.prevuse) {
		assert(r == p->syms[RX]);
		p = p->x.prevuse;
	}
	assert(p->x.registered && !readsreg(p));
	tmp = newtemp(AUTO, optype(p->op));
	genspill(r, p, tmp);
	for (p = here->x.next; p; p = p->x.next)
		for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++) {
			Node k = p->x.kids[i];

			if (k->x.registered && k->syms[RX] == r)
				genreload(p, tmp, i);
		}
	putreg(r);
}
int NrOfSpills;
static void genspill(Symbol r,Node last,Symbol tmp)
{
	Node p, q;
	Symbol s;
	unsigned ty;
#ifdef DODEBUG
	fprint(2, "(spilling %s to local %s)\n", r->x.name, tmp->x.name);
	fprint(2, "(genspill: ");
	dumptree(last,0);
	fprint(2, ")\n");
#endif
	NrOfSpills++;
	ty = optype(last->op);
	if (ty == U)
		ty = I;
	NEW0(s, FUNC);
	s->sclass = REGISTER;
	s->x.name = r->x.name;
	s->x.regnode = r->x.regnode;
	s->x.regnode->vbl = s;
	q = newnode(ADDRLP, NULL, NULL, s);
	q = newnode(INDIR + ty, q, NULL, NULL);
	p = newnode(ADDRLP, NULL, NULL, tmp);
	p = newnode(ASGN + ty, p, q, NULL);
	p->x.spills = 1;
	rewrite(p);
	prune(p, &q);
	q = last->x.next;
	linearize(p, q);
	for (p = last->x.next; p != q; p = p->x.next) {
		ralloc(p);
		assert(!p->x.listed || !NeedsReg[opindex(p->op)] || !rmap[optype(p->op)]);
	}
}

static void genreload(Node p,Symbol tmp,int i)
{
	Node q;
	int ty;
#ifdef DODEBUG
	fprint(2, "(replacing %x with a reload from %s)\n", p->x.kids[i], tmp->x.name);
	fprint(2, "(genreload: ");
	dumptree(p->x.kids[i],0);
	fprint(2, ")\n");
#endif
	ty = optype(p->x.kids[i]->op);
	if (ty == U)
		ty = I;
	q = newnode(ADDRLP, NULL, NULL, tmp);
	p->x.kids[i] = newnode(INDIR + ty, q, NULL, NULL);
	rewrite(p->x.kids[i]);
	prune(p->x.kids[i], &q);
	reprune(&p->kids[1], reprune(&p->kids[0], 0, i, p), i, p);
	prune(p, &q);
	linearize(p->x.kids[i], p);
}
static int reprune(Node *pp,int k,int n,Node p)
{
	struct node x, *q = *pp;

	if (q == NULL || k > n)
		return k;
	else if (q->x.inst == 0)
		return reprune(&q->kids[1],
			reprune(&q->kids[0], k, n, p), n, p);
	else if (k == n) {
#ifdef DODEBUG
		fprint(2, "(reprune changes %x from %x to %x)\n", pp, *pp, p->x.kids[n]);
#endif
		*pp = p->x.kids[n];
		x = *p;		/* explain this */
		(IR->x.target)(&x);
	}
	return k + 1;
}
void spill(unsigned mask,int n,Node here)
{
	int i;
	Node p;

	here->x.spills = 1;
	usedmask[n] |= mask;
	if (mask&~freemask[n])
		for (p = here; p; p = p->x.next)
			for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++) {
				Symbol r = p->x.kids[i]->syms[RX];
				assert(r);
				if (p->x.kids[i]->x.registered && r->x.regnode->set == n
				&& r->x.regnode->mask&mask)
					spillr(r, here);
			}
}
#ifdef DODEBUG
//enum { EAX=0, ECX=1, EDX=2, EBX=3, ESI=6, EDI=7 };

static void dumpregs(char *msg,char *a,char *b,char *c) 
{
	fprint(2, msg, a, b, c);
	fprint(2, "(free[0]=%x)", freemask[0]);
	if (freemask[0] & 1) fprint(2," eax");
	if (freemask[0] & (1 << 1)) fprint(2," ecx");
	if (freemask[0] & (1 << 2)) fprint(2," edx");
	if (freemask[0] & (1 << 3)) fprint(2," ebx");
	if (freemask[0] & (1 << 6)) fprint(2," esi");
	if (freemask[0] & (1 << 7)) fprint(2," edi");
	fprint(2,"\n");
//	fprint(2, "(free[1]=%x)", freemask[1]);
}
#endif
int getregnum(Node p)
{
	assert(p && p->syms[RX] && p->syms[RX]->x.regnode);
	return p->syms[RX]->x.regnode->number;
}


unsigned regloc(Symbol p)
{
	assert(p && p->sclass == REGISTER && p->sclass == REGISTER && p->x.regnode);
	return p->x.regnode->set<<8 | p->x.regnode->number;
}

