root/src/cmd/8g/gsubr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. clearp
  2. prog
  3. dumpdata
  4. gbranch
  5. patch
  6. unpatch
  7. newplist
  8. gused
  9. gjmp
  10. ggloblnod
  11. gargsize
  12. ggloblsym
  13. gtrack
  14. isfat
  15. afunclit
  16. optoas
  17. foptoas
  18. ginit
  19. gclean
  20. anyregalloc
  21. regalloc
  22. regfree
  23. nodreg
  24. nodindreg
  25. nodarg
  26. gconreg
  27. nswap
  28. ncon
  29. ismem
  30. split64
  31. splitclean
  32. bignodes
  33. memname
  34. gmove
  35. floatmove
  36. floatmove_387
  37. floatmove_sse
  38. samaddr
  39. gins
  40. naddr
  41. dotaddable
  42. sudoclean
  43. sudoaddable

// Derived from Inferno utils/8c/txt.c
// http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c
//
//      Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
//      Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
//      Portions Copyright © 1997-1999 Vita Nuova Limited
//      Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
//      Portions Copyright © 2004,2006 Bruce Ellis
//      Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
//      Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
//      Portions Copyright © 2009 The Go Authors.  All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include <u.h>
#include <libc.h>
#include "gg.h"
#include "../../pkg/runtime/funcdata.h"

// TODO(rsc): Can make this bigger if we move
// the text segment up higher in 8l for all GOOS.
// At the same time, can raise StackBig in ../../pkg/runtime/stack.h.
uint32 unmappedzero = 4096;

#define CASE(a,b)       (((a)<<16)|((b)<<0))

void
clearp(Prog *p)
{
        p->as = AEND;
        p->from.type = D_NONE;
        p->from.index = D_NONE;
        p->to.type = D_NONE;
        p->to.index = D_NONE;
        p->pc = pcloc;
        pcloc++;
}

static int ddumped;
static Prog *dfirst;
static Prog *dpc;

/*
 * generate and return proc with p->as = as,
 * linked into program.  pc is next instruction.
 */
Prog*
prog(int as)
{
        Prog *p;

        if(as == ADATA || as == AGLOBL) {
                if(ddumped)
                        fatal("already dumped data");
                if(dpc == nil) {
                        dpc = mal(sizeof(*dpc));
                        dfirst = dpc;
                }
                p = dpc;
                dpc = mal(sizeof(*dpc));
                p->link = dpc;
        } else {
                p = pc;
                pc = mal(sizeof(*pc));
                clearp(pc);
                p->link = pc;
        }

        if(lineno == 0) {
                if(debug['K'])
                        warn("prog: line 0");
        }

        p->as = as;
        p->lineno = lineno;
        return p;
}

void
dumpdata(void)
{
        ddumped = 1;
        if(dfirst == nil)
                return;
        newplist();
        *pc = *dfirst;
        pc = dpc;
        clearp(pc);
}

/*
 * generate a branch.
 * t is ignored.
 * likely values are for branch prediction:
 *      -1 unlikely
 *      0 no opinion
 *      +1 likely
 */
Prog*
gbranch(int as, Type *t, int likely)
{
        Prog *p;

        USED(t);
        p = prog(as);
        p->to.type = D_BRANCH;
        p->to.u.branch = P;
        if(likely != 0) {
                p->from.type = D_CONST;
                p->from.offset = likely > 0;
        }
        return p;
}

/*
 * patch previous branch to jump to to.
 */
void
patch(Prog *p, Prog *to)
{
        if(p->to.type != D_BRANCH)
                fatal("patch: not a branch");
        p->to.u.branch = to;
        p->to.offset = to->pc;
}

Prog*
unpatch(Prog *p)
{
        Prog *q;

        if(p->to.type != D_BRANCH)
                fatal("unpatch: not a branch");
        q = p->to.u.branch;
        p->to.u.branch = P;
        p->to.offset = 0;
        return q;
}

/*
 * start a new Prog list.
 */
Plist*
newplist(void)
{
        Plist *pl;

        pl = linknewplist(ctxt);
        
        pc = mal(sizeof(*pc));
        clearp(pc);
        pl->firstpc = pc;

        return pl;
}

void
gused(Node *n)
{
        gins(ANOP, n, N);       // used
}

Prog*
gjmp(Prog *to)
{
        Prog *p;

        p = gbranch(AJMP, T, 0);
        if(to != P)
                patch(p, to);
        return p;
}

void
ggloblnod(Node *nam)
{
        Prog *p;

        p = gins(AGLOBL, nam, N);
        p->lineno = nam->lineno;
        p->from.sym->gotype = linksym(ngotype(nam));
        p->to.sym = nil;
        p->to.type = D_CONST;
        p->to.offset = nam->type->width;
        if(nam->readonly)
                p->from.scale = RODATA;
        if(nam->type != T && !haspointers(nam->type))
                p->from.scale |= NOPTR;
}

void
gargsize(int32 size)
{
        Node n1, n2;
        
        nodconst(&n1, types[TINT32], PCDATA_ArgSize);
        nodconst(&n2, types[TINT32], size);
        gins(APCDATA, &n1, &n2);
}

void
ggloblsym(Sym *s, int32 width, int dupok, int rodata)
{
        Prog *p;

        p = gins(AGLOBL, N, N);
        p->from.type = D_EXTERN;
        p->from.index = D_NONE;
        p->from.sym = linksym(s);
        p->to.type = D_CONST;
        p->to.index = D_NONE;
        p->to.offset = width;
        if(dupok)
                p->from.scale |= DUPOK;
        if(rodata)
                p->from.scale |= RODATA;
}

void
gtrack(Sym *s)
{
        Prog *p;
        
        p = gins(AUSEFIELD, N, N);
        p->from.type = D_EXTERN;
        p->from.index = D_NONE;
        p->from.sym = linksym(s);
}

int
isfat(Type *t)
{
        if(t != T)
        switch(t->etype) {
        case TSTRUCT:
        case TARRAY:
        case TSTRING:
        case TINTER:    // maybe remove later
                return 1;
        }
        return 0;
}

/*
 * naddr of func generates code for address of func.
 * if using opcode that can take address implicitly,
 * call afunclit to fix up the argument.
 */
void
afunclit(Addr *a, Node *n)
{
        if(a->type == D_ADDR && a->index == D_EXTERN) {
                a->type = D_EXTERN;
                a->index = D_NONE;
                a->sym = linksym(n->sym);
        }
}

/*
 * return Axxx for Oxxx on type t.
 */
int
optoas(int op, Type *t)
{
        int a;

        if(t == T)
                fatal("optoas: t is nil");

        a = AGOK;
        switch(CASE(op, simtype[t->etype])) {
        default:
                fatal("optoas: no entry %O-%T", op, t);
                break;

        case CASE(OADDR, TPTR32):
                a = ALEAL;
                break;

        case CASE(OEQ, TBOOL):
        case CASE(OEQ, TINT8):
        case CASE(OEQ, TUINT8):
        case CASE(OEQ, TINT16):
        case CASE(OEQ, TUINT16):
        case CASE(OEQ, TINT32):
        case CASE(OEQ, TUINT32):
        case CASE(OEQ, TINT64):
        case CASE(OEQ, TUINT64):
        case CASE(OEQ, TPTR32):
        case CASE(OEQ, TPTR64):
        case CASE(OEQ, TFLOAT32):
        case CASE(OEQ, TFLOAT64):
                a = AJEQ;
                break;

        case CASE(ONE, TBOOL):
        case CASE(ONE, TINT8):
        case CASE(ONE, TUINT8):
        case CASE(ONE, TINT16):
        case CASE(ONE, TUINT16):
        case CASE(ONE, TINT32):
        case CASE(ONE, TUINT32):
        case CASE(ONE, TINT64):
        case CASE(ONE, TUINT64):
        case CASE(ONE, TPTR32):
        case CASE(ONE, TPTR64):
        case CASE(ONE, TFLOAT32):
        case CASE(ONE, TFLOAT64):
                a = AJNE;
                break;

        case CASE(OLT, TINT8):
        case CASE(OLT, TINT16):
        case CASE(OLT, TINT32):
        case CASE(OLT, TINT64):
                a = AJLT;
                break;

        case CASE(OLT, TUINT8):
        case CASE(OLT, TUINT16):
        case CASE(OLT, TUINT32):
        case CASE(OLT, TUINT64):
                a = AJCS;
                break;

        case CASE(OLE, TINT8):
        case CASE(OLE, TINT16):
        case CASE(OLE, TINT32):
        case CASE(OLE, TINT64):
                a = AJLE;
                break;

        case CASE(OLE, TUINT8):
        case CASE(OLE, TUINT16):
        case CASE(OLE, TUINT32):
        case CASE(OLE, TUINT64):
                a = AJLS;
                break;

        case CASE(OGT, TINT8):
        case CASE(OGT, TINT16):
        case CASE(OGT, TINT32):
        case CASE(OGT, TINT64):
                a = AJGT;
                break;

        case CASE(OGT, TUINT8):
        case CASE(OGT, TUINT16):
        case CASE(OGT, TUINT32):
        case CASE(OGT, TUINT64):
        case CASE(OLT, TFLOAT32):
        case CASE(OLT, TFLOAT64):
                a = AJHI;
                break;

        case CASE(OGE, TINT8):
        case CASE(OGE, TINT16):
        case CASE(OGE, TINT32):
        case CASE(OGE, TINT64):
                a = AJGE;
                break;

        case CASE(OGE, TUINT8):
        case CASE(OGE, TUINT16):
        case CASE(OGE, TUINT32):
        case CASE(OGE, TUINT64):
        case CASE(OLE, TFLOAT32):
        case CASE(OLE, TFLOAT64):
                a = AJCC;
                break;

        case CASE(OCMP, TBOOL):
        case CASE(OCMP, TINT8):
        case CASE(OCMP, TUINT8):
                a = ACMPB;
                break;

        case CASE(OCMP, TINT16):
        case CASE(OCMP, TUINT16):
                a = ACMPW;
                break;

        case CASE(OCMP, TINT32):
        case CASE(OCMP, TUINT32):
        case CASE(OCMP, TPTR32):
                a = ACMPL;
                break;

        case CASE(OAS, TBOOL):
        case CASE(OAS, TINT8):
        case CASE(OAS, TUINT8):
                a = AMOVB;
                break;

        case CASE(OAS, TINT16):
        case CASE(OAS, TUINT16):
                a = AMOVW;
                break;

        case CASE(OAS, TINT32):
        case CASE(OAS, TUINT32):
        case CASE(OAS, TPTR32):
                a = AMOVL;
                break;

        case CASE(OADD, TINT8):
        case CASE(OADD, TUINT8):
                a = AADDB;
                break;

        case CASE(OADD, TINT16):
        case CASE(OADD, TUINT16):
                a = AADDW;
                break;

        case CASE(OADD, TINT32):
        case CASE(OADD, TUINT32):
        case CASE(OADD, TPTR32):
        case CASE(OADDPTR, TPTR32):
                a = AADDL;
                break;

        case CASE(OSUB, TINT8):
        case CASE(OSUB, TUINT8):
                a = ASUBB;
                break;

        case CASE(OSUB, TINT16):
        case CASE(OSUB, TUINT16):
                a = ASUBW;
                break;

        case CASE(OSUB, TINT32):
        case CASE(OSUB, TUINT32):
        case CASE(OSUB, TPTR32):
                a = ASUBL;
                break;

        case CASE(OINC, TINT8):
        case CASE(OINC, TUINT8):
                a = AINCB;
                break;

        case CASE(OINC, TINT16):
        case CASE(OINC, TUINT16):
                a = AINCW;
                break;

        case CASE(OINC, TINT32):
        case CASE(OINC, TUINT32):
        case CASE(OINC, TPTR32):
                a = AINCL;
                break;

        case CASE(ODEC, TINT8):
        case CASE(ODEC, TUINT8):
                a = ADECB;
                break;

        case CASE(ODEC, TINT16):
        case CASE(ODEC, TUINT16):
                a = ADECW;
                break;

        case CASE(ODEC, TINT32):
        case CASE(ODEC, TUINT32):
        case CASE(ODEC, TPTR32):
                a = ADECL;
                break;

        case CASE(OCOM, TINT8):
        case CASE(OCOM, TUINT8):
                a = ANOTB;
                break;

        case CASE(OCOM, TINT16):
        case CASE(OCOM, TUINT16):
                a = ANOTW;
                break;

        case CASE(OCOM, TINT32):
        case CASE(OCOM, TUINT32):
        case CASE(OCOM, TPTR32):
                a = ANOTL;
                break;

        case CASE(OMINUS, TINT8):
        case CASE(OMINUS, TUINT8):
                a = ANEGB;
                break;

        case CASE(OMINUS, TINT16):
        case CASE(OMINUS, TUINT16):
                a = ANEGW;
                break;

        case CASE(OMINUS, TINT32):
        case CASE(OMINUS, TUINT32):
        case CASE(OMINUS, TPTR32):
                a = ANEGL;
                break;

        case CASE(OAND, TINT8):
        case CASE(OAND, TUINT8):
                a = AANDB;
                break;

        case CASE(OAND, TINT16):
        case CASE(OAND, TUINT16):
                a = AANDW;
                break;

        case CASE(OAND, TINT32):
        case CASE(OAND, TUINT32):
        case CASE(OAND, TPTR32):
                a = AANDL;
                break;

        case CASE(OOR, TINT8):
        case CASE(OOR, TUINT8):
                a = AORB;
                break;

        case CASE(OOR, TINT16):
        case CASE(OOR, TUINT16):
                a = AORW;
                break;

        case CASE(OOR, TINT32):
        case CASE(OOR, TUINT32):
        case CASE(OOR, TPTR32):
                a = AORL;
                break;

        case CASE(OXOR, TINT8):
        case CASE(OXOR, TUINT8):
                a = AXORB;
                break;

        case CASE(OXOR, TINT16):
        case CASE(OXOR, TUINT16):
                a = AXORW;
                break;

        case CASE(OXOR, TINT32):
        case CASE(OXOR, TUINT32):
        case CASE(OXOR, TPTR32):
                a = AXORL;
                break;

        case CASE(OLROT, TINT8):
        case CASE(OLROT, TUINT8):
                a = AROLB;
                break;

        case CASE(OLROT, TINT16):
        case CASE(OLROT, TUINT16):
                a = AROLW;
                break;

        case CASE(OLROT, TINT32):
        case CASE(OLROT, TUINT32):
        case CASE(OLROT, TPTR32):
                a = AROLL;
                break;

        case CASE(OLSH, TINT8):
        case CASE(OLSH, TUINT8):
                a = ASHLB;
                break;

        case CASE(OLSH, TINT16):
        case CASE(OLSH, TUINT16):
                a = ASHLW;
                break;

        case CASE(OLSH, TINT32):
        case CASE(OLSH, TUINT32):
        case CASE(OLSH, TPTR32):
                a = ASHLL;
                break;

        case CASE(ORSH, TUINT8):
                a = ASHRB;
                break;

        case CASE(ORSH, TUINT16):
                a = ASHRW;
                break;

        case CASE(ORSH, TUINT32):
        case CASE(ORSH, TPTR32):
                a = ASHRL;
                break;

        case CASE(ORSH, TINT8):
                a = ASARB;
                break;

        case CASE(ORSH, TINT16):
                a = ASARW;
                break;

        case CASE(ORSH, TINT32):
                a = ASARL;
                break;

        case CASE(OHMUL, TINT8):
        case CASE(OMUL, TINT8):
        case CASE(OMUL, TUINT8):
                a = AIMULB;
                break;

        case CASE(OHMUL, TINT16):
        case CASE(OMUL, TINT16):
        case CASE(OMUL, TUINT16):
                a = AIMULW;
                break;

        case CASE(OHMUL, TINT32):
        case CASE(OMUL, TINT32):
        case CASE(OMUL, TUINT32):
        case CASE(OMUL, TPTR32):
                a = AIMULL;
                break;

        case CASE(OHMUL, TUINT8):
                a = AMULB;
                break;

        case CASE(OHMUL, TUINT16):
                a = AMULW;
                break;

        case CASE(OHMUL, TUINT32):
        case CASE(OHMUL, TPTR32):
                a = AMULL;
                break;

        case CASE(ODIV, TINT8):
        case CASE(OMOD, TINT8):
                a = AIDIVB;
                break;

        case CASE(ODIV, TUINT8):
        case CASE(OMOD, TUINT8):
                a = ADIVB;
                break;

        case CASE(ODIV, TINT16):
        case CASE(OMOD, TINT16):
                a = AIDIVW;
                break;

        case CASE(ODIV, TUINT16):
        case CASE(OMOD, TUINT16):
                a = ADIVW;
                break;

        case CASE(ODIV, TINT32):
        case CASE(OMOD, TINT32):
                a = AIDIVL;
                break;

        case CASE(ODIV, TUINT32):
        case CASE(ODIV, TPTR32):
        case CASE(OMOD, TUINT32):
        case CASE(OMOD, TPTR32):
                a = ADIVL;
                break;

        case CASE(OEXTEND, TINT16):
                a = ACWD;
                break;

        case CASE(OEXTEND, TINT32):
                a = ACDQ;
                break;
        }
        return a;
}

#define FCASE(a, b, c)  (((a)<<16)|((b)<<8)|(c))
int
foptoas(int op, Type *t, int flg)
{
        int et, a;

        a = AGOK;
        et = simtype[t->etype];

        if(use_sse)
                goto sse;

        // If we need Fpop, it means we're working on
        // two different floating-point registers, not memory.
        // There the instruction only has a float64 form.
        if(flg & Fpop)
                et = TFLOAT64;

        // clear Frev if unneeded
        switch(op) {
        case OADD:
        case OMUL:
                flg &= ~Frev;
                break;
        }

        switch(FCASE(op, et, flg)) {
        case FCASE(OADD, TFLOAT32, 0):
                return AFADDF;
        case FCASE(OADD, TFLOAT64, 0):
                return AFADDD;
        case FCASE(OADD, TFLOAT64, Fpop):
                return AFADDDP;

        case FCASE(OSUB, TFLOAT32, 0):
                return AFSUBF;
        case FCASE(OSUB, TFLOAT32, Frev):
                return AFSUBRF;

        case FCASE(OSUB, TFLOAT64, 0):
                return AFSUBD;
        case FCASE(OSUB, TFLOAT64, Frev):
                return AFSUBRD;
        case FCASE(OSUB, TFLOAT64, Fpop):
                return AFSUBDP;
        case FCASE(OSUB, TFLOAT64, Fpop|Frev):
                return AFSUBRDP;

        case FCASE(OMUL, TFLOAT32, 0):
                return AFMULF;
        case FCASE(OMUL, TFLOAT64, 0):
                return AFMULD;
        case FCASE(OMUL, TFLOAT64, Fpop):
                return AFMULDP;

        case FCASE(ODIV, TFLOAT32, 0):
                return AFDIVF;
        case FCASE(ODIV, TFLOAT32, Frev):
                return AFDIVRF;

        case FCASE(ODIV, TFLOAT64, 0):
                return AFDIVD;
        case FCASE(ODIV, TFLOAT64, Frev):
                return AFDIVRD;
        case FCASE(ODIV, TFLOAT64, Fpop):
                return AFDIVDP;
        case FCASE(ODIV, TFLOAT64, Fpop|Frev):
                return AFDIVRDP;

        case FCASE(OCMP, TFLOAT32, 0):
                return AFCOMF;
        case FCASE(OCMP, TFLOAT32, Fpop):
                return AFCOMFP;
        case FCASE(OCMP, TFLOAT64, 0):
                return AFCOMD;
        case FCASE(OCMP, TFLOAT64, Fpop):
                return AFCOMDP;
        case FCASE(OCMP, TFLOAT64, Fpop2):
                return AFCOMDPP;
        
        case FCASE(OMINUS, TFLOAT32, 0):
                return AFCHS;
        case FCASE(OMINUS, TFLOAT64, 0):
                return AFCHS;
        }

        fatal("foptoas %O %T %#x", op, t, flg);
        return 0;

sse:
        switch(CASE(op, et)) {
        default:
                fatal("foptoas-sse: no entry %O-%T", op, t);
                break;

        case CASE(OCMP, TFLOAT32):
                a = AUCOMISS;
                break;

        case CASE(OCMP, TFLOAT64):
                a = AUCOMISD;
                break;

        case CASE(OAS, TFLOAT32):
                a = AMOVSS;
                break;

        case CASE(OAS, TFLOAT64):
                a = AMOVSD;
                break;

        case CASE(OADD, TFLOAT32):
                a = AADDSS;
                break;

        case CASE(OADD, TFLOAT64):
                a = AADDSD;
                break;

        case CASE(OSUB, TFLOAT32):
                a = ASUBSS;
                break;

        case CASE(OSUB, TFLOAT64):
                a = ASUBSD;
                break;

        case CASE(OMUL, TFLOAT32):
                a = AMULSS;
                break;

        case CASE(OMUL, TFLOAT64):
                a = AMULSD;
                break;

        case CASE(ODIV, TFLOAT32):
                a = ADIVSS;
                break;

        case CASE(ODIV, TFLOAT64):
                a = ADIVSD;
                break;
        }
        return a;
}


static  int     resvd[] =
{
//      D_DI,   // for movstring
//      D_SI,   // for movstring

        D_AX,   // for divide
        D_CX,   // for shift
        D_DX,   // for divide
        D_SP,   // for stack

        D_BL,   // because D_BX can be allocated
        D_BH,
};

void
ginit(void)
{
        int i;

        for(i=0; i<nelem(reg); i++)
                reg[i] = 1;
        for(i=D_AX; i<=D_DI; i++)
                reg[i] = 0;
        for(i=D_X0; i<=D_X7; i++)
                reg[i] = 0;
        for(i=0; i<nelem(resvd); i++)
                reg[resvd[i]]++;
}

uintptr regpc[D_NONE];

void
gclean(void)
{
        int i;

        for(i=0; i<nelem(resvd); i++)
                reg[resvd[i]]--;

        for(i=D_AX; i<=D_DI; i++)
                if(reg[i])
                        yyerror("reg %R left allocated at %ux", i, regpc[i]);
        for(i=D_X0; i<=D_X7; i++)
                if(reg[i])
                        yyerror("reg %R left allocated\n", i);
}

int32
anyregalloc(void)
{
        int i, j;

        for(i=D_AX; i<=D_DI; i++) {
                if(reg[i] == 0)
                        goto ok;
                for(j=0; j<nelem(resvd); j++)
                        if(resvd[j] == i)
                                goto ok;
                return 1;
        ok:;
        }
        for(i=D_X0; i<=D_X7; i++)
                if(reg[i])
                        return 1;
        return 0;
}

/*
 * allocate register of type t, leave in n.
 * if o != N, o is desired fixed register.
 * caller must regfree(n).
 */
void
regalloc(Node *n, Type *t, Node *o)
{
        int i, et;

        if(t == T)
                fatal("regalloc: t nil");
        et = simtype[t->etype];

        switch(et) {
        case TINT64:
        case TUINT64:
                fatal("regalloc64");

        case TINT8:
        case TUINT8:
        case TINT16:
        case TUINT16:
        case TINT32:
        case TUINT32:
        case TPTR32:
        case TPTR64:
        case TBOOL:
                if(o != N && o->op == OREGISTER) {
                        i = o->val.u.reg;
                        if(i >= D_AX && i <= D_DI)
                                goto out;
                }
                for(i=D_AX; i<=D_DI; i++)
                        if(reg[i] == 0)
                                goto out;

                fprint(2, "registers allocated at\n");
                for(i=D_AX; i<=D_DI; i++)
                        fprint(2, "\t%R\t%#lux\n", i, regpc[i]);
                yyerror("out of fixed registers");
                goto err;

        case TFLOAT32:
        case TFLOAT64:
                if(!use_sse) {
                        i = D_F0;
                        goto out;
                }
                if(o != N && o->op == OREGISTER) {
                        i = o->val.u.reg;
                        if(i >= D_X0 && i <= D_X7)
                                goto out;
                }
                for(i=D_X0; i<=D_X7; i++)
                        if(reg[i] == 0)
                                goto out;
                fprint(2, "registers allocated at\n");
                for(i=D_X0; i<=D_X7; i++)
                        fprint(2, "\t%R\t%#lux\n", i, regpc[i]);
                fatal("out of floating registers");
        }
        yyerror("regalloc: unknown type %T", t);

err:
        nodreg(n, t, 0);
        return;

out:
        if (i == D_SP)
                print("alloc SP\n");
        if(reg[i] == 0) {
                regpc[i] = (uintptr)getcallerpc(&n);
                if(i == D_AX || i == D_CX || i == D_DX || i == D_SP) {
                        dump("regalloc-o", o);
                        fatal("regalloc %R", i);
                }
        }
        reg[i]++;
        nodreg(n, t, i);
}

void
regfree(Node *n)
{
        int i;
        
        if(n->op == ONAME)
                return;
        if(n->op != OREGISTER && n->op != OINDREG)
                fatal("regfree: not a register");
        i = n->val.u.reg;
        if(i == D_SP)
                return;
        if(i < 0 || i >= nelem(reg))
                fatal("regfree: reg out of range");
        if(reg[i] <= 0)
                fatal("regfree: reg not allocated");
        reg[i]--;
        if(reg[i] == 0 && (i == D_AX || i == D_CX || i == D_DX || i == D_SP))
                fatal("regfree %R", i);
}

/*
 * initialize n to be register r of type t.
 */
void
nodreg(Node *n, Type *t, int r)
{
        if(t == T)
                fatal("nodreg: t nil");

        memset(n, 0, sizeof(*n));
        n->op = OREGISTER;
        n->addable = 1;
        ullmancalc(n);
        n->val.u.reg = r;
        n->type = t;
}

/*
 * initialize n to be indirect of register r; n is type t.
 */
void
nodindreg(Node *n, Type *t, int r)
{
        nodreg(n, t, r);
        n->op = OINDREG;
}

Node*
nodarg(Type *t, int fp)
{
        Node *n;
        NodeList *l;
        Type *first;
        Iter savet;

        // entire argument struct, not just one arg
        switch(t->etype) {
        default:
                fatal("nodarg %T", t);

        case TSTRUCT:
                if(!t->funarg)
                        fatal("nodarg: TSTRUCT but not funarg");
                n = nod(ONAME, N, N);
                n->sym = lookup(".args");
                n->type = t;
                first = structfirst(&savet, &t);
                if(first == nil)
                        fatal("nodarg: bad struct");
                if(first->width == BADWIDTH)
                        fatal("nodarg: offset not computed for %T", t);
                n->xoffset = first->width;
                n->addable = 1;
                break;

        case TFIELD:
                if(fp == 1 && t->sym != S && !isblanksym(t->sym)) {
                        for(l=curfn->dcl; l; l=l->next) {
                                n = l->n;
                                if((n->class == PPARAM || n->class == PPARAMOUT) && n->sym == t->sym)
                                        return n;
                        }
                }

                n = nod(ONAME, N, N);
                n->type = t->type;
                n->sym = t->sym;
                if(t->width == BADWIDTH)
                        fatal("nodarg: offset not computed for %T", t);
                n->xoffset = t->width;
                n->addable = 1;
                n->orig = t->nname;
                break;
        }
        
        // Rewrite argument named _ to __,
        // or else the assignment to _ will be
        // discarded during code generation.
        if(isblank(n))
                n->sym = lookup("__");

        switch(fp) {
        default:
                fatal("nodarg %T %d", t, fp);

        case 0:         // output arg
                n->op = OINDREG;
                n->val.u.reg = D_SP;
                break;

        case 1:         // input arg
                n->class = PPARAM;
                break;
        }

        n->typecheck = 1;
        return n;
}

/*
 * generate
 *      as $c, reg
 */
void
gconreg(int as, vlong c, int reg)
{
        Node n1, n2;

        nodconst(&n1, types[TINT64], c);
        nodreg(&n2, types[TINT64], reg);
        gins(as, &n1, &n2);
}

/*
 * swap node contents
 */
void
nswap(Node *a, Node *b)
{
        Node t;

        t = *a;
        *a = *b;
        *b = t;
}

/*
 * return constant i node.
 * overwritten by next call, but useful in calls to gins.
 */
Node*
ncon(uint32 i)
{
        static Node n;

        if(n.type == T)
                nodconst(&n, types[TUINT32], 0);
        mpmovecfix(n.val.u.xval, i);
        return &n;
}

/*
 * Is this node a memory operand?
 */
int
ismem(Node *n)
{
        switch(n->op) {
        case OITAB:
        case OSPTR:
        case OLEN:
        case OCAP:
        case OINDREG:
        case ONAME:
        case OPARAM:
        case OCLOSUREVAR:
                return 1;
        }
        return 0;
}

Node sclean[10];
int nsclean;

/*
 * n is a 64-bit value.  fill in lo and hi to refer to its 32-bit halves.
 */
void
split64(Node *n, Node *lo, Node *hi)
{
        Node n1;
        int64 i;

        if(!is64(n->type))
                fatal("split64 %T", n->type);

        if(nsclean >= nelem(sclean))
                fatal("split64 clean");
        sclean[nsclean].op = OEMPTY;
        nsclean++;
        switch(n->op) {
        default:
                if(!dotaddable(n, &n1)) {
                        igen(n, &n1, N);
                        sclean[nsclean-1] = n1;
                }
                n = &n1;
                goto common;
        case ONAME:
                if(n->class == PPARAMREF) {
                        cgen(n->heapaddr, &n1);
                        sclean[nsclean-1] = n1;
                        // fall through.
                        n = &n1;
                }
                goto common;
        case OINDREG:
        common:
                *lo = *n;
                *hi = *n;
                lo->type = types[TUINT32];
                if(n->type->etype == TINT64)
                        hi->type = types[TINT32];
                else
                        hi->type = types[TUINT32];
                hi->xoffset += 4;
                break;

        case OLITERAL:
                convconst(&n1, n->type, &n->val);
                i = mpgetfix(n1.val.u.xval);
                nodconst(lo, types[TUINT32], (uint32)i);
                i >>= 32;
                if(n->type->etype == TINT64)
                        nodconst(hi, types[TINT32], (int32)i);
                else
                        nodconst(hi, types[TUINT32], (uint32)i);
                break;
        }
}

void
splitclean(void)
{
        if(nsclean <= 0)
                fatal("splitclean");
        nsclean--;
        if(sclean[nsclean].op != OEMPTY)
                regfree(&sclean[nsclean]);
}

/*
 * set up nodes representing fp constants
 */
Node zerof;
Node two64f;
Node two63f;

void
bignodes(void)
{
        static int did;

        if(did)
                return;
        did = 1;

        two64f = *ncon(0);
        two64f.type = types[TFLOAT64];
        two64f.val.ctype = CTFLT;
        two64f.val.u.fval = mal(sizeof *two64f.val.u.fval);
        mpmovecflt(two64f.val.u.fval, 18446744073709551616.);

        two63f = two64f;
        two63f.val.u.fval = mal(sizeof *two63f.val.u.fval);
        mpmovecflt(two63f.val.u.fval, 9223372036854775808.);

        zerof = two64f;
        zerof.val.u.fval = mal(sizeof *zerof.val.u.fval);
        mpmovecflt(zerof.val.u.fval, 0);
}

void
memname(Node *n, Type *t)
{
        tempname(n, t);
        strcpy(namebuf, n->sym->name);
        namebuf[0] = '.';       // keep optimizer from registerizing
        n->sym = lookup(namebuf);
        n->orig->sym = n->sym;
}

static void floatmove(Node *f, Node *t);
static void floatmove_387(Node *f, Node *t);
static void floatmove_sse(Node *f, Node *t);

void
gmove(Node *f, Node *t)
{
        int a, ft, tt;
        Type *cvt;
        Node r1, r2, flo, fhi, tlo, thi, con;

        if(debug['M'])
                print("gmove %N -> %N\n", f, t);

        ft = simsimtype(f->type);
        tt = simsimtype(t->type);
        cvt = t->type;
        
        if(iscomplex[ft] || iscomplex[tt]) {
                complexmove(f, t);
                return;
        }
        if(isfloat[ft] || isfloat[tt]) {
                floatmove(f, t);
                return;
        }

        // cannot have two integer memory operands;
        // except 64-bit, which always copies via registers anyway.
        if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t))
                goto hard;

        // convert constant to desired type
        if(f->op == OLITERAL) {
                convconst(&con, t->type, &f->val);
                f = &con;
                ft = simsimtype(con.type);
        }

        // value -> value copy, only one memory operand.
        // figure out the instruction to use.
        // break out of switch for one-instruction gins.
        // goto rdst for "destination must be register".
        // goto hard for "convert to cvt type first".
        // otherwise handle and return.

        switch(CASE(ft, tt)) {
        default:
                goto fatal;

        /*
         * integer copy and truncate
         */
        case CASE(TINT8, TINT8):        // same size
        case CASE(TINT8, TUINT8):
        case CASE(TUINT8, TINT8):
        case CASE(TUINT8, TUINT8):
                a = AMOVB;
                break;

        case CASE(TINT16, TINT8):       // truncate
        case CASE(TUINT16, TINT8):
        case CASE(TINT32, TINT8):
        case CASE(TUINT32, TINT8):
        case CASE(TINT16, TUINT8):
        case CASE(TUINT16, TUINT8):
        case CASE(TINT32, TUINT8):
        case CASE(TUINT32, TUINT8):
                a = AMOVB;
                goto rsrc;

        case CASE(TINT64, TINT8):       // truncate low word
        case CASE(TUINT64, TINT8):
        case CASE(TINT64, TUINT8):
        case CASE(TUINT64, TUINT8):
                split64(f, &flo, &fhi);
                nodreg(&r1, t->type, D_AX);
                gmove(&flo, &r1);
                gins(AMOVB, &r1, t);
                splitclean();
                return;

        case CASE(TINT16, TINT16):      // same size
        case CASE(TINT16, TUINT16):
        case CASE(TUINT16, TINT16):
        case CASE(TUINT16, TUINT16):
                a = AMOVW;
                break;

        case CASE(TINT32, TINT16):      // truncate
        case CASE(TUINT32, TINT16):
        case CASE(TINT32, TUINT16):
        case CASE(TUINT32, TUINT16):
                a = AMOVW;
                goto rsrc;

        case CASE(TINT64, TINT16):      // truncate low word
        case CASE(TUINT64, TINT16):
        case CASE(TINT64, TUINT16):
        case CASE(TUINT64, TUINT16):
                split64(f, &flo, &fhi);
                nodreg(&r1, t->type, D_AX);
                gmove(&flo, &r1);
                gins(AMOVW, &r1, t);
                splitclean();
                return;

        case CASE(TINT32, TINT32):      // same size
        case CASE(TINT32, TUINT32):
        case CASE(TUINT32, TINT32):
        case CASE(TUINT32, TUINT32):
                a = AMOVL;
                break;

        case CASE(TINT64, TINT32):      // truncate
        case CASE(TUINT64, TINT32):
        case CASE(TINT64, TUINT32):
        case CASE(TUINT64, TUINT32):
                split64(f, &flo, &fhi);
                nodreg(&r1, t->type, D_AX);
                gmove(&flo, &r1);
                gins(AMOVL, &r1, t);
                splitclean();
                return;

        case CASE(TINT64, TINT64):      // same size
        case CASE(TINT64, TUINT64):
        case CASE(TUINT64, TINT64):
        case CASE(TUINT64, TUINT64):
                split64(f, &flo, &fhi);
                split64(t, &tlo, &thi);
                if(f->op == OLITERAL) {
                        gins(AMOVL, &flo, &tlo);
                        gins(AMOVL, &fhi, &thi);
                } else {
                        nodreg(&r1, t->type, D_AX);
                        nodreg(&r2, t->type, D_DX);
                        gins(AMOVL, &flo, &r1);
                        gins(AMOVL, &fhi, &r2);
                        gins(AMOVL, &r1, &tlo);
                        gins(AMOVL, &r2, &thi);
                }
                splitclean();
                splitclean();
                return;

        /*
         * integer up-conversions
         */
        case CASE(TINT8, TINT16):       // sign extend int8
        case CASE(TINT8, TUINT16):
                a = AMOVBWSX;
                goto rdst;
        case CASE(TINT8, TINT32):
        case CASE(TINT8, TUINT32):
                a = AMOVBLSX;
                goto rdst;
        case CASE(TINT8, TINT64):       // convert via int32
        case CASE(TINT8, TUINT64):
                cvt = types[TINT32];
                goto hard;

        case CASE(TUINT8, TINT16):      // zero extend uint8
        case CASE(TUINT8, TUINT16):
                a = AMOVBWZX;
                goto rdst;
        case CASE(TUINT8, TINT32):
        case CASE(TUINT8, TUINT32):
                a = AMOVBLZX;
                goto rdst;
        case CASE(TUINT8, TINT64):      // convert via uint32
        case CASE(TUINT8, TUINT64):
                cvt = types[TUINT32];
                goto hard;

        case CASE(TINT16, TINT32):      // sign extend int16
        case CASE(TINT16, TUINT32):
                a = AMOVWLSX;
                goto rdst;
        case CASE(TINT16, TINT64):      // convert via int32
        case CASE(TINT16, TUINT64):
                cvt = types[TINT32];
                goto hard;

        case CASE(TUINT16, TINT32):     // zero extend uint16
        case CASE(TUINT16, TUINT32):
                a = AMOVWLZX;
                goto rdst;
        case CASE(TUINT16, TINT64):     // convert via uint32
        case CASE(TUINT16, TUINT64):
                cvt = types[TUINT32];
                goto hard;

        case CASE(TINT32, TINT64):      // sign extend int32
        case CASE(TINT32, TUINT64):
                split64(t, &tlo, &thi);
                nodreg(&flo, tlo.type, D_AX);
                nodreg(&fhi, thi.type, D_DX);
                gmove(f, &flo);
                gins(ACDQ, N, N);
                gins(AMOVL, &flo, &tlo);
                gins(AMOVL, &fhi, &thi);
                splitclean();
                return;

        case CASE(TUINT32, TINT64):     // zero extend uint32
        case CASE(TUINT32, TUINT64):
                split64(t, &tlo, &thi);
                gmove(f, &tlo);
                gins(AMOVL, ncon(0), &thi);
                splitclean();
                return;
        }

        gins(a, f, t);
        return;

rsrc:
        // requires register source
        regalloc(&r1, f->type, t);
        gmove(f, &r1);
        gins(a, &r1, t);
        regfree(&r1);
        return;

rdst:
        // requires register destination
        regalloc(&r1, t->type, t);
        gins(a, f, &r1);
        gmove(&r1, t);
        regfree(&r1);
        return;

hard:
        // requires register intermediate
        regalloc(&r1, cvt, t);
        gmove(f, &r1);
        gmove(&r1, t);
        regfree(&r1);
        return;

fatal:
        // should not happen
        fatal("gmove %N -> %N", f, t);
}

static void
floatmove(Node *f, Node *t)
{
        Node r1, r2, t1, t2, tlo, thi, con, f0, f1, ax, dx, cx;
        Type *cvt;
        int ft, tt;
        Prog *p1, *p2, *p3;

        ft = simsimtype(f->type);
        tt = simsimtype(t->type);
        cvt = t->type;

        // cannot have two floating point memory operands.
        if(isfloat[ft] && isfloat[tt] && ismem(f) && ismem(t))
                goto hard;

        // convert constant to desired type
        if(f->op == OLITERAL) {
                convconst(&con, t->type, &f->val);
                f = &con;
                ft = simsimtype(con.type);

                // some constants can't move directly to memory.
                if(ismem(t)) {
                        // float constants come from memory.
                        if(isfloat[tt])
                                goto hard;
                }
        }

        // value -> value copy, only one memory operand.
        // figure out the instruction to use.
        // break out of switch for one-instruction gins.
        // goto rdst for "destination must be register".
        // goto hard for "convert to cvt type first".
        // otherwise handle and return.

        switch(CASE(ft, tt)) {
        default:
                if(use_sse)
                        floatmove_sse(f, t);
                else
                        floatmove_387(f, t);
                return;

        // float to very long integer.
        case CASE(TFLOAT32, TINT64):
        case CASE(TFLOAT64, TINT64):
                if(f->op == OREGISTER) {
                        cvt = f->type;
                        goto hardmem;
                }
                nodreg(&r1, types[ft], D_F0);
                if(ft == TFLOAT32)
                        gins(AFMOVF, f, &r1);
                else
                        gins(AFMOVD, f, &r1);

                // set round to zero mode during conversion
                memname(&t1, types[TUINT16]);
                memname(&t2, types[TUINT16]);
                gins(AFSTCW, N, &t1);
                gins(AMOVW, ncon(0xf7f), &t2);
                gins(AFLDCW, &t2, N);
                if(tt == TINT16)
                        gins(AFMOVWP, &r1, t);
                else if(tt == TINT32)
                        gins(AFMOVLP, &r1, t);
                else
                        gins(AFMOVVP, &r1, t);
                gins(AFLDCW, &t1, N);
                return;

        case CASE(TFLOAT32, TUINT64):
        case CASE(TFLOAT64, TUINT64):
                if(!ismem(f)) {
                        cvt = f->type;
                        goto hardmem;
                }
                bignodes();
                nodreg(&f0, types[ft], D_F0);
                nodreg(&f1, types[ft], D_F0 + 1);
                nodreg(&ax, types[TUINT16], D_AX);

                if(ft == TFLOAT32)
                        gins(AFMOVF, f, &f0);
                else
                        gins(AFMOVD, f, &f0);

                // if 0 > v { answer = 0 }
                gins(AFMOVD, &zerof, &f0);
                gins(AFUCOMIP, &f0, &f1);
                p1 = gbranch(optoas(OGT, types[tt]), T, 0);
                // if 1<<64 <= v { answer = 0 too }
                gins(AFMOVD, &two64f, &f0);
                gins(AFUCOMIP, &f0, &f1);
                p2 = gbranch(optoas(OGT, types[tt]), T, 0);
                patch(p1, pc);
                gins(AFMOVVP, &f0, t);  // don't care about t, but will pop the stack
                split64(t, &tlo, &thi);
                gins(AMOVL, ncon(0), &tlo);
                gins(AMOVL, ncon(0), &thi);
                splitclean();
                p1 = gbranch(AJMP, T, 0);
                patch(p2, pc);

                // in range; algorithm is:
                //      if small enough, use native float64 -> int64 conversion.
                //      otherwise, subtract 2^63, convert, and add it back.

                // set round to zero mode during conversion
                memname(&t1, types[TUINT16]);
                memname(&t2, types[TUINT16]);
                gins(AFSTCW, N, &t1);
                gins(AMOVW, ncon(0xf7f), &t2);
                gins(AFLDCW, &t2, N);

                // actual work
                gins(AFMOVD, &two63f, &f0);
                gins(AFUCOMIP, &f0, &f1);
                p2 = gbranch(optoas(OLE, types[tt]), T, 0);
                gins(AFMOVVP, &f0, t);
                p3 = gbranch(AJMP, T, 0);
                patch(p2, pc);
                gins(AFMOVD, &two63f, &f0);
                gins(AFSUBDP, &f0, &f1);
                gins(AFMOVVP, &f0, t);
                split64(t, &tlo, &thi);
                gins(AXORL, ncon(0x80000000), &thi);    // + 2^63
                patch(p3, pc);
                splitclean();
                // restore rounding mode
                gins(AFLDCW, &t1, N);

                patch(p1, pc);
                return;

        /*
         * integer to float
         */
        case CASE(TINT64, TFLOAT32):
        case CASE(TINT64, TFLOAT64):
                if(t->op == OREGISTER)
                        goto hardmem;
                nodreg(&f0, t->type, D_F0);
                gins(AFMOVV, f, &f0);
                if(tt == TFLOAT32)
                        gins(AFMOVFP, &f0, t);
                else
                        gins(AFMOVDP, &f0, t);
                return;

        case CASE(TUINT64, TFLOAT32):
        case CASE(TUINT64, TFLOAT64):
                // algorithm is:
                //      if small enough, use native int64 -> float64 conversion.
                //      otherwise, halve (rounding to odd?), convert, and double.
                nodreg(&ax, types[TUINT32], D_AX);
                nodreg(&dx, types[TUINT32], D_DX);
                nodreg(&cx, types[TUINT32], D_CX);
                tempname(&t1, f->type);
                split64(&t1, &tlo, &thi);
                gmove(f, &t1);
                gins(ACMPL, &thi, ncon(0));
                p1 = gbranch(AJLT, T, 0);
                // native
                nodreg(&r1, types[tt], D_F0);
                gins(AFMOVV, &t1, &r1);
                if(tt == TFLOAT32)
                        gins(AFMOVFP, &r1, t);
                else
                        gins(AFMOVDP, &r1, t);
                p2 = gbranch(AJMP, T, 0);
                // simulated
                patch(p1, pc);
                gmove(&tlo, &ax);
                gmove(&thi, &dx);
                p1 = gins(ASHRL, ncon(1), &ax);
                p1->from.index = D_DX;  // double-width shift DX -> AX
                p1->from.scale = 0;
                gins(AMOVL, ncon(0), &cx);
                gins(ASETCC, N, &cx);
                gins(AORL, &cx, &ax);
                gins(ASHRL, ncon(1), &dx);
                gmove(&dx, &thi);
                gmove(&ax, &tlo);
                nodreg(&r1, types[tt], D_F0);
                nodreg(&r2, types[tt], D_F0 + 1);
                gins(AFMOVV, &t1, &r1);
                gins(AFMOVD, &r1, &r1);
                gins(AFADDDP, &r1, &r2);
                if(tt == TFLOAT32)
                        gins(AFMOVFP, &r1, t);
                else
                        gins(AFMOVDP, &r1, t);
                patch(p2, pc);
                splitclean();
                return;
        }

hard:
        // requires register intermediate
        regalloc(&r1, cvt, t);
        gmove(f, &r1);
        gmove(&r1, t);
        regfree(&r1);
        return;

hardmem:
        // requires memory intermediate
        tempname(&r1, cvt);
        gmove(f, &r1);
        gmove(&r1, t);
        return;
}

static void
floatmove_387(Node *f, Node *t)
{
        Node r1, t1, t2;
        Type *cvt;
        Prog *p1, *p2, *p3;
        int a, ft, tt;

        ft = simsimtype(f->type);
        tt = simsimtype(t->type);
        cvt = t->type;

        switch(CASE(ft, tt)) {
        default:
                goto fatal;

        /*
        * float to integer
        */
        case CASE(TFLOAT32, TINT16):
        case CASE(TFLOAT32, TINT32):
        case CASE(TFLOAT32, TINT64):
        case CASE(TFLOAT64, TINT16):
        case CASE(TFLOAT64, TINT32):
        case CASE(TFLOAT64, TINT64):
                if(t->op == OREGISTER)
                        goto hardmem;
                nodreg(&r1, types[ft], D_F0);
                if(f->op != OREGISTER) {
                        if(ft == TFLOAT32)
                                gins(AFMOVF, f, &r1);
                        else
                                gins(AFMOVD, f, &r1);
                }

                // set round to zero mode during conversion
                memname(&t1, types[TUINT16]);
                memname(&t2, types[TUINT16]);
                gins(AFSTCW, N, &t1);
                gins(AMOVW, ncon(0xf7f), &t2);
                gins(AFLDCW, &t2, N);
                if(tt == TINT16)
                        gins(AFMOVWP, &r1, t);
                else if(tt == TINT32)
                        gins(AFMOVLP, &r1, t);
                else
                        gins(AFMOVVP, &r1, t);
                gins(AFLDCW, &t1, N);
                return;

        case CASE(TFLOAT32, TINT8):
        case CASE(TFLOAT32, TUINT16):
        case CASE(TFLOAT32, TUINT8):
        case CASE(TFLOAT64, TINT8):
        case CASE(TFLOAT64, TUINT16):
        case CASE(TFLOAT64, TUINT8):
                // convert via int32.
                tempname(&t1, types[TINT32]);
                gmove(f, &t1);
                switch(tt) {
                default:
                        fatal("gmove %T", t);
                case TINT8:
                        gins(ACMPL, &t1, ncon(-0x80));
                        p1 = gbranch(optoas(OLT, types[TINT32]), T, -1);
                        gins(ACMPL, &t1, ncon(0x7f));
                        p2 = gbranch(optoas(OGT, types[TINT32]), T, -1);
                        p3 = gbranch(AJMP, T, 0);
                        patch(p1, pc);
                        patch(p2, pc);
                        gmove(ncon(-0x80), &t1);
                        patch(p3, pc);
                        gmove(&t1, t);
                        break;
                case TUINT8:
                        gins(ATESTL, ncon(0xffffff00), &t1);
                        p1 = gbranch(AJEQ, T, +1);
                        gins(AMOVL, ncon(0), &t1);
                        patch(p1, pc);
                        gmove(&t1, t);
                        break;
                case TUINT16:
                        gins(ATESTL, ncon(0xffff0000), &t1);
                        p1 = gbranch(AJEQ, T, +1);
                        gins(AMOVL, ncon(0), &t1);
                        patch(p1, pc);
                        gmove(&t1, t);
                        break;
                }
                return;

        case CASE(TFLOAT32, TUINT32):
        case CASE(TFLOAT64, TUINT32):
                // convert via int64.
                cvt = types[TINT64];
                goto hardmem;

        /*
         * integer to float
         */
        case CASE(TINT16, TFLOAT32):
        case CASE(TINT16, TFLOAT64):
        case CASE(TINT32, TFLOAT32):
        case CASE(TINT32, TFLOAT64):
        case CASE(TINT64, TFLOAT32):
        case CASE(TINT64, TFLOAT64):
                if(t->op != OREGISTER)
                        goto hard;
                if(f->op == OREGISTER) {
                        cvt = f->type;
                        goto hardmem;
                }
                switch(ft) {
                case TINT16:
                        a = AFMOVW;
                        break;
                case TINT32:
                        a = AFMOVL;
                        break;
                default:
                        a = AFMOVV;
                        break;
                }
                break;

        case CASE(TINT8, TFLOAT32):
        case CASE(TINT8, TFLOAT64):
        case CASE(TUINT16, TFLOAT32):
        case CASE(TUINT16, TFLOAT64):
        case CASE(TUINT8, TFLOAT32):
        case CASE(TUINT8, TFLOAT64):
                // convert via int32 memory
                cvt = types[TINT32];
                goto hardmem;

        case CASE(TUINT32, TFLOAT32):
        case CASE(TUINT32, TFLOAT64):
                // convert via int64 memory
                cvt = types[TINT64];
                goto hardmem;

        /*
         * float to float
         */
        case CASE(TFLOAT32, TFLOAT32):
        case CASE(TFLOAT64, TFLOAT64):
                // The way the code generator uses floating-point
                // registers, a move from F0 to F0 is intended as a no-op.
                // On the x86, it's not: it pushes a second copy of F0
                // on the floating point stack.  So toss it away here.
                // Also, F0 is the *only* register we ever evaluate
                // into, so we should only see register/register as F0/F0.
                if(ismem(f) && ismem(t))
                        goto hard;
                if(f->op == OREGISTER && t->op == OREGISTER) {
                        if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0)
                                goto fatal;
                        return;
                }
                a = AFMOVF;
                if(ft == TFLOAT64)
                        a = AFMOVD;
                if(ismem(t)) {
                        if(f->op != OREGISTER || f->val.u.reg != D_F0)
                                fatal("gmove %N", f);
                        a = AFMOVFP;
                        if(ft == TFLOAT64)
                                a = AFMOVDP;
                }
                break;

        case CASE(TFLOAT32, TFLOAT64):
                if(ismem(f) && ismem(t))
                        goto hard;
                if(f->op == OREGISTER && t->op == OREGISTER) {
                        if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0)
                                goto fatal;
                        return;
                }
                if(f->op == OREGISTER)
                        gins(AFMOVDP, f, t);
                else
                        gins(AFMOVF, f, t);
                return;

        case CASE(TFLOAT64, TFLOAT32):
                if(ismem(f) && ismem(t))
                        goto hard;
                if(f->op == OREGISTER && t->op == OREGISTER) {
                        tempname(&r1, types[TFLOAT32]);
                        gins(AFMOVFP, f, &r1);
                        gins(AFMOVF, &r1, t);
                        return;
                }
                if(f->op == OREGISTER)
                        gins(AFMOVFP, f, t);
                else
                        gins(AFMOVD, f, t);
                return;
        }

        gins(a, f, t);
        return;

hard:
        // requires register intermediate
        regalloc(&r1, cvt, t);
        gmove(f, &r1);
        gmove(&r1, t);
        regfree(&r1);
        return;

hardmem:
        // requires memory intermediate
        tempname(&r1, cvt);
        gmove(f, &r1);
        gmove(&r1, t);
        return;

fatal:
        // should not happen
        fatal("gmove %lN -> %lN", f, t);
        return;
}

static void
floatmove_sse(Node *f, Node *t)
{
        Node r1;
        Type *cvt;
        int a, ft, tt;

        ft = simsimtype(f->type);
        tt = simsimtype(t->type);

        switch(CASE(ft, tt)) {
        default:
                // should not happen
                fatal("gmove %N -> %N", f, t);
                return;
        /*
        * float to integer
        */
        case CASE(TFLOAT32, TINT16):
        case CASE(TFLOAT32, TINT8):
        case CASE(TFLOAT32, TUINT16):
        case CASE(TFLOAT32, TUINT8):
        case CASE(TFLOAT64, TINT16):
        case CASE(TFLOAT64, TINT8):
        case CASE(TFLOAT64, TUINT16):
        case CASE(TFLOAT64, TUINT8):
                // convert via int32.
                cvt = types[TINT32];
                goto hard;

        case CASE(TFLOAT32, TUINT32):
        case CASE(TFLOAT64, TUINT32):
                // convert via int64.
                cvt = types[TINT64];
                goto hardmem;

        case CASE(TFLOAT32, TINT32):
                a = ACVTTSS2SL;
                goto rdst;

        case CASE(TFLOAT64, TINT32):
                a = ACVTTSD2SL;
                goto rdst;

        /*
         * integer to float
         */
        case CASE(TINT8, TFLOAT32):
        case CASE(TINT8, TFLOAT64):
        case CASE(TINT16, TFLOAT32):
        case CASE(TINT16, TFLOAT64):
        case CASE(TUINT16, TFLOAT32):
        case CASE(TUINT16, TFLOAT64):
        case CASE(TUINT8, TFLOAT32):
        case CASE(TUINT8, TFLOAT64):
                // convert via int32 memory
                cvt = types[TINT32];
                goto hard;

        case CASE(TUINT32, TFLOAT32):
        case CASE(TUINT32, TFLOAT64):
                // convert via int64 memory
                cvt = types[TINT64];
                goto hardmem;

        case CASE(TINT32, TFLOAT32):
                a = ACVTSL2SS;
                goto rdst;

        case CASE(TINT32, TFLOAT64):
                a = ACVTSL2SD;
                goto rdst;

        /*
         * float to float
         */
        case CASE(TFLOAT32, TFLOAT32):
                a = AMOVSS;
                break;

        case CASE(TFLOAT64, TFLOAT64):
                a = AMOVSD;
                break;

        case CASE(TFLOAT32, TFLOAT64):
                a = ACVTSS2SD;
                goto rdst;

        case CASE(TFLOAT64, TFLOAT32):
                a = ACVTSD2SS;
                goto rdst;
        }

        gins(a, f, t);
        return;

hard:
        // requires register intermediate
        regalloc(&r1, cvt, t);
        gmove(f, &r1);
        gmove(&r1, t);
        regfree(&r1);
        return;

hardmem:
        // requires memory intermediate
        tempname(&r1, cvt);
        gmove(f, &r1);
        gmove(&r1, t);
        return;

rdst:
        // requires register destination
        regalloc(&r1, t->type, t);
        gins(a, f, &r1);
        gmove(&r1, t);
        regfree(&r1);
        return;
}

int
samaddr(Node *f, Node *t)
{

        if(f->op != t->op)
                return 0;

        switch(f->op) {
        case OREGISTER:
                if(f->val.u.reg != t->val.u.reg)
                        break;
                return 1;
        }
        return 0;
}
/*
 * generate one instruction:
 *      as f, t
 */
Prog*
gins(int as, Node *f, Node *t)
{
        Prog *p;
        Addr af, at;
        int w;

        if(as == AFMOVF && f && f->op == OREGISTER && t && t->op == OREGISTER)
                fatal("gins MOVF reg, reg");
        if(as == ACVTSD2SS && f && f->op == OLITERAL)
                fatal("gins CVTSD2SS const");
        if(as == AMOVSD && t && t->op == OREGISTER && t->val.u.reg == D_F0)
                fatal("gins MOVSD into F0");

        switch(as) {
        case AMOVB:
        case AMOVW:
        case AMOVL:
                if(f != N && t != N && samaddr(f, t))
                        return nil;
                break;
        
        case ALEAL:
                if(f != N && isconst(f, CTNIL))
                        fatal("gins LEAL nil %T", f->type);
                break;
        }

        memset(&af, 0, sizeof af);
        memset(&at, 0, sizeof at);
        if(f != N)
                naddr(f, &af, 1);
        if(t != N)
                naddr(t, &at, 1);
        p = prog(as);
        if(f != N)
                p->from = af;
        if(t != N)
                p->to = at;
        if(debug['g'])
                print("%P\n", p);

        w = 0;
        switch(as) {
        case AMOVB:
                w = 1;
                break;
        case AMOVW:
                w = 2;
                break;
        case AMOVL:
                w = 4;
                break;
        }

        if(1 && w != 0 && f != N && (af.width > w || at.width > w)) {
                dump("bad width from:", f);
                dump("bad width to:", t);
                fatal("bad width: %P (%d, %d)\n", p, af.width, at.width);
        }

        return p;
}

/*
 * generate code to compute n;
 * make a refer to result.
 */
void
naddr(Node *n, Addr *a, int canemitcode)
{
        Sym *s;

        a->scale = 0;
        a->index = D_NONE;
        a->type = D_NONE;
        a->gotype = nil;
        a->node = N;
        if(n == N)
                return;

        switch(n->op) {
        default:
                fatal("naddr: bad %O %D", n->op, a);
                break;

        case OREGISTER:
                a->type = n->val.u.reg;
                a->sym = nil;
                break;

        case OINDREG:
                a->type = n->val.u.reg+D_INDIR;
                a->sym = linksym(n->sym);
                a->offset = n->xoffset;
                break;

        case OPARAM:
                // n->left is PHEAP ONAME for stack parameter.
                // compute address of actual parameter on stack.
                a->etype = n->left->type->etype;
                a->width = n->left->type->width;
                a->offset = n->xoffset;
                a->sym = linksym(n->left->sym);
                a->type = D_PARAM;
                a->node = n->left->orig;
                break;

        case OCLOSUREVAR:
                if(!curfn->needctxt)
                        fatal("closurevar without needctxt");
                a->type = D_DX+D_INDIR;
                a->offset = n->xoffset;
                a->sym = nil;
                break;

        case OCFUNC:
                naddr(n->left, a, canemitcode);
                a->sym = linksym(n->left->sym);
                break;

        case ONAME:
                a->etype = 0;
                a->width = 0;
                if(n->type != T) {
                        a->etype = simtype[n->type->etype];
                        dowidth(n->type);
                        a->width = n->type->width;
                }
                a->offset = n->xoffset;
                s = n->sym;
                a->node = n->orig;
                //if(a->node >= (Node*)&n)
                //      fatal("stack node");
                if(s == S)
                        s = lookup(".noname");
                if(n->method) {
                        if(n->type != T)
                        if(n->type->sym != S)
                        if(n->type->sym->pkg != nil)
                                s = pkglookup(s->name, n->type->sym->pkg);
                }

                switch(n->class) {
                default:
                        fatal("naddr: ONAME class %S %d\n", n->sym, n->class);
                case PEXTERN:
                        a->type = D_EXTERN;
                        break;
                case PAUTO:
                        a->type = D_AUTO;
                        break;
                case PPARAM:
                case PPARAMOUT:
                        a->type = D_PARAM;
                        break;
                case PFUNC:
                        a->index = D_EXTERN;
                        a->type = D_ADDR;
                        s = funcsym(s);
                        break;
                }
                a->sym = linksym(s);
                break;

        case OLITERAL:
                switch(n->val.ctype) {
                default:
                        fatal("naddr: const %lT", n->type);
                        break;
                case CTFLT:
                        a->type = D_FCONST;
                        a->u.dval = mpgetflt(n->val.u.fval);
                        break;
                case CTINT:
                case CTRUNE:
                        a->sym = nil;
                        a->type = D_CONST;
                        a->offset = mpgetfix(n->val.u.xval);
                        break;
                case CTSTR:
                        datagostring(n->val.u.sval, a);
                        break;
                case CTBOOL:
                        a->sym = nil;
                        a->type = D_CONST;
                        a->offset = n->val.u.bval;
                        break;
                case CTNIL:
                        a->sym = nil;
                        a->type = D_CONST;
                        a->offset = 0;
                        break;
                }
                break;

        case OADDR:
                naddr(n->left, a, canemitcode);
                if(a->type >= D_INDIR) {
                        a->type -= D_INDIR;
                        break;
                }
                if(a->type == D_EXTERN || a->type == D_STATIC ||
                   a->type == D_AUTO || a->type == D_PARAM)
                        if(a->index == D_NONE) {
                                a->index = a->type;
                                a->type = D_ADDR;
                                break;
                        }
                fatal("naddr: OADDR\n");
        
        case OITAB:
                // itable of interface value
                naddr(n->left, a, canemitcode);
                if(a->type == D_CONST && a->offset == 0)
                        break;  // len(nil)
                a->etype = tptr;
                a->width = widthptr;
                break;

        case OSPTR:
                // pointer in a string or slice
                naddr(n->left, a, canemitcode);
                if(a->type == D_CONST && a->offset == 0)
                        break;  // ptr(nil)
                a->etype = simtype[tptr];
                a->offset += Array_array;
                a->width = widthptr;
                break;

        case OLEN:
                // len of string or slice
                naddr(n->left, a, canemitcode);
                if(a->type == D_CONST && a->offset == 0)
                        break;  // len(nil)
                a->etype = TUINT32;
                a->offset += Array_nel;
                a->width = 4;
                break;

        case OCAP:
                // cap of string or slice
                naddr(n->left, a, canemitcode);
                if(a->type == D_CONST && a->offset == 0)
                        break;  // cap(nil)
                a->etype = TUINT32;
                a->offset += Array_cap;
                a->width = 4;
                break;

//      case OADD:
//              if(n->right->op == OLITERAL) {
//                      v = n->right->vconst;
//                      naddr(n->left, a, canemitcode);
//              } else
//              if(n->left->op == OLITERAL) {
//                      v = n->left->vconst;
//                      naddr(n->right, a, canemitcode);
//              } else
//                      goto bad;
//              a->offset += v;
//              break;

        }
}

int
dotaddable(Node *n, Node *n1)
{
        int o;
        int64 oary[10];
        Node *nn;

        if(n->op != ODOT)
                return 0;

        o = dotoffset(n, oary, &nn);
        if(nn != N && nn->addable && o == 1 && oary[0] >= 0) {
                *n1 = *nn;
                n1->type = n->type;
                n1->xoffset += oary[0];
                return 1;
        }
        return 0;
}

void
sudoclean(void)
{
}

int
sudoaddable(int as, Node *n, Addr *a)
{
        USED(as);
        USED(n);
        USED(a);

        return 0;
}

/* [<][>][^][v][top][bottom][index][help] */