root/src/liblink/objfile.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. writeobj
  2. writesym
  3. wrint
  4. wrstring
  5. wrpath
  6. wrdata
  7. wrpathsym
  8. wrsym
  9. ldobjfile
  10. readsym
  11. rdint
  12. rdstring
  13. rddata
  14. rdsym

// Copyright 2013 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Writing and reading of Go object files.
//
// Originally, Go object files were Plan 9 object files, but no longer.
// Now they are more like standard object files, in that each symbol is defined
// by an associated memory image (bytes) and a list of relocations to apply
// during linking. We do not (yet?) use a standard file format, however.
// For now, the format is chosen to be as simple as possible to read and write.
// It may change for reasons of efficiency, or we may even switch to a
// standard file format if there are compelling benefits to doing so.
// See golang.org/s/go13linker for more background.
//
// The file format is:
//
//      - magic header: "\x00\x00go13ld"
//      - byte 1 - version number
//      - sequence of strings giving dependencies (imported packages)
//      - empty string (marks end of sequence)
//      - sequence of defined symbols
//      - byte 0xff (marks end of sequence)
//      - magic footer: "\xff\xffgo13ld"
//
// All integers are stored in a zigzag varint format.
// See golang.org/s/go12symtab for a definition.
//
// Data blocks and strings are both stored as an integer
// followed by that many bytes.
//
// A symbol reference is a string name followed by a version.
// An empty name corresponds to a nil LSym* pointer.
//
// Each symbol is laid out as the following fields (taken from LSym*):
//
//      - byte 0xfe (sanity check for synchronization)
//      - type [int]
//      - name [string]
//      - version [int]
//      - dupok [int]
//      - size [int]
//      - gotype [symbol reference]
//      - p [data block]
//      - nr [int]
//      - r [nr relocations, sorted by off]
//
// If type == STEXT, there are a few more fields:
//
//      - args [int]
//      - locals [int]
//      - nosplit [int]
//      - leaf [int]
//      - nlocal [int]
//      - local [nlocal automatics]
//      - pcln [pcln table]
//
// Each relocation has the encoding:
//
//      - off [int]
//      - siz [int]
//      - type [int]
//      - add [int]
//      - xadd [int]
//      - sym [symbol reference]
//      - xsym [symbol reference]
//
// Each local has the encoding:
//
//      - asym [symbol reference]
//      - offset [int]
//      - type [int]
//      - gotype [symbol reference]
//
// The pcln table has the encoding:
//
//      - pcsp [data block]
//      - pcfile [data block]
//      - pcline [data block]
//      - npcdata [int]
//      - pcdata [npcdata data blocks]
//      - nfuncdata [int]
//      - funcdata [nfuncdata symbol references]
//      - funcdatasym [nfuncdata ints]
//      - nfile [int]
//      - file [nfile symbol references]
//
// The file layout and meaning of type integers are architecture-independent.
//
// TODO(rsc): The file format is good for a first pass but needs work.
//      - There are SymID in the object file that should really just be strings.
//      - The actual symbol memory images are interlaced with the symbol
//        metadata. They should be separated, to reduce the I/O required to
//        load just the metadata.
//      - The symbol references should be shortened, either with a symbol
//        table or by using a simple backward index to an earlier mentioned symbol.

#include <u.h>
#include <libc.h>
#include <bio.h>
#include <link.h>
#include "../cmd/ld/textflag.h"

static void writesym(Link*, Biobuf*, LSym*);
static void wrint(Biobuf*, int64);
static void wrstring(Biobuf*, char*);
static void wrpath(Link *, Biobuf*, char*);
static void wrdata(Biobuf*, void*, int);
static void wrsym(Biobuf*, LSym*);
static void wrpathsym(Link *ctxt, Biobuf *b, LSym *s);

static void readsym(Link*, Biobuf*, char*, char*);
static int64 rdint(Biobuf*);
static char *rdstring(Biobuf*);
static void rddata(Biobuf*, uchar**, int*);
static LSym *rdsym(Link*, Biobuf*, char*);

// The Go and C compilers, and the assembler, call writeobj to write
// out a Go object file.  The linker does not call this; the linker
// does not write out object files.
void
writeobj(Link *ctxt, Biobuf *b)
{
        int flag;
        Hist *h;
        LSym *s, *text, *etext, *curtext, *data, *edata;
        Plist *pl;
        Prog *p, *plink;
        Auto *a;

        // Build list of symbols, and assign instructions to lists.
        // Ignore ctxt->plist boundaries. There are no guarantees there,
        // and the C compilers and assemblers just use one big list.
        text = nil;
        curtext = nil;
        data = nil;
        etext = nil;
        edata = nil;
        for(pl = ctxt->plist; pl != nil; pl = pl->link) {
                for(p = pl->firstpc; p != nil; p = plink) {
                        plink = p->link;
                        p->link = nil;

                        if(p->as == ctxt->arch->AEND)
                                continue;

                        if(p->as == ctxt->arch->ATYPE) {
                                // Assume each TYPE instruction describes
                                // a different local variable or parameter,
                                // so no dedup.
                                // Using only the TYPE instructions means
                                // that we discard location information about local variables
                                // in C and assembly functions; that information is inferred
                                // from ordinary references, because there are no TYPE
                                // instructions there. Without the type information, gdb can't
                                // use the locations, so we don't bother to save them.
                                // If something else could use them, we could arrange to
                                // preserve them.
                                if(curtext == nil)
                                        continue;
                                a = emallocz(sizeof *a);
                                a->asym = p->from.sym;
                                a->aoffset = p->from.offset;
                                a->type = ctxt->arch->symtype(&p->from);
                                a->gotype = p->from.gotype;
                                a->link = curtext->autom;
                                curtext->autom = a;
                                continue;
                        }

                        if(p->as == ctxt->arch->AGLOBL) {
                                s = p->from.sym;
                                if(s->seenglobl++)
                                        print("duplicate %P\n", p);
                                if(s->onlist)
                                        sysfatal("symbol %s listed multiple times", s->name);
                                s->onlist = 1;
                                if(data == nil)
                                        data = s;
                                else
                                        edata->next = s;
                                s->next = nil;
                                s->size = p->to.offset;
                                if(s->type == 0 || s->type == SXREF)
                                        s->type = SBSS;
                                
                                if(ctxt->arch->thechar == '5')
                                        flag = p->reg;
                                else
                                        flag = p->from.scale;
                                        
                                if(flag & DUPOK)
                                        s->dupok = 1;
                                if(flag & RODATA)
                                        s->type = SRODATA;
                                else if(flag & NOPTR)
                                        s->type = SNOPTRBSS;
                                edata = s;
                                continue;
                        }

                        if(p->as == ctxt->arch->ADATA) {
                                savedata(ctxt, p->from.sym, p, "<input>");
                                continue;
                        }

                        if(p->as == ctxt->arch->ATEXT) {
                                s = p->from.sym;
                                if(s == nil) {
                                        // func _() { }
                                        curtext = nil;
                                        continue;
                                }
                                if(s->text != nil)
                                        sysfatal("duplicate TEXT for %s", s->name);
                                if(s->onlist)
                                        sysfatal("symbol %s listed multiple times", s->name);
                                s->onlist = 1;
                                if(text == nil)
                                        text = s;
                                else
                                        etext->next = s;
                                etext = s;
                                if(ctxt->arch->thechar == '5')
                                        flag = p->reg;
                                else
                                        flag = p->from.scale;
                                if(flag & DUPOK)
                                        s->dupok = 1;
                                if(flag & NOSPLIT)
                                        s->nosplit = 1;
                                s->next = nil;
                                s->type = STEXT;
                                s->text = p;
                                s->etext = p;
                                curtext = s;
                                continue;
                        }
                        
                        if(curtext == nil)
                                continue;
                        s = curtext;
                        s->etext->link = p;
                        s->etext = p;
                }
        }

        // Turn functions into machine code images.
        for(s = text; s != nil; s = s->next) {
                mkfwd(s);
                linkpatch(ctxt, s);
                ctxt->arch->follow(ctxt, s);
                ctxt->arch->addstacksplit(ctxt, s);
                ctxt->arch->assemble(ctxt, s);
                linkpcln(ctxt, s);
        }

        // Emit header.
        Bputc(b, 0);
        Bputc(b, 0);
        Bprint(b, "go13ld");
        Bputc(b, 1); // version

        // Emit autolib.
        for(h = ctxt->hist; h != nil; h = h->link)
                if(h->offset < 0)
                        wrstring(b, h->name);
        wrstring(b, "");

        // Emit symbols.
        for(s = text; s != nil; s = s->next)
                writesym(ctxt, b, s);
        for(s = data; s != nil; s = s->next)
                writesym(ctxt, b, s);

        // Emit footer.
        Bputc(b, 0xff);
        Bputc(b, 0xff);
        Bprint(b, "go13ld");
}

static void
writesym(Link *ctxt, Biobuf *b, LSym *s)
{
        Reloc *r;
        int i, j, c, n;
        Pcln *pc;
        Prog *p;
        Auto *a;
        char *name;

        if(ctxt->debugasm) {
                Bprint(ctxt->bso, "%s ", s->name);
                if(s->version)
                        Bprint(ctxt->bso, "v=%d ", s->version);
                if(s->type)
                        Bprint(ctxt->bso, "t=%d ", s->type);
                if(s->dupok)
                        Bprint(ctxt->bso, "dupok ");
                if(s->nosplit)
                        Bprint(ctxt->bso, "nosplit ");
                Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
                if(s->type == STEXT) {
                        Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
                        if(s->leaf)
                                Bprint(ctxt->bso, " leaf");
                }
                Bprint(ctxt->bso, "\n");
                for(p=s->text; p != nil; p = p->link)
                        Bprint(ctxt->bso, "\t%#06ux %P\n", (int)p->pc, p);
                for(i=0; i<s->np; ) {
                        Bprint(ctxt->bso, "\t%#06ux", i);
                        for(j=i; j<i+16 && j<s->np; j++)
                                Bprint(ctxt->bso, " %02ux", s->p[j]);
                        for(; j<i+16; j++)
                                Bprint(ctxt->bso, "   ");
                        Bprint(ctxt->bso, "  ");
                        for(j=i; j<i+16 && j<s->np; j++) {
                                c = s->p[j];
                                if(' ' <= c && c <= 0x7e)
                                        Bprint(ctxt->bso, "%c", c);
                                else
                                        Bprint(ctxt->bso, ".");
                        }
                        Bprint(ctxt->bso, "\n");
                        i += 16;
                }
                for(i=0; i<s->nr; i++) {
                        r = &s->r[i];
                        name = "";
                        if(r->sym != nil)
                                name = r->sym->name;
                        Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, name, (vlong)r->add);
                }
        }

        Bputc(b, 0xfe);
        wrint(b, s->type);
        wrstring(b, s->name);
        wrint(b, s->version);
        wrint(b, s->dupok);
        wrint(b, s->size);
        wrsym(b, s->gotype);
        wrdata(b, s->p, s->np);

        wrint(b, s->nr);
        for(i=0; i<s->nr; i++) {
                r = &s->r[i];
                wrint(b, r->off);
                wrint(b, r->siz);
                wrint(b, r->type);
                wrint(b, r->add);
                wrint(b, r->xadd);
                wrsym(b, r->sym);
                wrsym(b, r->xsym);
        }
        
        if(s->type == STEXT) {
                wrint(b, s->args);
                wrint(b, s->locals);
                wrint(b, s->nosplit);
                wrint(b, s->leaf);
                n = 0;
                for(a = s->autom; a != nil; a = a->link)
                        n++;
                wrint(b, n);
                for(a = s->autom; a != nil; a = a->link) {
                        wrsym(b, a->asym);
                        wrint(b, a->aoffset);
                        if(a->type == ctxt->arch->D_AUTO)
                                wrint(b, A_AUTO);
                        else if(a->type == ctxt->arch->D_PARAM)
                                wrint(b, A_PARAM);
                        else
                                sysfatal("%s: invalid local variable type %d", s->name, a->type);
                        wrsym(b, a->gotype);
                }

                pc = s->pcln;
                wrdata(b, pc->pcsp.p, pc->pcsp.n);
                wrdata(b, pc->pcfile.p, pc->pcfile.n);
                wrdata(b, pc->pcline.p, pc->pcline.n);
                wrint(b, pc->npcdata);
                for(i=0; i<pc->npcdata; i++)
                        wrdata(b, pc->pcdata[i].p, pc->pcdata[i].n);
                wrint(b, pc->nfuncdata);
                for(i=0; i<pc->nfuncdata; i++)
                        wrsym(b, pc->funcdata[i]);
                for(i=0; i<pc->nfuncdata; i++)
                        wrint(b, pc->funcdataoff[i]);
                wrint(b, pc->nfile);
                for(i=0; i<pc->nfile; i++)
                        wrpathsym(ctxt, b, pc->file[i]);
        }
}

static void
wrint(Biobuf *b, int64 sval)
{
        uint64 uv, v;
        uchar buf[10], *p;

        uv = ((uint64)sval<<1) ^ (uint64)(int64)(sval>>63);

        p = buf;
        for(v = uv; v >= 0x80; v >>= 7)
                *p++ = v | 0x80;
        *p++ = v;
        
        Bwrite(b, buf, p - buf);
}

static void
wrstring(Biobuf *b, char *s)
{
        wrdata(b, s, strlen(s));
}

// wrpath writes a path just like a string, but on windows, it
// translates '\\' to '/' in the process.
static void
wrpath(Link *ctxt, Biobuf *b, char *p)
{
        int i, n;
        if (!ctxt->windows || strchr(p, '\\') == nil) {
                wrstring(b, p);
                return;
        } else {
                n = strlen(p);
                wrint(b, n);
                for (i = 0; i < n; i++)
                        Bputc(b, p[i] == '\\' ? '/' : p[i]);
        }
}

static void
wrdata(Biobuf *b, void *v, int n)
{
        wrint(b, n);
        Bwrite(b, v, n);
}

static void
wrpathsym(Link *ctxt, Biobuf *b, LSym *s)
{
        if(s == nil) {
                wrint(b, 0);
                wrint(b, 0);
                return;
        }
        wrpath(ctxt, b, s->name);
        wrint(b, s->version);
}

static void
wrsym(Biobuf *b, LSym *s)
{
        if(s == nil) {
                wrint(b, 0);
                wrint(b, 0);
                return;
        }
        wrstring(b, s->name);
        wrint(b, s->version);
}

static char startmagic[] = "\x00\x00go13ld";
static char endmagic[] = "\xff\xffgo13ld";

void
ldobjfile(Link *ctxt, Biobuf *f, char *pkg, int64 len, char *pn)
{
        int c;
        uchar buf[8];
        int64 start;
        char *lib;

        start = Boffset(f);
        ctxt->version++;
        memset(buf, 0, sizeof buf);
        Bread(f, buf, sizeof buf);
        if(memcmp(buf, startmagic, sizeof buf) != 0)
                sysfatal("%s: invalid file start %x %x %x %x %x %x %x %x", pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
        if((c = Bgetc(f)) != 1)
                sysfatal("%s: invalid file version number %d", pn, c);

        for(;;) {
                lib = rdstring(f);
                if(lib[0] == 0)
                        break;
                addlib(ctxt, pkg, pn, lib);
        }
        
        for(;;) {
                c = Bgetc(f);
                Bungetc(f);
                if(c == 0xff)
                        break;
                readsym(ctxt, f, pkg, pn);
        }
        
        memset(buf, 0, sizeof buf);
        Bread(f, buf, sizeof buf);
        if(memcmp(buf, endmagic, sizeof buf) != 0)
                sysfatal("%s: invalid file end", pn);
        
        if(Boffset(f) != start+len)
                sysfatal("%s: unexpected end at %lld, want %lld", pn, (vlong)Boffset(f), (vlong)(start+len));
}

static void
readsym(Link *ctxt, Biobuf *f, char *pkg, char *pn)
{
        int i, j, c, t, v, n, size, dupok;
        static int ndup;
        char *name;
        Reloc *r;
        LSym *s, *dup;
        Pcln *pc;
        Auto *a;
        
        if(Bgetc(f) != 0xfe)
                sysfatal("readsym out of sync");
        t = rdint(f);
        name = expandpkg(rdstring(f), pkg);
        v = rdint(f);
        if(v != 0 && v != 1)
                sysfatal("invalid symbol version %d", v);
        dupok = rdint(f);
        size = rdint(f);
        
        if(v != 0)
                v = ctxt->version;
        s = linklookup(ctxt, name, v);
        dup = nil;
        if(s->type != 0 && s->type != SXREF) {
                if(s->type != SBSS && s->type != SNOPTRBSS && !dupok && !s->dupok)
                        sysfatal("duplicate symbol %s (types %d and %d) in %s and %s", s->name, s->type, t, s->file, pn);
                if(s->np > 0) {
                        dup = s;
                        s = linknewsym(ctxt, ".dup", ndup++); // scratch
                }
        }
        s->file = pkg;
        s->dupok = dupok;
        if(t == SXREF)
                sysfatal("bad sxref");
        if(t == 0)
                sysfatal("missing type for %s in %s", name, pn);
        s->type = t;
        if(s->size < size)
                s->size = size;
        s->gotype = rdsym(ctxt, f, pkg);
        rddata(f, &s->p, &s->np);
        s->maxp = s->np;
        n = rdint(f);
        if(n > 0) {
                s->r = emallocz(n * sizeof s->r[0]);
                s->nr = n;
                s->maxr = n;
                for(i=0; i<n; i++) {
                        r = &s->r[i];
                        r->off = rdint(f);
                        r->siz = rdint(f);
                        r->type = rdint(f);
                        r->add = rdint(f);
                        r->xadd = rdint(f);
                        r->sym = rdsym(ctxt, f, pkg);
                        r->xsym = rdsym(ctxt, f, pkg);
                }
        }
        
        if(s->np > 0 && dup != nil && dup->np > 0 && strncmp(s->name, "gclocals·", 10) == 0) {
                // content-addressed garbage collection liveness bitmap symbol.
                // double check for hash collisions.
                if(s->np != dup->np || memcmp(s->p, dup->p, s->np) != 0)
                        sysfatal("dupok hash collision for %s in %s and %s", s->name, s->file, pn);
        }
        
        if(s->type == STEXT) {
                s->args = rdint(f);
                s->locals = rdint(f);
                s->nosplit = rdint(f);
                s->leaf = rdint(f);
                n = rdint(f);
                for(i=0; i<n; i++) {
                        a = emallocz(sizeof *a);
                        a->asym = rdsym(ctxt, f, pkg);
                        a->aoffset = rdint(f);
                        a->type = rdint(f);
                        a->gotype = rdsym(ctxt, f, pkg);
                        a->link = s->autom;
                        s->autom = a;
                }

                s->pcln = emallocz(sizeof *s->pcln);
                pc = s->pcln;
                rddata(f, &pc->pcsp.p, &pc->pcsp.n);
                rddata(f, &pc->pcfile.p, &pc->pcfile.n);
                rddata(f, &pc->pcline.p, &pc->pcline.n);
                n = rdint(f);
                pc->pcdata = emallocz(n * sizeof pc->pcdata[0]);
                pc->npcdata = n;
                for(i=0; i<n; i++)
                        rddata(f, &pc->pcdata[i].p, &pc->pcdata[i].n);
                n = rdint(f);
                pc->funcdata = emallocz(n * sizeof pc->funcdata[0]);
                pc->funcdataoff = emallocz(n * sizeof pc->funcdataoff[0]);
                pc->nfuncdata = n;
                for(i=0; i<n; i++)
                        pc->funcdata[i] = rdsym(ctxt, f, pkg);
                for(i=0; i<n; i++)
                        pc->funcdataoff[i] = rdint(f);
                n = rdint(f);
                pc->file = emallocz(n * sizeof pc->file[0]);
                pc->nfile = n;
                for(i=0; i<n; i++)
                        pc->file[i] = rdsym(ctxt, f, pkg);

                if(dup == nil) {
                        if(s->onlist)
                                sysfatal("symbol %s listed multiple times", s->name);
                        s->onlist = 1;
                        if(ctxt->etextp)
                                ctxt->etextp->next = s;
                        else
                                ctxt->textp = s;
                        ctxt->etextp = s;
                }
        }

        if(ctxt->debugasm) {
                Bprint(ctxt->bso, "%s ", s->name);
                if(s->version)
                        Bprint(ctxt->bso, "v=%d ", s->version);
                if(s->type)
                        Bprint(ctxt->bso, "t=%d ", s->type);
                if(s->dupok)
                        Bprint(ctxt->bso, "dupok ");
                if(s->nosplit)
                        Bprint(ctxt->bso, "nosplit ");
                Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
                if(s->type == STEXT)
                        Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
                Bprint(ctxt->bso, "\n");
                for(i=0; i<s->np; ) {
                        Bprint(ctxt->bso, "\t%#06ux", i);
                        for(j=i; j<i+16 && j<s->np; j++)
                                Bprint(ctxt->bso, " %02ux", s->p[j]);
                        for(; j<i+16; j++)
                                Bprint(ctxt->bso, "   ");
                        Bprint(ctxt->bso, "  ");
                        for(j=i; j<i+16 && j<s->np; j++) {
                                c = s->p[j];
                                if(' ' <= c && c <= 0x7e)
                                        Bprint(ctxt->bso, "%c", c);
                                else
                                        Bprint(ctxt->bso, ".");
                        }
                        Bprint(ctxt->bso, "\n");
                        i += 16;
                }
                for(i=0; i<s->nr; i++) {
                        r = &s->r[i];
                        Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, r->sym->name, (vlong)r->add);
                }
        }
}

static int64
rdint(Biobuf *f)
{
        int c;
        uint64 uv;
        int shift;
        
        uv = 0;
        for(shift = 0;; shift += 7) {
                if(shift >= 64)
                        sysfatal("corrupt input");
                c = Bgetc(f);
                uv |= (uint64)(c & 0x7F) << shift;
                if(!(c & 0x80))
                        break;
        }

        return (int64)(uv>>1) ^ ((int64)((uint64)uv<<63)>>63);
}

static char*
rdstring(Biobuf *f)
{
        int n;
        char *p;
        
        n = rdint(f);
        p = emallocz(n+1);
        Bread(f, p, n);
        return p;
}

static void
rddata(Biobuf *f, uchar **pp, int *np)
{
        *np = rdint(f);
        *pp = emallocz(*np);
        Bread(f, *pp, *np);
}

static LSym*
rdsym(Link *ctxt, Biobuf *f, char *pkg)
{
        int n, v;
        char *p;
        LSym *s;
        
        n = rdint(f);
        if(n == 0) {
                rdint(f);
                return nil;
        }
        p = emallocz(n+1);
        Bread(f, p, n);
        v = rdint(f);
        if(v != 0)
                v = ctxt->version;
        s = linklookup(ctxt, expandpkg(p, pkg), v);
        
        if(v == 0 && s->name[0] == '$' && s->type == 0) {
                if(strncmp(s->name, "$f32.", 5) == 0) {
                        int32 i32;
                        i32 = strtoul(s->name+5, nil, 16);
                        s->type = SRODATA;
                        adduint32(ctxt, s, i32);
                        s->reachable = 0;
                } else if(strncmp(s->name, "$f64.", 5) == 0) {
                        int64 i64;
                        i64 = strtoull(s->name+5, nil, 16);
                        s->type = SRODATA;
                        adduint64(ctxt, s, i64);
                        s->reachable = 0;
                }
        }

        return s;
}

/* [<][>][^][v][top][bottom][index][help] */