// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Memory allocator, based on tcmalloc. // http://goog-perftools.sourceforge.net/doc/tcmalloc.html // The main allocator works in runs of pages. // Small allocation sizes (up to and including 32 kB) are // rounded to one of about 100 size classes, each of which // has its own free list of objects of exactly that size. // Any free page of memory can be split into a set of objects // of one size class, which are then managed using free list // allocators. // // The allocator's data structures are: // // FixAlloc: a free-list allocator for fixed-size objects, // used to manage storage used by the allocator. // MHeap: the malloc heap, managed at page (4096-byte) granularity. // MSpan: a run of pages managed by the MHeap. // MCentral: a shared free list for a given size class. // MCache: a per-thread (in Go, per-P) cache for small objects. // MStats: allocation statistics. // // Allocating a small object proceeds up a hierarchy of caches: // // 1. Round the size up to one of the small size classes // and look in the corresponding MCache free list. // If the list is not empty, allocate an object from it. // This can all be done without acquiring a lock. // // 2. If the MCache free list is empty, replenish it by // taking a bunch of objects from the MCentral free list. // Moving a bunch amortizes the cost of acquiring the MCentral lock. // // 3. If the MCentral free list is empty, replenish it by // allocating a run of pages from the MHeap and then // chopping that memory into a objects of the given size. // Allocating many objects amortizes the cost of locking // the heap. // // 4. If the MHeap is empty or has no page runs large enough, // allocate a new group of pages (at least 1MB) from the // operating system. Allocating a large run of pages // amortizes the cost of talking to the operating system. // // Freeing a small object proceeds up the same hierarchy: // // 1. Look up the size class for the object and add it to // the MCache free list. // // 2. If the MCache free list is too long or the MCache has // too much memory, return some to the MCentral free lists. // // 3. If all the objects in a given span have returned to // the MCentral list, return that span to the page heap. // // 4. If the heap has too much memory, return some to the // operating system. // // TODO(rsc): Step 4 is not implemented. // // Allocating and freeing a large object uses the page heap // directly, bypassing the MCache and MCentral free lists. // // The small objects on the MCache and MCentral free lists // may or may not be zeroed. They are zeroed if and only if // the second word of the object is zero. A span in the // page heap is zeroed unless s->needzero is set. When a span // is allocated to break into small objects, it is zeroed if needed // and s->needzero is set. There are two main benefits to delaying the // zeroing this way: // // 1. stack frames allocated from the small object lists // or the page heap can avoid zeroing altogether. // 2. the cost of zeroing when reusing a small object is // charged to the mutator, not the garbage collector. // // This C code was written with an eye toward translating to Go // in the future. Methods have the form Type_Method(Type *t, ...). typedef struct MCentral MCentral; typedef struct MHeap MHeap; typedef struct MSpan MSpan; typedef struct MStats MStats; typedef struct MLink MLink; typedef struct MTypes MTypes; typedef struct GCStats GCStats; enum { PageShift = 13, PageSize = 1<<PageShift, PageMask = PageSize - 1, }; typedef uintptr PageID; // address >> PageShift enum { // Computed constant. The definition of MaxSmallSize and the // algorithm in msize.c produce some number of different allocation // size classes. NumSizeClasses is that number. It's needed here // because there are static arrays of this length; when msize runs its // size choosing algorithm it double-checks that NumSizeClasses agrees. NumSizeClasses = 67, // Tunable constants. MaxSmallSize = 32<<10, // Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc. TinySize = 16, TinySizeClass = 2, FixAllocChunk = 16<<10, // Chunk size for FixAlloc MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap. HeapAllocChunk = 1<<20, // Chunk size for heap growth // Number of bits in page to span calculations (4k pages). // On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason). // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits. // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. #ifdef _64BIT #ifdef GOOS_windows // Windows counts memory used by page table into committed memory // of the process, so we can't reserve too much memory. // See http://golang.org/issue/5402 and http://golang.org/issue/5236. MHeapMap_Bits = 35 - PageShift, #else MHeapMap_Bits = 37 - PageShift, #endif #else MHeapMap_Bits = 32 - PageShift, #endif // Max number of threads to run garbage collection. // 2, 3, and 4 are all plausible maximums depending // on the hardware details of the machine. The garbage // collector scales well to 8 cpus. MaxGcproc = 8, }; // Maximum memory allocation size, a hint for callers. // This must be a #define instead of an enum because it // is so large. #ifdef _64BIT #define MaxMem (1ULL<<(MHeapMap_Bits+PageShift)) /* 128 GB or 32 GB */ #else #define MaxMem ((uintptr)-1) #endif // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) struct MLink { MLink *next; }; // SysAlloc obtains a large chunk of zeroed memory from the // operating system, typically on the order of a hundred kilobytes // or a megabyte. // NOTE: SysAlloc returns OS-aligned memory, but the heap allocator // may use larger alignment, so the caller must be careful to realign the // memory obtained by SysAlloc. // // SysUnused notifies the operating system that the contents // of the memory region are no longer needed and can be reused // for other purposes. // SysUsed notifies the operating system that the contents // of the memory region are needed again. // // SysFree returns it unconditionally; this is only used if // an out-of-memory error has been detected midway through // an allocation. It is okay if SysFree is a no-op. // // SysReserve reserves address space without allocating memory. // If the pointer passed to it is non-nil, the caller wants the // reservation there, but SysReserve can still choose another // location if that one is unavailable. On some systems and in some // cases SysReserve will simply check that the address space is // available and not actually reserve it. If SysReserve returns // non-nil, it sets *reserved to true if the address space is // reserved, false if it has merely been checked. // NOTE: SysReserve returns OS-aligned memory, but the heap allocator // may use larger alignment, so the caller must be careful to realign the // memory obtained by SysAlloc. // // SysMap maps previously reserved address space for use. // The reserved argument is true if the address space was really // reserved, not merely checked. // // SysFault marks a (already SysAlloc'd) region to fault // if accessed. Used only for debugging the runtime. void* runtime·SysAlloc(uintptr nbytes, uint64 *stat); void runtime·SysFree(void *v, uintptr nbytes, uint64 *stat); void runtime·SysUnused(void *v, uintptr nbytes); void runtime·SysUsed(void *v, uintptr nbytes); void runtime·SysMap(void *v, uintptr nbytes, bool reserved, uint64 *stat); void* runtime·SysReserve(void *v, uintptr nbytes, bool *reserved); void runtime·SysFault(void *v, uintptr nbytes); // FixAlloc is a simple free-list allocator for fixed size objects. // Malloc uses a FixAlloc wrapped around SysAlloc to manages its // MCache and MSpan objects. // // Memory returned by FixAlloc_Alloc is not zeroed. // The caller is responsible for locking around FixAlloc calls. // Callers can keep state in the object but the first word is // smashed by freeing and reallocating. struct FixAlloc { uintptr size; void (*first)(void *arg, byte *p); // called first time p is returned void* arg; MLink* list; byte* chunk; uint32 nchunk; uintptr inuse; // in-use bytes now uint64* stat; }; void runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat); void* runtime·FixAlloc_Alloc(FixAlloc *f); void runtime·FixAlloc_Free(FixAlloc *f, void *p); // Statistics. // Shared with Go: if you edit this structure, also edit type MemStats in mem.go. struct MStats { // General statistics. uint64 alloc; // bytes allocated and still in use uint64 total_alloc; // bytes allocated (even if freed) uint64 sys; // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) uint64 nlookup; // number of pointer lookups uint64 nmalloc; // number of mallocs uint64 nfree; // number of frees // Statistics about malloc heap. // protected by mheap.Lock uint64 heap_alloc; // bytes allocated and still in use uint64 heap_sys; // bytes obtained from system uint64 heap_idle; // bytes in idle spans uint64 heap_inuse; // bytes in non-idle spans uint64 heap_released; // bytes released to the OS uint64 heap_objects; // total number of allocated objects // Statistics about allocation of low-level fixed-size structures. // Protected by FixAlloc locks. uint64 stacks_inuse; // bootstrap stacks uint64 stacks_sys; uint64 mspan_inuse; // MSpan structures uint64 mspan_sys; uint64 mcache_inuse; // MCache structures uint64 mcache_sys; uint64 buckhash_sys; // profiling bucket hash table uint64 gc_sys; uint64 other_sys; // Statistics about garbage collector. // Protected by mheap or stopping the world during GC. uint64 next_gc; // next GC (in heap_alloc time) uint64 last_gc; // last GC (in absolute time) uint64 pause_total_ns; uint64 pause_ns[256]; uint32 numgc; bool enablegc; bool debuggc; // Statistics about allocation size classes. struct { uint32 size; uint64 nmalloc; uint64 nfree; } by_size[NumSizeClasses]; }; #define mstats runtime·memStats extern MStats mstats; void runtime·updatememstats(GCStats *stats); // Size classes. Computed and initialized by InitSizes. // // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, // 1 <= sizeclass < NumSizeClasses, for n. // Size class 0 is reserved to mean "not small". // // class_to_size[i] = largest size in class i // class_to_allocnpages[i] = number of pages to allocate when // making new objects in class i int32 runtime·SizeToClass(int32); uintptr runtime·roundupsize(uintptr); extern int32 runtime·class_to_size[NumSizeClasses]; extern int32 runtime·class_to_allocnpages[NumSizeClasses]; extern int8 runtime·size_to_class8[1024/8 + 1]; extern int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1]; extern void runtime·InitSizes(void); typedef struct MCacheList MCacheList; struct MCacheList { MLink *list; uint32 nlist; }; // Per-thread (in Go, per-P) cache for small objects. // No locking needed because it is per-thread (per-P). struct MCache { // The following members are accessed on every malloc, // so they are grouped here for better caching. int32 next_sample; // trigger heap sample after allocating this many bytes intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap // Allocator cache for tiny objects w/o pointers. // See "Tiny allocator" comment in malloc.goc. byte* tiny; uintptr tinysize; // The rest is not accessed on every malloc. MSpan* alloc[NumSizeClasses]; // spans to allocate from MCacheList free[NumSizeClasses];// lists of explicitly freed objects // Local allocator stats, flushed during GC. uintptr local_nlookup; // number of pointer lookups uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize) uintptr local_nlargefree; // number of frees for large objects (>MaxSmallSize) uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) }; MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass); void runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size); void runtime·MCache_ReleaseAll(MCache *c); // MTypes describes the types of blocks allocated within a span. // The compression field describes the layout of the data. // // MTypes_Empty: // All blocks are free, or no type information is available for // allocated blocks. // The data field has no meaning. // MTypes_Single: // The span contains just one block. // The data field holds the type information. // The sysalloc field has no meaning. // MTypes_Words: // The span contains multiple blocks. // The data field points to an array of type [NumBlocks]uintptr, // and each element of the array holds the type of the corresponding // block. // MTypes_Bytes: // The span contains at most seven different types of blocks. // The data field points to the following structure: // struct { // type [8]uintptr // type[0] is always 0 // index [NumBlocks]byte // } // The type of the i-th block is: data.type[data.index[i]] enum { MTypes_Empty = 0, MTypes_Single = 1, MTypes_Words = 2, MTypes_Bytes = 3, }; struct MTypes { byte compression; // one of MTypes_* uintptr data; }; enum { KindSpecialFinalizer = 1, KindSpecialProfile = 2, // Note: The finalizer special must be first because if we're freeing // an object, a finalizer special will cause the freeing operation // to abort, and we want to keep the other special records around // if that happens. }; typedef struct Special Special; struct Special { Special* next; // linked list in span uint16 offset; // span offset of object byte kind; // kind of Special }; // The described object has a finalizer set for it. typedef struct SpecialFinalizer SpecialFinalizer; struct SpecialFinalizer { Special; FuncVal* fn; uintptr nret; Type* fint; PtrType* ot; }; // The described object is being heap profiled. typedef struct Bucket Bucket; // from mprof.goc typedef struct SpecialProfile SpecialProfile; struct SpecialProfile { Special; Bucket* b; }; // An MSpan is a run of pages. enum { MSpanInUse = 0, MSpanFree, MSpanListHead, MSpanDead, }; struct MSpan { MSpan *next; // in a span linked list MSpan *prev; // in a span linked list PageID start; // starting page number uintptr npages; // number of pages in span MLink *freelist; // list of free objects // sweep generation: // if sweepgen == h->sweepgen - 2, the span needs sweeping // if sweepgen == h->sweepgen - 1, the span is currently being swept // if sweepgen == h->sweepgen, the span is swept and ready to use // h->sweepgen is incremented by 2 after every GC uint32 sweepgen; uint16 ref; // capacity - number of objects in freelist uint8 sizeclass; // size class bool incache; // being used by an MCache uint8 state; // MSpanInUse etc uint8 needzero; // needs to be zeroed before allocation uintptr elemsize; // computed from sizeclass or from npages int64 unusedsince; // First time spotted by GC in MSpanFree state uintptr npreleased; // number of pages released to the OS byte *limit; // end of data in span MTypes types; // types of allocated objects in this span Lock specialLock; // guards specials list Special *specials; // linked list of special records sorted by offset. MLink *freebuf; // objects freed explicitly, not incorporated into freelist yet }; void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages); void runtime·MSpan_EnsureSwept(MSpan *span); bool runtime·MSpan_Sweep(MSpan *span); // Every MSpan is in one doubly-linked list, // either one of the MHeap's free lists or one of the // MCentral's span lists. We use empty MSpan structures as list heads. void runtime·MSpanList_Init(MSpan *list); bool runtime·MSpanList_IsEmpty(MSpan *list); void runtime·MSpanList_Insert(MSpan *list, MSpan *span); void runtime·MSpanList_InsertBack(MSpan *list, MSpan *span); void runtime·MSpanList_Remove(MSpan *span); // from whatever list it is in // Central list of free objects of a given size. struct MCentral { Lock; int32 sizeclass; MSpan nonempty; // list of spans with a free object MSpan empty; // list of spans with no free objects (or cached in an MCache) int32 nfree; // # of objects available in nonempty spans }; void runtime·MCentral_Init(MCentral *c, int32 sizeclass); MSpan* runtime·MCentral_CacheSpan(MCentral *c); void runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s); bool runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end); void runtime·MCentral_FreeList(MCentral *c, MLink *start); // TODO: need this? // Main malloc heap. // The heap itself is the "free[]" and "large" arrays, // but all the other global data is here too. struct MHeap { Lock; MSpan free[MaxMHeapList]; // free lists of given length MSpan freelarge; // free lists length >= MaxMHeapList MSpan busy[MaxMHeapList]; // busy lists of large objects of given length MSpan busylarge; // busy lists of large objects length >= MaxMHeapList MSpan **allspans; // all spans out there MSpan **sweepspans; // copy of allspans referenced by sweeper uint32 nspan; uint32 nspancap; uint32 sweepgen; // sweep generation, see comment in MSpan uint32 sweepdone; // all spans are swept // span lookup MSpan** spans; uintptr spans_mapped; // range of addresses we might see in the heap byte *bitmap; uintptr bitmap_mapped; byte *arena_start; byte *arena_used; byte *arena_end; bool arena_reserved; // central free lists for small size classes. // the padding makes sure that the MCentrals are // spaced CacheLineSize bytes apart, so that each MCentral.Lock // gets its own cache line. struct { MCentral; byte pad[CacheLineSize]; } central[NumSizeClasses]; FixAlloc spanalloc; // allocator for Span* FixAlloc cachealloc; // allocator for MCache* FixAlloc specialfinalizeralloc; // allocator for SpecialFinalizer* FixAlloc specialprofilealloc; // allocator for SpecialProfile* Lock speciallock; // lock for sepcial record allocators. // Malloc stats. uint64 largefree; // bytes freed for large objects (>MaxSmallSize) uint64 nlargefree; // number of frees for large objects (>MaxSmallSize) uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) }; extern MHeap runtime·mheap; void runtime·MHeap_Init(MHeap *h); MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero); void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct); MSpan* runtime·MHeap_Lookup(MHeap *h, void *v); MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v); void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj); void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n); void runtime·MHeap_MapBits(MHeap *h); void runtime·MHeap_MapSpans(MHeap *h); void runtime·MHeap_Scavenger(void); void runtime·MHeap_SplitSpan(MHeap *h, MSpan *s); void* runtime·mallocgc(uintptr size, uintptr typ, uint32 flag); void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat); int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s); void runtime·gc(int32 force); uintptr runtime·sweepone(void); void runtime·markscan(void *v); void runtime·marknogc(void *v); void runtime·checkallocated(void *v, uintptr n); void runtime·markfreed(void *v); void runtime·checkfreed(void *v, uintptr n); extern int32 runtime·checking; void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover); void runtime·unmarkspan(void *v, uintptr size); void runtime·purgecachedstats(MCache*); void* runtime·cnew(Type*); void* runtime·cnewarray(Type*, intgo); void runtime·tracealloc(void*, uintptr, uintptr); void runtime·tracefree(void*, uintptr); void runtime·tracegc(void); uintptr runtime·gettype(void*); enum { // flags to malloc FlagNoScan = 1<<0, // GC doesn't have to scan object FlagNoProfiling = 1<<1, // must not profile FlagNoGC = 1<<2, // must not free or scan for pointers FlagNoZero = 1<<3, // don't zero memory FlagNoInvokeGC = 1<<4, // don't invoke GC }; void runtime·MProf_Malloc(void*, uintptr); void runtime·MProf_Free(Bucket*, uintptr, bool); void runtime·MProf_GC(void); void runtime·iterate_memprof(void (*callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr)); int32 runtime·gcprocs(void); void runtime·helpgc(int32 nproc); void runtime·gchelper(void); void runtime·createfing(void); G* runtime·wakefing(void); extern bool runtime·fingwait; extern bool runtime·fingwake; void runtime·setprofilebucket(void *p, Bucket *b); bool runtime·addfinalizer(void*, FuncVal *fn, uintptr, Type*, PtrType*); void runtime·removefinalizer(void*); void runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot); void runtime·freeallspecials(MSpan *span, void *p, uintptr size); bool runtime·freespecial(Special *s, void *p, uintptr size, bool freed); enum { TypeInfo_SingleObject = 0, TypeInfo_Array = 1, TypeInfo_Chan = 2, // Enables type information at the end of blocks allocated from heap DebugTypeAtBlockEnd = 0, }; // Information from the compiler about the layout of stack frames. typedef struct BitVector BitVector; struct BitVector { int32 n; // # of bits uint32 *data; }; typedef struct StackMap StackMap; struct StackMap { int32 n; // number of bitmaps int32 nbit; // number of bits in each bitmap uint32 data[]; }; enum { // Pointer map BitsPerPointer = 2, BitsDead = 0, BitsScalar = 1, BitsPointer = 2, BitsMultiWord = 3, // BitsMultiWord will be set for the first word of a multi-word item. // When it is set, one of the following will be set for the second word. BitsString = 0, BitsSlice = 1, BitsIface = 2, BitsEface = 3, }; // Returns pointer map data for the given stackmap index // (the index is encoded in PCDATA_StackMapIndex). BitVector runtime·stackmapdata(StackMap *stackmap, int32 n); // defined in mgc0.go void runtime·gc_m_ptr(Eface*); void runtime·gc_g_ptr(Eface*); void runtime·gc_itab_ptr(Eface*); void runtime·memorydump(void); int32 runtime·setgcpercent(int32); // Value we use to mark dead pointers when GODEBUG=gcdead=1. #define PoisonGC ((uintptr)0xf969696969696969ULL) #define PoisonStack ((uintptr)0x6868686868686868ULL)