root/Source/wtf/PartitionAlloc.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. partitionBucketNumSystemPages
  2. parititonAllocBaseInit
  3. partitionBucketInitBase
  4. partitionAllocInit
  5. partitionAllocGenericInit
  6. partitionAllocShutdownBucket
  7. partitionAllocBaseShutdown
  8. partitionAllocShutdown
  9. partitionAllocGenericShutdown
  10. partitionAllocPartitionPages
  11. partitionUnusePage
  12. partitionBucketSlots
  13. partitionBucketPartitionPages
  14. partitionPageReset
  15. partitionPageAllocAndFillFreelist
  16. partitionSetNewActivePage
  17. partitionPageToDirectMapExtent
  18. partitionDirectMap
  19. partitionDirectUnmap
  20. partitionAllocSlowPath
  21. partitionFreePage
  22. partitionRegisterEmptyPage
  23. partitionFreeSlowPath
  24. partitionReallocDirectMappedInPlace
  25. partitionReallocGeneric
  26. partitionDumpStats

/*
 * Copyright (C) 2013 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"
#include "wtf/PartitionAlloc.h"

#include <string.h>

#ifndef NDEBUG
#include <stdio.h>
#endif

// Two partition pages are used as guard / metadata page so make sure the super
// page size is bigger.
COMPILE_ASSERT(WTF::kPartitionPageSize * 4 <= WTF::kSuperPageSize, ok_super_page_size);
COMPILE_ASSERT(!(WTF::kSuperPageSize % WTF::kPartitionPageSize), ok_super_page_multiple);
// Four system pages gives us room to hack out a still-guard-paged piece
// of metadata in the middle of a guard partition page.
COMPILE_ASSERT(WTF::kSystemPageSize * 4 <= WTF::kPartitionPageSize, ok_partition_page_size);
COMPILE_ASSERT(!(WTF::kPartitionPageSize % WTF::kSystemPageSize), ok_partition_page_multiple);
COMPILE_ASSERT(sizeof(WTF::PartitionPage) <= WTF::kPageMetadataSize, PartitionPage_not_too_big);
COMPILE_ASSERT(sizeof(WTF::PartitionBucket) <= WTF::kPageMetadataSize, PartitionBucket_not_too_big);
COMPILE_ASSERT(sizeof(WTF::PartitionSuperPageExtentEntry) <= WTF::kPageMetadataSize, PartitionSuperPageExtentEntry_not_too_big);
COMPILE_ASSERT(WTF::kPageMetadataSize * WTF::kNumPartitionPagesPerSuperPage <= WTF::kSystemPageSize, page_metadata_fits_in_hole);
// Check that some of our zanier calculations worked out as expected.
COMPILE_ASSERT(WTF::kGenericSmallestBucket == 8, generic_smallest_bucket);
COMPILE_ASSERT(WTF::kGenericMaxBucketed == 983040, generic_max_bucketed);

namespace WTF {

int PartitionRootBase::gInitializedLock = 0;
bool PartitionRootBase::gInitialized = false;
PartitionPage PartitionRootBase::gSeedPage;
PartitionBucket PartitionRootBase::gPagedBucket;

static size_t partitionBucketNumSystemPages(size_t size)
{
    // This works out reasonably for the current bucket sizes of the generic
    // allocator, and the current values of partition page size and constants.
    // Specifically, we have enough room to always pack the slots perfectly into
    // some number of system pages. The only waste is the waste associated with
    // unfaulted pages (i.e. wasted address space).
    // TODO: we end up using a lot of system pages for very small sizes. For
    // example, we'll use 12 system pages for slot size 24. The slot size is
    // so small that the waste would be tiny with just 4, or 1, system pages.
    // Later, we can investigate whether there are anti-fragmentation benefits
    // to using fewer system pages.
    double bestWasteRatio = 1.0f;
    size_t bestPages = 0;
    if (size > kMaxSystemPagesPerSlotSpan * kSystemPageSize) {
        ASSERT(!(size % kSystemPageSize));
        return size / kSystemPageSize;
    }
    ASSERT(size <= kMaxSystemPagesPerSlotSpan * kSystemPageSize);
    for (size_t i = kNumSystemPagesPerPartitionPage - 1; i <= kMaxSystemPagesPerSlotSpan; ++i) {
        size_t pageSize = kSystemPageSize * i;
        size_t numSlots = pageSize / size;
        size_t waste = pageSize - (numSlots * size);
        // Leaving a page unfaulted is not free; the page will occupy an empty page table entry.
        // Make a simple attempt to account for that.
        size_t numRemainderPages = i & (kNumSystemPagesPerPartitionPage - 1);
        size_t numUnfaultedPages = numRemainderPages ? (kNumSystemPagesPerPartitionPage - numRemainderPages) : 0;
        waste += sizeof(void*) * numUnfaultedPages;
        double wasteRatio = (double) waste / (double) pageSize;
        if (wasteRatio < bestWasteRatio) {
            bestWasteRatio = wasteRatio;
            bestPages = i;
        }
    }
    ASSERT(bestPages > 0);
    return bestPages;
}

static void parititonAllocBaseInit(PartitionRootBase* root)
{
    ASSERT(!root->initialized);

    spinLockLock(&PartitionRootBase::gInitializedLock);
    if (!PartitionRootBase::gInitialized) {
        PartitionRootBase::gInitialized = true;
        // We mark the seed page as free to make sure it is skipped by our
        // logic to find a new active page.
        PartitionRootBase::gPagedBucket.activePagesHead = &PartitionRootGeneric::gSeedPage;
    }
    spinLockUnlock(&PartitionRootBase::gInitializedLock);

    root->initialized = true;
    root->totalSizeOfSuperPages = 0;
    root->nextSuperPage = 0;
    root->nextPartitionPage = 0;
    root->nextPartitionPageEnd = 0;
    root->firstExtent = 0;
    root->currentExtent = 0;

    memset(&root->globalEmptyPageRing, '\0', sizeof(root->globalEmptyPageRing));
    root->globalEmptyPageRingIndex = 0;

    // This is a "magic" value so we can test if a root pointer is valid.
    root->invertedSelf = ~reinterpret_cast<uintptr_t>(root);
}

static void partitionBucketInitBase(PartitionBucket* bucket, PartitionRootBase* root)
{
    bucket->activePagesHead = &PartitionRootGeneric::gSeedPage;
    bucket->freePagesHead = 0;
    bucket->numFullPages = 0;
    bucket->numSystemPagesPerSlotSpan = partitionBucketNumSystemPages(bucket->slotSize);
}

void partitionAllocInit(PartitionRoot* root, size_t numBuckets, size_t maxAllocation)
{
    parititonAllocBaseInit(root);

    root->numBuckets = numBuckets;
    root->maxAllocation = maxAllocation;
    size_t i;
    for (i = 0; i < root->numBuckets; ++i) {
        PartitionBucket* bucket = &root->buckets()[i];
        if (!i)
            bucket->slotSize = kAllocationGranularity;
        else
            bucket->slotSize = i << kBucketShift;
        partitionBucketInitBase(bucket, root);
    }
}

void partitionAllocGenericInit(PartitionRootGeneric* root)
{
    parititonAllocBaseInit(root);

    root->lock = 0;

    // Precalculate some shift and mask constants used in the hot path.
    // Example: malloc(41) == 101001 binary.
    // Order is 6 (1 << 6-1)==32 is highest bit set.
    // orderIndex is the next three MSB == 010 == 2.
    // subOrderIndexMask is a mask for the remaining bits == 11 (masking to 01 for the subOrderIndex).
    size_t order;
    for (order = 0; order <= kBitsPerSizet; ++order) {
        size_t orderIndexShift;
        if (order < kGenericNumBucketsPerOrderBits + 1)
            orderIndexShift = 0;
        else
            orderIndexShift = order - (kGenericNumBucketsPerOrderBits + 1);
        root->orderIndexShifts[order] = orderIndexShift;
        size_t subOrderIndexMask;
        if (order == kBitsPerSizet) {
            // This avoids invoking undefined behavior for an excessive shift.
            subOrderIndexMask = static_cast<size_t>(-1) >> (kGenericNumBucketsPerOrderBits + 1);
        } else {
            subOrderIndexMask = ((1 << order) - 1) >> (kGenericNumBucketsPerOrderBits + 1);
        }
        root->orderSubIndexMasks[order] = subOrderIndexMask;
    }

    // Set up the actual usable buckets first.
    // Note that typical values (i.e. min allocation size of 8) will result in
    // invalid buckets (size==9 etc. or more generally, size is not a multiple
    // of the smallest allocation granularity).
    // We avoid them in the bucket lookup map, but we tolerate them to keep the
    // code simpler and the structures more generic.
    size_t i, j;
    size_t currentSize = kGenericSmallestBucket;
    size_t currentIncrement = kGenericSmallestBucket >> kGenericNumBucketsPerOrderBits;
    PartitionBucket* bucket = &root->buckets[0];
    for (i = 0; i < kGenericNumBucketedOrders; ++i) {
        for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
            bucket->slotSize = currentSize;
            partitionBucketInitBase(bucket, root);
            // Disable invalid buckets so that touching them faults.
            if (currentSize % kGenericSmallestBucket)
                bucket->activePagesHead = 0;
            currentSize += currentIncrement;
            ++bucket;
        }
        currentIncrement <<= 1;
    }
    ASSERT(currentSize == 1 << kGenericMaxBucketedOrder);
    ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));

    // Then set up the fast size -> bucket lookup table.
    bucket = &root->buckets[0];
    PartitionBucket** bucketPtr = &root->bucketLookups[0];
    for (order = 0; order <= kBitsPerSizet; ++order) {
        for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
            if (order < kGenericMinBucketedOrder) {
                // Use the bucket of finest granularity for malloc(0) etc.
                *bucketPtr++ = &root->buckets[0];
            } else if (order > kGenericMaxBucketedOrder) {
                *bucketPtr++ = &PartitionRootGeneric::gPagedBucket;
            } else {
                PartitionBucket* validBucket = bucket;
                // Skip over invalid buckets.
                while (validBucket->slotSize % kGenericSmallestBucket)
                    validBucket++;
                *bucketPtr++ = validBucket;
                bucket++;
            }
        }
    }
    ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
    ASSERT(bucketPtr == &root->bucketLookups[0] + ((kBitsPerSizet + 1) * kGenericNumBucketsPerOrder));
    // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
    // which tries to overflow to a non-existant order.
    *bucketPtr = &PartitionRootGeneric::gPagedBucket;
}

static bool partitionAllocShutdownBucket(PartitionBucket* bucket)
{
    // Failure here indicates a memory leak.
    bool noLeaks = !bucket->numFullPages;
    PartitionPage* page = bucket->activePagesHead;
    while (page) {
        if (page->numAllocatedSlots)
            noLeaks = false;
        page = page->nextPage;
    }

    return noLeaks;
}

static void partitionAllocBaseShutdown(PartitionRootBase* root)
{
    ASSERT(root->initialized);
    root->initialized = false;

    // Now that we've examined all partition pages in all buckets, it's safe
    // to free all our super pages. We first collect the super page pointers
    // on the stack because some of them are themselves store in super pages.
    char* superPages[kMaxPartitionSize / kSuperPageSize];
    size_t numSuperPages = 0;
    PartitionSuperPageExtentEntry* entry = root->firstExtent;
    while (entry) {
        char* superPage = entry->superPageBase;
        while (superPage != entry->superPagesEnd) {
            superPages[numSuperPages] = superPage;
            numSuperPages++;
            superPage += kSuperPageSize;
        }
        entry = entry->next;
    }
    ASSERT(numSuperPages == root->totalSizeOfSuperPages / kSuperPageSize);
    for (size_t i = 0; i < numSuperPages; ++i)
        freePages(superPages[i], kSuperPageSize);
}

bool partitionAllocShutdown(PartitionRoot* root)
{
    bool noLeaks = true;
    size_t i;
    for (i = 0; i < root->numBuckets; ++i) {
        PartitionBucket* bucket = &root->buckets()[i];
        if (!partitionAllocShutdownBucket(bucket))
            noLeaks = false;
    }

    partitionAllocBaseShutdown(root);
    return noLeaks;
}

bool partitionAllocGenericShutdown(PartitionRootGeneric* root)
{
    bool noLeaks = true;
    size_t i;
    for (i = 0; i < kGenericNumBucketedOrders * kGenericNumBucketsPerOrder; ++i) {
        PartitionBucket* bucket = &root->buckets[i];
        if (!partitionAllocShutdownBucket(bucket))
            noLeaks = false;
    }
    partitionAllocBaseShutdown(root);
    return noLeaks;
}

static ALWAYS_INLINE void* partitionAllocPartitionPages(PartitionRootBase* root, size_t numPartitionPages)
{
    ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPage) % kPartitionPageSize));
    ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPageEnd) % kPartitionPageSize));
    RELEASE_ASSERT(numPartitionPages <= kNumPartitionPagesPerSuperPage);
    size_t totalSize = kPartitionPageSize * numPartitionPages;
    size_t numPartitionPagesLeft = (root->nextPartitionPageEnd - root->nextPartitionPage) >> kPartitionPageShift;
    if (LIKELY(numPartitionPagesLeft >= numPartitionPages)) {
        // In this case, we can still hand out pages from the current super page
        // allocation.
        char* ret = root->nextPartitionPage;
        root->nextPartitionPage += totalSize;
        return ret;
    }

    // Need a new super page.
    root->totalSizeOfSuperPages += kSuperPageSize;
    RELEASE_ASSERT(root->totalSizeOfSuperPages <= kMaxPartitionSize);
    char* requestedAddress = root->nextSuperPage;
    char* superPage = reinterpret_cast<char*>(allocPages(requestedAddress, kSuperPageSize, kSuperPageSize));
    // TODO: handle allocation failure here with PartitionAllocReturnNull.
    RELEASE_ASSERT(superPage);
    root->nextSuperPage = superPage + kSuperPageSize;
    char* ret = superPage + kPartitionPageSize;
    root->nextPartitionPage = ret + totalSize;
    root->nextPartitionPageEnd = root->nextSuperPage - kPartitionPageSize;
    // Make the first partition page in the super page a guard page, but leave a
    // hole in the middle.
    // This is where we put page metadata and also a tiny amount of extent
    // metadata.
    setSystemPagesInaccessible(superPage, kSystemPageSize);
    setSystemPagesInaccessible(superPage + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
    // Also make the last partition page a guard page.
    setSystemPagesInaccessible(superPage + (kSuperPageSize - kPartitionPageSize), kPartitionPageSize);

    // If we were after a specific address, but didn't get it, assume that
    // the system chose a lousy address and re-randomize the next
    // allocation.
    if (requestedAddress && requestedAddress != superPage)
        root->nextSuperPage = 0;

    // We allocated a new super page so update super page metadata.
    // First check if this is a new extent or not.
    PartitionSuperPageExtentEntry* latestExtent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(superPage));
    PartitionSuperPageExtentEntry* currentExtent = root->currentExtent;
    bool isNewExtent = (superPage != requestedAddress);
    if (UNLIKELY(isNewExtent)) {
        latestExtent->next = 0;
        if (UNLIKELY(!currentExtent)) {
            root->firstExtent = latestExtent;
        } else {
            ASSERT(currentExtent->superPageBase);
            currentExtent->next = latestExtent;
        }
        root->currentExtent = latestExtent;
        currentExtent = latestExtent;
        currentExtent->superPageBase = superPage;
        currentExtent->superPagesEnd = superPage + kSuperPageSize;
    } else {
        // We allocated next to an existing extent so just nudge the size up a little.
        currentExtent->superPagesEnd += kSuperPageSize;
        ASSERT(ret >= currentExtent->superPageBase && ret < currentExtent->superPagesEnd);
    }
    // By storing the root in every extent metadata object, we have a fast way
    // to go from a pointer within the partition to the root object.
    latestExtent->root = root;

    return ret;
}

static ALWAYS_INLINE void partitionUnusePage(PartitionPage* page)
{
    ASSERT(page->bucket->numSystemPagesPerSlotSpan);
    void* addr = partitionPageToPointer(page);
    decommitSystemPages(addr, page->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
}

static ALWAYS_INLINE size_t partitionBucketSlots(const PartitionBucket* bucket)
{
    return (bucket->numSystemPagesPerSlotSpan * kSystemPageSize) / bucket->slotSize;
}

static ALWAYS_INLINE size_t partitionBucketPartitionPages(const PartitionBucket* bucket)
{
    return (bucket->numSystemPagesPerSlotSpan + (kNumSystemPagesPerPartitionPage - 1)) / kNumSystemPagesPerPartitionPage;
}

static ALWAYS_INLINE void partitionPageReset(PartitionPage* page, PartitionBucket* bucket)
{
    ASSERT(page != &PartitionRootGeneric::gSeedPage);
    page->numAllocatedSlots = 0;
    page->numUnprovisionedSlots = partitionBucketSlots(bucket);
    ASSERT(page->numUnprovisionedSlots);
    page->bucket = bucket;
    page->nextPage = 0;
    // NULLing the freelist is not strictly necessary but it makes an ASSERT in partitionPageFillFreelist simpler.
    page->freelistHead = 0;
    page->pageOffset = 0;
    page->freeCacheIndex = -1;
    size_t numPartitionPages = partitionBucketPartitionPages(bucket);
    size_t i;
    char* pageCharPtr = reinterpret_cast<char*>(page);
    for (i = 1; i < numPartitionPages; ++i) {
        pageCharPtr += kPageMetadataSize;
        PartitionPage* secondaryPage = reinterpret_cast<PartitionPage*>(pageCharPtr);
        secondaryPage->pageOffset = i;
    }
}

static ALWAYS_INLINE char* partitionPageAllocAndFillFreelist(PartitionPage* page)
{
    ASSERT(page != &PartitionRootGeneric::gSeedPage);
    size_t numSlots = page->numUnprovisionedSlots;
    ASSERT(numSlots);
    PartitionBucket* bucket = page->bucket;
    // We should only get here when _every_ slot is either used or unprovisioned.
    // (The third state is "on the freelist". If we have a non-empty freelist, we should not get here.)
    ASSERT(numSlots + page->numAllocatedSlots == partitionBucketSlots(bucket));
    // Similarly, make explicitly sure that the freelist is empty.
    ASSERT(!page->freelistHead);
    ASSERT(page->numAllocatedSlots >= 0);

    size_t size = bucket->slotSize;
    char* base = reinterpret_cast<char*>(partitionPageToPointer(page));
    char* returnObject = base + (size * page->numAllocatedSlots);
    char* firstFreelistPointer = returnObject + size;
    char* firstFreelistPointerExtent = firstFreelistPointer + sizeof(PartitionFreelistEntry*);
    // Our goal is to fault as few system pages as possible. We calculate the
    // page containing the "end" of the returned slot, and then allow freelist
    // pointers to be written up to the end of that page.
    char* subPageLimit = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(firstFreelistPointer) + kSystemPageOffsetMask) & kSystemPageBaseMask);
    char* slotsLimit = returnObject + (size * page->numUnprovisionedSlots);
    char* freelistLimit = subPageLimit;
    if (UNLIKELY(slotsLimit < freelistLimit))
        freelistLimit = slotsLimit;

    size_t numNewFreelistEntries = 0;
    if (LIKELY(firstFreelistPointerExtent <= freelistLimit)) {
        // Only consider used space in the slot span. If we consider wasted
        // space, we may get an off-by-one when a freelist pointer fits in the
        // wasted space, but a slot does not.
        // We know we can fit at least one freelist pointer.
        numNewFreelistEntries = 1;
        // Any further entries require space for the whole slot span.
        numNewFreelistEntries += (freelistLimit - firstFreelistPointerExtent) / size;
    }

    // We always return an object slot -- that's the +1 below.
    // We do not neccessarily create any new freelist entries, because we cross sub page boundaries frequently for large bucket sizes.
    ASSERT(numNewFreelistEntries + 1 <= numSlots);
    numSlots -= (numNewFreelistEntries + 1);
    page->numUnprovisionedSlots = numSlots;
    page->numAllocatedSlots++;

    if (LIKELY(numNewFreelistEntries)) {
        char* freelistPointer = firstFreelistPointer;
        PartitionFreelistEntry* entry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
        page->freelistHead = entry;
        while (--numNewFreelistEntries) {
            freelistPointer += size;
            PartitionFreelistEntry* nextEntry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
            entry->next = partitionFreelistMask(nextEntry);
            entry = nextEntry;
        }
        entry->next = partitionFreelistMask(0);
    } else {
        page->freelistHead = 0;
    }
    return returnObject;
}

// This helper function scans the active page list for a suitable new active
// page, starting at the passed in page.
// When it finds a suitable new active page (one that has free slots), it is
// set as the new active page and true is returned. If there is no suitable new
// active page, false is returned and the current active page is set to null.
// As potential pages are scanned, they are tidied up according to their state.
// Freed pages are swept on to the free page list and full pages are unlinked
// from any list.
static ALWAYS_INLINE bool partitionSetNewActivePage(PartitionPage* page)
{
    if (page == &PartitionRootBase::gSeedPage) {
        ASSERT(!page->nextPage);
        return false;
    }

    PartitionPage* nextPage = 0;
    PartitionBucket* bucket = page->bucket;

    for (; page; page = nextPage) {
        nextPage = page->nextPage;
        ASSERT(page->bucket == bucket);
        ASSERT(page != bucket->freePagesHead);
        ASSERT(!bucket->freePagesHead || page != bucket->freePagesHead->nextPage);

        // Page is usable if it has something on the freelist, or unprovisioned
        // slots that can be turned into a freelist.
        if (LIKELY(page->freelistHead != 0) || LIKELY(page->numUnprovisionedSlots)) {
            bucket->activePagesHead = page;
            return true;
        }

        ASSERT(page->numAllocatedSlots >= 0);
        if (LIKELY(page->numAllocatedSlots == 0)) {
            ASSERT(page->freeCacheIndex == -1);
            // We hit a free page, so shepherd it on to the free page list.
            page->nextPage = bucket->freePagesHead;
            bucket->freePagesHead = page;
        } else {
            // If we get here, we found a full page. Skip over it too, and also
            // tag it as full (via a negative value). We need it tagged so that
            // free'ing can tell, and move it back into the active page list.
            ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket)));
            page->numAllocatedSlots = -page->numAllocatedSlots;
            ++bucket->numFullPages;
            // numFullPages is a uint16_t for efficient packing so guard against
            // overflow to be safe.
            RELEASE_ASSERT(bucket->numFullPages);
            // Not necessary but might help stop accidents.
            page->nextPage = 0;
        }
    }

    bucket->activePagesHead = 0;
    return false;
}

struct PartitionDirectMapExtent {
    size_t mapSize; // Mapped size, not including guard pages and meta-data.
};

static ALWAYS_INLINE PartitionDirectMapExtent* partitionPageToDirectMapExtent(PartitionPage* page)
{
    ASSERT(partitionBucketIsDirectMapped(page->bucket));
    return reinterpret_cast<PartitionDirectMapExtent*>(reinterpret_cast<char*>(page) + 2 * kPageMetadataSize);
}

static ALWAYS_INLINE void* partitionDirectMap(PartitionRootBase* root, int flags, size_t size)
{
    size = partitionDirectMapSize(size);

    // Because we need to fake looking like a super page, We need to allocate
    // a bunch of system pages more than "size":
    // - The first few system pages are the partition page in which the super
    // page metadata is stored. We fault just one system page out of a partition
    // page sized clump.
    // - We add a trailing guard page.
    size_t mapSize = size + kPartitionPageSize + kSystemPageSize;
    // Round up to the allocation granularity.
    mapSize += kPageAllocationGranularityOffsetMask;
    mapSize &= kPageAllocationGranularityBaseMask;

    // TODO: we may want to let the operating system place these allocations
    // where it pleases. On 32-bit, this might limit address space
    // fragmentation and on 64-bit, this might have useful savings for TLB
    // and page table overhead.
    // TODO: if upsizing realloc()s are common on large sizes, we could
    // consider over-allocating address space on 64-bit, "just in case".
    // TODO: consider pre-populating page tables (e.g. MAP_POPULATE on Linux,
    // MADV_WILLNEED on POSIX).
    // TODO: these pages will be zero-filled. Consider internalizing an
    // allocZeroed() API so we can avoid a memset() entirely in this case.
    char* ptr = reinterpret_cast<char*>(allocPages(0, mapSize, kSuperPageSize));
    if (!ptr) {
        if (flags & PartitionAllocReturnNull)
            return 0;
        RELEASE_ASSERT(false);
    }
    char* ret = ptr + kPartitionPageSize;
    // TODO: due to all the guard paging, this arrangement creates 4 mappings.
    // We could get it down to three by using read-only for the metadata page,
    // or perhaps two by leaving out the trailing guard page on 64-bit.
    setSystemPagesInaccessible(ptr, kSystemPageSize);
    setSystemPagesInaccessible(ptr + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
    setSystemPagesInaccessible(ret + size, kSystemPageSize);

    PartitionSuperPageExtentEntry* extent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(ptr));
    extent->root = root;
    PartitionPage* page = partitionPointerToPageNoAlignmentCheck(ret);
    PartitionBucket* bucket = reinterpret_cast<PartitionBucket*>(reinterpret_cast<char*>(page) + kPageMetadataSize);
    page->freelistHead = 0;
    page->nextPage = 0;
    page->bucket = bucket;
    page->numAllocatedSlots = 1;
    page->numUnprovisionedSlots = 0;
    page->pageOffset = 0;
    page->freeCacheIndex = 0;

    bucket->activePagesHead = 0;
    bucket->freePagesHead = 0;
    bucket->slotSize = size;
    bucket->numSystemPagesPerSlotSpan = 0;
    bucket->numFullPages = 0;

    PartitionDirectMapExtent* mapExtent = partitionPageToDirectMapExtent(page);
    mapExtent->mapSize = mapSize - kPartitionPageSize - kSystemPageSize;

    return ret;
}

static ALWAYS_INLINE void partitionDirectUnmap(PartitionPage* page)
{
    size_t unmapSize = partitionPageToDirectMapExtent(page)->mapSize;

    // Add on the size of the trailing guard page and preceeding partition
    // page.
    unmapSize += kPartitionPageSize + kSystemPageSize;

    ASSERT(!(unmapSize & kPageAllocationGranularityOffsetMask));

    char* ptr = reinterpret_cast<char*>(partitionPageToPointer(page));
    // Account for the mapping starting a partition page before the actual
    // allocation address.
    ptr -= kPartitionPageSize;

    freePages(ptr, unmapSize);
}

void* partitionAllocSlowPath(PartitionRootBase* root, int flags, size_t size, PartitionBucket* bucket)
{
    // The slow path is called when the freelist is empty.
    ASSERT(!bucket->activePagesHead->freelistHead);

    // For the partitionAllocGeneric API, we have a bunch of buckets marked
    // as special cases. We bounce them through to the slow path so that we
    // can still have a blazing fast hot path due to lack of corner-case
    // branches.
    bool returnNull = flags & PartitionAllocReturnNull;
    if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
        ASSERT(size > kGenericMaxBucketed);
        ASSERT(bucket == &PartitionRootBase::gPagedBucket);
        if (size > kGenericMaxDirectMapped) {
            if (returnNull)
                return 0;
            RELEASE_ASSERT(false);
        }
        return partitionDirectMap(root, flags, size);
    }

    // First, look for a usable page in the existing active pages list.
    // Change active page, accepting the current page as a candidate.
    if (LIKELY(partitionSetNewActivePage(bucket->activePagesHead))) {
        PartitionPage* newPage = bucket->activePagesHead;
        if (LIKELY(newPage->freelistHead != 0)) {
            PartitionFreelistEntry* ret = newPage->freelistHead;
            newPage->freelistHead = partitionFreelistMask(ret->next);
            newPage->numAllocatedSlots++;
            return ret;
        }
        ASSERT(newPage->numUnprovisionedSlots);
        return partitionPageAllocAndFillFreelist(newPage);
    }

    // Second, look in our list of freed but reserved pages.
    PartitionPage* newPage = bucket->freePagesHead;
    if (LIKELY(newPage != 0)) {
        ASSERT(newPage != &PartitionRootGeneric::gSeedPage);
        ASSERT(!newPage->freelistHead);
        ASSERT(!newPage->numAllocatedSlots);
        ASSERT(!newPage->numUnprovisionedSlots);
        ASSERT(newPage->freeCacheIndex == -1);
        bucket->freePagesHead = newPage->nextPage;
    } else {
        // Third. If we get here, we need a brand new page.
        size_t numPartitionPages = partitionBucketPartitionPages(bucket);
        void* rawNewPage = partitionAllocPartitionPages(root, numPartitionPages);
        // Skip the alignment check because it depends on page->bucket, which is not yet set.
        newPage = partitionPointerToPageNoAlignmentCheck(rawNewPage);
    }

    partitionPageReset(newPage, bucket);
    bucket->activePagesHead = newPage;
    return partitionPageAllocAndFillFreelist(newPage);
}

static ALWAYS_INLINE void partitionFreePage(PartitionPage* page)
{
    ASSERT(page->freelistHead);
    ASSERT(!page->numAllocatedSlots);
    partitionUnusePage(page);
    // We actually leave the freed page in the active list. We'll sweep it on
    // to the free page list when we next walk the active page list. Pulling
    // this trick enables us to use a singly-linked page list for all cases,
    // which is critical in keeping the page metadata structure down to 32
    // bytes in size.
    page->freelistHead = 0;
    page->numUnprovisionedSlots = 0;
}

static ALWAYS_INLINE void partitionRegisterEmptyPage(PartitionPage* page)
{
    PartitionRootBase* root = partitionPageToRoot(page);
    // If the page is already registered as empty, give it another life.
    if (page->freeCacheIndex != -1) {
        ASSERT(page->freeCacheIndex >= 0);
        ASSERT(static_cast<unsigned>(page->freeCacheIndex) < kMaxFreeableSpans);
        ASSERT(root->globalEmptyPageRing[page->freeCacheIndex] == page);
        root->globalEmptyPageRing[page->freeCacheIndex] = 0;
    }

    size_t currentIndex = root->globalEmptyPageRingIndex;
    PartitionPage* pageToFree = root->globalEmptyPageRing[currentIndex];
    // The page might well have been re-activated, filled up, etc. before we get
    // around to looking at it here.
    if (pageToFree) {
        ASSERT(pageToFree != &PartitionRootBase::gSeedPage);
        ASSERT(pageToFree->freeCacheIndex >= 0);
        ASSERT(static_cast<unsigned>(pageToFree->freeCacheIndex) < kMaxFreeableSpans);
        ASSERT(pageToFree == root->globalEmptyPageRing[pageToFree->freeCacheIndex]);
        if (!pageToFree->numAllocatedSlots && pageToFree->freelistHead) {
            // The page is still empty, and not freed, so _really_ free it.
            partitionFreePage(pageToFree);
        }
        pageToFree->freeCacheIndex = -1;
    }

    // We put the empty slot span on our global list of "pages that were once
    // empty". thus providing it a bit of breathing room to get re-used before
    // we really free it. This improves performance, particularly on Mac OS X
    // which has subpar memory management performance.
    root->globalEmptyPageRing[currentIndex] = page;
    page->freeCacheIndex = currentIndex;
    ++currentIndex;
    if (currentIndex == kMaxFreeableSpans)
        currentIndex = 0;
    root->globalEmptyPageRingIndex = currentIndex;
}

void partitionFreeSlowPath(PartitionPage* page)
{
    PartitionBucket* bucket = page->bucket;
    ASSERT(page != &PartitionRootGeneric::gSeedPage);
    ASSERT(bucket->activePagesHead != &PartitionRootGeneric::gSeedPage);
    if (LIKELY(page->numAllocatedSlots == 0)) {
        // Page became fully unused.
        if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
            partitionDirectUnmap(page);
            return;
        }
        // If it's the current page, attempt to change it. We'd prefer to leave
        // the page empty as a gentle force towards defragmentation.
        if (LIKELY(page == bucket->activePagesHead) && page->nextPage) {
            if (partitionSetNewActivePage(page->nextPage)) {
                ASSERT(bucket->activePagesHead != page);
                // Link the empty page back in after the new current page, to
                // avoid losing a reference to it.
                // TODO: consider walking the list to link the empty page after
                // all non-empty pages?
                PartitionPage* currentPage = bucket->activePagesHead;
                page->nextPage = currentPage->nextPage;
                currentPage->nextPage = page;
            } else {
                bucket->activePagesHead = page;
                page->nextPage = 0;
            }
        }
        partitionRegisterEmptyPage(page);
    } else {
        // Ensure that the page is full. That's the only valid case if we
        // arrive here.
        ASSERT(page->numAllocatedSlots < 0);
        // A transition of numAllocatedSlots from 0 to -1 is not legal, and
        // likely indicates a double-free.
        RELEASE_ASSERT(page->numAllocatedSlots != -1);
        page->numAllocatedSlots = -page->numAllocatedSlots - 2;
        ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket) - 1));
        // Fully used page became partially used. It must be put back on the
        // non-full page list. Also make it the current page to increase the
        // chances of it being filled up again. The old current page will be
        // the next page.
        page->nextPage = bucket->activePagesHead;
        bucket->activePagesHead = page;
        --bucket->numFullPages;
        // Special case: for a partition page with just a single slot, it may
        // now be empty and we want to run it through the empty logic.
        if (UNLIKELY(page->numAllocatedSlots == 0))
            partitionFreeSlowPath(page);
    }
}

bool partitionReallocDirectMappedInPlace(PartitionRootGeneric* root, PartitionPage* page, size_t newSize)
{
    ASSERT(partitionBucketIsDirectMapped(page->bucket));

    newSize = partitionCookieSizeAdjustAdd(newSize);

    // Note that the new size might be a bucketed size; this function is called
    // whenever we're reallocating a direct mapped allocation.
    newSize = partitionDirectMapSize(newSize);
    if (newSize < kGenericMinDirectMappedDownsize)
        return false;

    // bucket->slotSize is the current size of the allocation.
    size_t currentSize = page->bucket->slotSize;
    if (newSize == currentSize)
        return true;

    char* charPtr = static_cast<char*>(partitionPageToPointer(page));

    if (newSize < currentSize) {
        // Shrink by decommitting unneeded pages and making them inaccessible.
        size_t decommitSize = currentSize - newSize;
        decommitSystemPages(charPtr + newSize, decommitSize);
        setSystemPagesInaccessible(charPtr + newSize, decommitSize);
    } else if (newSize <= partitionPageToDirectMapExtent(page)->mapSize) {
        // Grow within the actually allocated memory. Just need to make the
        // pages accessible again.
        size_t recommitSize = newSize - currentSize;
        setSystemPagesAccessible(charPtr + currentSize, recommitSize);

#ifndef NDEBUG
        memset(charPtr + currentSize, kUninitializedByte, recommitSize);
#endif
    } else {
        // We can't perform the realloc in-place.
        // TODO: support this too when possible.
        return false;
    }

#ifndef NDEBUG
    // Write a new trailing cookie.
    partitionCookieWriteValue(charPtr + newSize - kCookieSize);
#endif

    page->bucket->slotSize = newSize;
    return true;
}

void* partitionReallocGeneric(PartitionRootGeneric* root, void* ptr, size_t newSize)
{
#if defined(MEMORY_TOOL_REPLACES_ALLOCATOR)
    return realloc(ptr, newSize);
#else
    if (UNLIKELY(!ptr))
        return partitionAllocGeneric(root, newSize);
    if (UNLIKELY(!newSize)) {
        partitionFreeGeneric(root, ptr);
        return 0;
    }

    RELEASE_ASSERT(newSize <= kGenericMaxDirectMapped);

    ASSERT(partitionPointerIsValid(partitionCookieFreePointerAdjust(ptr)));

    PartitionPage* page = partitionPointerToPage(partitionCookieFreePointerAdjust(ptr));

    if (UNLIKELY(partitionBucketIsDirectMapped(page->bucket))) {
        // We may be able to perform the realloc in place by changing the
        // accessibility of memory pages and, if reducing the size, decommitting
        // them.
        if (partitionReallocDirectMappedInPlace(root, page, newSize))
            return ptr;
    }

    size_t actualNewSize = partitionAllocActualSize(root, newSize);
    size_t actualOldSize = partitionAllocGetSize(ptr);

    // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
    // new size is a significant percentage smaller. We could do the same if we
    // determine it is a win.
    if (actualNewSize == actualOldSize) {
        // Trying to allocate a block of size newSize would give us a block of
        // the same size as the one we've already got, so no point in doing
        // anything here.
        return ptr;
    }

    // This realloc cannot be resized in-place. Sadness.
    void* ret = partitionAllocGeneric(root, newSize);
    size_t copySize = actualOldSize;
    if (newSize < copySize)
        copySize = newSize;

    memcpy(ret, ptr, copySize);
    partitionFreeGeneric(root, ptr);
    return ret;
#endif
}

#ifndef NDEBUG

void partitionDumpStats(const PartitionRoot& root)
{
    size_t i;
    size_t totalLive = 0;
    size_t totalResident = 0;
    size_t totalFreeable = 0;
    for (i = 0; i < root.numBuckets; ++i) {
        const PartitionBucket& bucket = root.buckets()[i];
        if (bucket.activePagesHead == &PartitionRootGeneric::gSeedPage && !bucket.freePagesHead && !bucket.numFullPages) {
            // Empty bucket with no freelist or full pages. Skip reporting it.
            continue;
        }
        size_t numFreePages = 0;
        PartitionPage* freePages = bucket.freePagesHead;
        while (freePages) {
            ++numFreePages;
            freePages = freePages->nextPage;
        }
        size_t bucketSlotSize = bucket.slotSize;
        size_t bucketNumSlots = partitionBucketSlots(&bucket);
        size_t bucketUsefulStorage = bucketSlotSize * bucketNumSlots;
        size_t bucketPageSize = bucket.numSystemPagesPerSlotSpan * kSystemPageSize;
        size_t bucketWaste = bucketPageSize - bucketUsefulStorage;
        size_t numActiveBytes = bucket.numFullPages * bucketUsefulStorage;
        size_t numResidentBytes = bucket.numFullPages * bucketPageSize;
        size_t numFreeableBytes = 0;
        size_t numActivePages = 0;
        const PartitionPage* page = bucket.activePagesHead;
        do {
            if (page != &PartitionRootGeneric::gSeedPage) {
                ++numActivePages;
                numActiveBytes += (page->numAllocatedSlots * bucketSlotSize);
                size_t pageBytesResident = (bucketNumSlots - page->numUnprovisionedSlots) * bucketSlotSize;
                // Round up to system page size.
                pageBytesResident = (pageBytesResident + kSystemPageOffsetMask) & kSystemPageBaseMask;
                numResidentBytes += pageBytesResident;
                if (!page->numAllocatedSlots)
                    numFreeableBytes += pageBytesResident;
            }
            page = page->nextPage;
        } while (page != bucket.activePagesHead);
        totalLive += numActiveBytes;
        totalResident += numResidentBytes;
        totalFreeable += numFreeableBytes;
        printf("bucket size %zu (pageSize %zu waste %zu): %zu alloc/%zu commit/%zu freeable bytes, %zu/%zu/%zu full/active/free pages\n", bucketSlotSize, bucketPageSize, bucketWaste, numActiveBytes, numResidentBytes, numFreeableBytes, static_cast<size_t>(bucket.numFullPages), numActivePages, numFreePages);
    }
    printf("total live: %zu bytes\n", totalLive);
    printf("total resident: %zu bytes\n", totalResident);
    printf("total freeable: %zu bytes\n", totalFreeable);
    fflush(stdout);
}

#endif // !NDEBUG

} // namespace WTF


/* [<][>][^][v][top][bottom][index][help] */