// Copyright (c) 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // --- // Author: Sanjay Ghemawat <opensource@google.com> // // A data structure used by the caching malloc. It maps from page# to // a pointer that contains info about that page. We use two // representations: one for 32-bit addresses, and another for 64 bit // addresses. Both representations provide the same interface. The // first representation is implemented as a flat array, the seconds as // a three-level radix tree that strips away approximately 1/3rd of // the bits every time. // // The BITS parameter should be the number of bits required to hold // a page number. E.g., with 32 bit pointers and 4K pages (i.e., // page offset fits in lower 12 bits), BITS == 20. #ifndef TCMALLOC_PAGEMAP_H_ #define TCMALLOC_PAGEMAP_H_ #include "config.h" #include <stddef.h> // for NULL, size_t #include <string.h> // for memset #if defined HAVE_STDINT_H #include <stdint.h> #elif defined HAVE_INTTYPES_H #include <inttypes.h> #else #include <sys/types.h> #endif #ifdef WIN32 // TODO(jar): This is not needed when TCMalloc_PageMap1_LazyCommit has an API // supporting commit and reservation of memory. #include "common.h" #endif #include "internal_logging.h" // for ASSERT // Single-level array template <int BITS> class TCMalloc_PageMap1 { private: static const int LENGTH = 1 << BITS; void** array_; public: typedef uintptr_t Number; explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) { array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS)); memset(array_, 0, sizeof(void*) << BITS); } // Ensure that the map contains initialized entries "x .. x+n-1". // Returns true if successful, false if we could not allocate memory. bool Ensure(Number x, size_t n) { // Nothing to do since flat array was allocated at start. All // that's left is to check for overflow (that is, we don't want to // ensure a number y where array_[y] would be an out-of-bounds // access). return n <= LENGTH - x; // an overflow-free way to do "x + n <= LENGTH" } void PreallocateMoreMemory() {} // Return the current value for KEY. Returns NULL if not yet set, // or if k is out of range. void* get(Number k) const { if ((k >> BITS) > 0) { return NULL; } return array_[k]; } // REQUIRES "k" is in range "[0,2^BITS-1]". // REQUIRES "k" has been ensured before. // // Sets the value 'v' for key 'k'. void set(Number k, void* v) { array_[k] = v; } // Return the first non-NULL pointer found in this map for // a page number >= k. Returns NULL if no such number is found. void* Next(Number k) const { while (k < (1 << BITS)) { if (array_[k] != NULL) return array_[k]; k++; } return NULL; } }; #ifdef WIN32 // Lazy commit, single-level array. // Very similar to PageMap1, except the page map is only committed as needed. // Since we don't return memory to the OS, the committed portion of the map will // only grow, and we'll only be called to Ensure when we really grow the heap. // We maintain a bit map to help us deduce if we've already committed a range // in our map. template <int BITS> class TCMalloc_PageMap1_LazyCommit { private: // Dimension of our page map array_. static const int LENGTH = 1 << BITS; // The page map array that sits in reserved virtual space. Pages of this // array are committed as they are needed. For each page of virtual memory, // we potentially have a pointer to a span instance. void** array_; // A bit vector that allows us to deduce what pages in array_ are committed. // Note that 2^3 = 8 bits per char, and hence the use of the magical "3" in // the array range gives us the effective "divide by 8". char committed_[sizeof(void*) << (BITS - kPageShift - 3)]; // Given an |index| into |array_|, find the page number in |array_| that holds // that element. size_t ContainingPage(size_t index) const { return (index * sizeof(*array_)) >> kPageShift; } // Find out if the given page_num index in array_ is in committed memory. bool IsCommitted(size_t page_num) const { return committed_[page_num >> 3] & (1 << (page_num & 0x7)); } // Remember that the given page_num index in array_ is in committed memory. void SetCommitted(size_t page_num) { committed_[page_num >> 3] |= (1 << (page_num & 0x7)); } public: typedef uintptr_t Number; explicit TCMalloc_PageMap1_LazyCommit(void* (*allocator)(size_t)) { // TODO(jar): We need a reservation function, but current API to this class // only provides an allocator. // Get decommitted memory. We will commit as necessary. size_t size = sizeof(*array_) << BITS; array_ = reinterpret_cast<void**>(VirtualAlloc( NULL, size, MEM_RESERVE, PAGE_READWRITE)); tcmalloc::update_metadata_system_bytes(size); tcmalloc::update_metadata_unmapped_bytes(size); // Make sure we divided LENGTH evenly. ASSERT(sizeof(committed_) * 8 == (LENGTH * sizeof(*array_)) >> kPageShift); // Indicate that none of the pages of array_ have been committed yet. memset(committed_, 0, sizeof(committed_)); } // Ensure that the map contains initialized and committed entries in array_ to // describe pages "x .. x+n-1". // Returns true if successful, false if we could not ensure this. // If we have to commit more memory in array_ (which also clears said memory), // then we'll set some of the bits in committed_ to remember this fact. // Only the bits of committed_ near end-points for calls to Ensure() are ever // set, as the calls to Ensure() will never have overlapping ranges other than // at their end-points. // // Example: Suppose the OS allocates memory in pages including 40...50, and // later the OS allocates memory in pages 51...83. When the first allocation // of 40...50 is observed, then Ensure of (39,51) will be called. The range // shown in the arguments is extended so that tcmalloc can look to see if // adjacent pages are part of a span that can be coaleced. Later, when pages // 51...83 are allocated, Ensure() will be called with arguments (50,84), // broadened again for the same reason. // // After the above, we would NEVER get a call such as Ensure(45,60), as that // overlaps with the interior of prior ensured regions. We ONLY get an Ensure // call when the OS has allocated memory, and since we NEVER give memory back // to the OS, the OS can't possible allocate the same region to us twice, and // can't induce an Ensure() on an interior of previous Ensure call. // // Also note that OS allocations are NOT guaranteed to be consecutive (there // may be "holes" where code etc. uses the virtual addresses), or to appear in // any order, such as lowest to highest, or vice versa (as other independent // allocation systems in the process may be performing VirtualAllocations and // VirtualFrees asynchronously.) bool Ensure(Number x, size_t n) { if (n > LENGTH - x) return false; // We won't Ensure mapping for last pages in memory. ASSERT(n > 0); // For a given page number in memory, calculate what page in array_ needs to // be memory resident. Note that we really only need a few bytes in array_ // for each page of virtual space we have to map, but we can only commit // whole pages of array_. For instance, a 4K page of array_ has about 1k // entries, and hence can map about 1K pages, or a total of about 4MB // typically. As a result, it is possible that the first entry in array_, // and the n'th entry in array_, will sit in the same page of array_. size_t first_page = ContainingPage(x); size_t last_page = ContainingPage(x + n - 1); // Check at each boundary, to see if we need to commit at that end. Some // other neighbor may have already forced us to commit at either or both // boundaries. if (IsCommitted(first_page)) { if (first_page == last_page) return true; ++first_page; if (IsCommitted(first_page)) { if (first_page == last_page) return true; ++first_page; } } if (IsCommitted(last_page)) { if (first_page == last_page) return true; --last_page; if (IsCommitted(last_page)) { if (first_page == last_page) return true; --last_page; } } ASSERT(!IsCommitted(last_page)); ASSERT(!IsCommitted(first_page)); void* start = reinterpret_cast<char*>(array_) + (first_page << kPageShift); size_t length = (last_page - first_page + 1) << kPageShift; #ifndef NDEBUG // Validate we are committing new sections, and hence we're not clearing any // existing data. MEMORY_BASIC_INFORMATION info = {0}; size_t result = VirtualQuery(start, &info, sizeof(info)); ASSERT(result); ASSERT(0 == (info.State & MEM_COMMIT)); // It starts with uncommitted. ASSERT(info.RegionSize >= length); // Entire length is uncommitted. #endif TCMalloc_SystemCommit(start, length); tcmalloc::update_metadata_unmapped_bytes(-length); #ifndef NDEBUG result = VirtualQuery(start, &info, sizeof(info)); ASSERT(result); ASSERT(0 != (info.State & MEM_COMMIT)); // Now it is committed. ASSERT(info.RegionSize >= length); // Entire length is committed. #endif // As noted in the large comment/example describing this method, we will // never be called with a range of pages very much inside this |first_page| // to |last_page| range. // As a result, we only need to set bits for each end of that range, and one // page inside each end. SetCommitted(first_page); if (first_page < last_page) { SetCommitted(last_page); SetCommitted(first_page + 1); // These may be duplicates now. SetCommitted(last_page - 1); } return true; } // This is a premature call to get all the meta-memory allocated, so as to // avoid virtual space fragmentation. Since we pre-reserved all memory, we // don't need to do anything here (we won't fragment virtual space). void PreallocateMoreMemory() {} // Return the current value for KEY. Returns NULL if not yet set, // or if k is out of range. void* get(Number k) const { if ((k >> BITS) > 0) { return NULL; } return array_[k]; } // REQUIRES "k" is in range "[0,2^BITS-1]". // REQUIRES "k" has been ensured before. // // Sets the value for KEY. void set(Number k, void* v) { array_[k] = v; } // Return the first non-NULL pointer found in this map for // a page number >= k. Returns NULL if no such number is found. void* Next(Number k) const { while (k < (1 << BITS)) { if (array_[k] != NULL) return array_[k]; k++; } return NULL; } }; #endif // WIN32 // Two-level radix tree template <int BITS> class TCMalloc_PageMap2 { private: // Put 32 entries in the root and (2^BITS)/32 entries in each leaf. static const int ROOT_BITS = 5; static const int ROOT_LENGTH = 1 << ROOT_BITS; static const int LEAF_BITS = BITS - ROOT_BITS; static const int LEAF_LENGTH = 1 << LEAF_BITS; // Leaf node struct Leaf { void* values[LEAF_LENGTH]; }; Leaf* root_[ROOT_LENGTH]; // Pointers to 32 child nodes void* (*allocator_)(size_t); // Memory allocator public: typedef uintptr_t Number; explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) { allocator_ = allocator; memset(root_, 0, sizeof(root_)); } void* get(Number k) const { const Number i1 = k >> LEAF_BITS; const Number i2 = k & (LEAF_LENGTH-1); if ((k >> BITS) > 0 || root_[i1] == NULL) { return NULL; } return root_[i1]->values[i2]; } void set(Number k, void* v) { ASSERT(k >> BITS == 0); const Number i1 = k >> LEAF_BITS; const Number i2 = k & (LEAF_LENGTH-1); root_[i1]->values[i2] = v; } bool Ensure(Number start, size_t n) { for (Number key = start; key <= start + n - 1; ) { const Number i1 = key >> LEAF_BITS; // Check for overflow if (i1 >= ROOT_LENGTH) return false; // Make 2nd level node if necessary if (root_[i1] == NULL) { Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); if (leaf == NULL) return false; memset(leaf, 0, sizeof(*leaf)); root_[i1] = leaf; } // Advance key past whatever is covered by this leaf node key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; } return true; } void PreallocateMoreMemory() { // Allocate enough to keep track of all possible pages Ensure(0, 1 << BITS); } void* Next(Number k) const { while (k < (1 << BITS)) { const Number i1 = k >> LEAF_BITS; Leaf* leaf = root_[i1]; if (leaf != NULL) { // Scan forward in leaf for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) { if (leaf->values[i2] != NULL) { return leaf->values[i2]; } } } // Skip to next top-level entry k = (i1 + 1) << LEAF_BITS; } return NULL; } }; // Three-level radix tree template <int BITS> class TCMalloc_PageMap3 { private: // How many bits should we consume at each interior level static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS; // How many bits should we consume at leaf level static const int LEAF_BITS = BITS - 2*INTERIOR_BITS; static const int LEAF_LENGTH = 1 << LEAF_BITS; // Interior node struct Node { Node* ptrs[INTERIOR_LENGTH]; }; // Leaf node struct Leaf { void* values[LEAF_LENGTH]; }; Node* root_; // Root of radix tree void* (*allocator_)(size_t); // Memory allocator Node* NewNode() { Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node))); if (result != NULL) { memset(result, 0, sizeof(*result)); } return result; } public: typedef uintptr_t Number; explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) { allocator_ = allocator; root_ = NewNode(); } void* get(Number k) const { const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); const Number i3 = k & (LEAF_LENGTH-1); if ((k >> BITS) > 0 || root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) { return NULL; } return reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3]; } void set(Number k, void* v) { ASSERT(k >> BITS == 0); const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); const Number i3 = k & (LEAF_LENGTH-1); reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3] = v; } bool Ensure(Number start, size_t n) { for (Number key = start; key <= start + n - 1; ) { const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS); const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1); // Check for overflow if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH) return false; // Make 2nd level node if necessary if (root_->ptrs[i1] == NULL) { Node* n = NewNode(); if (n == NULL) return false; root_->ptrs[i1] = n; } // Make leaf node if necessary if (root_->ptrs[i1]->ptrs[i2] == NULL) { Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); if (leaf == NULL) return false; memset(leaf, 0, sizeof(*leaf)); root_->ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf); } // Advance key past whatever is covered by this leaf node key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; } return true; } void PreallocateMoreMemory() { } void* Next(Number k) const { while (k < (Number(1) << BITS)) { const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); if (root_->ptrs[i1] == NULL) { // Advance to next top-level entry k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS); } else { Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]); if (leaf != NULL) { for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) { if (leaf->values[i3] != NULL) { return leaf->values[i3]; } } } // Advance to next interior entry k = ((k >> LEAF_BITS) + 1) << LEAF_BITS; } } return NULL; } }; #endif // TCMALLOC_PAGEMAP_H_