diff --git a/README.md b/README.md index b369efd..6b87936 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ -# Google-tcmalloc-simulation-implementation +# Google-tcmalloc-simulation-implementation(未完成) 谷歌开源项目tcmalloc高并发内存池学习和模拟实现 开题日期:20240504 -- [Google-tcmalloc-simulation-implementation](#google-tcmalloc-simulation-implementation) +- [Google-tcmalloc-simulation-implementation(未完成)](#google-tcmalloc-simulation-implementation未完成) + - [==bugs to fix (项目目前待解决的问题)==](#bugs-to-fix-项目目前待解决的问题) - [前言](#前言) - [threadCache整体框架](#threadcache整体框架) - [开始写threadCache代码](#开始写threadcache代码) @@ -22,9 +23,23 @@ - [page\_cache内存释放](#page_cache内存释放) - [大于256k的情况](#大于256k的情况) - [处理代码中`new`的问题](#处理代码中new的问题) + - [解决free,使其不用传大小](#解决free使其不用传大小) + - [多线程场景下深度测试](#多线程场景下深度测试) + - [分析性能瓶颈](#分析性能瓶颈) + - [用Radix Tree进行优化](#用radix-tree进行优化) *** +## ==bugs to fix (项目目前待解决的问题)== + +1. 在ubuntu_arm64环境下,如果调用多线程,出现段错误(原因未知,待解决) +2. 在ubuntu_arm64环境下,radix tree需要用第三棵,前两棵用不了,需要解决。 +3. 在window32位环境下,可以偶尔成功运行,出现偶发段错误,原因未知,待解决。 + +经过radixtree优化后,模拟实现的tcmalloc效率高于malloc。(win32下测试,会出现偶发段错误) + +![](./assets/5.png) + ## 前言 当前项目是实现一个高并发的内存池,他的原型是google的一个开源项目tcmalloc,tcmalloc全称 Thread-Caching Malloc,即线程缓存的malloc,实现了高效的多线程内存管理,用于替代系统的内存分配相关的函数(malloc、free)。 @@ -1196,4 +1211,92 @@ void page_cache::release_span_to_page(span* s) { ## 处理代码中`new`的问题 -代码中有些地方用了`new span`。这个就很不对。我们弄这个tcmalloc是用来替代malloc的,既然是替代,那我们的代码里面怎么能有`new`,`new`也是调用`malloc`的,所以我们要改一下。 \ No newline at end of file +代码中有些地方用了`new span`。这个就很不对。我们弄这个tcmalloc是用来替代malloc的,既然是替代,那我们的代码里面怎么能有`new`,`new`也是调用`malloc`的,所以我们要改一下。 + +然后之前是写了一个定长内存池的,可以用来代替new。 + +**博客地址:[内存池是什么原理?|内存池简易模拟实现|为学习高并发内存池tcmalloc做准备](https://blog.csdn.net/Yu_Cblog/article/details/131741601)** + +page_cache.hpp +```cpp +class page_cache { +private: + span_list __span_lists[PAGES_NUM]; + static page_cache __s_inst; + page_cache() = default; + page_cache(const page_cache&) = delete; + std::unordered_map __id_span_map; + object_pool __span_pool; +``` +多加一个`object_pool __span_pool;`对象。 + +然后,`new span`的地方都替换掉。`delete`的地方也换掉就行。 + +然后这里面也改一下。 + +tcmalloc.hpp +```cpp +static void* tcmalloc(size_t size) { + if (size > MAX_BYTES) { + // 处理申请大内存的情况 + size_t align_size = size_class::round_up(size); + size_t k_page = align_size >> PAGE_SHIFT; + page_cache::get_instance()->__page_mtx.lock(); + span* cur_span = page_cache::get_instance()->new_span(k_page); // 直接找pc + page_cache::get_instance()->__page_mtx.unlock(); + void* ptr = (void*)(cur_span->__page_id << PAGE_SHIFT); // span转化成地址 + return ptr; + } + if (p_tls_thread_cache == nullptr) { + // 相当于单例 + // p_tls_thread_cache = new thread_cache; + static object_pool tc_pool; + p_tls_thread_cache = tc_pool.new_(); + } +#ifdef PROJECT_DEBUG + LOG(DEBUG) << "tcmalloc find tc from mem" << std::endl; +#endif + return p_tls_thread_cache->allocate(size); +} +``` + +## 解决free,使其不用传大小 + +因为我们已经有页号到span的映射了。所以我们在span里面增加一个字段,obj_size就行。 + +## 多线程场景下深度测试 + +**首先要明确一点,我们不是去造一个轮子,我们要和malloc对比,不是说要比malloc快多少,因为我们在很多细节上,和tcmalloc差的还是很远的。** + +测试代码可以见bench\_mark.cc。 + +结果 +```bash +parallels@ubuntu-linux-22-04-desktop:~/Project/Google-tcmalloc-simulation-implementation$ ./out +========================================================== +4个线程并发执行10轮次,每轮次concurrent alloc 1000次: 花费:27877 ms +4个线程并发执行10轮次,每轮次concurrent dealloc 1000次: 花费:52190 ms +4个线程并发concurrent alloc&dealloc 40000次,总计花费:80067 ms + + +4个线程并发执行10次,每轮次malloc 1000次: 花费:2227ms +4个线程并发执行10轮次,每轮次free 1000次: 花费:1385 ms +4个线程并发malloc&free 40000次,总计花费:3612 ms +========================================================== +parallels@ubuntu-linux-22-04-desktop:~/Project/Google-tcmalloc-simulation-implementation$ +``` + +比malloc差。 + +## 分析性能瓶颈 + +linux和windows(VS STUDIO)下都有很多性能分析的工具,可以检测哪里调用的时间多。 + +在这里直接出结论:锁用了很多时间。 + +可以用基数树进行优化。 + +## 用Radix Tree进行优化 + +radix tree 我们可以直接用tcmalloc源码里面的。`page_map.hpp`。 + diff --git a/assets/5.png b/assets/5.png new file mode 100644 index 0000000..609f0ef Binary files /dev/null and b/assets/5.png differ diff --git a/bench_mark.cc b/bench_mark.cc new file mode 100644 index 0000000..89633dd --- /dev/null +++ b/bench_mark.cc @@ -0,0 +1,85 @@ + + +#include "./include/tcmalloc.hpp" +#include +#include + +// ntimes 一轮申请和释放内存的次数 +// rounds 轮次 +void BenchmarkMalloc(size_t ntimes, size_t nworks, size_t rounds) { + std::vector vthread(nworks); + std::atomic malloc_costtime(0); + std::atomic free_costtime(0); + for (size_t k = 0; k < nworks; ++k) { + vthread[k] = std::thread([&, k]() { + std::vector v; + v.reserve(ntimes); + for (size_t j = 0; j < rounds; ++j) { + size_t begin1 = clock(); + for (size_t i = 0; i < ntimes; i++) { + v.push_back(malloc(16)); + // v.push_back(malloc((16 + i) % 8192 + 1)); + } + size_t end1 = clock(); + size_t begin2 = clock(); + for (size_t i = 0; i < ntimes; i++) { + free(v[i]); + } + size_t end2 = clock(); + v.clear(); + malloc_costtime += (end1 - begin1); + free_costtime += (end2 - begin2); + } + }); + } + for (auto& t : vthread) { + t.join(); + } + std::cout << nworks << "threads run" << rounds << " times, each round malloc " << ntimes << " times, cost: " << malloc_costtime.load() << "ms\n"; + std::cout << nworks << "threads run" << rounds << " times, each round free " << ntimes << " times, cost: " << free_costtime.load() << " ms\n"; + std::cout << nworks << "threads run malloc and free " << nworks * rounds * ntimes << " time, total cost: " << malloc_costtime.load() + free_costtime.load() << " ms\n"; +} + +// 单轮次申请释放次数 线程数 轮次 +void BenchmarkConcurrentMalloc(size_t ntimes, size_t nworks, size_t rounds) { + std::vector vthread(nworks); + std::atomic malloc_costtime(0); + std::atomic free_costtime(0); + for (size_t k = 0; k < nworks; ++k) { + vthread[k] = std::thread([&]() { + std::vector v; + v.reserve(ntimes); + for (size_t j = 0; j < rounds; ++j) { + size_t begin1 = clock(); + for (size_t i = 0; i < ntimes; i++) { + v.push_back(tcmalloc(16)); + // v.push_back(ConcurrentAlloc((16 + i) % 8192 + 1)); + } + size_t end1 = clock(); + size_t begin2 = clock(); + for (size_t i = 0; i < ntimes; i++) { + tcfree(v[i]); + } + size_t end2 = clock(); + v.clear(); + malloc_costtime += (end1 - begin1); + free_costtime += (end2 - begin2); + } + }); + } + for (auto& t : vthread) { + t.join(); + } + std::cout << nworks << "threads run" << rounds << " times, each round malloc " << ntimes << " times, cost: " << malloc_costtime.load() << "ms\n"; + std::cout << nworks << "threads run" << rounds << " times, each round free " << ntimes << " times, cost: " << free_costtime.load() << " ms\n"; + std::cout << nworks << "threads run tcmalloc and tcfree " << nworks * rounds * ntimes << " time, total cost: " << malloc_costtime.load() + free_costtime.load() << " ms\n"; +} + +int main() { + size_t n = 1000; + BenchmarkConcurrentMalloc(n, 4, 10); + std::cout << std::endl + << std::endl; + BenchmarkMalloc(n, 4, 10); + return 0; +} \ No newline at end of file diff --git a/include/common.hpp b/include/common.hpp index 9b280f9..3c6f408 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -28,8 +28,10 @@ static const size_t PAGE_SHIFT = 13; #if defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) || defined(__aarch64__) typedef unsigned long long PAGE_ID; +#define SYS_BYTES 64 #else typedef size_t PAGE_ID; +#define SYS_BYTES 32 #endif inline static void* system_alloc(size_t kpage) { @@ -199,6 +201,7 @@ class span { size_t __use_count = 0; // 切成段小块内存,被分配给threadCache的计数器 void* __free_list = nullptr; // 切好的小块内存的自由链表 bool __is_use = false; // 是否在被使用 + size_t __obj_size; // 切好的小对象的大小 }; // 带头双向循环链表 diff --git a/include/object_pool.hpp b/include/object_pool.hpp new file mode 100644 index 0000000..08dd493 --- /dev/null +++ b/include/object_pool.hpp @@ -0,0 +1,53 @@ + + +#ifndef __YUFC_OBJECT_POOL_HPP__ +#define __YUFC_OBJECT_POOL_HPP__ + +#include +#include +#include "./common.hpp" + +#define __DEFAULT_KB__ 128 + + + +template +class object_pool { +private: + char* __memory = nullptr; // char 方便切 + size_t __remain_bytes = 0; // 大块内存在切的过程中剩余的字节数 + void* __free_list = nullptr; // 还回来的时候形成的自由链表 +public: + T* new_() { + T* obj = nullptr; + // 不够空间 首选是把还回来的内存块对象进行再次利用 + if (__free_list) { + // 头删 + void* next = *((void**)__free_list); + obj = (T*)__free_list; + __free_list = next; + return obj; + } + if (__remain_bytes < sizeof(T)) { + // 空间不够了,要重新开一个空间 + __remain_bytes = __DEFAULT_KB__ * 1024; + __memory = (char*)malloc(__remain_bytes); + if (__memory == nullptr) { + throw std::bad_alloc(); + } + } + obj = (T*)__memory; + size_t obj_size = sizeof(T) < sizeof(void*) ? sizeof(void*) : sizeof(T); + __memory += obj_size; + __remain_bytes -= obj_size; + new (obj) T; + return obj; + } + void delete_(T* obj) { + obj->~T(); + *(void**)obj = __free_list; + __free_list = obj; + } +}; + +#endif \ No newline at end of file diff --git a/include/page_cache.hpp b/include/page_cache.hpp index 092eaad..91e039d 100644 --- a/include/page_cache.hpp +++ b/include/page_cache.hpp @@ -4,6 +4,8 @@ #define __YUFC_PAGE_CACHE_HPP__ #include "./common.hpp" +#include "./object_pool.hpp" +#include "./page_map.hpp" class page_cache { private: @@ -11,7 +13,9 @@ class page_cache { static page_cache __s_inst; page_cache() = default; page_cache(const page_cache&) = delete; - std::unordered_map __id_span_map; + // std::unordered_map __id_span_map; + TCMalloc_PageMap3 __id_span_map; + object_pool __span_pool; public: std::mutex __page_mtx; @@ -21,6 +25,7 @@ class page_cache { span* map_obj_to_span(void* obj); // 释放空闲的span回到pc,并合并相邻的span void release_span_to_page(span* s, size_t size = 0); + public: // 获取一个K页的span span* new_span(size_t k); diff --git a/include/page_map.hpp b/include/page_map.hpp new file mode 100644 index 0000000..b3b8c17 --- /dev/null +++ b/include/page_map.hpp @@ -0,0 +1,223 @@ + +#ifndef __PAGE_MAP_FROM_GOOGLE_TCMALLOC__ +#define __PAGE_MAP_FROM_GOOGLE_TCMALLOC__ + +#include "common.hpp" +#include "object_pool.hpp" +#include + +#define ASSERT assert + +// Single-level array +template +class TCMalloc_PageMap1 { +private: + static const int LENGTH = 1 << BITS; + void** array_; + +public: + typedef uintptr_t Number; + + // explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) { + explicit TCMalloc_PageMap1() { + // array_ = reinterpret_cast((*allocator)(sizeof(void*) << BITS)); + size_t size = sizeof(void*) << BITS; + size_t alignSize = size_class::__round_up(size, 1 << PAGE_SHIFT); + array_ = (void**)system_alloc(alignSize >> PAGE_SHIFT); + memset(array_, 0, sizeof(void*) << BITS); + } + + // Return the current value for KEY. Returns NULL if not yet set, + // or if k is out of range. + void* get(Number k) const { + if ((k >> BITS) > 0) { + return NULL; + } + return array_[k]; + } + + // REQUIRES "k" is in range "[0,2^BITS-1]". + // REQUIRES "k" has been ensured before. + // + // Sets the value 'v' for key 'k'. + void set(Number k, void* v) { + array_[k] = v; + } +}; + +// Two-level radix tree +template +class TCMalloc_PageMap2 { +private: + // Put 32 entries in the root and (2^BITS)/32 entries in each leaf. + static const int ROOT_BITS = 5; + static const int ROOT_LENGTH = 1 << ROOT_BITS; + + static const int LEAF_BITS = BITS - ROOT_BITS; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Leaf* root_[ROOT_LENGTH]; // Pointers to 32 child nodes + void* (*allocator_)(size_t); // Memory allocator + +public: + typedef uintptr_t Number; + + // explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) { + explicit TCMalloc_PageMap2() { + // allocator_ = allocator; + memset(root_, 0, sizeof(root_)); + + PreallocateMoreMemory(); + } + + void* get(Number k) const { + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH - 1); + if ((k >> BITS) > 0 || root_[i1] == NULL) { + return NULL; + } + return root_[i1]->values[i2]; + } + + void set(Number k, void* v) { + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH - 1); + ASSERT(i1 < ROOT_LENGTH); + root_[i1]->values[i2] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1;) { + const Number i1 = key >> LEAF_BITS; + + // Check for overflow + if (i1 >= ROOT_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_[i1] == NULL) { + // Leaf* leaf = reinterpret_cast((*allocator_)(sizeof(Leaf))); + // if (leaf == NULL) return false; + static object_pool leafPool; + Leaf* leaf = (Leaf*)leafPool.new_(); + + memset(leaf, 0, sizeof(*leaf)); + root_[i1] = leaf; + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + // Allocate enough to keep track of all possible pages + Ensure(0, 1 << BITS); + } +}; + +// Three-level radix tree +template +class TCMalloc_PageMap3 { +private: + // How many bits should we consume at each interior level + static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up + static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS; + + // How many bits should we consume at leaf level + static const int LEAF_BITS = BITS - 2 * INTERIOR_BITS; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + // Interior node + struct Node { + Node* ptrs[INTERIOR_LENGTH]; + }; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Node* root_; // Root of radix tree + void* (*allocator_)(size_t); // Memory allocator + + Node* NewNode() { + // Node* result = reinterpret_cast((*allocator_)(sizeof(Node))); + static object_pool node_pool; + Node* result = (Node*) node_pool.new_(); + if (result != NULL) { + memset(result, 0, sizeof(*result)); + } + return result; + } + +public: + typedef uintptr_t Number; + + // explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) { + explicit TCMalloc_PageMap3() { + // allocator_ = allocator; + root_ = NewNode(); + } + + void* get(Number k) const { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH - 1); + const Number i3 = k & (LEAF_LENGTH - 1); + if ((k >> BITS) > 0 || root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) { + return NULL; + } + return reinterpret_cast(root_->ptrs[i1]->ptrs[i2])->values[i3]; + } + + void set(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH - 1); + const Number i3 = k & (LEAF_LENGTH - 1); + reinterpret_cast(root_->ptrs[i1]->ptrs[i2])->values[i3] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1;) { + const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH - 1); + + // Check for overflow + if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_->ptrs[i1] == NULL) { + Node* n = NewNode(); + if (n == NULL) + return false; + root_->ptrs[i1] = n; + } + + // Make leaf node if necessary + if (root_->ptrs[i1]->ptrs[i2] == NULL) { + Leaf* leaf = reinterpret_cast((*allocator_)(sizeof(Leaf))); + if (leaf == NULL) + return false; + memset(leaf, 0, sizeof(*leaf)); + root_->ptrs[i1]->ptrs[i2] = reinterpret_cast(leaf); + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + } +}; + +#endif \ No newline at end of file diff --git a/include/tcmalloc.hpp b/include/tcmalloc.hpp index 8d84bb0..2596f4c 100644 --- a/include/tcmalloc.hpp +++ b/include/tcmalloc.hpp @@ -4,6 +4,7 @@ #include "common.hpp" #include "log.hpp" +#include "object_pool.hpp" #include "page_cache.hpp" #include "thread_cache.hpp" @@ -14,20 +15,26 @@ static void* tcmalloc(size_t size) { size_t k_page = align_size >> PAGE_SHIFT; page_cache::get_instance()->__page_mtx.lock(); span* cur_span = page_cache::get_instance()->new_span(k_page); // 直接找pc + cur_span->__obj_size = size; page_cache::get_instance()->__page_mtx.unlock(); void* ptr = (void*)(cur_span->__page_id << PAGE_SHIFT); // span转化成地址 return ptr; } - if (p_tls_thread_cache == nullptr) + if (p_tls_thread_cache == nullptr) { // 相当于单例 - p_tls_thread_cache = new thread_cache; + // p_tls_thread_cache = new thread_cache; + static object_pool tc_pool; + p_tls_thread_cache = tc_pool.new_(); + } #ifdef PROJECT_DEBUG LOG(DEBUG) << "tcmalloc find tc from mem" << std::endl; #endif return p_tls_thread_cache->allocate(size); } -static void tcfree(void* ptr, size_t size) { +static void tcfree(void* ptr) { + span* s = page_cache::get_instance()->map_obj_to_span(ptr); // 找到这个span就能找到obj_size了 + size_t size = s->__obj_size; // 找到大小了 if (size > MAX_BYTES) { span* s = page_cache::get_instance()->map_obj_to_span(ptr); // 找到这个span page_cache::get_instance()->__page_mtx.lock(); diff --git a/makefile b/makefile index f1892c4..8155695 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,19 @@ -out: *.cc ./src/*.cc - g++ -o $@ $^ -std=c++11 -lpthread -debug: *.cc ./src/*.cc - g++ -o $@ $^ -std=c++11 -lpthread -DPROJECT_DEBUG +out: bench_mark.cc ./src/*.cc + g++ -o $@ $^ -std=c++11 -lpthread -m32 +debug: bench_mark.cc ./src/*.cc + g++ -o $@ $^ -std=c++11 -lpthread -DPROJECT_DEBUG -g -m32 +unit: unit_test.cc ./src/*.cc + g++ -o $@ $^ -std=c++11 -lpthread -DPROJECT_DEBUG -g -m32 .PHONY:clean clean: - rm -f out debug \ No newline at end of file + rm -f out debug + +# out: bench_mark.cc ./src/*.cc +# arm-linux-gnueabihf-g++ -o $@ $^ -std=c++11 -lpthread +# debug: bench_mark.cc ./src/*.cc +# arm-linux-gnueabihf-g++ -o $@ $^ -std=c++11 -lpthread -DPROJECT_DEBUG -g +# unit: unit_test.cc ./src/*.cc +# arm-linux-gnueabihf-g++ -o $@ $^ -std=c++11 -lpthread -DPROJECT_DEBUG -g +# .PHONY:clean +# clean: +# rm -f out debug unit diff --git a/src/central_cache.cc b/src/central_cache.cc index 92230e1..c1b14f2 100644 --- a/src/central_cache.cc +++ b/src/central_cache.cc @@ -54,6 +54,7 @@ span* central_cache::get_non_empty_span(span_list& list, size_t size) { page_cache::get_instance()->__page_mtx.lock(); span* cur_span = page_cache::get_instance()->new_span(size_class::num_move_page(size)); cur_span->__is_use = true; // 表示已经被使用 + cur_span->__obj_size = size; page_cache::get_instance()->__page_mtx.unlock(); #ifdef PROJECT_DEBUG LOG(DEBUG) << "central_cache::get_non_empty_span() get new span success" << std::endl; @@ -70,11 +71,15 @@ span* central_cache::get_non_empty_span(span_list& list, size_t size) { #ifdef PROJECT_DEBUG LOG(DEBUG) << "central_cache::get_non_empty_span() cut span" << std::endl; #endif + int i = 1; while (addr_start < addr_end) { - free_list::__next_obj(tail) = addr_start; + ++i; + free_list::__next_obj(tail) = addr_start; // tail不是空指针 + // std::cerr << "here" << std::endl; tail = free_list::__next_obj(tail); addr_start += size; } + free_list::__next_obj(tail) = nullptr; // 恢复锁 list.__bucket_mtx.lock(); list.push_front(cur_span); diff --git a/src/page_cache.cc b/src/page_cache.cc index eafda69..805b622 100644 --- a/src/page_cache.cc +++ b/src/page_cache.cc @@ -10,11 +10,12 @@ span* page_cache::new_span(size_t k) { // 处理大内存情况 if (k > PAGES_NUM - 1) { void* ptr = system_alloc(k); - span* cur_span = new span; + span* cur_span = __span_pool.new_(); cur_span->__page_id = (PAGE_ID)ptr >> PAGE_SHIFT; cur_span->__n = k; // map记录一下 - __id_span_map[cur_span->__page_id] = cur_span; + // __id_span_map[cur_span->__page_id] = cur_span; + __id_span_map.set(cur_span->__page_id, cur_span); return cur_span; } // 先检查第k个桶是否有span @@ -25,7 +26,8 @@ span* page_cache::new_span(size_t k) { span* s = __span_lists[k].pop_front(); // ? __span_lists->pop_front(); // 建立id和span的映射,方便central cache回收小块内存时,查找对应的span for (PAGE_ID i = 0; i < s->__n; ++i) { - __id_span_map[s->__page_id + i] = s; + // __id_span_map[s->__page_id + i] = s; + __id_span_map.set(s->__page_id + i, s); } return s; } @@ -39,7 +41,7 @@ span* page_cache::new_span(size_t k) { // 假设这个页是n页的,需要的是k页的 // 1. 从__span_lists中拿下来 2. 切开 3. 一个返回给cc 4. 另一个挂到 n-k 号桶里面去 span* n_span = __span_lists[i].pop_front(); - span* k_span = new span; + span* k_span = __span_pool.new_(); // 在n_span头部切除k页下来 k_span->__page_id = n_span->__page_id; // <1> k_span->__n = k; // <2> @@ -56,11 +58,14 @@ span* page_cache::new_span(size_t k) { // 剩下的挂到相应位置 __span_lists[n_span->__n].push_front(n_span); // 存储n_span的首尾页号跟n_span的映射,方便pc回收内存时进行合并查找 - __id_span_map[n_span->__page_id] = n_span; - __id_span_map[n_span->__page_id + n_span->__n - 1] = n_span; + // __id_span_map[n_span->__page_id] = n_span; + __id_span_map.set(n_span->__page_id, n_span); + // __id_span_map[n_span->__page_id + n_span->__n - 1] = n_span; + __id_span_map.set(n_span->__page_id + n_span->__n - 1, n_span); // 这里记录映射(简历id和span的映射,方便cc回收小块内存时,查找对应的span) for (PAGE_ID j = 0; j < k_span->__n; j++) { - __id_span_map[k_span->__page_id + j] = k_span; + // __id_span_map[k_span->__page_id + j] = k_span; + __id_span_map.set(k_span->__page_id + j, k_span); } #ifdef PROJECT_DEBUG LOG(DEBUG) << "page_cache::new_span() have span, return" << std::endl; @@ -72,7 +77,7 @@ span* page_cache::new_span(size_t k) { LOG(DEBUG) << "page_cache::new_span() cannot find span, goto os for mem" << std::endl; #endif // 走到这里,说明找不到span了:找os要 - span* big_span = new span; + span* big_span = __span_pool.new_(); void* ptr = system_alloc(PAGES_NUM - 1); big_span->__page_id = (PAGE_ID)ptr >> PAGE_SHIFT; big_span->__n = PAGES_NUM - 1; @@ -84,12 +89,18 @@ span* page_cache::new_span(size_t k) { span* page_cache::map_obj_to_span(void* obj) { // 先把页号算出来 PAGE_ID id = (PAGE_ID)obj >> PAGE_SHIFT; // 这个理论推导可以自行推导一下 - auto ret = __id_span_map.find(id); - if (ret != __id_span_map.end()) - return ret->second; - LOG(FATAL) << std::endl; - assert(false); - return nullptr; + // std::unique_lock lock(__page_mtx); // 用一个RAII的锁 + // auto ret = __id_span_map.find(id); + // if (ret != __id_span_map.end()) + // return ret->second; + // LOG(FATAL) << std::endl; + // assert(false); + // return nullptr; + + // 换成radix树之后就不用加锁了 + auto ret = (span*)__id_span_map.get(id); + assert(ret != nullptr); // 表示没找到 + return ret; } void page_cache::release_span_to_page(span* s, size_t size) { @@ -99,7 +110,8 @@ void page_cache::release_span_to_page(span* s, size_t size) { // 处理大内存 void* ptr = (void*)(s->__page_id << PAGE_SHIFT); system_free(s, size); - delete s; + // delete s; + __span_pool.delete_(s); return; } // 对span前后对页尝试进行合并,缓解内存碎片问题 @@ -108,10 +120,14 @@ void page_cache::release_span_to_page(span* s, size_t size) { // 拿到id如何找span: 之前写好的map能拿到吗? // 找到了,如果isuse是false,就能合并了(向前合并+向后合并) // 如果遇到了合并大小超过了128页了,也要停止了 - auto ret = __id_span_map.find(prev_id); - if (ret == __id_span_map.end()) // 前面的页号没有了,不合并了 + // auto ret = __id_span_map.find(prev_id); + // if (ret == __id_span_map.end()) // 前面的页号没有了,不合并了 + // break; + auto ret = (span*)__id_span_map.get(prev_id); + if (ret == nullptr) break; - span* prev_span = ret->second; + // span* prev_span = ret->second; + span* prev_span = ret; if (prev_span->__is_use == true) // 前面相邻页的span在使用,不合并了 break; if (prev_span->__n + s->__n > PAGES_NUM - 1) // 合并出超过128页的span没办法管理,不合并了 @@ -119,14 +135,19 @@ void page_cache::release_span_to_page(span* s, size_t size) { s->__page_id = prev_span->__page_id; s->__n += prev_span->__n; __span_lists[prev_span->__n].erase(prev_span); // 防止野指针,删掉 - delete prev_span; // 删掉这个span + // delete prev_span; // 删掉这个span + __span_pool.delete_(prev_span); // 删掉这个span } // 向前合并的逻辑 while end; while (true) { PAGE_ID next_id = s->__page_id + s->__n; // 注意这里的页号是+n了 - auto ret = __id_span_map.find(next_id); - if (ret == __id_span_map.end()) // 后面的页号没有了 + // auto ret = __id_span_map.find(next_id); + // if (ret == __id_span_map.end()) // 后面的页号没有了 + // break; + auto ret = (span*)__id_span_map.get(next_id); + if (ret == nullptr) break; - span* next_span = ret->second; + // span* next_span = ret->second; + span* next_span = ret; if (next_span->__is_use == true) // 后面相邻页的span在使用,不合并了 break; if (next_span->__n + s->__n > PAGES_NUM - 1) // 合并出超过128页的span没办法管理,不合并了 @@ -134,12 +155,15 @@ void page_cache::release_span_to_page(span* s, size_t size) { s->__page_id; // 起始页号不用变了,因为是向后合并 s->__n += next_span->__n; __span_lists[next_span->__n].erase(next_span); // 防止野指针,删掉 - delete next_span; + // delete next_span; + __span_pool.delete_(next_span); } // 已经合并完成了,把东西挂起来 __span_lists[s->__n].push_front(s); s->__is_use = false; // 处理一下映射,方便别人找到我 - __id_span_map[s->__page_id] = s; - __id_span_map[s->__page_id + s->__n - 1] = s; + // __id_span_map[s->__page_id] = s; + // __id_span_map[s->__page_id + s->__n - 1] = s; + __id_span_map.set(s->__page_id, s); + __id_span_map.set(s->__page_id + s->__n - 1, s); } \ No newline at end of file diff --git a/unit_test.cc b/unit_test.cc index 52a9dbe..6f1d368 100644 --- a/unit_test.cc +++ b/unit_test.cc @@ -52,11 +52,14 @@ void test_dealloc(int alloc_times = 10) { // 生成并输出随机数 std::map s; for (int i = 0; i < alloc_times; i++) { - int sz = distrib(gen); - s.insert({ tcmalloc(sz), sz }); // 申请随机值 + size_t sz = distrib(gen); + std::cout << sz << std::endl; + void* ptr = tcmalloc(sz); + std::cout << "malloc successful" << std::endl; + s.insert({ ptr, sz }); // 申请随机值 } for (auto& e : s) { - tcfree(e.first, e.second); + tcfree(e.first /*, e.second*/); } } @@ -68,12 +71,26 @@ void test_multi_thread() { std::cout << "run successful" << std::endl; } +void run() { + void* ptr = tcmalloc(5949); + tcfree(ptr); + std::cout << "run successful" << std::endl; +} + void big_alloc() { - void* ptr = tcmalloc(8 * 127 * 1024); - tcfree(ptr, 8 * 127 * 1024); + std::thread t1(run); + t1.join(); } int main() { - big_alloc(); +// std::cout << "haha" << std::endl; +// big_alloc(); +#ifdef __aarch64__ + std::cout << "64" << std::endl; +#elif defined(__arm__) + std::cout << "32" << std::endl; +#else + std::cout << "unknown sys" << std::endl; +#endif return 0; } \ No newline at end of file