namespace art {
namespace gc {
namespace allocator {
extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
static constexpr bool kUsePrefetchDuringAllocRun = true;
static constexpr bool kPrefetchNewRunDataByZeroing = false;
static constexpr size_t kPrefetchStride = 64;
size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
bool RosAlloc::initialized_ = false;
size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
RosAlloc::Run* RosAlloc::dedicated_full_run_ =
reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
PageReleaseMode page_release_mode, size_t page_release_size_threshold)
: base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
capacity_(capacity), max_capacity_(max_capacity),
lock_("rosalloc global lock", kRosAllocGlobalLock),
bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
page_release_mode_(page_release_mode),
page_release_size_threshold_(page_release_size_threshold) {
DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
CHECK_LE(capacity, max_capacity);
CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
if (!initialized_) {
// 初始化偏移等信息
Initialize();
}
VLOG(heap) << "RosAlloc base="
<< std::hex << (intptr_t)base_ << ", end="
<< std::hex << (intptr_t)(base_ + capacity_)
<< ", capacity=" << std::dec << capacity_
<< ", max_capacity=" << std::dec << max_capacity_;
for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
size_bracket_lock_names_[i] =
StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
size_bracket_locks_[i] = new Mutex(size_bracket_lock_names_[i].c_str(), kRosAllocBracketLock);
current_runs_[i] = dedicated_full_run_;
}
DCHECK_EQ(footprint_, capacity_);
size_t num_of_pages = footprint_ / kPageSize;
size_t max_num_of_pages = max_capacity_ / kPageSize;
std::string error_msg;
page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", NULL, RoundUp(max_num_of_pages, kPageSize),
PROT_READ | PROT_WRITE, false, &error_msg));
CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg;
// 这段mmap来的区域实际是做监视内存用
page_map_ = page_map_mem_map_->Begin();
page_map_size_ = num_of_pages;
max_page_map_size_ = max_num_of_pages;
free_page_run_size_map_.resize(num_of_pages);
// fpr的基地址是在rosalloc创建的时候传进来的,也是rosalloc space创建的时候的参数
FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
if (kIsDebugBuild) {
free_pages->magic_num_ = kMagicNumFree;
}
free_pages->SetByteSize(this, capacity_);
DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
DCHECK(free_pages->IsFree());
free_pages->ReleasePages(this);
DCHECK(free_pages->IsFree());
free_page_runs_.insert(free_pages);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
<< reinterpret_cast<intptr_t>(free_pages)
<< " into free_page_runs_";
}
}
RosAlloc::~RosAlloc() {
for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
delete size_bracket_locks_[i];
}
}
// alloc large object 或者 alloc run的时候可能调到
void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
lock_.AssertHeld(self);
DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
FreePageRun* res = NULL;
const size_t req_byte_size = num_pages * kPageSize;
// Find the lowest address free page run that's large enough.
for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
FreePageRun* fpr = *it;
DCHECK(fpr->IsFree());
size_t fpr_byte_size = fpr->ByteSize(this);
DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
if (req_byte_size <= fpr_byte_size) {
// Found one.
free_page_runs_.erase(it++);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
<< std::hex << reinterpret_cast<intptr_t>(fpr)
<< " from free_page_runs_";
}
// 切割
if (req_byte_size < fpr_byte_size) {
// Split.
FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
if (kIsDebugBuild) {
remainder->magic_num_ = kMagicNumFree;
}
remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
// Don't need to call madvise on remainder here.
// 多余的插入fpr
free_page_runs_.insert(remainder);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
<< reinterpret_cast<intptr_t>(remainder)
<< " into free_page_runs_";
}
fpr->SetByteSize(this, req_byte_size);
DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
}
res = fpr;
break;
} else {
++it;
}
}
// Failed to allocate pages. Grow the footprint, if possible.
// 分配失败
if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
FreePageRun* last_free_page_run = NULL;
size_t last_free_page_run_size;
// 返回最后一个元素的反向迭代器
auto it = free_page_runs_.rbegin();
// rosalloc末尾有无fpr
if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
// There is a free page run at the end.
DCHECK(last_free_page_run->IsFree());
DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
// 得到最后一个fpr的大小
last_free_page_run_size = last_free_page_run->ByteSize(this);
} else {
// There is no free page run at the end.
last_free_page_run_size = 0;
}
DCHECK_LT(last_free_page_run_size, req_byte_size);
// 如果增长率空间,能否满足要求
if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
// If we grow the heap, we can allocate it.
// 最低能增长两兆
size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
capacity_ - footprint_);
DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
size_t new_footprint = footprint_ + increment;
size_t new_num_of_pages = new_footprint / kPageSize;
DCHECK_LT(page_map_size_, new_num_of_pages);
DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
page_map_size_ = new_num_of_pages;
DCHECK_LE(page_map_size_, max_page_map_size_);
free_page_run_size_map_.resize(new_num_of_pages);
art_heap_rosalloc_morecore(this, increment);
if (last_free_page_run_size > 0) {
// There was a free page run at the end. Expand its size.
DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
// 如果末尾有fpr就扩张,如果没有重做一个fpr
last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
DCHECK_EQ(last_free_page_run->End(this), base_ + new_footprint);
} else {
// Otherwise, insert a new free page run at the end.
FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
if (kIsDebugBuild) {
new_free_page_run->magic_num_ = kMagicNumFree;
}
new_free_page_run->SetByteSize(this, increment);
DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
free_page_runs_.insert(new_free_page_run);
DCHECK_EQ(*free_page_runs_.rbegin(), new_free_page_run);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
<< std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
<< " into free_page_runs_";
}
}
DCHECK_LE(footprint_ + increment, capacity_);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
<< footprint_ << " to " << new_footprint;
}
footprint_ = new_footprint;
// And retry the last free page run.
// 重来一次
it = free_page_runs_.rbegin();
DCHECK(it != free_page_runs_.rend());
FreePageRun* fpr = *it;
if (kIsDebugBuild && last_free_page_run_size > 0) {
DCHECK(last_free_page_run != NULL);
DCHECK_EQ(last_free_page_run, fpr);
}
size_t fpr_byte_size = fpr->ByteSize(this);
DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
DCHECK_LE(req_byte_size, fpr_byte_size);
free_page_runs_.erase(fpr);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
<< " from free_page_runs_";
}
// 分割
if (req_byte_size < fpr_byte_size) {
// Split if there's a remainder.
FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
if (kIsDebugBuild) {
remainder->magic_num_ = kMagicNumFree;
}
remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
free_page_runs_.insert(remainder);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
<< reinterpret_cast<intptr_t>(remainder)
<< " into free_page_runs_";
}
fpr->SetByteSize(this, req_byte_size);
DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
}
res = fpr;
}
}
if (LIKELY(res != NULL)) {
// Update the page map.
// 更新目前正在使用的page map
size_t page_map_idx = ToPageMapIndex(res);
for (size_t i = 0; i < num_pages; i++) {
DCHECK(IsFreePage(page_map_idx + i));
}
switch (page_map_type) {
case kPageMapRun:
page_map_[page_map_idx] = kPageMapRun;
for (size_t i = 1; i < num_pages; i++) {
page_map_[page_map_idx + i] = kPageMapRunPart;
}
break;
case kPageMapLargeObject:
page_map_[page_map_idx] = kPageMapLargeObject;
for (size_t i = 1; i < num_pages; i++) {
page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
}
break;
default:
LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
break;
}
if (kIsDebugBuild) {
// Clear the first page since it is not madvised due to the magic number.
memset(res, 0, kPageSize);
}
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
<< "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
<< "(" << std::dec << (num_pages * kPageSize) << ")";
}
return res;
}
// Fail.
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : NULL";
}
return nullptr;
}
// 释放页面
size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
lock_.AssertHeld(self);
size_t pm_idx = ToPageMapIndex(ptr);
DCHECK_LT(pm_idx, page_map_size_);
// 根据page_idx找到page_type
byte pm_type = page_map_[pm_idx];
DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
byte pm_part_type;
switch (pm_type) {
case kPageMapRun:
pm_part_type = kPageMapRunPart;
break;
case kPageMapLargeObject:
pm_part_type = kPageMapLargeObjectPart;
break;
default:
LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
<< static_cast<int>(pm_type) << ", ptr=" << std::hex
<< reinterpret_cast<intptr_t>(ptr);
return 0;
}
// Update the page map and count the number of pages.
size_t num_pages = 1;
page_map_[pm_idx] = kPageMapEmpty;
size_t idx = pm_idx + 1;
size_t end = page_map_size_;
// 标记位empty
while (idx < end && page_map_[idx] == pm_part_type) {
page_map_[idx] = kPageMapEmpty;
num_pages++;
idx++;
}
const size_t byte_size = num_pages * kPageSize;
if (already_zero) {
if (kCheckZeroMemory) {
const uword* word_ptr = reinterpret_cast<uword*>(ptr);
for (size_t i = 0; i < byte_size / sizeof(uword); ++i) {
CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
}
}
} else if (!DoesReleaseAllPages()) {
memset(ptr, 0, byte_size);
}
if (kTraceRosAlloc) {
LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
<< "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
<< "(" << std::dec << (num_pages * kPageSize) << ")";
}
// Turn it into a free run.
// 作为一个新的ptr
FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
if (kIsDebugBuild) {
fpr->magic_num_ = kMagicNumFree;
}
fpr->SetByteSize(this, byte_size);
DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
if (!free_page_runs_.empty()) {
// Try to coalesce in the higher address direction.
if (kTraceRosAlloc) {
LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
<< std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
<< std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
<< (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
}
auto higher_it = free_page_runs_.upper_bound(fpr);
// 尝试向下合并
if (higher_it != free_page_runs_.end()) {
for (auto it = higher_it; it != free_page_runs_.end(); ) {
FreePageRun* h = *it;
DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
<< std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
<< std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
<< (h->End(this) == End() ? page_map_size_ : ToPageMapIndex(h->End(this))) << "]";
}
if (fpr->End(this) == h->Begin()) {
if (kTraceRosAlloc) {
LOG(INFO) << "Success";
}
// Clear magic num since this is no longer the start of a free page run.
if (kIsDebugBuild) {
h->magic_num_ = 0;
}
free_page_runs_.erase(it++);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
<< reinterpret_cast<intptr_t>(h)
<< " from free_page_runs_";
}
fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
} else {
// Not adjacent. Stop.
if (kTraceRosAlloc) {
LOG(INFO) << "Fail";
}
break;
}
}
}
// Try to coalesce in the lower address direction.
// 向上合并(低地址)
auto lower_it = free_page_runs_.upper_bound(fpr);
if (lower_it != free_page_runs_.begin()) {
--lower_it;
for (auto it = lower_it; ; ) {
// We want to try to coalesce with the first element but
// there's no "<=" operator for the iterator.
bool to_exit_loop = it == free_page_runs_.begin();
FreePageRun* l = *it;
DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
<< std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
<< std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
<< (l->End(this) == End() ? page_map_size_ : ToPageMapIndex(l->End(this))) << "]";
}
if (l->End(this) == fpr->Begin()) {
if (kTraceRosAlloc) {
LOG(INFO) << "Success";
}
free_page_runs_.erase(it--);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
<< reinterpret_cast<intptr_t>(l)
<< " from free_page_runs_";
}
l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
// Clear magic num since this is no longer the start of a free page run.
if (kIsDebugBuild) {
fpr->magic_num_ = 0;
}
fpr = l;
} else {
// Not adjacent. Stop.
if (kTraceRosAlloc) {
LOG(INFO) << "Fail";
}
break;
}
if (to_exit_loop) {
break;
}
}
}
}
// Insert it.
DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
DCHECK(fpr->IsFree());
fpr->ReleasePages(this);
DCHECK(fpr->IsFree());
free_page_runs_.insert(fpr);
DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
<< " into free_page_runs_";
}
return byte_size;
}
// 分配大对象,通过alloc page
void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
DCHECK_GT(size, kLargeSizeThreshold);
size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
void* r;
{
MutexLock mu(self, lock_);
r = AllocPages(self, num_pages, kPageMapLargeObject);
}
if (UNLIKELY(r == nullptr)) {
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
}
return nullptr;
}
const size_t total_bytes = num_pages * kPageSize;
*bytes_allocated = total_bytes;
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
<< "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
<< "(" << std::dec << (num_pages * kPageSize) << ")";
}
// Check if the returned memory is really all zero.
if (kCheckZeroMemory) {
CHECK_EQ(total_bytes % sizeof(uword), 0U);
const uword* words = reinterpret_cast<uword*>(r);
for (size_t i = 0; i < total_bytes / sizeof(uword); ++i) {
CHECK_EQ(words[i], 0U);
}
}
return r;
}
// 释放
size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
DCHECK_LE(base_, ptr);
DCHECK_LT(ptr, base_ + footprint_);
size_t pm_idx = RoundDownToPageMapIndex(ptr);
Run* run = nullptr;
{
MutexLock mu(self, lock_);
DCHECK_LT(pm_idx, page_map_size_);
byte page_map_entry = page_map_[pm_idx];
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
<< ", page_map_entry=" << static_cast<int>(page_map_entry);
}
switch (page_map_[pm_idx]) {
case kPageMapLargeObject:
// free从fpr中分配的内存
return FreePages(self, ptr, false);
case kPageMapLargeObjectPart:
LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
return 0;
case kPageMapRunPart: {
// Find the beginning of the run.
do {
--pm_idx;
DCHECK_LT(pm_idx, capacity_ / kPageSize);
} while (page_map_[pm_idx] != kPageMapRun);
// Fall-through.
case kPageMapRun:
run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
DCHECK_EQ(run->magic_num_, kMagicNum);
break;
case kPageMapReleased:
// Fall-through.
case kPageMapEmpty:
LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
return 0;
}
default:
LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
return 0;
}
}
DCHECK(run != nullptr);
// free run中分配的内存
return FreeFromRun(self, ptr, run);
}
size_t RosAlloc::Free(Thread* self, void* ptr) {
ReaderMutexLock rmu(self, bulk_free_lock_);
return FreeInternal(self, ptr);
}
// 分配run
RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) {
RosAlloc::Run* new_run = nullptr;
{
MutexLock mu(self, lock_);
// 在fpr中分配page用于分配run
new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun));
}
if (LIKELY(new_run != nullptr)) {
if (kIsDebugBuild) {
new_run->magic_num_ = kMagicNum;
}
new_run->size_bracket_idx_ = idx;
// 标记无效位
new_run->SetAllocBitMapBitsForInvalidSlots();
DCHECK(!new_run->IsThreadLocal());
DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
DCHECK(!new_run->to_be_bulk_freed_);
if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
// Take ownership of the cache lines if we are likely to be thread local run.
if (kPrefetchNewRunDataByZeroing) {
// Zeroing the data is sometimes faster than prefetching but it increases memory usage
// since we end up dirtying zero pages which may have been madvised.
new_run->ZeroData();
} else {
const size_t num_of_slots = numOfSlots[idx];
const size_t bracket_size = bracketSizes[idx];
const size_t num_of_bytes = num_of_slots * bracket_size;
byte* begin = reinterpret_cast<byte*>(new_run) + headerSizes[idx];
for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) {
__builtin_prefetch(begin + i);
}
}
}
}
return new_run;
}
// 从二分树获得不满的run进行填满操作
RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
// Get the lowest address non-full run from the binary tree.
std::set<Run*>* const bt = &non_full_runs_[idx];
if (!bt->empty()) {
// If there's one, use it as the current run.
auto it = bt->begin();
Run* non_full_run = *it;
DCHECK(non_full_run != nullptr);
DCHECK(!non_full_run->IsThreadLocal());
bt->erase(it);
return non_full_run;
}
// If there's none, allocate a new run and use it as the current run.
return AllocRun(self, idx);
}
inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
Run* current_run = current_runs_[idx];
DCHECK(current_run != nullptr);
void* slot_addr = current_run->AllocSlot();
// 如果没有分配到
if (UNLIKELY(slot_addr == nullptr)) {
// The current run got full. Try to refill it.
DCHECK(current_run->IsFull());
if (kIsDebugBuild && current_run != dedicated_full_run_) {
full_runs_[idx].insert(current_run);
if (kTraceRosAlloc) {
LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
<< reinterpret_cast<intptr_t>(current_run)
<< " into full_runs_[" << std::dec << idx << "]";
}
DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
}
// 重新获取新的run
current_run = RefillRun(self, idx);
if (UNLIKELY(current_run == nullptr)) {
// Failed to allocate a new run, make sure that it is the dedicated full run.
current_runs_[idx] = dedicated_full_run_;
return nullptr;
}
DCHECK(current_run != nullptr);
DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
current_run->SetIsThreadLocal(false);
current_runs_[idx] = current_run;
DCHECK(!current_run->IsFull());
slot_addr = current_run->AllocSlot();
// Must succeed now with a new run.
DCHECK(slot_addr != nullptr);
}
return slot_addr;
}
// 非线程安全,非tl
void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) {
DCHECK_LE(size, kLargeSizeThreshold);
size_t bracket_size;
size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
DCHECK_EQ(idx, SizeToIndex(size));
DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
DCHECK_EQ(bracket_size, bracketSizes[idx]);
DCHECK_LE(size, bracket_size);
DCHECK(size > 512 || bracket_size - size < 16);
Locks::mutator_lock_->AssertExclusiveHeld(self);
// 从当前current中分配内存
void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
if (LIKELY(slot_addr != nullptr)) {
DCHECK(bytes_allocated != nullptr);
*bytes_allocated = bracket_size;
// Caller verifies that it is all 0.
}
return slot_addr;
}
// 从thread local中分配
// 所有内存都是在建立rosalloc之前已经mmap(猜测?)
// 什么时候分配给current?
void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
DCHECK_LE(size, kLargeSizeThreshold);
size_t bracket_size;
// 找到大小框的idx
size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
DCHECK_EQ(idx, SizeToIndex(size));
DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
DCHECK_EQ(bracket_size, bracketSizes[idx]);
DCHECK_LE(size, bracket_size);
DCHECK(size > 512 || bracket_size - size < 16);
void* slot_addr;
if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
// Use a thread-local run.
// 使用tl run
Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
// Allow invalid since this will always fail the allocation.
if (kIsDebugBuild) {
// Need the lock to prevent race conditions.
MutexLock mu(self, *size_bracket_locks_[idx]);
CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
}
DCHECK(thread_local_run != nullptr);
DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
// 根据alloc bitmap分配
slot_addr = thread_local_run->AllocSlot();
// The allocation must fail if the run is invalid.
DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr)
<< "allocated from an invalid run";
// 分配失败(说明没有free了?)
if (UNLIKELY(slot_addr == nullptr)) {
// The run got full. Try to free slots.
DCHECK(thread_local_run->IsFull());
MutexLock mu(self, *size_bracket_locks_[idx]);
bool is_all_free_after_merge;
// This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
// tl bitmap上全为0,即全为free
if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
DCHECK_NE(thread_local_run, dedicated_full_run_);
// Some slot got freed. Keep it.
DCHECK(!thread_local_run->IsFull());
DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
if (is_all_free_after_merge) {
// Check that the bitmap idx is back at 0 if it's all free.
DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
}
} else {
// No slots got freed. Try to refill the thread-local run.
DCHECK(thread_local_run->IsFull());
if (thread_local_run != dedicated_full_run_) {
thread_local_run->SetIsThreadLocal(false);
if (kIsDebugBuild) {
full_runs_[idx].insert(thread_local_run);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
<< reinterpret_cast<intptr_t>(thread_local_run)
<< " into full_runs_[" << std::dec << idx << "]";
}
}
DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
}
thread_local_run = RefillRun(self, idx);
if (UNLIKELY(thread_local_run == nullptr)) {
self->SetRosAllocRun(idx, dedicated_full_run_);
return nullptr;
}
DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
thread_local_run->SetIsThreadLocal(true);
self->SetRosAllocRun(idx, thread_local_run);
DCHECK(!thread_local_run->IsFull());
}
DCHECK(thread_local_run != nullptr);
DCHECK(!thread_local_run->IsFull());
DCHECK(thread_local_run->IsThreadLocal());
slot_addr = thread_local_run->AllocSlot();
// Must succeed now with a new run.
DCHECK(slot_addr != nullptr);
}
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
<< "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
<< "(" << std::dec << (bracket_size) << ")";
}
} else {
// Use the (shared) current run.
MutexLock mu(self, *size_bracket_locks_[idx]);
slot_addr = AllocFromCurrentRunUnlocked(self, idx);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
<< "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
<< "(" << std::dec << (bracket_size) << ")";
}
}
DCHECK(bytes_allocated != nullptr);
*bytes_allocated = bracket_size;
// Caller verifies that it is all 0.
return slot_addr;
}
std::string RosAlloc::Run::Dump() {
size_t idx = size_bracket_idx_;
size_t num_slots = numOfSlots[idx];
size_t num_vec = RoundUp(num_slots, 32) / 32;
std::ostringstream stream;
stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
<< "{ magic_num=" << static_cast<int>(magic_num_)
<< " size_bracket_idx=" << idx
<< " is_thread_local=" << static_cast<int>(is_thread_local_)
<< " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
<< " first_search_vec_idx=" << first_search_vec_idx_
<< " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
<< " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
<< " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
<< " }" << std::endl;
return stream.str();
}
inline void* RosAlloc::Run::AllocSlot() {
const size_t idx = size_bracket_idx_;
while (true) {
if (kIsDebugBuild) {
// Make sure that no slots leaked, the bitmap should be full for all previous vectors.
for (size_t i = 0; i < first_search_vec_idx_; ++i) {
CHECK_EQ(~alloc_bit_map_[i], 0U);
}
}
// 由于alloc bitmap很多,所以用数组表示
uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
// 举例 1000000010000 ffzl为5
// 第一次标记全为1,分配失败。已分配的被标记为1
uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
if (LIKELY(ffz1 != 0)) {
const uint32_t ffz = ffz1 - 1;
// 找到slot下标,kBitsPerByte一个字节8位
const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
const uint32_t mask = 1U << ffz;
DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
// Found an empty slot. Set the bit.
DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
// bitmap清零
*alloc_bitmap_ptr |= mask;
DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
// run基地址 + 头大小 + 槽位idx * 对应slot大小(分箱大小)
byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
<< ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
}
return slot_addr;
}
const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
if (first_search_vec_idx_ + 1 >= num_words) {
DCHECK(IsFull());
// Already at the last word, return null.
return nullptr;
}
// Increase the index to the next word and try again.
++first_search_vec_idx_;
}
}
// 主要步骤
// 1、清标记位 2、清内存
void RosAlloc::Run::FreeSlot(void* ptr) {
DCHECK(!IsThreadLocal());
const byte idx = size_bracket_idx_;
const size_t bracket_size = bracketSizes[idx];
// 算出slot的偏移
const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
- (reinterpret_cast<byte*>(this) + headerSizes[idx]);
DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
size_t slot_idx = offset_from_slot_base / bracket_size;
DCHECK_LT(slot_idx, numOfSlots[idx]);
size_t vec_idx = slot_idx / 32;
if (kIsDebugBuild) {
size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
DCHECK_LT(vec_idx, num_vec);
}
size_t vec_off = slot_idx % 32;
uint32_t* vec = &alloc_bit_map_[vec_idx];
first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
const uint32_t mask = 1U << vec_off;
DCHECK_NE(*vec & mask, 0U);
*vec &= ~mask;
DCHECK_EQ(*vec & mask, 0U);
// Zero out the memory.
// TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
memset(ptr, 0, bracket_size);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
<< ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
}
}
// thread local与Rosalloc mem的关系
// 返回tl bitmap之前是不是0
inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
DCHECK(IsThreadLocal());
// Free slots in the alloc bit map based on the thread local free bit map.
const size_t idx = size_bracket_idx_;
// 每个run所包含的slot的个数
const size_t num_of_slots = numOfSlots[idx];
const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
bool changed = false;
// alloc bitmap
uint32_t* vecp = &alloc_bit_map_[0];
// tl alloc bitmap
uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
bool is_all_free_after = true;
for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
uint32_t tl_free_vec = *tl_free_vecp;
uint32_t vec_before = *vecp;
uint32_t vec_after;
// tl非0,则合并到vec_before,vec发生改变,changed为true
if (tl_free_vec != 0) {
first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
vec_after = vec_before & ~tl_free_vec;
*vecp = vec_after;
changed = true;
*tl_free_vecp = 0; // clear the thread local free bit map.
} else {
vec_after = vec_before;
}
// 合并之后有slot被使用了
if (vec_after != 0) {
// 如果vec_after之后不为0,说明并非全是free
if (v == num_vec - 1) {
// 已经合并到最后一个,由于最后一个前面含有无效slot的标记,因此需要另外判断。
// Only not all free if a bit other than the mask bits are set.
is_all_free_after =
is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
} else {
is_all_free_after = false;
}
}
DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
}
*is_all_free_after_out = is_all_free_after;
// Return true if there was at least a bit set in the thread-local
// free bit map and at least a bit in the alloc bit map changed.
return changed;
}
inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
const size_t idx = size_bracket_idx_;
const size_t num_slots = numOfSlots[idx];
const size_t num_vec = RoundUp(num_slots, 32) / 32;
DCHECK_NE(num_vec, 0U);
// Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
// don't represent valid slots.
// 空余slot的bitmap位填1,填于高位
alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
}
//初始化header置0
inline void RosAlloc::Run::ZeroHeader() {
const byte idx = size_bracket_idx_;
memset(this, 0, headerSizes[idx]);
}
//初始化data置0
inline void RosAlloc::Run::ZeroData() {
const byte idx = size_bracket_idx_;
byte* slot_begin = reinterpret_cast<byte*>(this) + headerSizes[idx];
memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
}
// rosalloc 初始化
void RosAlloc::Initialize() {
// bracketSizes.
// kNumOfSizeBrackets = 34
for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
if (i < kNumOfSizeBrackets - 2) {
bracketSizes[i] = 16 * (i + 1);
} else if (i == kNumOfSizeBrackets - 2) {
bracketSizes[i] = 1 * KB;
} else {
DCHECK_EQ(i, kNumOfSizeBrackets - 1);
bracketSizes[i] = 2 * KB;
}
if (kTraceRosAlloc) {
LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
}
}
// numOfPages.
// 每个bracket可用于被分配的page大小
for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
if (i < 4) {
numOfPages[i] = 1;
} else if (i < 8) {
numOfPages[i] = 2;
} else if (i < 16) {
numOfPages[i] = 4;
} else if (i < 32) {
numOfPages[i] = 8;
} else if (i == 32) {
DCHECK_EQ(i, kNumOfSizeBrackets - 2);
numOfPages[i] = 16;
} else {
DCHECK_EQ(i, kNumOfSizeBrackets - 1);
numOfPages[i] = 32;
}
if (kTraceRosAlloc) {
LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
}
}
// Compute numOfSlots and slotOffsets.
for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
size_t bracket_size = bracketSizes[i];
size_t run_size = kPageSize * numOfPages[i];
// 计算slots个数
size_t max_num_of_slots = run_size / bracket_size;
// Compute the actual number of slots by taking the header and
// alignment into account.
// 固定头大小
size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
size_t header_size = 0;
size_t bulk_free_bit_map_offset = 0;
size_t thread_local_free_bit_map_offset = 0;
size_t num_of_slots = 0;
// Search for the maximum number of slots that allows enough space
// for the header (including the bit maps.)
// 由于run结尾应用了一个0长度数组,该处为寻找合适的head和body的分界点
for (int s = max_num_of_slots; s >= 0; s--) {
// slot所占大小
size_t tmp_slots_size = bracket_size * s;
// s向上对齐为sizeof(uint32_t) * kBitsPerByte
size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
// 一个run的构成
// alloc_bit_map 某个slot被分配出去就在alloc_bit_map上标记出来
//fixed header size + alloc_bit_map + bulk_free_bit_map(tmp_bit_map_size)
// + thread_local_free_bit_map(tmp_bit_map_size)
size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
// Align up the unaligned header size. bracket_size may not be a power of two.
// 凑成bracket_size的整数倍,对齐
size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
tmp_unaligned_header_size :
tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
if (tmp_slots_size + tmp_header_size <= run_size) {
// Found the right number of slots, that is, there was enough
// space for the header (including the bit maps.)
num_of_slots = s;
header_size = tmp_header_size;
bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
break;
}
}
DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
// Add the padding for the alignment remainder.
// slot与head直接的间隙
header_size += run_size % bracket_size;
DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
numOfSlots[i] = num_of_slots;
headerSizes[i] = header_size;
bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
if (kTraceRosAlloc) {
LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
<< ", headerSizes[" << i << "]=" << headerSizes[i]
<< ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
<< ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
}
}
// Fill the alloc bitmap so nobody can successfully allocate from it.
if (kIsDebugBuild) {
dedicated_full_run_->magic_num_ = kMagicNum;
}
// It doesn't matter which size bracket we use since the main goal is to have the allocation
// fail 100% of the time you attempt to allocate into the dedicated full run.
// 牺牲run目前只做初始化,只有分配过程100%失败,才尝试在牺牲run分配
// 该run只是作为一个占位符?
dedicated_full_run_->size_bracket_idx_ = 0;
dedicated_full_run_->FillAllocBitMap();
dedicated_full_run_->SetIsThreadLocal(true);
}
} // namespace allocator
} // namespace gc
} // namespace art