【操作系统入门】虚拟内存与页面置换

最新推荐文章于 2026-01-07 09:37:02 发布

原创最新推荐文章于 2026-01-07 09:37:02 发布 · 488 阅读

9 ·

CC 4.0 BY-SA版权

文章标签：

#系统架构

操作系统专栏收录该内容

11 篇文章

订阅专栏

【操作系统入门】第八章：虚拟内存与页面置换——超越物理限制的魔法

本系列共10篇，这是第8/10篇。在第七章，我们探讨了分页、分段等内存管理基础。今天，我们将深入虚拟内存这一革命性技术，探索操作系统如何让进程使用比物理内存大得多的地址空间。

开篇：虚拟内存的奇迹

想象一个魔术师的手提箱：从外面看只是一个普通箱子，但打开后却能不断取出远大于箱子容量的物品。虚拟内存就是计算机世界的这种"魔术"——让每个进程都相信自己拥有完整且私有的巨大内存空间，而实际上物理内存可能只有其中的一小部分。

第一部分：虚拟内存概念与按需分页

1.1 虚拟内存的核心思想

虚拟内存通过结合主内存和二级存储（磁盘），为进程提供一个远大于物理内存的虚拟地址空间。

关键优势：

进程隔离：每个进程拥有独立的地址空间
简化编程：程序员无需关心物理内存布局
内存超售：所有进程的虚拟内存总和可以远超物理内存
高效文件映射：文件可以直接映射到地址空间

1.2 按需分页

虚拟内存的核心实现机制是按需分页——只有在进程实际访问页面时，才将其加载到物理内存。

// 页错误处理程序核心逻辑
void handle_page_fault(virt_addr_t fault_addr, fault_reason_t reason) {
    // 获取对应的页表项
    pte_t *pte = get_pte(current_process->page_table, fault_addr);
    
    switch (reason) {
    case PAGE_NOT_PRESENT:
        // 页面不在内存中，需要从磁盘加载
        if (!pte->present) {
            handle_page_not_present(pte, fault_addr);
        } else {
            // TLB失效，重新加载TLB
            reload_tlb_entry(fault_addr, pte);
        }
        break;
        
    case PAGE_PROTECTION_VIOLATION:
        // 权限错误：写只读页面或用户访问内核页面
        handle_protection_fault(pte, fault_addr);
        break;
        
    case INVALID_ADDRESS:
        // 访问了未分配的地址
        send_signal(current_process, SIGSEGV);
        break;
    }
}

1.3 页表项的扩展

支持虚拟内存的页表项需要额外信息：

typedef struct {
    uint32_t frame_number   : 20;   // 物理帧号
    uint32_t present        : 1;    // 页是否在内存中
    uint32_t writable       : 1;    // 是否可写
    uint32_t user_accessible: 1;    // 用户模式可访问
    uint32_t accessed       : 1;    // 最近是否被访问（用于置换算法）
    uint32_t dirty          : 1;    // 是否被修改过（决定是否需要写回）
    uint32_t paging_file    : 1;    // 是否在分页文件中
    uint32_t swap_offset    : 24;   // 在交换空间中的位置
    uint32_t reserved       : 2;    // 保留位
} virtual_pte_t;

第二部分：页面置换算法理论

当物理内存不足时，操作系统必须选择牺牲页面换出到磁盘。不同的置换算法对系统性能有巨大影响。

2.1 评估指标

缺页率：缺页次数与内存访问次数的比例
Belady异常：增加物理帧数反而导致缺页率上升的现象

2.2 最优置换算法（OPT）

理论上的最优算法：置换在未来最长时间不会被访问的页面。

// 理论上最优，但需要预知未来
page_t *optimal_replacement(memory_t *mem, access_sequence_t *future) {
    page_t *victim = NULL;
    uint32_t farthest_use = 0;
    
    // 遍历所有内存中的页面
    for (int i = 0; i < mem->frame_count; i++) {
        page_t *page = &mem->frames[i];
        
        // 查找该页面下一次被访问的位置
        uint32_t next_use = find_next_access(page, future);
        
        // 选择最远才会被使用的页面
        if (next_use > farthest_use) {
            farthest_use = next_use;
            victim = page;
        }
    }
    
    return victim;
}

局限性：需要预知未来的页面访问序列，实际中不可实现。

2.3 先进先出算法（FIFO）

置换在内存中驻留时间最长的页面。

// FIFO页面置换
typedef struct {
    page_t **frames;
    uint32_t count;
    uint32_t head;  // 指向最早进入的页面
} fifo_memory_t;

page_t *fifo_replace(fifo_memory_t *fifo) {
    page_t *victim = fifo->frames[fifo->head];
    
    // 循环队列，头指针前移
    fifo->head = (fifo->head + 1) % fifo->count;
    
    return victim;
}

问题：可能置换掉经常访问的页面，存在Belady异常。

2.4 最近最少使用算法（LRU）

基于局部性原理：最近被访问的页面很可能在不久的将来再次被访问。

实现方案1：计数器法

// 为每个页表项添加计数器
typedef struct {
    uint64_t last_accessed;  // 最后访问时间戳
    // ... 其他字段
} lru_pte_t;

page_t *lru_counter_replace(memory_t *mem) {
    page_t *victim = NULL;
    uint64_t oldest_time = UINT64_MAX;
    
    for (int i = 0; i < mem->frame_count; i++) {
        if (mem->frames[i].last_accessed < oldest_time) {
            oldest_time = mem->frames[i].last_accessed;
            victim = &mem->frames[i];
        }
    }
    return victim;
}

实现方案2：栈法

// 维护访问顺序栈
typedef struct lru_stack_node {
    page_t *page;
    struct lru_stack_node *prev;
    struct lru_stack_node *next;
} lru_stack_node_t;

void lru_stack_access(lru_stack_t *stack, page_t *page) {
    // 如果页面已在栈中，移动到栈顶
    lru_stack_node_t *node = find_node(stack, page);
    if (node) {
        remove_node(stack, node);
    } else {
        node = create_node(page);
    }
    push_top(stack, node);  // 放到栈顶
}

page_t *lru_stack_replace(lru_stack_t *stack) {
    // 栈底就是最近最少使用的页面
    return stack->bottom->page;
}

第三部分：实用的近似LRU算法

精确LRU实现开销大，实际系统使用近似算法。

3.1 时钟算法（第二次机会算法）

typedef struct {
    page_t **frames;
    uint32_t count;
    uint32_t clock_hand;  // 时钟指针
} clock_memory_t;

page_t *clock_replace(clock_memory_t *clock_mem) {
    while (true) {
        page_t *candidate = clock_mem->frames[clock_mem->clock_hand];
        
        if (candidate->accessed == 0) {
            // 引用位为0，选择该页面
            clock_mem->clock_hand = (clock_mem->clock_hand + 1) % clock_mem->count;
            return candidate;
        } else {
            // 给第二次机会：清空引用位，继续检查下一个
            candidate->accessed = 0;
            clock_mem->clock_hand = (clock_mem->clock_hand + 1) % clock_mem->count;
        }
    }
}

3.2 改进型时钟算法

考虑页面的修改状态（脏位），置换代价不同：

未修改页面：直接丢弃
已修改页面：需要写回磁盘

// 四类页面优先级
typedef enum {
    CLASS_0 = 0,  // (引用位=0, 脏位=0) - 最佳牺牲品
    CLASS_1 = 1,  // (引用位=0, 脏位=1) - 需要写回
    CLASS_2 = 2,  // (引用位=1, 脏位=0) - 清空引用位后变为CLASS_0
    CLASS_3 = 3   // (引用位=1, 脏位=1) - 清空引用位后变为CLASS_1
} page_class_t;

page_t *enhanced_clock_replace(clock_memory_t *clock_mem) {
    // 第一轮扫描：寻找(0,0)页面
    for (int scan = 0; scan < 2; scan++) {
        for (int i = 0; i < clock_mem->count; i++) {
            page_t *candidate = clock_mem->frames[clock_mem->clock_hand];
            page_class_t class = get_page_class(candidate);
            
            if (scan == 0 && class == CLASS_0) {
                // 找到最佳牺牲品
                clock_mem->clock_hand = (clock_mem->clock_hand + 1) % clock_mem->count;
                return candidate;
            }
            else if (scan == 1 && class == CLASS_1) {
                // 找到需要写回的页面
                clock_mem->clock_hand = (clock_mem->clock_hand + 1) % clock_mem->count;
                return candidate;
            }
            else {
                // 清空引用位，继续寻找
                candidate->accessed = 0;
                clock_mem->clock_hand = (clock_mem->clock_hand + 1) % clock_mem->count;
            }
        }
    }
    
    // 如果前两轮没找到，重新开始（此时所有页面引用位都已清空）
    return enhanced_clock_replace(clock_mem);
}

3.3 老化算法

使用移位寄存器近似LRU：

// 每个页面有一个8位的访问历史寄存器
typedef struct {
    uint8_t age_counter;    // 老化计数器
    uint8_t referenced;     // 当前引用位
} aging_pte_t;

void aging_update(aging_pte_t *pages, int count) {
    for (int i = 0; i < count; i++) {
        // 右移一位，最高位设置为当前引用位
        pages[i].age_counter = (pages[i].age_counter >> 1) | 
                              (pages[i].referenced << 7);
        pages[i].referenced = 0;  // 清空引用位
    }
}

page_t *aging_replace(aging_pte_t *pages, int count) {
    page_t *victim = NULL;
    uint8_t min_age = UINT8_MAX;
    
    for (int i = 0; i < count; i++) {
        if (pages[i].age_counter < min_age) {
            min_age = pages[i].age_counter;
            victim = get_page_from_pte(&pages[i]);
        }
    }
    return victim;
}

第四部分：工作集模型与颠簸

4.1 工作集模型

Denning的工作集理论：进程在时间窗口Δ内访问的页面集合称为工作集。

typedef struct {
    page_t **pages;         // 工作集页面
    uint64_t last_accessed; // 最后访问时间
    uint32_t size;          // 工作集大小
} working_set_t;

bool is_in_working_set(working_set_t *ws, page_t *page, uint64_t current_time, uint64_t tau) {
    // 检查页面是否在最近tau时间内被访问过
    return (current_time - page->last_accessed) <= tau;
}

void update_working_set(working_set_t *ws, page_t *page, uint64_t current_time) {
    page->last_accessed = current_time;
    // 更新工作集统计信息...
}

4.2 页面错误频率算法

动态调整分配给进程的帧数来控制缺页率：

typedef struct {
    uint32_t page_fault_count;      // 缺页计数
    uint64_t last_check_time;       // 上次检查时间
    uint32_t allocated_frames;      // 分配的帧数
    double page_fault_rate;         // 缺页率
} pff_control_t;

void pff_adjust_allocation(pff_control_t *ctrl, process_t *proc) {
    uint64_t current_time = get_current_time();
    uint64_t interval = current_time - ctrl->last_check_time;
    
    // 计算当前缺页率
    double current_rate = (double)ctrl->page_fault_count / interval;
    
    if (current_rate > ctrl->page_fault_rate * 1.1) {
        // 缺页率过高，增加分配帧数
        ctrl->allocated_frames = min(ctrl->allocated_frames + 1, MAX_FRAMES);
        adjust_process_frames(proc, ctrl->allocated_frames);
    }
    else if (current_rate < ctrl->page_fault_rate * 0.9) {
        // 缺页率过低，减少分配帧数
        ctrl->allocated_frames = max(ctrl->allocated_frames - 1, MIN_FRAMES);
        adjust_process_frames(proc, ctrl->allocated_frames);
    }
    
    ctrl->page_fault_count = 0;
    ctrl->last_check_time = current_time;
}

4.3 系统颠簸

当系统过度分页时，CPU大部分时间用于页面置换而非有用工作：

// 检测和缓解系统颠簸
void handle_thrashing(memory_manager_t *mm) {
    double cpu_utilization = get_cpu_utilization();
    double page_fault_rate = get_system_page_fault_rate();
    
    if (cpu_utilization < 10.0 && page_fault_rate > 1000.0) {
        // 检测到颠簸：CPU利用率低但缺页率高
        printf("System thrashing detected! CPU util: %.1f%%, Page faults/sec: %.0f\n",
               cpu_utilization, page_fault_rate);
        
        // 缓解措施：挂起部分进程
        process_t *victim_process = select_process_to_suspend();
        if (victim_process) {
            suspend_process(victim_process);
            printf("Suspended process %d to alleviate thrashing\n", victim_process->pid);
        }
    }
}

第五部分：实际系统实现

5.1 Linux页面置换

Linux使用基于LRU的复杂策略：

// Linux的双链表LRU结构
struct lru_list {
    struct list_head active_list;   // 活跃页面列表
    struct list_head inactive_list; // 非活跃页面列表
    unsigned long active_count;     // 活跃页面计数
    unsigned long inactive_count;   // 非活跃页面计数
};

// 页面回收核心逻辑
static void shrink_page_list(struct list_head *page_list) {
    struct page *page;
    struct page *next;
    
    list_for_each_entry_safe(page, next, page_list, lru) {
        // 检查页面是否可回收
        if (PageDirty(page) && !PageWriteback(page)) {
            // 脏页面，需要写回
            set_page_writeback(page);
            submit_page_for_writeback(page);
            continue;
        }
        
        if (PageActive(page)) {
            // 活跃页面，移到非活跃列表
            del_page_from_active_list(page);
            add_page_to_inactive_list(page);
        } else {
            // 非活跃页面，尝试回收
            if (try_to_reclaim_page(page)) {
                // 成功回收
                free_page(page);
            }
        }
    }
}

5.2 Windows工作集管理器

Windows使用复杂的工作集管理：

// 工作集管理器核心逻辑
VOID MmWorkingSetManager(VOID) {
    // 平衡集管理器定期运行
    while (TRUE) {
        KeDelayExecutionThread(KernelMode, FALSE, &interval);
        
        // 检查每个进程的工作集
        for (process in all_processes) {
            working_set_size = process->WorkingSetSize;
            page_fault_count = process->PageFaultCount;
            
            // 根据缺页率调整工作集大小
            if (page_fault_count > threshold_high) {
                // 增加工作集
                process->WorkingSetSize = min(working_set_size * 1.1, MAX_WORKING_SET);
            } else if (page_fault_count < threshold_low) {
                // 减少工作集
                process->WorkingSetSize = max(working_set_size * 0.9, MIN_WORKING_SET);
            }
            
            // 修剪工作集：移出最近未使用的页面
            MmTrimWorkingSet(process, TRIM_AMOUNT);
        }
    }
}

第六部分：高级优化技术

6.1 预取优化

基于访问模式预测未来需要的页面：

// 顺序预取：检测顺序访问模式
void sequential_prefetch(virt_addr_t current_addr) {
    static virt_addr_t last_addr = 0;
    
    if (current_addr == last_addr + PAGE_SIZE) {
        // 检测到顺序访问，预取后续页面
        for (int i = 1; i <= PREFETCH_DEGREE; i++) {
            virt_addr_t prefetch_addr = current_addr + i * PAGE_SIZE;
            if (is_valid_address(prefetch_addr)) {
                prefetch_page(prefetch_addr);
            }
        }
    }
    
    last_addr = current_addr;
}

6.2 交换空间管理

磁盘交换空间的高效管理：

// 交换空间分配策略
typedef struct {
    disk_block_t *free_blocks;      // 空闲块列表
    uint32_t block_size;            // 交换块大小（通常等于页大小）
    uint32_t total_blocks;          // 总块数
    uint32_t used_blocks;           // 已用块数
} swap_space_t;

swap_offset_t allocate_swap_block(swap_space_t *swap) {
    if (swap->free_blocks == NULL) {
        // 交换空间不足
        handle_swap_space_exhaustion();
        return INVALID_SWAP_OFFSET;
    }
    
    disk_block_t *block = swap->free_blocks;
    swap->free_blocks = block->next;
    swap->used_blocks++;
    
    return block->offset;
}

void free_swap_block(swap_space_t *swap, swap_offset_t offset) {
    disk_block_t *block = get_block_from_offset(offset);
    block->next = swap->free_blocks;
    swap->free_blocks = block;
    swap->used_blocks--;
}

6.3 内存压缩

现代系统的内存压缩技术：

// 内存页面压缩
typedef struct {
    uint8_t *compressed_data;   // 压缩后的数据
    size_t compressed_size;     // 压缩后大小
    uint32_t original_crc;      // 原始数据校验和
} compressed_page_t;

bool try_compress_page(page_t *page, compressed_page_t *compressed) {
    // 尝试压缩页面
    size_t max_compressed_size = PAGE_SIZE;
    int result = compression_algorithm(page->data, PAGE_SIZE, 
                                      compressed->compressed_data, 
                                      &max_compressed_size);
    
    if (result == COMPRESSION_SUCCESS && 
        max_compressed_size < PAGE_SIZE * COMPRESSION_THRESHOLD) {
        // 压缩比达到阈值，使用压缩版本
        compressed->compressed_size = max_compressed_size;
        compressed->original_crc = calculate_crc32(page->data, PAGE_SIZE);
        return true;
    }
    
    return false;
}

第七部分：性能监控与调优

7.1 性能计数器

利用硬件性能计数器监控内存行为：

// 内存性能监控
typedef struct {
    uint64_t page_faults;           // 缺页次数
    uint64_t tlb_misses;            // TLB未命中
    uint64_t cache_misses;          // 缓存未命中
    uint64_t memory_accesses;       // 内存访问次数
} memory_stats_t;

void update_memory_stats(memory_stats_t *stats) {
    // 读取硬件性能计数器
    stats->page_faults = read_pmc(PMC_PAGE_FAULTS);
    stats->tlb_misses = read_pmc(PMC_TLB_MISSES);
    stats->cache_misses = read_pmc(PMC_CACHE_MISSES);
    
    // 计算缺页率、TLB命中率等指标
    double page_fault_rate = (double)stats->page_faults / stats->memory_accesses;
    double tlb_hit_rate = 1.0 - (double)stats->tlb_misses / stats->memory_accesses;
    
    // 根据指标进行调优
    if (page_fault_rate > 0.01) {
        adjust_memory_parameters(MORE_AGGRESSIVE_PREFETCH);
    }
}

7.2 自动调优参数

// 自适应内存参数调优
void adaptive_memory_tuning(system_t *sys) {
    memory_stats_t stats = collect_memory_stats();
    
    // 根据工作负载特征调整参数
    if (stats.working_set_size > sys->physical_memory * 0.8) {
        // 工作集接近物理内存大小，使用更积极的置换策略
        sys->page_replacement_aggressiveness = HIGH_AGGRESSIVENESS;
        sys->prefetch_enabled = true;
    } else {
        // 内存充足，使用保守策略
        sys->page_replacement_aggressiveness = LOW_AGGRESSIVENESS;
        sys->prefetch_enabled = false;
    }
    
    // 调整TLB和缓存相关参数
    if (stats.tlb_miss_rate > 0.02) {
        enable_large_pages();  // 启用大页减少TLB压力
    }
}