v8字节码的编译过程

最新推荐文章于 2025-10-08 04:31:47 发布

原创最新推荐文章于 2025-10-08 04:31:47 发布 · 2k 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#javascript #开发语言 #ecmascript

v8 专栏收录该内容

6 篇文章

订阅专栏

v8字节码的编译过程

前面的文章中我们学习了调用V8 API的方法。本文我们讲解一下v8编译成字节码的主要过程。

我们来看一张编译的全局地图：

API调用部分

我们知道，v8中编译代码的方法是v8::Script::Compile:

      v8::Local<v8::Script> script =
          v8::Script::Compile(context, source).ToLocalChecked();

这将会调用api.cc中的Compile:

MaybeLocal<Script> Script::Compile(Local<Context> context, Local<String> source,
                                   ScriptOrigin* origin) {
  if (origin) {
    ScriptCompiler::Source script_source(source, *origin);
    return ScriptCompiler::Compile(context, &script_source);
  }
  ScriptCompiler::Source script_source(source);
  return ScriptCompiler::Compile(context, &script_source);
}

Script::Compile会调用同属于api.cc中的ScriptCompiler::Compile:

MaybeLocal<Script> ScriptCompiler::Compile(Local<Context> context,
                                           Source* source,
                                           CompileOptions options,
                                           NoCacheReason no_cache_reason) {
  Utils::ApiCheck(
      !source->GetResourceOptions().IsModule(), "v8::ScriptCompiler::Compile",
      "v8::ScriptCompiler::CompileModule must be used to compile modules");
  auto isolate = context->GetIsolate();
  MaybeLocal<UnboundScript> maybe =
      CompileUnboundInternal(isolate, source, options, no_cache_reason);
  Local<UnboundScript> result;
  if (!maybe.ToLocal(&result)) return MaybeLocal<Script>();
  v8::Context::Scope scope(context);
  return result->BindToCurrentContext();
}

然后会调用ScriptCompiler的CompileUnboundInternal函数，我们删节一下code cache部分：

MaybeLocal<UnboundScript> ScriptCompiler::CompileUnboundInternal(
    Isolate* v8_isolate, Source* source, CompileOptions options,
    NoCacheReason no_cache_reason) {
  auto isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
  TRACE_EVENT_CALL_STATS_SCOPED(isolate, "v8", "V8.ScriptCompiler");
  ENTER_V8_NO_SCRIPT(isolate, v8_isolate->GetCurrentContext(), ScriptCompiler,
                     CompileUnbound, MaybeLocal<UnboundScript>(),
                     InternalEscapableScope);

  i::Handle<i::String> str = Utils::OpenHandle(*(source->source_string));

  i::Handle<i::SharedFunctionInfo> result;
  TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.compile"), "V8.CompileScript");
  i::ScriptDetails script_details = GetScriptDetails(
      isolate, source->resource_name, source->resource_line_offset,
      source->resource_column_offset, source->source_map_url,
      source->host_defined_options, source->resource_options);

  i::MaybeHandle<i::SharedFunctionInfo> maybe_function_info;
  if (options == kConsumeCodeCache) {
...
  } else {
    // Compile without any cache.
    maybe_function_info = i::Compiler::GetSharedFunctionInfoForScript(
        isolate, str, script_details, options, no_cache_reason,
        i::NOT_NATIVES_CODE);
  }

  has_pending_exception = !maybe_function_info.ToHandle(&result);
  RETURN_ON_FAILED_EXECUTION(UnboundScript);
  RETURN_ESCAPED(ToApiHandle<UnboundScript>(result));
}

Compiler部分

此时调用internal::Compiler::GetSharedFunctionInfoForScript:

MaybeHandle<SharedFunctionInfo> Compiler::GetSharedFunctionInfoForScript(
    Isolate* isolate, Handle<String> source,
    const ScriptDetails& script_details,
    ScriptCompiler::CompileOptions compile_options,
    ScriptCompiler::NoCacheReason no_cache_reason, NativesFlag natives) {
  return GetSharedFunctionInfoForScriptImpl(
      isolate, source, script_details, nullptr, nullptr, nullptr,
      compile_options, no_cache_reason, natives);
}

这是一层皮，最终调用到GetSharedFunctionInfoForScriptImpl：

MaybeHandle<SharedFunctionInfo> GetSharedFunctionInfoForScriptImpl(
    Isolate* isolate, Handle<String> source,
    const ScriptDetails& script_details, v8::Extension* extension,
    AlignedCachedData* cached_data, BackgroundDeserializeTask* deserialize_task,
    ScriptCompiler::CompileOptions compile_options,
    ScriptCompiler::NoCacheReason no_cache_reason, NativesFlag natives) {
...
      maybe_result =
          CompileScriptOnMainThread(flags, source, script_details, natives,
                                    extension, isolate, &is_compiled_scope);
    }
...

对于大多数情况，我们是在主线程中编译的，调用CompileScriptOnMainThread：

MaybeHandle<SharedFunctionInfo> CompileScriptOnMainThread(
    const UnoptimizedCompileFlags flags, Handle<String> source,
    const ScriptDetails& script_details, NativesFlag natives,
    v8::Extension* extension, Isolate* isolate,
    IsCompiledScope* is_compiled_scope) {
  UnoptimizedCompileState compile_state(isolate);
  ParseInfo parse_info(isolate, flags, &compile_state);
  parse_info.set_extension(extension);

  Handle<Script> script =
      NewScript(isolate, &parse_info, source, script_details, natives);
  DCHECK_IMPLIES(parse_info.flags().collect_type_profile(),
                 script->IsUserJavaScript());
  DCHECK_EQ(parse_info.flags().is_repl_mode(), script->is_repl_mode());

  return Compiler::CompileToplevel(&parse_info, script, isolate,
                                   is_compiled_scope);
}

然后从全局函数回到Compiler类的CompileToplevel函数：

MaybeHandle<SharedFunctionInfo> Compiler::CompileToplevel(
    ParseInfo* parse_info, Handle<Script> script, Isolate* isolate,
    IsCompiledScope* is_compiled_scope) {
  return v8::internal::CompileToplevel(parse_info, script, kNullMaybeHandle,
                                       isolate, is_compiled_scope);
}

结果类中的又是个皮，实现的CompileToplevel会调用parsing::ParseProgram去解析代码为AST，然后调用IterativelyExecuteAndFinalizeUnoptimizedCompilationJobs去进行编译操作：

MaybeHandle<SharedFunctionInfo> CompileToplevel(
    ParseInfo* parse_info, Handle<Script> script,
    MaybeHandle<ScopeInfo> maybe_outer_scope_info, Isolate* isolate,
    IsCompiledScope* is_compiled_scope) {

...
  if (parse_info->literal() == nullptr &&
      !parsing::ParseProgram(parse_info, script, maybe_outer_scope_info,
                             isolate, parsing::ReportStatisticsMode::kYes)) {
    FailWithPendingException(isolate, script, parse_info,
                             Compiler::ClearExceptionFlag::KEEP_EXCEPTION);
    return MaybeHandle<SharedFunctionInfo>();
  }
...

  if (!IterativelyExecuteAndFinalizeUnoptimizedCompilationJobs(
          isolate, shared_info, script, parse_info, isolate->allocator(),
          is_compiled_scope, &finalize_unoptimized_compilation_data_list,
          nullptr)) {
    FailWithPendingException(isolate, script, parse_info,
                             Compiler::ClearExceptionFlag::KEEP_EXCEPTION);
    return MaybeHandle<SharedFunctionInfo>();
  }
...  
}

最终会落到 IterativelyExecuteAndFinalizeUnoptimizedCompilationJobs 中。

bool IterativelyExecuteAndFinalizeUnoptimizedCompilationJobs(
    IsolateT* isolate, Handle<SharedFunctionInfo> outer_shared_info,
    Handle<Script> script, ParseInfo* parse_info,
    AccountingAllocator* allocator, IsCompiledScope* is_compiled_scope,
    FinalizeUnoptimizedCompilationDataList*
        finalize_unoptimized_compilation_data_list,
    DeferredFinalizationJobDataList*
        jobs_to_retry_finalization_on_main_thread) {
  DeclarationScope::AllocateScopeInfos(parse_info, isolate);

  std::vector<FunctionLiteral*> functions_to_compile;
  functions_to_compile.push_back(parse_info->literal());

  while (!functions_to_compile.empty()) {
    FunctionLiteral* literal = functions_to_compile.back();
    functions_to_compile.pop_back();
    Handle<SharedFunctionInfo> shared_info =
        Compiler::GetSharedFunctionInfo(literal, script, isolate);
    if (shared_info->is_compiled()) continue;

    std::unique_ptr<UnoptimizedCompilationJob> job =
        ExecuteSingleUnoptimizedCompilationJob(parse_info, literal, allocator,
                                               &functions_to_compile,
                                               isolate->AsLocalIsolate());

    if (!job) return false;
...

针对于每一个要编译的函数，将调用ExecuteSingleUnoptimizedCompilationJob去进行编译：

std::unique_ptr<UnoptimizedCompilationJob>
ExecuteSingleUnoptimizedCompilationJob(
    ParseInfo* parse_info, FunctionLiteral* literal,
    AccountingAllocator* allocator,
    std::vector<FunctionLiteral*>* eager_inner_literals,
    LocalIsolate* local_isolate) {
...    
  std::unique_ptr<UnoptimizedCompilationJob> job(
      interpreter::Interpreter::NewCompilationJob(
          parse_info, literal, allocator, eager_inner_literals, local_isolate));

  if (job->ExecuteJob() != CompilationJob::SUCCEEDED) {
    // Compilation failed, return null.
    return std::unique_ptr<UnoptimizedCompilationJob>();
  }

  return job;
}

创建了Job之后，再调用Job的ExecuteJob 函数去具体执行。

CompilationJob::Status UnoptimizedCompilationJob::ExecuteJob() {
  // Delegate to the underlying implementation.
  DCHECK_EQ(state(), State::kReadyToExecute);
  ScopedTimer t(&time_taken_to_execute_);
  return UpdateState(ExecuteJobImpl(), State::kReadyToFinalize);
}

解释器部分

ExecuteJobImpl是一个虚函数。目前的实现有两种，一种是asm.js的实现，另一种是解释器的实现。

正常我们用的都是解释器的实现方法：

InterpreterCompilationJob::Status InterpreterCompilationJob::ExecuteJobImpl() {
  RCS_SCOPE(parse_info()->runtime_call_stats(),
            RuntimeCallCounterId::kCompileIgnition,
            RuntimeCallStats::kThreadSpecific);
...

  base::Optional<ParkedScope> parked_scope;
  if (local_isolate_) parked_scope.emplace(local_isolate_);

  generator()->GenerateBytecode(stack_limit());

  if (generator()->HasStackOverflow()) {
    return FAILED;
  }
  return SUCCEEDED;
}

最终将调用BytecodeGenerator的GenerateBytecode方法去生成字节码。

void BytecodeGenerator::GenerateBytecode(uintptr_t stack_limit) {
...

  if (NeedsContextInitialization(closure_scope())) {
    // Push a new inner context scope for the function.
    BuildNewLocalActivationContext();
    ContextScope local_function_context(this, closure_scope());
    BuildLocalActivationContextInitialization();
    GenerateBytecodeBody();
  } else {
    GenerateBytecodeBody();
  }

  // Check that we are not falling off the end.
  DCHECK(builder()->RemainderOfBlockIsDead());
}

字节码生成的核心部分在GenerateBytecodeBody中，其主要的工作是遍历AST，所以主要的操作全在Visit*:

void BytecodeGenerator::GenerateBytecodeBody() {
  // Build the arguments object if it is used.
  VisitArgumentsObject(closure_scope()->arguments());

  // Build rest arguments array if it is used.
  Variable* rest_parameter = closure_scope()->rest_parameter();
  VisitRestArgumentsArray(rest_parameter);

  // Build assignment to the function name or {.this_function}
  // variables if used.
  VisitThisFunctionVariable(closure_scope()->function_var());
  VisitThisFunctionVariable(closure_scope()->this_function_var());

  // Build assignment to {new.target} variable if it is used.
  VisitNewTargetVariable(closure_scope()->new_target_var());

...

  // Visit declarations within the function scope.
  if (closure_scope()->is_script_scope()) {
    VisitGlobalDeclarations(closure_scope()->declarations());
  } else if (closure_scope()->is_module_scope()) {
    VisitModuleDeclarations(closure_scope()->declarations());
  } else {
    VisitDeclarations(closure_scope()->declarations());
  }

  // Emit initializing assignments for module namespace imports (if any).
  VisitModuleNamespaceImports();

...

  // Visit statements in the function body.
  VisitStatements(literal->body());

...
}

字节码生成器

在Visit*函数中，除了遍历AST，然后就是调用字节码生成器去生成字节码。

我们来看个简单的例子，void运算符的实现：

void BytecodeGenerator::VisitVoid(UnaryOperation* expr) {
  VisitForEffect(expr->expression());
  builder()->LoadUndefined();
}

这个builder()获取到的是BytecodeArrayBuilder类的对象：

BytecodeArrayBuilder& BytecodeArrayBuilder::LoadUndefined() {
  OutputLdaUndefined();
  return *this;
}

不过当我们试图寻找OutputLdaUndefined函数的时候，会发现在代码中搜不到。
这是因为，这些Output函数的名字，是用宏拼出来的：

#define DEFINE_BYTECODE_OUTPUT(name, ...)                             \
  template <typename... Operands>                                     \
  BytecodeNode BytecodeArrayBuilder::Create##name##Node(              \
      Operands... operands) {                                         \
    return BytecodeNodeBuilder<Bytecode::k##name, __VA_ARGS__>::Make( \
        this, operands...);                                           \
  }                                                                   \
                                                                      \
  template <typename... Operands>                                     \
  void BytecodeArrayBuilder::Output##name(Operands... operands) {     \
    BytecodeNode node(Create##name##Node(operands...));               \
    Write(&node);                                                     \
  }                                                                   \
                                                                      \
  template <typename... Operands>                                     \
  void BytecodeArrayBuilder::Output##name(BytecodeLabel* label,       \
                                          Operands... operands) {     \
    DCHECK(Bytecodes::IsForwardJump(Bytecode::k##name));              \
    BytecodeNode node(Create##name##Node(operands...));               \
    WriteJump(&node, label);                                          \
  }
BYTECODE_LIST(DEFINE_BYTECODE_OUTPUT)
#undef DEFINE_BYTECODE_OUTPUT

通过这个宏，会帮我们生成BytecodeArrayBuilder::OutputLdaUndefined 方法，然后它会调用到BytecodeArrayBuilder的Write方法。

  void BytecodeArrayBuilder::OutputLdaUndefined(Operands... operands) {     
    BytecodeNode node(CreateLdaUndefinedNode(operands...));               
    Write(&node);                                                     
  }

然后BytecodeArrayBuilder的Write方法再调用BytecodeArrayWriter的Write方法。

void BytecodeArrayBuilder::Write(BytecodeNode* node) {
  AttachOrEmitDeferredSourceInfo(node);
  bytecode_array_writer_.Write(node);
}

最后通过BytecodeArrayWriter的EmitBytecode函数去最终生成字节码。

void BytecodeArrayWriter::Write(BytecodeNode* node) {
  DCHECK(!Bytecodes::IsJump(node->bytecode()));

  if (exit_seen_in_block_) return;  // Don't emit dead code.
  UpdateExitSeenInBlock(node->bytecode());
  MaybeElideLastBytecode(node->bytecode(), node->source_info().is_valid());

  UpdateSourcePositionTable(node);
  EmitBytecode(node);
}

EmitBytecode的作用，除了将字节码节点所对应的字节码压到生成的字节码栈中之外，还需要根据字节码的特性，将其操作数也压栈：

void BytecodeArrayWriter::EmitBytecode(const BytecodeNode* const node) {
  DCHECK_NE(node->bytecode(), Bytecode::kIllegal);

  Bytecode bytecode = node->bytecode();
  OperandScale operand_scale = node->operand_scale();

  if (operand_scale != OperandScale::kSingle) {
    Bytecode prefix = Bytecodes::OperandScaleToPrefixBytecode(operand_scale);
    bytecodes()->push_back(Bytecodes::ToByte(prefix));
  }
  bytecodes()->push_back(Bytecodes::ToByte(bytecode));

  const uint32_t* const operands = node->operands();
  const int operand_count = node->operand_count();
  const OperandSize* operand_sizes =
      Bytecodes::GetOperandSizes(bytecode, operand_scale);
  for (int i = 0; i < operand_count; ++i) {
    switch (operand_sizes[i]) {
      case OperandSize::kNone:
        UNREACHABLE();
      case OperandSize::kByte:
        bytecodes()->push_back(static_cast<uint8_t>(operands[i]));
        break;
      case OperandSize::kShort: {
        uint16_t operand = static_cast<uint16_t>(operands[i]);
        const uint8_t* raw_operand = reinterpret_cast<const uint8_t*>(&operand);
        bytecodes()->push_back(raw_operand[0]);
        bytecodes()->push_back(raw_operand[1]);
        break;
      }
      case OperandSize::kQuad: {
        const uint8_t* raw_operand =
            reinterpret_cast<const uint8_t*>(&operands[i]);
        bytecodes()->push_back(raw_operand[0]);
        bytecodes()->push_back(raw_operand[1]);
        bytecodes()->push_back(raw_operand[2]);
        bytecodes()->push_back(raw_operand[3]);
        break;
      }
    }
  }
}