%PDF- %PDF-
Direktori : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/wasm/ |
Current File : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/wasm/module-compiler.cc |
// Copyright 2017 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/wasm/module-compiler.h" #include <algorithm> #include <atomic> #include <memory> #include <queue> #include "src/api/api-inl.h" #include "src/base/enum-set.h" #include "src/base/optional.h" #include "src/base/platform/mutex.h" #include "src/base/platform/semaphore.h" #include "src/base/platform/time.h" #include "src/codegen/compiler.h" #include "src/compiler/wasm-compiler.h" #include "src/debug/debug.h" #include "src/handles/global-handles-inl.h" #include "src/logging/counters-scopes.h" #include "src/logging/metrics.h" #include "src/tracing/trace-event.h" #include "src/wasm/code-space-access.h" #include "src/wasm/module-decoder.h" #include "src/wasm/pgo.h" #include "src/wasm/std-object-sizes.h" #include "src/wasm/streaming-decoder.h" #include "src/wasm/wasm-code-manager.h" #include "src/wasm/wasm-engine.h" #include "src/wasm/wasm-import-wrapper-cache.h" #include "src/wasm/wasm-js.h" #include "src/wasm/wasm-limits.h" #include "src/wasm/wasm-objects-inl.h" #include "src/wasm/wasm-result.h" #include "src/wasm/wasm-serialization.h" #define TRACE_COMPILE(...) \ do { \ if (v8_flags.trace_wasm_compiler) PrintF(__VA_ARGS__); \ } while (false) #define TRACE_STREAMING(...) \ do { \ if (v8_flags.trace_wasm_streaming) PrintF(__VA_ARGS__); \ } while (false) #define TRACE_LAZY(...) \ do { \ if (v8_flags.trace_wasm_lazy_compilation) PrintF(__VA_ARGS__); \ } while (false) namespace v8 { namespace internal { namespace wasm { namespace { enum class CompileStrategy : uint8_t { // Compiles functions on first use. In this case, execution will block until // the function's baseline is reached and top tier compilation starts in // background (if applicable). // Lazy compilation can help to reduce startup time and code size at the risk // of blocking execution. kLazy, // Compiles baseline ahead of execution and starts top tier compilation in // background (if applicable). kEager, // Triggers baseline compilation on first use (just like {kLazy}) with the // difference that top tier compilation is started eagerly. // This strategy can help to reduce startup time at the risk of blocking // execution, but only in its early phase (until top tier compilation // finishes). kLazyBaselineEagerTopTier, // Marker for default strategy. kDefault = kEager, }; class CompilationStateImpl; class CompilationUnitBuilder; class V8_NODISCARD BackgroundCompileScope { public: explicit BackgroundCompileScope(std::weak_ptr<NativeModule> native_module) : native_module_(native_module.lock()) {} NativeModule* native_module() const { DCHECK(native_module_); return native_module_.get(); } inline CompilationStateImpl* compilation_state() const; bool cancelled() const; private: // Keep the native module alive while in this scope. std::shared_ptr<NativeModule> native_module_; }; enum CompilationTier { kBaseline = 0, kTopTier = 1, kNumTiers = kTopTier + 1 }; // A set of work-stealing queues (vectors of units). Each background compile // task owns one of the queues and steals from all others once its own queue // runs empty. class CompilationUnitQueues { public: // Public API for QueueImpl. struct Queue { bool ShouldPublish(int num_processed_units) const; }; explicit CompilationUnitQueues(int num_declared_functions) : num_declared_functions_(num_declared_functions) { // Add one first queue, to add units to. queues_.emplace_back(std::make_unique<QueueImpl>(0)); #if !defined(__cpp_lib_atomic_value_initialization) || \ __cpp_lib_atomic_value_initialization < 201911L for (auto& atomic_counter : num_units_) { std::atomic_init(&atomic_counter, size_t{0}); } #endif top_tier_compiled_ = std::make_unique<std::atomic<bool>[]>(num_declared_functions); #if !defined(__cpp_lib_atomic_value_initialization) || \ __cpp_lib_atomic_value_initialization < 201911L for (int i = 0; i < num_declared_functions; i++) { std::atomic_init(&top_tier_compiled_.get()[i], false); } #endif } Queue* GetQueueForTask(int task_id) { int required_queues = task_id + 1; { base::SharedMutexGuard<base::kShared> queues_guard{&queues_mutex_}; if (V8_LIKELY(static_cast<int>(queues_.size()) >= required_queues)) { return queues_[task_id].get(); } } // Otherwise increase the number of queues. base::SharedMutexGuard<base::kExclusive> queues_guard{&queues_mutex_}; int num_queues = static_cast<int>(queues_.size()); while (num_queues < required_queues) { int steal_from = num_queues + 1; queues_.emplace_back(std::make_unique<QueueImpl>(steal_from)); ++num_queues; } // Update the {publish_limit}s of all queues. // We want background threads to publish regularly (to avoid contention when // they are all publishing at the end). On the other side, each publishing // has some overhead (part of it for synchronizing between threads), so it // should not happen *too* often. Thus aim for 4-8 publishes per thread, but // distribute it such that publishing is likely to happen at different // times. int units_per_thread = num_declared_functions_ / num_queues; int min = std::max(10, units_per_thread / 8); int queue_id = 0; for (auto& queue : queues_) { // Set a limit between {min} and {2*min}, but not smaller than {10}. int limit = min + (min * queue_id / num_queues); queue->publish_limit.store(limit, std::memory_order_relaxed); ++queue_id; } return queues_[task_id].get(); } base::Optional<WasmCompilationUnit> GetNextUnit(Queue* queue, CompilationTier tier) { DCHECK_LT(tier, CompilationTier::kNumTiers); if (auto unit = GetNextUnitOfTier(queue, tier)) { size_t old_units_count = num_units_[tier].fetch_sub(1, std::memory_order_relaxed); DCHECK_LE(1, old_units_count); USE(old_units_count); return unit; } return {}; } void AddUnits(base::Vector<WasmCompilationUnit> baseline_units, base::Vector<WasmCompilationUnit> top_tier_units, const WasmModule* module) { DCHECK_LT(0, baseline_units.size() + top_tier_units.size()); // Add to the individual queues in a round-robin fashion. No special care is // taken to balance them; they will be balanced by work stealing. QueueImpl* queue; { int queue_to_add = next_queue_to_add.load(std::memory_order_relaxed); base::SharedMutexGuard<base::kShared> queues_guard{&queues_mutex_}; while (!next_queue_to_add.compare_exchange_weak( queue_to_add, next_task_id(queue_to_add, queues_.size()), std::memory_order_relaxed)) { // Retry with updated {queue_to_add}. } queue = queues_[queue_to_add].get(); } base::MutexGuard guard(&queue->mutex); base::Optional<base::MutexGuard> big_units_guard; for (auto pair : {std::make_pair(CompilationTier::kBaseline, baseline_units), std::make_pair(CompilationTier::kTopTier, top_tier_units)}) { int tier = pair.first; base::Vector<WasmCompilationUnit> units = pair.second; if (units.empty()) continue; num_units_[tier].fetch_add(units.size(), std::memory_order_relaxed); for (WasmCompilationUnit unit : units) { size_t func_size = module->functions[unit.func_index()].code.length(); if (func_size <= kBigUnitsLimit) { queue->units[tier].push_back(unit); } else { if (!big_units_guard) { big_units_guard.emplace(&big_units_queue_.mutex); } big_units_queue_.has_units[tier].store(true, std::memory_order_relaxed); big_units_queue_.units[tier].emplace(func_size, unit); } } } } void AddTopTierPriorityUnit(WasmCompilationUnit unit, size_t priority) { base::SharedMutexGuard<base::kShared> queues_guard{&queues_mutex_}; // Add to the individual queues in a round-robin fashion. No special care is // taken to balance them; they will be balanced by work stealing. // Priorities should only be seen as a hint here; without balancing, we // might pop a unit with lower priority from one queue while other queues // still hold higher-priority units. // Since updating priorities in a std::priority_queue is difficult, we just // add new units with higher priorities, and use the // {CompilationUnitQueues::top_tier_compiled_} array to discard units for // functions which are already being compiled. int queue_to_add = next_queue_to_add.load(std::memory_order_relaxed); while (!next_queue_to_add.compare_exchange_weak( queue_to_add, next_task_id(queue_to_add, queues_.size()), std::memory_order_relaxed)) { // Retry with updated {queue_to_add}. } { auto* queue = queues_[queue_to_add].get(); base::MutexGuard guard(&queue->mutex); queue->top_tier_priority_units.emplace(priority, unit); num_priority_units_.fetch_add(1, std::memory_order_relaxed); num_units_[CompilationTier::kTopTier].fetch_add( 1, std::memory_order_relaxed); } } // Get the current number of units in the queue for |tier|. This is only a // momentary snapshot, it's not guaranteed that {GetNextUnit} returns a unit // if this method returns non-zero. size_t GetSizeForTier(CompilationTier tier) const { DCHECK_LT(tier, CompilationTier::kNumTiers); return num_units_[tier].load(std::memory_order_relaxed); } void AllowAnotherTopTierJob(uint32_t func_index) { top_tier_compiled_[func_index].store(false, std::memory_order_relaxed); } void AllowAnotherTopTierJobForAllFunctions() { for (int i = 0; i < num_declared_functions_; i++) { AllowAnotherTopTierJob(i); } } size_t EstimateCurrentMemoryConsumption() const; private: // Functions bigger than {kBigUnitsLimit} will be compiled first, in ascending // order of their function body size. static constexpr size_t kBigUnitsLimit = 4096; struct BigUnit { BigUnit(size_t func_size, WasmCompilationUnit unit) : func_size{func_size}, unit(unit) {} size_t func_size; WasmCompilationUnit unit; bool operator<(const BigUnit& other) const { return func_size < other.func_size; } }; struct TopTierPriorityUnit { TopTierPriorityUnit(int priority, WasmCompilationUnit unit) : priority(priority), unit(unit) {} size_t priority; WasmCompilationUnit unit; bool operator<(const TopTierPriorityUnit& other) const { return priority < other.priority; } }; struct BigUnitsQueue { BigUnitsQueue() { #if !defined(__cpp_lib_atomic_value_initialization) || \ __cpp_lib_atomic_value_initialization < 201911L for (auto& atomic : has_units) std::atomic_init(&atomic, false); #endif } mutable base::Mutex mutex; // Can be read concurrently to check whether any elements are in the queue. std::atomic<bool> has_units[CompilationTier::kNumTiers]; // Protected by {mutex}: std::priority_queue<BigUnit> units[CompilationTier::kNumTiers]; }; struct QueueImpl : public Queue { explicit QueueImpl(int next_steal_task_id) : next_steal_task_id(next_steal_task_id) {} // Number of units after which the task processing this queue should publish // compilation results. Updated (reduced, using relaxed ordering) when new // queues are allocated. If there is only one thread running, we can delay // publishing arbitrarily. std::atomic<int> publish_limit{kMaxInt}; base::Mutex mutex; // All fields below are protected by {mutex}. std::vector<WasmCompilationUnit> units[CompilationTier::kNumTiers]; std::priority_queue<TopTierPriorityUnit> top_tier_priority_units; int next_steal_task_id; }; int next_task_id(int task_id, size_t num_queues) const { int next = task_id + 1; return next == static_cast<int>(num_queues) ? 0 : next; } base::Optional<WasmCompilationUnit> GetNextUnitOfTier(Queue* public_queue, int tier) { QueueImpl* queue = static_cast<QueueImpl*>(public_queue); // First check whether there is a priority unit. Execute that first. if (tier == CompilationTier::kTopTier) { if (auto unit = GetTopTierPriorityUnit(queue)) { return unit; } } // Then check whether there is a big unit of that tier. if (auto unit = GetBigUnitOfTier(tier)) return unit; // Finally check whether our own queue has a unit of the wanted tier. If // so, return it, otherwise get the task id to steal from. int steal_task_id; { base::MutexGuard mutex_guard(&queue->mutex); if (!queue->units[tier].empty()) { auto unit = queue->units[tier].back(); queue->units[tier].pop_back(); return unit; } steal_task_id = queue->next_steal_task_id; } // Try to steal from all other queues. If this succeeds, return one of the // stolen units. { base::SharedMutexGuard<base::kShared> guard{&queues_mutex_}; for (size_t steal_trials = 0; steal_trials < queues_.size(); ++steal_trials, ++steal_task_id) { if (steal_task_id >= static_cast<int>(queues_.size())) { steal_task_id = 0; } if (auto unit = StealUnitsAndGetFirst(queue, steal_task_id, tier)) { return unit; } } } // If we reach here, we didn't find any unit of the requested tier. return {}; } base::Optional<WasmCompilationUnit> GetBigUnitOfTier(int tier) { // Fast path without locking. if (!big_units_queue_.has_units[tier].load(std::memory_order_relaxed)) { return {}; } base::MutexGuard guard(&big_units_queue_.mutex); if (big_units_queue_.units[tier].empty()) return {}; WasmCompilationUnit unit = big_units_queue_.units[tier].top().unit; big_units_queue_.units[tier].pop(); if (big_units_queue_.units[tier].empty()) { big_units_queue_.has_units[tier].store(false, std::memory_order_relaxed); } return unit; } base::Optional<WasmCompilationUnit> GetTopTierPriorityUnit(QueueImpl* queue) { // Fast path without locking. if (num_priority_units_.load(std::memory_order_relaxed) == 0) { return {}; } int steal_task_id; { base::MutexGuard mutex_guard(&queue->mutex); while (!queue->top_tier_priority_units.empty()) { auto unit = queue->top_tier_priority_units.top().unit; queue->top_tier_priority_units.pop(); num_priority_units_.fetch_sub(1, std::memory_order_relaxed); if (!top_tier_compiled_[unit.func_index()].exchange( true, std::memory_order_relaxed)) { return unit; } num_units_[CompilationTier::kTopTier].fetch_sub( 1, std::memory_order_relaxed); } steal_task_id = queue->next_steal_task_id; } // Try to steal from all other queues. If this succeeds, return one of the // stolen units. { base::SharedMutexGuard<base::kShared> guard{&queues_mutex_}; for (size_t steal_trials = 0; steal_trials < queues_.size(); ++steal_trials, ++steal_task_id) { if (steal_task_id >= static_cast<int>(queues_.size())) { steal_task_id = 0; } if (auto unit = StealTopTierPriorityUnit(queue, steal_task_id)) { return unit; } } } return {}; } // Steal units of {wanted_tier} from {steal_from_task_id} to {queue}. Return // first stolen unit (rest put in queue of {task_id}), or {nullopt} if // {steal_from_task_id} had no units of {wanted_tier}. // Hold a shared lock on {queues_mutex_} when calling this method. base::Optional<WasmCompilationUnit> StealUnitsAndGetFirst( QueueImpl* queue, int steal_from_task_id, int wanted_tier) { auto* steal_queue = queues_[steal_from_task_id].get(); // Cannot steal from own queue. if (steal_queue == queue) return {}; std::vector<WasmCompilationUnit> stolen; base::Optional<WasmCompilationUnit> returned_unit; { base::MutexGuard guard(&steal_queue->mutex); auto* steal_from_vector = &steal_queue->units[wanted_tier]; if (steal_from_vector->empty()) return {}; size_t remaining = steal_from_vector->size() / 2; auto steal_begin = steal_from_vector->begin() + remaining; returned_unit = *steal_begin; stolen.assign(steal_begin + 1, steal_from_vector->end()); steal_from_vector->erase(steal_begin, steal_from_vector->end()); } base::MutexGuard guard(&queue->mutex); auto* target_queue = &queue->units[wanted_tier]; target_queue->insert(target_queue->end(), stolen.begin(), stolen.end()); queue->next_steal_task_id = steal_from_task_id + 1; return returned_unit; } // Steal one priority unit from {steal_from_task_id} to {task_id}. Return // stolen unit, or {nullopt} if {steal_from_task_id} had no priority units. // Hold a shared lock on {queues_mutex_} when calling this method. base::Optional<WasmCompilationUnit> StealTopTierPriorityUnit( QueueImpl* queue, int steal_from_task_id) { auto* steal_queue = queues_[steal_from_task_id].get(); // Cannot steal from own queue. if (steal_queue == queue) return {}; base::Optional<WasmCompilationUnit> returned_unit; { base::MutexGuard guard(&steal_queue->mutex); while (true) { if (steal_queue->top_tier_priority_units.empty()) return {}; auto unit = steal_queue->top_tier_priority_units.top().unit; steal_queue->top_tier_priority_units.pop(); num_priority_units_.fetch_sub(1, std::memory_order_relaxed); if (!top_tier_compiled_[unit.func_index()].exchange( true, std::memory_order_relaxed)) { returned_unit = unit; break; } num_units_[CompilationTier::kTopTier].fetch_sub( 1, std::memory_order_relaxed); } } base::MutexGuard guard(&queue->mutex); queue->next_steal_task_id = steal_from_task_id + 1; return returned_unit; } // {queues_mutex_} protectes {queues_}; mutable base::SharedMutex queues_mutex_; std::vector<std::unique_ptr<QueueImpl>> queues_; const int num_declared_functions_; BigUnitsQueue big_units_queue_; std::atomic<size_t> num_units_[CompilationTier::kNumTiers]; std::atomic<size_t> num_priority_units_{0}; std::unique_ptr<std::atomic<bool>[]> top_tier_compiled_; std::atomic<int> next_queue_to_add{0}; }; size_t CompilationUnitQueues::EstimateCurrentMemoryConsumption() const { UPDATE_WHEN_CLASS_CHANGES(CompilationUnitQueues, 248); UPDATE_WHEN_CLASS_CHANGES(QueueImpl, 144); UPDATE_WHEN_CLASS_CHANGES(BigUnitsQueue, 120); // Not including sizeof(CompilationUnitQueues) because that's included in // sizeof(CompilationStateImpl). size_t result = 0; { base::SharedMutexGuard<base::kShared> lock(&queues_mutex_); result += ContentSize(queues_) + queues_.size() * sizeof(QueueImpl); for (const auto& q : queues_) { result += ContentSize(*q->units); result += q->top_tier_priority_units.size() * sizeof(TopTierPriorityUnit); } } { base::MutexGuard lock(&big_units_queue_.mutex); result += big_units_queue_.units[0].size() * sizeof(BigUnit); result += big_units_queue_.units[1].size() * sizeof(BigUnit); } // For {top_tier_compiled_}. result += sizeof(std::atomic<bool>) * num_declared_functions_; return result; } bool CompilationUnitQueues::Queue::ShouldPublish( int num_processed_units) const { auto* queue = static_cast<const QueueImpl*>(this); return num_processed_units >= queue->publish_limit.load(std::memory_order_relaxed); } // The {CompilationStateImpl} keeps track of the compilation state of the // owning NativeModule, i.e. which functions are left to be compiled. // It contains a task manager to allow parallel and asynchronous background // compilation of functions. // Its public interface {CompilationState} lives in compilation-environment.h. class CompilationStateImpl { public: CompilationStateImpl(const std::shared_ptr<NativeModule>& native_module, std::shared_ptr<Counters> async_counters, DynamicTiering dynamic_tiering); ~CompilationStateImpl() { if (js_to_wasm_wrapper_job_ && js_to_wasm_wrapper_job_->IsValid()) js_to_wasm_wrapper_job_->CancelAndDetach(); if (baseline_compile_job_->IsValid()) baseline_compile_job_->CancelAndDetach(); if (top_tier_compile_job_->IsValid()) top_tier_compile_job_->CancelAndDetach(); } // Call right after the constructor, after the {compilation_state_} field in // the {NativeModule} has been initialized. void InitCompileJob(); // {kCancelUnconditionally}: Cancel all compilation. // {kCancelInitialCompilation}: Cancel all compilation if initial (baseline) // compilation is not finished yet. enum CancellationPolicy { kCancelUnconditionally, kCancelInitialCompilation }; void CancelCompilation(CancellationPolicy); bool cancelled() const; // Apply a compilation hint to the initial compilation progress, updating all // internal fields accordingly. void ApplyCompilationHintToInitialProgress(const WasmCompilationHint& hint, size_t hint_idx); // Use PGO information to choose a better initial compilation progress // (tiering decisions). void ApplyPgoInfoToInitialProgress(ProfileInformation* pgo_info); // Apply PGO information to a fully initialized compilation state. Also // trigger compilation as needed. void ApplyPgoInfoLate(ProfileInformation* pgo_info); // Initialize compilation progress. Set compilation tiers to expect for // baseline and top tier compilation. Must be set before // {CommitCompilationUnits} is invoked which triggers background compilation. void InitializeCompilationProgress(int num_import_wrappers, int num_export_wrappers, ProfileInformation* pgo_info); void InitializeCompilationProgressAfterDeserialization( base::Vector<const int> lazy_functions, base::Vector<const int> eager_functions); // Initializes compilation units based on the information encoded in the // {compilation_progress_}. void InitializeCompilationUnits( std::unique_ptr<CompilationUnitBuilder> builder); // Adds compilation units for another function to the // {CompilationUnitBuilder}. This function is the streaming compilation // equivalent to {InitializeCompilationUnits}. void AddCompilationUnit(CompilationUnitBuilder* builder, int func_index); // Add the callback to be called on compilation events. Needs to be // set before {CommitCompilationUnits} is run to ensure that it receives all // events. The callback object must support being deleted from any thread. void AddCallback(std::unique_ptr<CompilationEventCallback> callback); // Inserts new functions to compile and kicks off compilation. void CommitCompilationUnits( base::Vector<WasmCompilationUnit> baseline_units, base::Vector<WasmCompilationUnit> top_tier_units, base::Vector<std::shared_ptr<JSToWasmWrapperCompilationUnit>> js_to_wasm_wrapper_units); void CommitTopTierCompilationUnit(WasmCompilationUnit); void AddTopTierPriorityCompilationUnit(WasmCompilationUnit, size_t); CompilationUnitQueues::Queue* GetQueueForCompileTask(int task_id); base::Optional<WasmCompilationUnit> GetNextCompilationUnit( CompilationUnitQueues::Queue*, CompilationTier tier); std::shared_ptr<JSToWasmWrapperCompilationUnit> GetJSToWasmWrapperCompilationUnit(size_t index); void FinalizeJSToWasmWrappers(Isolate* isolate, const WasmModule* module); void OnFinishedUnits(base::Vector<WasmCode*>); void OnFinishedJSToWasmWrapperUnits(); void OnCompilationStopped(WasmFeatures detected); void PublishDetectedFeatures(Isolate*); void SchedulePublishCompilationResults( std::vector<std::unique_ptr<WasmCode>> unpublished_code, CompilationTier tier); size_t NumOutstandingCompilations(CompilationTier tier) const; void SetError(); void WaitForCompilationEvent(CompilationEvent event); void TierUpAllFunctions(); void AllowAnotherTopTierJob(uint32_t func_index) { compilation_unit_queues_.AllowAnotherTopTierJob(func_index); } void AllowAnotherTopTierJobForAllFunctions() { compilation_unit_queues_.AllowAnotherTopTierJobForAllFunctions(); } bool failed() const { return compile_failed_.load(std::memory_order_relaxed); } bool baseline_compilation_finished() const { base::MutexGuard guard(&callbacks_mutex_); return outstanding_baseline_units_ == 0 && !has_outstanding_export_wrappers_; } DynamicTiering dynamic_tiering() const { return dynamic_tiering_; } Counters* counters() const { return async_counters_.get(); } void SetWireBytesStorage( std::shared_ptr<WireBytesStorage> wire_bytes_storage) { base::MutexGuard guard(&mutex_); wire_bytes_storage_ = std::move(wire_bytes_storage); } std::shared_ptr<WireBytesStorage> GetWireBytesStorage() const { base::MutexGuard guard(&mutex_); DCHECK_NOT_NULL(wire_bytes_storage_); return wire_bytes_storage_; } void set_compilation_id(int compilation_id) { DCHECK_EQ(compilation_id_, kInvalidCompilationID); compilation_id_ = compilation_id; } std::weak_ptr<NativeModule> const native_module_weak() const { return native_module_weak_; } size_t EstimateCurrentMemoryConsumption() const; private: void AddCompilationUnitInternal(CompilationUnitBuilder* builder, int function_index, uint8_t function_progress); // Trigger callbacks according to the internal counters below // (outstanding_...). // Hold the {callbacks_mutex_} when calling this method. void TriggerCallbacks(); void PublishCompilationResults( std::vector<std::unique_ptr<WasmCode>> unpublished_code); void PublishCode(base::Vector<std::unique_ptr<WasmCode>> codes); NativeModule* const native_module_; std::weak_ptr<NativeModule> const native_module_weak_; const std::shared_ptr<Counters> async_counters_; // Compilation error, atomically updated. This flag can be updated and read // using relaxed semantics. std::atomic<bool> compile_failed_{false}; // True if compilation was cancelled and worker threads should return. This // flag can be updated and read using relaxed semantics. std::atomic<bool> compile_cancelled_{false}; CompilationUnitQueues compilation_unit_queues_; // Wrapper compilation units are stored in shared_ptrs so that they are kept // alive by the tasks even if the NativeModule dies. std::vector<std::shared_ptr<JSToWasmWrapperCompilationUnit>> js_to_wasm_wrapper_units_; // Cache the dynamic tiering configuration to be consistent for the whole // compilation. const DynamicTiering dynamic_tiering_; // This mutex protects all information of this {CompilationStateImpl} which is // being accessed concurrently. mutable base::Mutex mutex_; // The compile job handles, initialized right after construction of // {CompilationStateImpl}. std::unique_ptr<JobHandle> js_to_wasm_wrapper_job_; std::unique_ptr<JobHandle> baseline_compile_job_; std::unique_ptr<JobHandle> top_tier_compile_job_; // The compilation id to identify trace events linked to this compilation. static constexpr int kInvalidCompilationID = -1; int compilation_id_ = kInvalidCompilationID; ////////////////////////////////////////////////////////////////////////////// // Protected by {mutex_}: // Features detected to be used in this module. Features can be detected // as a module is being compiled. WasmFeatures detected_features_ = WasmFeatures::None(); // Abstraction over the storage of the wire bytes. Held in a shared_ptr so // that background compilation jobs can keep the storage alive while // compiling. std::shared_ptr<WireBytesStorage> wire_bytes_storage_; // End of fields protected by {mutex_}. ////////////////////////////////////////////////////////////////////////////// // This mutex protects the callbacks vector, and the counters used to // determine which callbacks to call. The counters plus the callbacks // themselves need to be synchronized to ensure correct order of events. mutable base::Mutex callbacks_mutex_; ////////////////////////////////////////////////////////////////////////////// // Protected by {callbacks_mutex_}: // Callbacks to be called on compilation events. std::vector<std::unique_ptr<CompilationEventCallback>> callbacks_; // Events that already happened. base::EnumSet<CompilationEvent> finished_events_; int outstanding_baseline_units_ = 0; bool has_outstanding_export_wrappers_ = false; // The amount of generated top tier code since the last // {kFinishedCompilationChunk} event. size_t bytes_since_last_chunk_ = 0; std::vector<uint8_t> compilation_progress_; // End of fields protected by {callbacks_mutex_}. ////////////////////////////////////////////////////////////////////////////// struct PublishState { // {mutex_} protects {publish_queue_} and {publisher_running_}. base::Mutex mutex_; std::vector<std::unique_ptr<WasmCode>> publish_queue_; bool publisher_running_ = false; }; PublishState publish_state_[CompilationTier::kNumTiers]; // Encoding of fields in the {compilation_progress_} vector. using RequiredBaselineTierField = base::BitField8<ExecutionTier, 0, 2>; using RequiredTopTierField = base::BitField8<ExecutionTier, 2, 2>; using ReachedTierField = base::BitField8<ExecutionTier, 4, 2>; }; CompilationStateImpl* Impl(CompilationState* compilation_state) { return reinterpret_cast<CompilationStateImpl*>(compilation_state); } const CompilationStateImpl* Impl(const CompilationState* compilation_state) { return reinterpret_cast<const CompilationStateImpl*>(compilation_state); } CompilationStateImpl* BackgroundCompileScope::compilation_state() const { DCHECK(native_module_); return Impl(native_module_->compilation_state()); } size_t CompilationStateImpl::EstimateCurrentMemoryConsumption() const { UPDATE_WHEN_CLASS_CHANGES(CompilationStateImpl, 704); UPDATE_WHEN_CLASS_CHANGES(JSToWasmWrapperCompilationUnit, 40); size_t result = sizeof(CompilationStateImpl); result += compilation_unit_queues_.EstimateCurrentMemoryConsumption(); result += ContentSize(js_to_wasm_wrapper_units_); result += js_to_wasm_wrapper_units_.size() * (sizeof(JSToWasmWrapperCompilationUnit) + sizeof(TurbofanCompilationJob)); { base::MutexGuard lock(&callbacks_mutex_); result += ContentSize(callbacks_); // Concrete subclasses of CompilationEventCallback will be bigger, but we // can't know that here. result += callbacks_.size() * sizeof(CompilationEventCallback); result += ContentSize(compilation_progress_); } if (v8_flags.trace_wasm_offheap_memory) { PrintF("CompilationStateImpl: %zu\n", result); } return result; } bool BackgroundCompileScope::cancelled() const { return native_module_ == nullptr || Impl(native_module_->compilation_state())->cancelled(); } void UpdateFeatureUseCounts(Isolate* isolate, WasmFeatures detected) { using Feature = v8::Isolate::UseCounterFeature; constexpr static std::pair<WasmFeature, Feature> kUseCounters[] = { {kFeature_reftypes, Feature::kWasmRefTypes}, {kFeature_simd, Feature::kWasmSimdOpcodes}, {kFeature_threads, Feature::kWasmThreadOpcodes}, {kFeature_eh, Feature::kWasmExceptionHandling}, {kFeature_memory64, Feature::kWasmMemory64}, {kFeature_multi_memory, Feature::kWasmMultiMemory}, {kFeature_gc, Feature::kWasmGC}, {kFeature_imported_strings, Feature::kWasmImportedStrings}, }; for (auto& feature : kUseCounters) { if (detected.contains(feature.first)) isolate->CountUsage(feature.second); } } } // namespace ////////////////////////////////////////////////////// // PIMPL implementation of {CompilationState}. CompilationState::~CompilationState() { Impl(this)->~CompilationStateImpl(); } void CompilationState::InitCompileJob() { Impl(this)->InitCompileJob(); } void CompilationState::CancelCompilation() { Impl(this)->CancelCompilation(CompilationStateImpl::kCancelUnconditionally); } void CompilationState::CancelInitialCompilation() { Impl(this)->CancelCompilation( CompilationStateImpl::kCancelInitialCompilation); } void CompilationState::SetError() { Impl(this)->SetError(); } void CompilationState::SetWireBytesStorage( std::shared_ptr<WireBytesStorage> wire_bytes_storage) { Impl(this)->SetWireBytesStorage(std::move(wire_bytes_storage)); } std::shared_ptr<WireBytesStorage> CompilationState::GetWireBytesStorage() const { return Impl(this)->GetWireBytesStorage(); } void CompilationState::AddCallback( std::unique_ptr<CompilationEventCallback> callback) { return Impl(this)->AddCallback(std::move(callback)); } void CompilationState::TierUpAllFunctions() { Impl(this)->TierUpAllFunctions(); } void CompilationState::AllowAnotherTopTierJob(uint32_t func_index) { Impl(this)->AllowAnotherTopTierJob(func_index); } void CompilationState::AllowAnotherTopTierJobForAllFunctions() { Impl(this)->AllowAnotherTopTierJobForAllFunctions(); } void CompilationState::InitializeAfterDeserialization( base::Vector<const int> lazy_functions, base::Vector<const int> eager_functions) { Impl(this)->InitializeCompilationProgressAfterDeserialization( lazy_functions, eager_functions); } bool CompilationState::failed() const { return Impl(this)->failed(); } bool CompilationState::baseline_compilation_finished() const { return Impl(this)->baseline_compilation_finished(); } void CompilationState::set_compilation_id(int compilation_id) { Impl(this)->set_compilation_id(compilation_id); } DynamicTiering CompilationState::dynamic_tiering() const { return Impl(this)->dynamic_tiering(); } size_t CompilationState::EstimateCurrentMemoryConsumption() const { return Impl(this)->EstimateCurrentMemoryConsumption(); } // static std::unique_ptr<CompilationState> CompilationState::New( const std::shared_ptr<NativeModule>& native_module, std::shared_ptr<Counters> async_counters, DynamicTiering dynamic_tiering) { return std::unique_ptr<CompilationState>(reinterpret_cast<CompilationState*>( new CompilationStateImpl(std::move(native_module), std::move(async_counters), dynamic_tiering))); } // End of PIMPL implementation of {CompilationState}. ////////////////////////////////////////////////////// namespace { ExecutionTier ApplyHintToExecutionTier(WasmCompilationHintTier hint, ExecutionTier default_tier) { switch (hint) { case WasmCompilationHintTier::kDefault: return default_tier; case WasmCompilationHintTier::kBaseline: return ExecutionTier::kLiftoff; case WasmCompilationHintTier::kOptimized: return ExecutionTier::kTurbofan; } UNREACHABLE(); } const WasmCompilationHint* GetCompilationHint(const WasmModule* module, uint32_t func_index) { DCHECK_LE(module->num_imported_functions, func_index); uint32_t hint_index = declared_function_index(module, func_index); const std::vector<WasmCompilationHint>& compilation_hints = module->compilation_hints; if (hint_index < compilation_hints.size()) { return &compilation_hints[hint_index]; } return nullptr; } CompileStrategy GetCompileStrategy(const WasmModule* module, WasmFeatures enabled_features, uint32_t func_index, bool lazy_module) { if (lazy_module) return CompileStrategy::kLazy; if (!enabled_features.has_compilation_hints()) { return CompileStrategy::kDefault; } auto* hint = GetCompilationHint(module, func_index); if (hint == nullptr) return CompileStrategy::kDefault; switch (hint->strategy) { case WasmCompilationHintStrategy::kLazy: return CompileStrategy::kLazy; case WasmCompilationHintStrategy::kEager: return CompileStrategy::kEager; case WasmCompilationHintStrategy::kLazyBaselineEagerTopTier: return CompileStrategy::kLazyBaselineEagerTopTier; case WasmCompilationHintStrategy::kDefault: return CompileStrategy::kDefault; } } struct ExecutionTierPair { ExecutionTier baseline_tier; ExecutionTier top_tier; }; // Pass the debug state as a separate parameter to avoid data races: the debug // state may change between its use here and its use at the call site. To have // a consistent view on the debug state, the caller reads the debug state once // and then passes it to this function. ExecutionTierPair GetDefaultTiersPerModule(NativeModule* native_module, DynamicTiering dynamic_tiering, DebugState is_in_debug_state, bool lazy_module) { const WasmModule* module = native_module->module(); if (is_asmjs_module(module)) { return {ExecutionTier::kTurbofan, ExecutionTier::kTurbofan}; } if (lazy_module) { return {ExecutionTier::kNone, ExecutionTier::kNone}; } if (is_in_debug_state) { return {ExecutionTier::kLiftoff, ExecutionTier::kLiftoff}; } ExecutionTier baseline_tier = v8_flags.liftoff ? ExecutionTier::kLiftoff : ExecutionTier::kTurbofan; bool eager_tier_up = !dynamic_tiering && v8_flags.wasm_tier_up; ExecutionTier top_tier = eager_tier_up ? ExecutionTier::kTurbofan : baseline_tier; return {baseline_tier, top_tier}; } ExecutionTierPair GetLazyCompilationTiers(NativeModule* native_module, uint32_t func_index, DebugState is_in_debug_state) { DynamicTiering dynamic_tiering = Impl(native_module->compilation_state())->dynamic_tiering(); // For lazy compilation, get the tiers we would use if lazy compilation is // disabled. constexpr bool kNotLazy = false; ExecutionTierPair tiers = GetDefaultTiersPerModule( native_module, dynamic_tiering, is_in_debug_state, kNotLazy); // If we are in debug mode, we ignore compilation hints. if (is_in_debug_state) return tiers; // Check if compilation hints override default tiering behaviour. if (native_module->enabled_features().has_compilation_hints()) { if (auto* hint = GetCompilationHint(native_module->module(), func_index)) { tiers.baseline_tier = ApplyHintToExecutionTier(hint->baseline_tier, tiers.baseline_tier); tiers.top_tier = ApplyHintToExecutionTier(hint->top_tier, tiers.top_tier); } } if (V8_UNLIKELY(v8_flags.wasm_tier_up_filter >= 0 && func_index != static_cast<uint32_t>(v8_flags.wasm_tier_up_filter))) { tiers.top_tier = tiers.baseline_tier; } // Correct top tier if necessary. static_assert(ExecutionTier::kLiftoff < ExecutionTier::kTurbofan, "Assume an order on execution tiers"); if (tiers.baseline_tier > tiers.top_tier) { tiers.top_tier = tiers.baseline_tier; } return tiers; } // The {CompilationUnitBuilder} builds compilation units and stores them in an // internal buffer. The buffer is moved into the working queue of the // {CompilationStateImpl} when {Commit} is called. class CompilationUnitBuilder { public: explicit CompilationUnitBuilder(NativeModule* native_module) : native_module_(native_module) {} void AddImportUnit(uint32_t func_index) { DCHECK_GT(native_module_->module()->num_imported_functions, func_index); baseline_units_.emplace_back(func_index, ExecutionTier::kNone, kNotForDebugging); } void AddJSToWasmWrapperUnit( std::shared_ptr<JSToWasmWrapperCompilationUnit> unit) { js_to_wasm_wrapper_units_.emplace_back(std::move(unit)); } void AddBaselineUnit(int func_index, ExecutionTier tier) { baseline_units_.emplace_back(func_index, tier, kNotForDebugging); } void AddTopTierUnit(int func_index, ExecutionTier tier) { tiering_units_.emplace_back(func_index, tier, kNotForDebugging); } void Commit() { if (baseline_units_.empty() && tiering_units_.empty() && js_to_wasm_wrapper_units_.empty()) { return; } compilation_state()->CommitCompilationUnits( base::VectorOf(baseline_units_), base::VectorOf(tiering_units_), base::VectorOf(js_to_wasm_wrapper_units_)); Clear(); } void Clear() { baseline_units_.clear(); tiering_units_.clear(); js_to_wasm_wrapper_units_.clear(); } const WasmModule* module() { return native_module_->module(); } private: CompilationStateImpl* compilation_state() const { return Impl(native_module_->compilation_state()); } NativeModule* const native_module_; std::vector<WasmCompilationUnit> baseline_units_; std::vector<WasmCompilationUnit> tiering_units_; std::vector<std::shared_ptr<JSToWasmWrapperCompilationUnit>> js_to_wasm_wrapper_units_; }; DecodeResult ValidateSingleFunction(const WasmModule* module, int func_index, base::Vector<const uint8_t> code, WasmFeatures enabled_features) { // Sometimes functions get validated unpredictably in the background, for // debugging or when inlining one function into another. We check here if that // is the case, and exit early if so. if (module->function_was_validated(func_index)) return {}; const WasmFunction* func = &module->functions[func_index]; FunctionBody body{func->sig, func->code.offset(), code.begin(), code.end()}; WasmFeatures detected_features; DecodeResult result = ValidateFunctionBody(enabled_features, module, &detected_features, body); if (result.ok()) module->set_function_validated(func_index); return result; } enum OnlyLazyFunctions : bool { kAllFunctions = false, kOnlyLazyFunctions = true, }; bool IsLazyModule(const WasmModule* module) { return v8_flags.wasm_lazy_compilation || (v8_flags.asm_wasm_lazy_compilation && is_asmjs_module(module)); } class CompileLazyTimingScope { public: CompileLazyTimingScope(Counters* counters, NativeModule* native_module) : counters_(counters), native_module_(native_module) { timer_.Start(); } ~CompileLazyTimingScope() { base::TimeDelta elapsed = timer_.Elapsed(); native_module_->AddLazyCompilationTimeSample(elapsed.InMicroseconds()); counters_->wasm_lazy_compile_time()->AddTimedSample(elapsed); } private: Counters* counters_; NativeModule* native_module_; base::ElapsedTimer timer_; }; } // namespace bool CompileLazy(Isolate* isolate, Tagged<WasmInstanceObject> instance, int func_index) { DisallowGarbageCollection no_gc; Tagged<WasmModuleObject> module_object = instance->module_object(); NativeModule* native_module = module_object->native_module(); Counters* counters = isolate->counters(); // Put the timer scope around everything, including the {CodeSpaceWriteScope} // and its destruction, to measure complete overhead (apart from the runtime // function itself, which has constant overhead). base::Optional<CompileLazyTimingScope> lazy_compile_time_scope; if (base::TimeTicks::IsHighResolution()) { lazy_compile_time_scope.emplace(counters, native_module); } DCHECK(!native_module->lazy_compile_frozen()); TRACE_LAZY("Compiling wasm-function#%d.\n", func_index); CompilationStateImpl* compilation_state = Impl(native_module->compilation_state()); DebugState is_in_debug_state = native_module->IsInDebugState(); ExecutionTierPair tiers = GetLazyCompilationTiers(native_module, func_index, is_in_debug_state); DCHECK_LE(native_module->num_imported_functions(), func_index); DCHECK_LT(func_index, native_module->num_functions()); WasmCompilationUnit baseline_unit{ func_index, tiers.baseline_tier, is_in_debug_state ? kForDebugging : kNotForDebugging}; CompilationEnv env = native_module->CreateCompilationEnv(); WasmFeatures detected_features; WasmCompilationResult result = baseline_unit.ExecuteCompilation( &env, compilation_state->GetWireBytesStorage().get(), counters, &detected_features); compilation_state->OnCompilationStopped(detected_features); // During lazy compilation, we can only get compilation errors when // {--wasm-lazy-validation} is enabled. Otherwise, the module was fully // verified before starting its execution. CHECK_IMPLIES(result.failed(), v8_flags.wasm_lazy_validation); if (result.failed()) { return false; } WasmCodeRefScope code_ref_scope; WasmCode* code = native_module->PublishCode(native_module->AddCompiledCode(result)); DCHECK_EQ(func_index, code->index()); if (V8_UNLIKELY(native_module->log_code())) { GetWasmEngine()->LogCode(base::VectorOf(&code, 1)); // Log the code immediately in the current isolate. GetWasmEngine()->LogOutstandingCodesForIsolate(isolate); } counters->wasm_lazily_compiled_functions()->Increment(); const WasmModule* module = native_module->module(); const bool lazy_module = IsLazyModule(module); if (GetCompileStrategy(module, native_module->enabled_features(), func_index, lazy_module) == CompileStrategy::kLazy && tiers.baseline_tier < tiers.top_tier) { WasmCompilationUnit tiering_unit{func_index, tiers.top_tier, kNotForDebugging}; compilation_state->CommitTopTierCompilationUnit(tiering_unit); } return true; } void ThrowLazyCompilationError(Isolate* isolate, const NativeModule* native_module, int func_index) { const WasmModule* module = native_module->module(); CompilationStateImpl* compilation_state = Impl(native_module->compilation_state()); const WasmFunction* func = &module->functions[func_index]; base::Vector<const uint8_t> code = compilation_state->GetWireBytesStorage()->GetCode(func->code); auto enabled_features = native_module->enabled_features(); DecodeResult decode_result = ValidateSingleFunction(module, func_index, code, enabled_features); CHECK(decode_result.failed()); wasm::ErrorThrower thrower(isolate, nullptr); thrower.CompileFailed(GetWasmErrorWithName(native_module->wire_bytes(), func_index, module, std::move(decode_result).error())); } class TransitiveTypeFeedbackProcessor { public: static void Process(Tagged<WasmInstanceObject> instance, int func_index) { TransitiveTypeFeedbackProcessor{instance, func_index}.ProcessQueue(); } private: TransitiveTypeFeedbackProcessor(Tagged<WasmInstanceObject> instance, int func_index) : instance_(instance), module_(instance->module()), mutex_guard(&module_->type_feedback.mutex), feedback_for_function_(module_->type_feedback.feedback_for_function) { queue_.insert(func_index); } ~TransitiveTypeFeedbackProcessor() { DCHECK(queue_.empty()); } void ProcessQueue() { while (!queue_.empty()) { auto next = queue_.cbegin(); ProcessFunction(*next); queue_.erase(next); } } void ProcessFunction(int func_index); void EnqueueCallees(const std::vector<CallSiteFeedback>& feedback) { for (size_t i = 0; i < feedback.size(); i++) { const CallSiteFeedback& csf = feedback[i]; for (int j = 0; j < csf.num_cases(); j++) { int func = csf.function_index(j); // Don't spend time on calls that have never been executed. if (csf.call_count(j) == 0) continue; // Don't recompute feedback that has already been processed. auto existing = feedback_for_function_.find(func); if (existing != feedback_for_function_.end() && existing->second.feedback_vector.size() > 0) { continue; } queue_.insert(func); } } } DisallowGarbageCollection no_gc_scope_; Tagged<WasmInstanceObject> instance_; const WasmModule* const module_; // TODO(jkummerow): Check if it makes a difference to apply any updates // as a single batch at the end. base::SharedMutexGuard<base::kExclusive> mutex_guard; std::unordered_map<uint32_t, FunctionTypeFeedback>& feedback_for_function_; std::set<int> queue_; }; class FeedbackMaker { public: FeedbackMaker(Tagged<WasmInstanceObject> instance, int func_index, int num_calls) : instance_(instance), num_imported_functions_( static_cast<int>(instance->module()->num_imported_functions)), func_index_(func_index) { result_.reserve(num_calls); } void AddCandidate(Tagged<Object> maybe_function, int count) { if (!IsWasmInternalFunction(maybe_function)) return; Tagged<WasmInternalFunction> function = WasmInternalFunction::cast(maybe_function); if (function->ref() != instance_) { // Not a wasm function, or not a function declared in this instance. return; } if (function->function_index() < num_imported_functions_) return; AddCall(function->function_index(), count); } void AddCall(int target, int count) { // Keep the cache sorted (using insertion-sort), highest count first. int insertion_index = 0; while (insertion_index < cache_usage_ && counts_cache_[insertion_index] >= count) { insertion_index++; } for (int shifted_index = cache_usage_ - 1; shifted_index >= insertion_index; shifted_index--) { targets_cache_[shifted_index + 1] = targets_cache_[shifted_index]; counts_cache_[shifted_index + 1] = counts_cache_[shifted_index]; } targets_cache_[insertion_index] = target; counts_cache_[insertion_index] = count; cache_usage_++; } void FinalizeCall() { if (cache_usage_ == 0) { result_.emplace_back(); } else if (cache_usage_ == 1) { if (v8_flags.trace_wasm_inlining) { PrintF("[function %d: call_ref #%zu inlineable (monomorphic)]\n", func_index_, result_.size()); } result_.emplace_back(targets_cache_[0], counts_cache_[0]); } else { if (v8_flags.trace_wasm_inlining) { PrintF("[function %d: call_ref #%zu inlineable (polymorphic %d)]\n", func_index_, result_.size(), cache_usage_); } CallSiteFeedback::PolymorphicCase* polymorphic = new CallSiteFeedback::PolymorphicCase[cache_usage_]; for (int i = 0; i < cache_usage_; i++) { polymorphic[i].function_index = targets_cache_[i]; polymorphic[i].absolute_call_frequency = counts_cache_[i]; } result_.emplace_back(polymorphic, cache_usage_); } cache_usage_ = 0; } // {GetResult} can only be called on a r-value reference to make it more // obvious at call sites that {this} should not be used after this operation. std::vector<CallSiteFeedback>&& GetResult() && { return std::move(result_); } private: const Tagged<WasmInstanceObject> instance_; std::vector<CallSiteFeedback> result_; const int num_imported_functions_; const int func_index_; int cache_usage_{0}; int targets_cache_[kMaxPolymorphism]; int counts_cache_[kMaxPolymorphism]; }; void TransitiveTypeFeedbackProcessor::ProcessFunction(int func_index) { int which_vector = declared_function_index(module_, func_index); Tagged<Object> maybe_feedback = instance_->feedback_vectors()->get(which_vector); if (!IsFixedArray(maybe_feedback)) return; Tagged<FixedArray> feedback = FixedArray::cast(maybe_feedback); base::Vector<uint32_t> call_direct_targets = module_->type_feedback.feedback_for_function[func_index] .call_targets.as_vector(); DCHECK_EQ(feedback->length(), call_direct_targets.size() * 2); FeedbackMaker fm(instance_, func_index, feedback->length() / 2); for (int i = 0; i < feedback->length(); i += 2) { Tagged<Object> value = feedback->get(i); if (IsWasmInternalFunction(value)) { // Monomorphic. int count = Smi::cast(feedback->get(i + 1)).value(); fm.AddCandidate(value, count); } else if (IsFixedArray(value)) { // Polymorphic. Tagged<FixedArray> polymorphic = FixedArray::cast(value); for (int j = 0; j < polymorphic->length(); j += 2) { Tagged<Object> function = polymorphic->get(j); int count = Smi::cast(polymorphic->get(j + 1)).value(); fm.AddCandidate(function, count); } } else if (IsSmi(value)) { // Uninitialized, or a direct call collecting call count. uint32_t target = call_direct_targets[i / 2]; if (target != FunctionTypeFeedback::kNonDirectCall) { int count = Smi::cast(value).value(); fm.AddCall(static_cast<int>(target), count); } else if (v8_flags.trace_wasm_inlining) { PrintF("[function %d: call #%d: uninitialized]\n", func_index, i / 2); } } else if (v8_flags.trace_wasm_inlining) { if (value == ReadOnlyRoots(instance_->GetIsolate()).megamorphic_symbol()) { PrintF("[function %d: call #%d: megamorphic]\n", func_index, i / 2); } } fm.FinalizeCall(); } std::vector<CallSiteFeedback> result = std::move(fm).GetResult(); EnqueueCallees(result); feedback_for_function_[func_index].feedback_vector = std::move(result); } void TriggerTierUp(Tagged<WasmInstanceObject> instance, int func_index) { NativeModule* native_module = instance->module_object()->native_module(); CompilationStateImpl* compilation_state = Impl(native_module->compilation_state()); WasmCompilationUnit tiering_unit{func_index, ExecutionTier::kTurbofan, kNotForDebugging}; const WasmModule* module = native_module->module(); int priority; { base::SharedMutexGuard<base::kExclusive> mutex_guard( &module->type_feedback.mutex); int array_index = wasm::declared_function_index(instance->module(), func_index); instance->tiering_budget_array()[array_index] = v8_flags.wasm_tiering_budget; int& stored_priority = module->type_feedback.feedback_for_function[func_index].tierup_priority; if (stored_priority < kMaxInt) ++stored_priority; priority = stored_priority; } // Only create a compilation unit if this is the first time we detect this // function as hot (priority == 1), or if the priority increased // significantly. The latter is assumed to be the case if the priority // increased at least to four, and is a power of two. if (priority == 2 || !base::bits::IsPowerOfTwo(priority)) return; // Before adding the tier-up unit or increasing priority, do process type // feedback for best code generation. if (native_module->enabled_features().has_inlining() || native_module->module()->is_wasm_gc) { // TODO(jkummerow): we could have collisions here if different instances // of the same module have collected different feedback. If that ever // becomes a problem, figure out a solution. TransitiveTypeFeedbackProcessor::Process(instance, func_index); } compilation_state->AddTopTierPriorityCompilationUnit(tiering_unit, priority); } void TierUpNowForTesting(Isolate* isolate, Tagged<WasmInstanceObject> instance, int func_index) { NativeModule* native_module = instance->module_object()->native_module(); if (native_module->enabled_features().has_inlining() || native_module->module()->is_wasm_gc) { TransitiveTypeFeedbackProcessor::Process(instance, func_index); } wasm::GetWasmEngine()->CompileFunction(isolate->counters(), native_module, func_index, wasm::ExecutionTier::kTurbofan); CHECK(!native_module->compilation_state()->failed()); } namespace { void RecordStats(Tagged<Code> code, Counters* counters) { if (!code->has_instruction_stream()) return; counters->wasm_generated_code_size()->Increment(code->body_size()); counters->wasm_reloc_size()->Increment(code->relocation_size()); } enum CompilationExecutionResult : int8_t { kNoMoreUnits, kYield }; namespace { const char* GetCompilationEventName(const WasmCompilationUnit& unit, const CompilationEnv& env) { ExecutionTier tier = unit.tier(); if (tier == ExecutionTier::kLiftoff) { return "wasm.BaselineCompilation"; } if (tier == ExecutionTier::kTurbofan) { return "wasm.TopTierCompilation"; } if (unit.func_index() < static_cast<int>(env.module->num_imported_functions)) { return "wasm.WasmToJSWrapperCompilation"; } return "wasm.OtherCompilation"; } } // namespace constexpr uint8_t kMainTaskId = 0; // Run by the {BackgroundCompileJob} (on any thread). CompilationExecutionResult ExecuteCompilationUnits( std::weak_ptr<NativeModule> native_module, Counters* counters, JobDelegate* delegate, CompilationTier tier) { TRACE_EVENT0("v8.wasm", "wasm.ExecuteCompilationUnits"); // These fields are initialized in a {BackgroundCompileScope} before // starting compilation. base::Optional<CompilationEnv> env; std::shared_ptr<WireBytesStorage> wire_bytes; std::shared_ptr<const WasmModule> module; // Task 0 is any main thread (there might be multiple from multiple isolates), // worker threads start at 1 (thus the "+ 1"). static_assert(kMainTaskId == 0); int task_id = delegate ? (int{delegate->GetTaskId()} + 1) : kMainTaskId; DCHECK_LE(0, task_id); CompilationUnitQueues::Queue* queue; base::Optional<WasmCompilationUnit> unit; WasmFeatures global_detected_features = WasmFeatures::None(); // Preparation (synchronized): Initialize the fields above and get the first // compilation unit. { BackgroundCompileScope compile_scope(native_module); if (compile_scope.cancelled()) return kYield; env.emplace(compile_scope.native_module()->CreateCompilationEnv()); wire_bytes = compile_scope.compilation_state()->GetWireBytesStorage(); module = compile_scope.native_module()->shared_module(); queue = compile_scope.compilation_state()->GetQueueForCompileTask(task_id); unit = compile_scope.compilation_state()->GetNextCompilationUnit(queue, tier); if (!unit) return kNoMoreUnits; } TRACE_COMPILE("ExecuteCompilationUnits (task id %d)\n", task_id); std::vector<WasmCompilationResult> results_to_publish; while (true) { ExecutionTier current_tier = unit->tier(); const char* event_name = GetCompilationEventName(unit.value(), env.value()); TRACE_EVENT0("v8.wasm", event_name); while (unit->tier() == current_tier) { // Track detected features on a per-function basis before collecting them // into {global_detected_features}. WasmFeatures per_function_detected_features = WasmFeatures::None(); // (asynchronous): Execute the compilation. WasmCompilationResult result = unit->ExecuteCompilation(&env.value(), wire_bytes.get(), counters, &per_function_detected_features); global_detected_features.Add(per_function_detected_features); results_to_publish.emplace_back(std::move(result)); bool yield = delegate && delegate->ShouldYield(); // (synchronized): Publish the compilation result and get the next unit. BackgroundCompileScope compile_scope(native_module); if (compile_scope.cancelled()) return kYield; if (!results_to_publish.back().succeeded()) { compile_scope.compilation_state()->SetError(); return kNoMoreUnits; } if (!unit->for_debugging() && result.result_tier != current_tier) { compile_scope.native_module()->AddLiftoffBailout(); } // Yield or get next unit. if (yield || !(unit = compile_scope.compilation_state()->GetNextCompilationUnit( queue, tier))) { std::vector<std::unique_ptr<WasmCode>> unpublished_code = compile_scope.native_module()->AddCompiledCode( base::VectorOf(results_to_publish)); results_to_publish.clear(); compile_scope.compilation_state()->SchedulePublishCompilationResults( std::move(unpublished_code), tier); compile_scope.compilation_state()->OnCompilationStopped( global_detected_features); return yield ? kYield : kNoMoreUnits; } // Publish after finishing a certain amount of units, to avoid contention // when all threads publish at the end. bool batch_full = queue->ShouldPublish(static_cast<int>(results_to_publish.size())); // Also publish each time the compilation tier changes from Liftoff to // TurboFan, such that we immediately publish the baseline compilation // results to start execution, and do not wait for a batch to fill up. bool liftoff_finished = unit->tier() != current_tier && unit->tier() == ExecutionTier::kTurbofan; if (batch_full || liftoff_finished) { std::vector<std::unique_ptr<WasmCode>> unpublished_code = compile_scope.native_module()->AddCompiledCode( base::VectorOf(results_to_publish)); results_to_publish.clear(); compile_scope.compilation_state()->SchedulePublishCompilationResults( std::move(unpublished_code), tier); } } } UNREACHABLE(); } // (function is imported, canonical type index) using JSToWasmWrapperKey = std::pair<bool, uint32_t>; // Returns the number of units added. int AddExportWrapperUnits(Isolate* isolate, NativeModule* native_module, CompilationUnitBuilder* builder) { std::unordered_set<JSToWasmWrapperKey, base::hash<JSToWasmWrapperKey>> keys; for (auto exp : native_module->module()->export_table) { if (exp.kind != kExternalFunction) continue; auto& function = native_module->module()->functions[exp.index]; uint32_t canonical_type_index = native_module->module() ->isorecursive_canonical_type_ids[function.sig_index]; int wrapper_index = GetExportWrapperIndex(canonical_type_index, function.imported); if (wrapper_index < isolate->heap()->js_to_wasm_wrappers()->length()) { MaybeObject existing_wrapper = isolate->heap()->js_to_wasm_wrappers()->Get(wrapper_index); if (existing_wrapper.IsStrongOrWeak() && !IsUndefined(existing_wrapper.GetHeapObject())) { // Skip wrapper compilation as the wrapper is already cached. // Note that this does not guarantee that the wrapper is still cached // at the moment at which the WasmInternalFunction is instantiated. continue; } } JSToWasmWrapperKey key(function.imported, canonical_type_index); if (keys.insert(key).second) { auto unit = std::make_shared<JSToWasmWrapperCompilationUnit>( isolate, function.sig, canonical_type_index, native_module->module(), function.imported, native_module->enabled_features(), JSToWasmWrapperCompilationUnit::kAllowGeneric); builder->AddJSToWasmWrapperUnit(std::move(unit)); } } return static_cast<int>(keys.size()); } // Returns the number of units added. int AddImportWrapperUnits(NativeModule* native_module, CompilationUnitBuilder* builder) { std::unordered_set<WasmImportWrapperCache::CacheKey, WasmImportWrapperCache::CacheKeyHash> keys; int num_imported_functions = native_module->num_imported_functions(); for (int func_index = 0; func_index < num_imported_functions; func_index++) { const WasmFunction& function = native_module->module()->functions[func_index]; if (!IsJSCompatibleSignature(function.sig)) continue; if (UseGenericWasmToJSWrapper(kDefaultImportCallKind, function.sig, kNoSuspend)) { continue; } uint32_t canonical_type_index = native_module->module() ->isorecursive_canonical_type_ids[function.sig_index]; WasmImportWrapperCache::CacheKey key( kDefaultImportCallKind, canonical_type_index, static_cast<int>(function.sig->parameter_count()), kNoSuspend); auto it = keys.insert(key); if (it.second) { // Ensure that all keys exist in the cache, so that we can populate the // cache later without locking. (*native_module->import_wrapper_cache())[key] = nullptr; builder->AddImportUnit(func_index); } } return static_cast<int>(keys.size()); } std::unique_ptr<CompilationUnitBuilder> InitializeCompilation( Isolate* isolate, NativeModule* native_module, ProfileInformation* pgo_info) { CompilationStateImpl* compilation_state = Impl(native_module->compilation_state()); auto builder = std::make_unique<CompilationUnitBuilder>(native_module); int num_import_wrappers = AddImportWrapperUnits(native_module, builder.get()); int num_export_wrappers = AddExportWrapperUnits(isolate, native_module, builder.get()); compilation_state->InitializeCompilationProgress( num_import_wrappers, num_export_wrappers, pgo_info); return builder; } bool MayCompriseLazyFunctions(const WasmModule* module, WasmFeatures enabled_features) { if (IsLazyModule(module)) return true; if (enabled_features.has_compilation_hints()) return true; #ifdef ENABLE_SLOW_DCHECKS int start = module->num_imported_functions; int end = start + module->num_declared_functions; for (int func_index = start; func_index < end; func_index++) { SLOW_DCHECK(GetCompileStrategy(module, enabled_features, func_index, false) != CompileStrategy::kLazy); } #endif return false; } class CompilationTimeCallback : public CompilationEventCallback { public: enum CompileMode { kSynchronous, kAsync, kStreaming }; explicit CompilationTimeCallback( std::shared_ptr<Counters> async_counters, std::shared_ptr<metrics::Recorder> metrics_recorder, v8::metrics::Recorder::ContextId context_id, std::weak_ptr<NativeModule> native_module, CompileMode compile_mode) : start_time_(base::TimeTicks::Now()), async_counters_(std::move(async_counters)), metrics_recorder_(std::move(metrics_recorder)), context_id_(context_id), native_module_(std::move(native_module)), compile_mode_(compile_mode) {} void call(CompilationEvent compilation_event) override { DCHECK(base::TimeTicks::IsHighResolution()); std::shared_ptr<NativeModule> native_module = native_module_.lock(); if (!native_module) return; auto now = base::TimeTicks::Now(); auto duration = now - start_time_; if (compilation_event == CompilationEvent::kFinishedBaselineCompilation) { // Reset {start_time_} to measure tier-up time. start_time_ = now; if (compile_mode_ != kSynchronous) { TimedHistogram* histogram = compile_mode_ == kAsync ? async_counters_->wasm_async_compile_wasm_module_time() : async_counters_->wasm_streaming_compile_wasm_module_time(); histogram->AddSample(static_cast<int>(duration.InMicroseconds())); } v8::metrics::WasmModuleCompiled event{ (compile_mode_ != kSynchronous), // async (compile_mode_ == kStreaming), // streamed false, // cached false, // deserialized v8_flags.wasm_lazy_compilation, // lazy true, // success native_module->liftoff_code_size(), // code_size_in_bytes native_module->liftoff_bailout_count(), // liftoff_bailout_count duration.InMicroseconds()}; // wall_clock_duration_in_us metrics_recorder_->DelayMainThreadEvent(event, context_id_); } if (compilation_event == CompilationEvent::kFailedCompilation) { v8::metrics::WasmModuleCompiled event{ (compile_mode_ != kSynchronous), // async (compile_mode_ == kStreaming), // streamed false, // cached false, // deserialized v8_flags.wasm_lazy_compilation, // lazy false, // success native_module->liftoff_code_size(), // code_size_in_bytes native_module->liftoff_bailout_count(), // liftoff_bailout_count duration.InMicroseconds()}; // wall_clock_duration_in_us metrics_recorder_->DelayMainThreadEvent(event, context_id_); } } private: base::TimeTicks start_time_; const std::shared_ptr<Counters> async_counters_; std::shared_ptr<metrics::Recorder> metrics_recorder_; v8::metrics::Recorder::ContextId context_id_; std::weak_ptr<NativeModule> native_module_; const CompileMode compile_mode_; }; WasmError ValidateFunctions(const WasmModule* module, base::Vector<const uint8_t> wire_bytes, WasmFeatures enabled_features, OnlyLazyFunctions only_lazy_functions) { DCHECK_EQ(module->origin, kWasmOrigin); if (only_lazy_functions && !MayCompriseLazyFunctions(module, enabled_features)) { return {}; } std::function<bool(int)> filter; // Initially empty for "all functions". if (only_lazy_functions) { const bool is_lazy_module = IsLazyModule(module); filter = [module, enabled_features, is_lazy_module](int func_index) { CompileStrategy strategy = GetCompileStrategy(module, enabled_features, func_index, is_lazy_module); return strategy == CompileStrategy::kLazy || strategy == CompileStrategy::kLazyBaselineEagerTopTier; }; } // Call {ValidateFunctions} in the module decoder. return ValidateFunctions(module, enabled_features, wire_bytes, filter); } WasmError ValidateFunctions(const NativeModule& native_module, OnlyLazyFunctions only_lazy_functions) { return ValidateFunctions(native_module.module(), native_module.wire_bytes(), native_module.enabled_features(), only_lazy_functions); } void CompileNativeModule(Isolate* isolate, v8::metrics::Recorder::ContextId context_id, ErrorThrower* thrower, std::shared_ptr<NativeModule> native_module, ProfileInformation* pgo_info) { CHECK(!v8_flags.jitless); const WasmModule* module = native_module->module(); // The callback captures a shared ptr to the semaphore. auto* compilation_state = Impl(native_module->compilation_state()); if (base::TimeTicks::IsHighResolution()) { compilation_state->AddCallback(std::make_unique<CompilationTimeCallback>( isolate->async_counters(), isolate->metrics_recorder(), context_id, native_module, CompilationTimeCallback::kSynchronous)); } // Initialize the compilation units and kick off background compile tasks. std::unique_ptr<CompilationUnitBuilder> builder = InitializeCompilation(isolate, native_module.get(), pgo_info); compilation_state->InitializeCompilationUnits(std::move(builder)); // Validate wasm modules for lazy compilation if requested. Never validate // asm.js modules as these are valid by construction (additionally a CHECK // will catch this during lazy compilation). if (!v8_flags.wasm_lazy_validation && module->origin == kWasmOrigin) { DCHECK(!thrower->error()); if (WasmError validation_error = ValidateFunctions(*native_module, kOnlyLazyFunctions)) { thrower->CompileFailed(std::move(validation_error)); return; } } compilation_state->WaitForCompilationEvent( CompilationEvent::kFinishedExportWrappers); if (!compilation_state->failed()) { compilation_state->FinalizeJSToWasmWrappers(isolate, module); compilation_state->WaitForCompilationEvent( CompilationEvent::kFinishedBaselineCompilation); compilation_state->PublishDetectedFeatures(isolate); } if (compilation_state->failed()) { DCHECK_IMPLIES(IsLazyModule(module), !v8_flags.wasm_lazy_validation); WasmError validation_error = ValidateFunctions(*native_module, kAllFunctions); CHECK(validation_error.has_error()); thrower->CompileFailed(std::move(validation_error)); } } class BaseCompileJSToWasmWrapperJob : public JobTask { public: explicit BaseCompileJSToWasmWrapperJob(size_t compilation_units) : outstanding_units_(compilation_units), total_units_(compilation_units) {} size_t GetMaxConcurrency(size_t worker_count) const override { size_t flag_limit = static_cast<size_t>( std::max(1, v8_flags.wasm_num_compilation_tasks.value())); // {outstanding_units_} includes the units that other workers are currently // working on, so we can safely ignore the {worker_count} and just return // the current number of outstanding units. return std::min(flag_limit, outstanding_units_.load(std::memory_order_relaxed)); } protected: // Returns {true} and places the index of the next unit to process in // {index_out} if there are still units to be processed. Returns {false} // otherwise. bool GetNextUnitIndex(size_t* index_out) { size_t next_index = unit_index_.fetch_add(1, std::memory_order_relaxed); if (next_index >= total_units_) { // {unit_index_} may exceeed {total_units_}, but only by the number of // workers at worst, thus it can't exceed 2 * {total_units_} and overflow // shouldn't happen. DCHECK_GE(2 * total_units_, next_index); return false; } *index_out = next_index; return true; } // Returns true if the last unit was completed. bool CompleteUnit() { size_t outstanding_units = outstanding_units_.fetch_sub(1, std::memory_order_relaxed); DCHECK_GE(outstanding_units, 1); return outstanding_units == 1; } // When external cancellation is detected, call this method to bump // {unit_index_} and reset {outstanding_units_} such that no more tasks are // being scheduled for this job and all tasks exit as soon as possible. void FlushRemainingUnits() { // After being cancelled, make sure to reduce outstanding_units_ to // *basically* zero, but leave the count positive if other workers are still // running, to avoid underflow in {CompleteUnit}. size_t next_undone_unit = unit_index_.exchange(total_units_, std::memory_order_relaxed); size_t undone_units = next_undone_unit >= total_units_ ? 0 : total_units_ - next_undone_unit; // Note that the caller requested one unit that we also still need to remove // from {outstanding_units_}. ++undone_units; size_t previous_outstanding_units = outstanding_units_.fetch_sub(undone_units, std::memory_order_relaxed); CHECK_LE(undone_units, previous_outstanding_units); } private: std::atomic<size_t> unit_index_{0}; std::atomic<size_t> outstanding_units_; const size_t total_units_; }; class AsyncCompileJSToWasmWrapperJob final : public BaseCompileJSToWasmWrapperJob { public: explicit AsyncCompileJSToWasmWrapperJob( std::weak_ptr<NativeModule> native_module, size_t compilation_units) : BaseCompileJSToWasmWrapperJob(compilation_units), native_module_(std::move(native_module)), engine_barrier_(GetWasmEngine()->GetBarrierForBackgroundCompile()) {} void Run(JobDelegate* delegate) override { auto engine_scope = engine_barrier_->TryLock(); if (!engine_scope) return; std::shared_ptr<JSToWasmWrapperCompilationUnit> wrapper_unit = nullptr; OperationsBarrier::Token wrapper_compilation_token; Isolate* isolate; size_t index; if (!GetNextUnitIndex(&index)) return; { BackgroundCompileScope compile_scope(native_module_); if (compile_scope.cancelled()) return FlushRemainingUnits(); wrapper_unit = compile_scope.compilation_state()->GetJSToWasmWrapperCompilationUnit( index); isolate = wrapper_unit->isolate(); wrapper_compilation_token = wasm::GetWasmEngine()->StartWrapperCompilation(isolate); if (!wrapper_compilation_token) return FlushRemainingUnits(); } TRACE_EVENT0("v8.wasm", "wasm.JSToWasmWrapperCompilation"); // In case multi-cage pointer compression mode is enabled ensure that // current thread's cage base values are properly initialized. PtrComprCageAccessScope ptr_compr_cage_access_scope(isolate); while (true) { DCHECK_EQ(isolate, wrapper_unit->isolate()); wrapper_unit->Execute(); bool complete_last_unit = CompleteUnit(); bool yield = delegate && delegate->ShouldYield(); if (yield && !complete_last_unit) return; BackgroundCompileScope compile_scope(native_module_); if (compile_scope.cancelled()) return; if (complete_last_unit) { compile_scope.compilation_state()->OnFinishedJSToWasmWrapperUnits(); } if (yield) return; if (!GetNextUnitIndex(&index)) return; wrapper_unit = compile_scope.compilation_state()->GetJSToWasmWrapperCompilationUnit( index); } } private: std::weak_ptr<NativeModule> native_module_; std::shared_ptr<OperationsBarrier> engine_barrier_; }; class BackgroundCompileJob final : public JobTask { public: explicit BackgroundCompileJob(std::weak_ptr<NativeModule> native_module, std::shared_ptr<Counters> async_counters, CompilationTier tier) : native_module_(std::move(native_module)), engine_barrier_(GetWasmEngine()->GetBarrierForBackgroundCompile()), async_counters_(std::move(async_counters)), tier_(tier) {} void Run(JobDelegate* delegate) override { auto engine_scope = engine_barrier_->TryLock(); if (!engine_scope) return; ExecuteCompilationUnits(native_module_, async_counters_.get(), delegate, tier_); } size_t GetMaxConcurrency(size_t worker_count) const override { BackgroundCompileScope compile_scope(native_module_); if (compile_scope.cancelled()) return 0; size_t flag_limit = static_cast<size_t>( std::max(1, v8_flags.wasm_num_compilation_tasks.value())); // NumOutstandingCompilations() does not reflect the units that running // workers are processing, thus add the current worker count to that number. return std::min(flag_limit, worker_count + compile_scope.compilation_state() ->NumOutstandingCompilations(tier_)); } private: std::weak_ptr<NativeModule> native_module_; std::shared_ptr<OperationsBarrier> engine_barrier_; const std::shared_ptr<Counters> async_counters_; const CompilationTier tier_; }; } // namespace std::shared_ptr<NativeModule> CompileToNativeModule( Isolate* isolate, WasmFeatures enabled_features, ErrorThrower* thrower, std::shared_ptr<const WasmModule> module, ModuleWireBytes wire_bytes, int compilation_id, v8::metrics::Recorder::ContextId context_id, ProfileInformation* pgo_info) { WasmEngine* engine = GetWasmEngine(); base::OwnedVector<uint8_t> wire_bytes_copy = base::OwnedVector<uint8_t>::Of(wire_bytes.module_bytes()); // Prefer {wire_bytes_copy} to {wire_bytes.module_bytes()} for the temporary // cache key. When we eventually install the module in the cache, the wire // bytes of the temporary key and the new key have the same base pointer and // we can skip the full bytes comparison. std::shared_ptr<NativeModule> native_module = engine->MaybeGetNativeModule( module->origin, wire_bytes_copy.as_vector(), isolate); if (native_module) { CompileJsToWasmWrappers(isolate, module.get()); return native_module; } base::Optional<TimedHistogramScope> wasm_compile_module_time_scope; if (base::TimeTicks::IsHighResolution()) { wasm_compile_module_time_scope.emplace(SELECT_WASM_COUNTER( isolate->counters(), module->origin, wasm_compile, module_time)); } // Embedder usage count for declared shared memories. const bool has_shared_memory = std::any_of(module->memories.begin(), module->memories.end(), [](auto& memory) { return memory.is_shared; }); if (has_shared_memory) { isolate->CountUsage(v8::Isolate::UseCounterFeature::kWasmSharedMemory); } // Create a new {NativeModule} first. const bool include_liftoff = module->origin == kWasmOrigin && v8_flags.liftoff; size_t code_size_estimate = wasm::WasmCodeManager::EstimateNativeModuleCodeSize( module.get(), include_liftoff, DynamicTiering{v8_flags.wasm_dynamic_tiering.value()}); native_module = engine->NewNativeModule(isolate, enabled_features, module, code_size_estimate); native_module->SetWireBytes(std::move(wire_bytes_copy)); native_module->compilation_state()->set_compilation_id(compilation_id); CompileNativeModule(isolate, context_id, thrower, native_module, pgo_info); if (thrower->error()) { engine->UpdateNativeModuleCache(true, std::move(native_module), isolate); return {}; } std::shared_ptr<NativeModule> cached_native_module = engine->UpdateNativeModuleCache(false, native_module, isolate); if (cached_native_module != native_module) { // Do not use {module} or {native_module} any more; use // {cached_native_module} instead. module.reset(); native_module.reset(); return cached_native_module; } // Ensure that the code objects are logged before returning. engine->LogOutstandingCodesForIsolate(isolate); return native_module; } AsyncCompileJob::AsyncCompileJob( Isolate* isolate, WasmFeatures enabled_features, base::OwnedVector<const uint8_t> bytes, Handle<Context> context, Handle<NativeContext> incumbent_context, const char* api_method_name, std::shared_ptr<CompilationResultResolver> resolver, int compilation_id) : isolate_(isolate), api_method_name_(api_method_name), enabled_features_(enabled_features), dynamic_tiering_(DynamicTiering{v8_flags.wasm_dynamic_tiering.value()}), start_time_(base::TimeTicks::Now()), bytes_copy_(std::move(bytes)), wire_bytes_(bytes_copy_.as_vector()), resolver_(std::move(resolver)), compilation_id_(compilation_id) { TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), "wasm.AsyncCompileJob"); CHECK(v8_flags.wasm_async_compilation); CHECK(!v8_flags.jitless); v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate); v8::Platform* platform = V8::GetCurrentPlatform(); foreground_task_runner_ = platform->GetForegroundTaskRunner(v8_isolate); native_context_ = isolate->global_handles()->Create(context->native_context()); incumbent_context_ = isolate->global_handles()->Create(*incumbent_context); DCHECK(IsNativeContext(*native_context_)); context_id_ = isolate->GetOrRegisterRecorderContextId(native_context_); metrics_event_.async = true; } void AsyncCompileJob::Start() { DoAsync<DecodeModule>(isolate_->counters(), isolate_->metrics_recorder()); // -- } void AsyncCompileJob::Abort() { // Removing this job will trigger the destructor, which will cancel all // compilation. GetWasmEngine()->RemoveCompileJob(this); } // {ValidateFunctionsStreamingJobData} holds information that is shared between // the {AsyncStreamingProcessor} and the {ValidateFunctionsStreamingJob}. It // lives in the {AsyncStreamingProcessor} and is updated from both classes. struct ValidateFunctionsStreamingJobData { struct Unit { // {func_index == -1} represents an "invalid" unit. int func_index = -1; base::Vector<const uint8_t> code; // Check whether the unit is valid. operator bool() const { DCHECK_LE(-1, func_index); return func_index >= 0; } }; void Initialize(int num_declared_functions) { DCHECK_NULL(units); units = base::OwnedVector<Unit>::NewForOverwrite(num_declared_functions); // Initially {next == end}. next_available_unit.store(units.begin(), std::memory_order_relaxed); end_of_available_units.store(units.begin(), std::memory_order_relaxed); } void AddUnit(int declared_func_index, base::Vector<const uint8_t> code, JobHandle* job_handle) { DCHECK_NOT_NULL(units); // Write new unit to {*end}, then increment {end}. There is only one thread // adding new units, so no further synchronization needed. Unit* ptr = end_of_available_units.load(std::memory_order_relaxed); // Check invariant: {next <= end}. DCHECK_LE(next_available_unit.load(std::memory_order_relaxed), ptr); *ptr++ = {declared_func_index, code}; // Use release semantics, so whoever loads this pointer (using acquire // semantics) sees all our previous stores. end_of_available_units.store(ptr, std::memory_order_release); size_t total_units_added = ptr - units.begin(); // Periodically notify concurrency increase. This has overhead, so avoid // calling it too often. As long as threads are still running they will // continue processing new units anyway, and if background threads validate // faster than we can add units, then only notifying after increasingly long // delays is the right thing to do to avoid too many small validation tasks. // We notify on each power of two after 16 units, and every 16k units (just // to have *some* upper limit and avoiding to pile up too many units). // Additionally, notify after receiving the last unit of the module. if ((total_units_added >= 16 && base::bits::IsPowerOfTwo(total_units_added)) || (total_units_added % (16 * 1024)) == 0 || ptr == units.end()) { job_handle->NotifyConcurrencyIncrease(); } } size_t NumOutstandingUnits() const { Unit* next = next_available_unit.load(std::memory_order_relaxed); Unit* end = end_of_available_units.load(std::memory_order_relaxed); DCHECK_LE(next, end); return end - next; } // Retrieve one unit to validate; returns an "invalid" unit if nothing is in // the queue. Unit GetUnit() { // Use an acquire load to synchronize with the store in {AddUnit}. All units // before this {end} are fully initialized and ready to execute. Unit* end = end_of_available_units.load(std::memory_order_acquire); Unit* next = next_available_unit.load(std::memory_order_relaxed); while (next < end) { if (next_available_unit.compare_exchange_weak( next, next + 1, std::memory_order_relaxed)) { return *next; } // Otherwise retry with updated {next} pointer. } return {}; } base::OwnedVector<Unit> units; std::atomic<Unit*> next_available_unit; std::atomic<Unit*> end_of_available_units; std::atomic<bool> found_error{false}; }; class ValidateFunctionsStreamingJob final : public JobTask { public: ValidateFunctionsStreamingJob(const WasmModule* module, WasmFeatures enabled_features, ValidateFunctionsStreamingJobData* data) : module_(module), enabled_features_(enabled_features), data_(data) {} void Run(JobDelegate* delegate) override { TRACE_EVENT0("v8.wasm", "wasm.ValidateFunctionsStreaming"); using Unit = ValidateFunctionsStreamingJobData::Unit; while (Unit unit = data_->GetUnit()) { DecodeResult result = ValidateSingleFunction( module_, unit.func_index, unit.code, enabled_features_); if (result.failed()) { data_->found_error.store(true, std::memory_order_relaxed); break; } // After validating one function, check if we should yield. if (delegate->ShouldYield()) break; } } size_t GetMaxConcurrency(size_t worker_count) const override { return worker_count + data_->NumOutstandingUnits(); } private: const WasmModule* const module_; const WasmFeatures enabled_features_; ValidateFunctionsStreamingJobData* data_; }; class AsyncStreamingProcessor final : public StreamingProcessor { public: explicit AsyncStreamingProcessor(AsyncCompileJob* job); bool ProcessModuleHeader(base::Vector<const uint8_t> bytes) override; bool ProcessSection(SectionCode section_code, base::Vector<const uint8_t> bytes, uint32_t offset) override; bool ProcessCodeSectionHeader(int num_functions, uint32_t functions_mismatch_error_offset, std::shared_ptr<WireBytesStorage>, int code_section_start, int code_section_length) override; bool ProcessFunctionBody(base::Vector<const uint8_t> bytes, uint32_t offset) override; void OnFinishedChunk() override; void OnFinishedStream(base::OwnedVector<const uint8_t> bytes, bool after_error) override; void OnAbort() override; bool Deserialize(base::Vector<const uint8_t> wire_bytes, base::Vector<const uint8_t> module_bytes) override; private: void CommitCompilationUnits(); ModuleDecoder decoder_; AsyncCompileJob* job_; std::unique_ptr<CompilationUnitBuilder> compilation_unit_builder_; int num_functions_ = 0; bool prefix_cache_hit_ = false; bool before_code_section_ = true; ValidateFunctionsStreamingJobData validate_functions_job_data_; std::unique_ptr<JobHandle> validate_functions_job_handle_; // Running hash of the wire bytes up to code section size, but excluding the // code section itself. Used by the {NativeModuleCache} to detect potential // duplicate modules. size_t prefix_hash_ = 0; }; std::shared_ptr<StreamingDecoder> AsyncCompileJob::CreateStreamingDecoder() { DCHECK_NULL(stream_); stream_ = StreamingDecoder::CreateAsyncStreamingDecoder( std::make_unique<AsyncStreamingProcessor>(this)); return stream_; } AsyncCompileJob::~AsyncCompileJob() { // Note: This destructor always runs on the foreground thread of the isolate. background_task_manager_.CancelAndWait(); // If initial compilation did not finish yet we can abort it. if (native_module_) { Impl(native_module_->compilation_state()) ->CancelCompilation(CompilationStateImpl::kCancelInitialCompilation); } // Tell the streaming decoder that the AsyncCompileJob is not available // anymore. if (stream_) stream_->NotifyCompilationDiscarded(); CancelPendingForegroundTask(); isolate_->global_handles()->Destroy(native_context_.location()); isolate_->global_handles()->Destroy(incumbent_context_.location()); if (!module_object_.is_null()) { isolate_->global_handles()->Destroy(module_object_.location()); } } void AsyncCompileJob::CreateNativeModule( std::shared_ptr<const WasmModule> module, size_t code_size_estimate) { // Embedder usage count for declared shared memories. const bool has_shared_memory = std::any_of(module->memories.begin(), module->memories.end(), [](auto& memory) { return memory.is_shared; }); if (has_shared_memory) { isolate_->CountUsage(v8::Isolate::UseCounterFeature::kWasmSharedMemory); } // Create the module object and populate with compiled functions and // information needed at instantiation time. native_module_ = GetWasmEngine()->NewNativeModule( isolate_, enabled_features_, std::move(module), code_size_estimate); native_module_->SetWireBytes(std::move(bytes_copy_)); native_module_->compilation_state()->set_compilation_id(compilation_id_); } bool AsyncCompileJob::GetOrCreateNativeModule( std::shared_ptr<const WasmModule> module, size_t code_size_estimate) { native_module_ = GetWasmEngine()->MaybeGetNativeModule( module->origin, wire_bytes_.module_bytes(), isolate_); if (native_module_ == nullptr) { CreateNativeModule(std::move(module), code_size_estimate); return false; } return true; } void AsyncCompileJob::PrepareRuntimeObjects() { // Create heap objects for script and module bytes to be stored in the // module object. Asm.js is not compiled asynchronously. DCHECK(module_object_.is_null()); auto source_url = stream_ ? base::VectorOf(stream_->url()) : base::Vector<const char>(); auto script = GetWasmEngine()->GetOrCreateScript(isolate_, native_module_, source_url); Handle<WasmModuleObject> module_object = WasmModuleObject::New(isolate_, native_module_, script); module_object_ = isolate_->global_handles()->Create(*module_object); } // This function assumes that it is executed in a HandleScope, and that a // context is set on the isolate. void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) { TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), "wasm.FinishAsyncCompile"); if (stream_) { stream_->NotifyNativeModuleCreated(native_module_); } const WasmModule* module = native_module_->module(); auto compilation_state = Impl(native_module_->compilation_state()); // If experimental PGO via files is enabled, load profile information now that // we have all wire bytes and know that the module is valid. if (V8_UNLIKELY(v8_flags.experimental_wasm_pgo_from_file)) { std::unique_ptr<ProfileInformation> pgo_info = LoadProfileFromFile(module, native_module_->wire_bytes()); if (pgo_info) { compilation_state->ApplyPgoInfoLate(pgo_info.get()); } } bool is_after_deserialization = !module_object_.is_null(); if (!is_after_deserialization) { PrepareRuntimeObjects(); } // Measure duration of baseline compilation or deserialization from cache. if (base::TimeTicks::IsHighResolution()) { base::TimeDelta duration = base::TimeTicks::Now() - start_time_; int duration_usecs = static_cast<int>(duration.InMicroseconds()); isolate_->counters()->wasm_streaming_finish_wasm_module_time()->AddSample( duration_usecs); if (is_after_cache_hit || is_after_deserialization) { v8::metrics::WasmModuleCompiled event{ true, // async true, // streamed is_after_cache_hit, // cached is_after_deserialization, // deserialized v8_flags.wasm_lazy_compilation, // lazy !compilation_state->failed(), // success native_module_->turbofan_code_size(), // code_size_in_bytes native_module_->liftoff_bailout_count(), // liftoff_bailout_count duration.InMicroseconds()}; // wall_clock_duration_in_us isolate_->metrics_recorder()->DelayMainThreadEvent(event, context_id_); } } DCHECK(!isolate_->context().is_null()); // Finish the wasm script now and make it public to the debugger. Handle<Script> script(module_object_->script(), isolate_); if (script->type() == Script::Type::kWasm && module->debug_symbols.type == WasmDebugSymbols::Type::SourceMap && !module->debug_symbols.external_url.is_empty()) { ModuleWireBytes wire_bytes(native_module_->wire_bytes()); MaybeHandle<String> src_map_str = isolate_->factory()->NewStringFromUtf8( wire_bytes.GetNameOrNull(module->debug_symbols.external_url), AllocationType::kOld); script->set_source_mapping_url(*src_map_str.ToHandleChecked()); } { TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), "wasm.Debug.OnAfterCompile"); isolate_->debug()->OnAfterCompile(script); } // TODO(bbudge) Allow deserialization without wrapper compilation, so we can // just compile wrappers here. if (!is_after_deserialization) { if (is_after_cache_hit) { // TODO(thibaudm): Look into sharing wrappers. CompileJsToWasmWrappers(isolate_, module); } else { compilation_state->FinalizeJSToWasmWrappers(isolate_, module); } } // We can only update the feature counts once the entire compile is done. compilation_state->PublishDetectedFeatures(isolate_); // We might need debug code for the module, if the debugger was enabled while // streaming compilation was running. Since handling this while compiling via // streaming is tricky, we just remove all code which may have been generated, // and compile debug code lazily. if (native_module_->IsInDebugState()) { native_module_->RemoveCompiledCode( NativeModule::RemoveFilter::kRemoveNonDebugCode); } // Finally, log all generated code (it does not matter if this happens // repeatedly in case the script is shared). native_module_->LogWasmCodes(isolate_, module_object_->script()); FinishSuccessfully(); } void AsyncCompileJob::Failed() { // {job} keeps the {this} pointer alive. std::unique_ptr<AsyncCompileJob> job = GetWasmEngine()->RemoveCompileJob(this); // Revalidate the whole module to produce a deterministic error message. constexpr bool kValidate = true; ModuleResult result = DecodeWasmModule( enabled_features_, wire_bytes_.module_bytes(), kValidate, kWasmOrigin); CHECK(result.failed()); ErrorThrower thrower(isolate_, api_method_name_); thrower.CompileFailed(std::move(result).error()); resolver_->OnCompilationFailed(thrower.Reify()); } class AsyncCompileJob::CompilationStateCallback : public CompilationEventCallback { public: explicit CompilationStateCallback(AsyncCompileJob* job) : job_(job) {} void call(CompilationEvent event) override { // This callback is only being called from a foreground task. switch (event) { case CompilationEvent::kFinishedExportWrappers: // Even if baseline compilation units finish first, we trigger the // "kFinishedExportWrappers" event first. DCHECK(!last_event_.has_value()); break; case CompilationEvent::kFinishedBaselineCompilation: DCHECK_EQ(CompilationEvent::kFinishedExportWrappers, last_event_); if (job_->DecrementAndCheckFinisherCount(kCompilation)) { // Install the native module in the cache, or reuse a conflicting one. // If we get a conflicting module, wait until we are back in the // main thread to update {job_->native_module_} to avoid a data race. std::shared_ptr<NativeModule> cached_native_module = GetWasmEngine()->UpdateNativeModuleCache( false, job_->native_module_, job_->isolate_); if (cached_native_module == job_->native_module_) { // There was no cached module. cached_native_module = nullptr; } job_->DoSync<FinishCompilation>(std::move(cached_native_module)); } break; case CompilationEvent::kFinishedCompilationChunk: DCHECK(CompilationEvent::kFinishedBaselineCompilation == last_event_ || CompilationEvent::kFinishedCompilationChunk == last_event_); break; case CompilationEvent::kFailedCompilation: DCHECK(!last_event_.has_value() || last_event_ == CompilationEvent::kFinishedExportWrappers); if (job_->DecrementAndCheckFinisherCount(kCompilation)) { // Don't update {job_->native_module_} to avoid data races with other // compilation threads. Use a copy of the shared pointer instead. GetWasmEngine()->UpdateNativeModuleCache(true, job_->native_module_, job_->isolate_); job_->DoSync<Fail>(); } break; } #ifdef DEBUG last_event_ = event; #endif } private: AsyncCompileJob* job_; #ifdef DEBUG // This will be modified by different threads, but they externally // synchronize, so no explicit synchronization (currently) needed here. base::Optional<CompilationEvent> last_event_; #endif }; // A closure to run a compilation step (either as foreground or background // task) and schedule the next step(s), if any. class AsyncCompileJob::CompileStep { public: virtual ~CompileStep() = default; void Run(AsyncCompileJob* job, bool on_foreground) { if (on_foreground) { HandleScope scope(job->isolate_); SaveAndSwitchContext saved_context(job->isolate_, *job->native_context_); RunInForeground(job); } else { RunInBackground(job); } } virtual void RunInForeground(AsyncCompileJob*) { UNREACHABLE(); } virtual void RunInBackground(AsyncCompileJob*) { UNREACHABLE(); } }; class AsyncCompileJob::CompileTask : public CancelableTask { public: CompileTask(AsyncCompileJob* job, bool on_foreground) // We only manage the background tasks with the {CancelableTaskManager} of // the {AsyncCompileJob}. Foreground tasks are managed by the system's // {CancelableTaskManager}. Background tasks cannot spawn tasks managed by // their own task manager. : CancelableTask(on_foreground ? job->isolate_->cancelable_task_manager() : &job->background_task_manager_), job_(job), on_foreground_(on_foreground) {} ~CompileTask() override { if (job_ != nullptr && on_foreground_) ResetPendingForegroundTask(); } void RunInternal() final { if (!job_) return; if (on_foreground_) ResetPendingForegroundTask(); job_->step_->Run(job_, on_foreground_); // After execution, reset {job_} such that we don't try to reset the pending // foreground task when the task is deleted. job_ = nullptr; } void Cancel() { DCHECK_NOT_NULL(job_); job_ = nullptr; } private: // {job_} will be cleared to cancel a pending task. AsyncCompileJob* job_; bool on_foreground_; void ResetPendingForegroundTask() const { DCHECK_EQ(this, job_->pending_foreground_task_); job_->pending_foreground_task_ = nullptr; } }; void AsyncCompileJob::StartForegroundTask() { DCHECK_NULL(pending_foreground_task_); auto new_task = std::make_unique<CompileTask>(this, true); pending_foreground_task_ = new_task.get(); foreground_task_runner_->PostTask(std::move(new_task)); } void AsyncCompileJob::ExecuteForegroundTaskImmediately() { DCHECK_NULL(pending_foreground_task_); auto new_task = std::make_unique<CompileTask>(this, true); pending_foreground_task_ = new_task.get(); new_task->Run(); } void AsyncCompileJob::CancelPendingForegroundTask() { if (!pending_foreground_task_) return; pending_foreground_task_->Cancel(); pending_foreground_task_ = nullptr; } void AsyncCompileJob::StartBackgroundTask() { auto task = std::make_unique<CompileTask>(this, false); // If --wasm-num-compilation-tasks=0 is passed, do only spawn foreground // tasks. This is used to make timing deterministic. if (v8_flags.wasm_num_compilation_tasks > 0) { V8::GetCurrentPlatform()->CallOnWorkerThread(std::move(task)); } else { foreground_task_runner_->PostTask(std::move(task)); } } template <typename Step, AsyncCompileJob::UseExistingForegroundTask use_existing_fg_task, typename... Args> void AsyncCompileJob::DoSync(Args&&... args) { NextStep<Step>(std::forward<Args>(args)...); if (use_existing_fg_task && pending_foreground_task_ != nullptr) return; StartForegroundTask(); } template <typename Step, typename... Args> void AsyncCompileJob::DoImmediately(Args&&... args) { NextStep<Step>(std::forward<Args>(args)...); ExecuteForegroundTaskImmediately(); } template <typename Step, typename... Args> void AsyncCompileJob::DoAsync(Args&&... args) { NextStep<Step>(std::forward<Args>(args)...); StartBackgroundTask(); } template <typename Step, typename... Args> void AsyncCompileJob::NextStep(Args&&... args) { step_.reset(new Step(std::forward<Args>(args)...)); } //========================================================================== // Step 1: (async) Decode the module. //========================================================================== class AsyncCompileJob::DecodeModule : public AsyncCompileJob::CompileStep { public: explicit DecodeModule(Counters* counters, std::shared_ptr<metrics::Recorder> metrics_recorder) : counters_(counters), metrics_recorder_(std::move(metrics_recorder)) {} void RunInBackground(AsyncCompileJob* job) override { ModuleResult result; { DisallowHandleAllocation no_handle; DisallowGarbageCollection no_gc; // Decode the module bytes. TRACE_COMPILE("(1) Decoding module...\n"); TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), "wasm.DecodeModule"); auto enabled_features = job->enabled_features_; result = DecodeWasmModule(enabled_features, job->wire_bytes_.module_bytes(), false, kWasmOrigin, counters_, metrics_recorder_, job->context_id(), DecodingMethod::kAsync); // Validate lazy functions here if requested. if (result.ok() && !v8_flags.wasm_lazy_validation) { const WasmModule* module = result.value().get(); if (WasmError validation_error = ValidateFunctions(module, job->wire_bytes_.module_bytes(), job->enabled_features_, kOnlyLazyFunctions)) result = ModuleResult{std::move(validation_error)}; } } if (result.failed()) { // Decoding failure; reject the promise and clean up. job->DoSync<Fail>(); } else { // Decode passed. std::shared_ptr<WasmModule> module = std::move(result).value(); const bool include_liftoff = v8_flags.liftoff; size_t code_size_estimate = wasm::WasmCodeManager::EstimateNativeModuleCodeSize( module.get(), include_liftoff, job->dynamic_tiering_); job->DoSync<PrepareAndStartCompile>(std::move(module), true, code_size_estimate); } } private: Counters* const counters_; std::shared_ptr<metrics::Recorder> metrics_recorder_; }; //========================================================================== // Step 2 (sync): Create heap-allocated data and start compilation. //========================================================================== class AsyncCompileJob::PrepareAndStartCompile : public CompileStep { public: PrepareAndStartCompile(std::shared_ptr<const WasmModule> module, bool start_compilation, size_t code_size_estimate) : module_(std::move(module)), start_compilation_(start_compilation), code_size_estimate_(code_size_estimate) {} private: void RunInForeground(AsyncCompileJob* job) override { TRACE_COMPILE("(2) Prepare and start compile...\n"); const bool streaming = job->wire_bytes_.length() == 0; if (streaming) { // Streaming compilation already checked for cache hits. job->CreateNativeModule(module_, code_size_estimate_); } else if (job->GetOrCreateNativeModule(std::move(module_), code_size_estimate_)) { job->FinishCompile(true); return; } else { // If we are not streaming and did not get a cache hit, we might have hit // the path where the streaming decoder got a prefix cache hit, but the // module then turned out to be invalid, and we are running it through // non-streaming decoding again. In this case, function bodies have not // been validated yet (would have happened in the {DecodeModule} phase // if we would not come via the non-streaming path). Thus do this now. // Note that we only need to validate lazily compiled functions, others // will be validated during eager compilation. DCHECK(start_compilation_); if (!v8_flags.wasm_lazy_validation && ValidateFunctions(*job->native_module_, kOnlyLazyFunctions) .has_error()) { job->Failed(); return; } } // Make sure all compilation tasks stopped running. Decoding (async step) // is done. job->background_task_manager_.CancelAndWait(); CompilationStateImpl* compilation_state = Impl(job->native_module_->compilation_state()); compilation_state->AddCallback( std::make_unique<CompilationStateCallback>(job)); if (base::TimeTicks::IsHighResolution()) { auto compile_mode = job->stream_ == nullptr ? CompilationTimeCallback::kAsync : CompilationTimeCallback::kStreaming; compilation_state->AddCallback(std::make_unique<CompilationTimeCallback>( job->isolate_->async_counters(), job->isolate_->metrics_recorder(), job->context_id_, job->native_module_, compile_mode)); } if (start_compilation_) { // TODO(13209): Use PGO for async compilation, if available. constexpr ProfileInformation* kNoProfileInformation = nullptr; std::unique_ptr<CompilationUnitBuilder> builder = InitializeCompilation( job->isolate(), job->native_module_.get(), kNoProfileInformation); compilation_state->InitializeCompilationUnits(std::move(builder)); // In single-threaded mode there are no worker tasks that will do the // compilation. We call {WaitForCompilationEvent} here so that the main // thread participates and finishes the compilation. if (v8_flags.wasm_num_compilation_tasks == 0) { compilation_state->WaitForCompilationEvent( CompilationEvent::kFinishedBaselineCompilation); } } } const std::shared_ptr<const WasmModule> module_; const bool start_compilation_; const size_t code_size_estimate_; }; //========================================================================== // Step 3 (sync): Compilation finished. //========================================================================== class AsyncCompileJob::FinishCompilation : public CompileStep { public: explicit FinishCompilation(std::shared_ptr<NativeModule> cached_native_module) : cached_native_module_(std::move(cached_native_module)) {} private: void RunInForeground(AsyncCompileJob* job) override { TRACE_COMPILE("(3) Compilation finished\n"); if (cached_native_module_) { job->native_module_ = cached_native_module_; } // Then finalize and publish the generated module. job->FinishCompile(cached_native_module_ != nullptr); } std::shared_ptr<NativeModule> cached_native_module_; }; //========================================================================== // Step 4 (sync): Decoding or compilation failed. //========================================================================== class AsyncCompileJob::Fail : public CompileStep { private: void RunInForeground(AsyncCompileJob* job) override { TRACE_COMPILE("(4) Async compilation failed.\n"); // {job_} is deleted in {Failed}, therefore the {return}. return job->Failed(); } }; void AsyncCompileJob::FinishSuccessfully() { TRACE_COMPILE("(4) Finish module...\n"); { TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), "wasm.OnCompilationSucceeded"); // We have to make sure that an "incumbent context" is available in case // the module's start function calls out to Blink. Local<v8::Context> backup_incumbent_context = Utils::ToLocal(incumbent_context_); v8::Context::BackupIncumbentScope incumbent(backup_incumbent_context); resolver_->OnCompilationSucceeded(module_object_); } GetWasmEngine()->RemoveCompileJob(this); } AsyncStreamingProcessor::AsyncStreamingProcessor(AsyncCompileJob* job) : decoder_(job->enabled_features_), job_(job), compilation_unit_builder_(nullptr) {} // Process the module header. bool AsyncStreamingProcessor::ProcessModuleHeader( base::Vector<const uint8_t> bytes) { TRACE_STREAMING("Process module header...\n"); decoder_.DecodeModuleHeader(bytes); if (!decoder_.ok()) return false; prefix_hash_ = GetWireBytesHash(bytes); return true; } // Process all sections except for the code section. bool AsyncStreamingProcessor::ProcessSection(SectionCode section_code, base::Vector<const uint8_t> bytes, uint32_t offset) { TRACE_STREAMING("Process section %d ...\n", section_code); if (compilation_unit_builder_) { // We reached a section after the code section, we do not need the // compilation_unit_builder_ anymore. CommitCompilationUnits(); compilation_unit_builder_.reset(); } if (before_code_section_) { // Combine section hashes until code section. prefix_hash_ = base::hash_combine(prefix_hash_, GetWireBytesHash(bytes)); } if (section_code == SectionCode::kUnknownSectionCode) { size_t bytes_consumed = ModuleDecoder::IdentifyUnknownSection( &decoder_, bytes, offset, §ion_code); if (!decoder_.ok()) return false; if (section_code == SectionCode::kUnknownSectionCode) { // Skip unknown sections that we do not know how to handle. return true; } // Remove the unknown section tag from the payload bytes. offset += bytes_consumed; bytes = bytes.SubVector(bytes_consumed, bytes.size()); } decoder_.DecodeSection(section_code, bytes, offset); return decoder_.ok(); } // Start the code section. bool AsyncStreamingProcessor::ProcessCodeSectionHeader( int num_functions, uint32_t functions_mismatch_error_offset, std::shared_ptr<WireBytesStorage> wire_bytes_storage, int code_section_start, int code_section_length) { DCHECK_LE(0, code_section_length); before_code_section_ = false; TRACE_STREAMING("Start the code section with %d functions...\n", num_functions); prefix_hash_ = base::hash_combine(prefix_hash_, static_cast<uint32_t>(code_section_length)); if (!decoder_.CheckFunctionsCount(static_cast<uint32_t>(num_functions), functions_mismatch_error_offset)) { return false; } decoder_.StartCodeSection({static_cast<uint32_t>(code_section_start), static_cast<uint32_t>(code_section_length)}); if (!GetWasmEngine()->GetStreamingCompilationOwnership(prefix_hash_)) { // Known prefix, wait until the end of the stream and check the cache. prefix_cache_hit_ = true; return true; } // Execute the PrepareAndStartCompile step immediately and not in a separate // task. int num_imported_functions = static_cast<int>(decoder_.module()->num_imported_functions); DCHECK_EQ(kWasmOrigin, decoder_.module()->origin); const bool include_liftoff = v8_flags.liftoff; size_t code_size_estimate = wasm::WasmCodeManager::EstimateNativeModuleCodeSize( num_functions, num_imported_functions, code_section_length, include_liftoff, job_->dynamic_tiering_); job_->DoImmediately<AsyncCompileJob::PrepareAndStartCompile>( decoder_.shared_module(), false, code_size_estimate); auto* compilation_state = Impl(job_->native_module_->compilation_state()); compilation_state->SetWireBytesStorage(std::move(wire_bytes_storage)); DCHECK_EQ(job_->native_module_->module()->origin, kWasmOrigin); // Set outstanding_finishers_ to 2, because both the AsyncCompileJob and the // AsyncStreamingProcessor have to finish. job_->outstanding_finishers_ = 2; // TODO(13209): Use PGO for streaming compilation, if available. constexpr ProfileInformation* kNoProfileInformation = nullptr; compilation_unit_builder_ = InitializeCompilation( job_->isolate(), job_->native_module_.get(), kNoProfileInformation); return true; } // Process a function body. bool AsyncStreamingProcessor::ProcessFunctionBody( base::Vector<const uint8_t> bytes, uint32_t offset) { TRACE_STREAMING("Process function body %d ...\n", num_functions_); uint32_t func_index = decoder_.module()->num_imported_functions + num_functions_; ++num_functions_; // In case of {prefix_cache_hit} we still need the function body to be // decoded. Otherwise a later cache miss cannot be handled. decoder_.DecodeFunctionBody(func_index, static_cast<uint32_t>(bytes.length()), offset); if (prefix_cache_hit_) { // Don't compile yet if we might have a cache hit. return true; } const WasmModule* module = decoder_.module(); auto enabled_features = job_->enabled_features_; DCHECK_EQ(module->origin, kWasmOrigin); const bool lazy_module = v8_flags.wasm_lazy_compilation; CompileStrategy strategy = GetCompileStrategy(module, enabled_features, func_index, lazy_module); bool validate_lazily_compiled_function = !v8_flags.wasm_lazy_validation && (strategy == CompileStrategy::kLazy || strategy == CompileStrategy::kLazyBaselineEagerTopTier); if (validate_lazily_compiled_function) { // {bytes} is part of a section buffer owned by the streaming decoder. The // streaming decoder is held alive by the {AsyncCompileJob}, so we can just // use the {bytes} vector as long as the {AsyncCompileJob} is still running. if (!validate_functions_job_handle_) { validate_functions_job_data_.Initialize(module->num_declared_functions); validate_functions_job_handle_ = V8::GetCurrentPlatform()->CreateJob( TaskPriority::kUserVisible, std::make_unique<ValidateFunctionsStreamingJob>( module, enabled_features, &validate_functions_job_data_)); } validate_functions_job_data_.AddUnit(func_index, bytes, validate_functions_job_handle_.get()); } auto* compilation_state = Impl(job_->native_module_->compilation_state()); compilation_state->AddCompilationUnit(compilation_unit_builder_.get(), func_index); return true; } void AsyncStreamingProcessor::CommitCompilationUnits() { DCHECK(compilation_unit_builder_); compilation_unit_builder_->Commit(); } void AsyncStreamingProcessor::OnFinishedChunk() { TRACE_STREAMING("FinishChunk...\n"); if (compilation_unit_builder_) CommitCompilationUnits(); } // Finish the processing of the stream. void AsyncStreamingProcessor::OnFinishedStream( base::OwnedVector<const uint8_t> bytes, bool after_error) { TRACE_STREAMING("Finish stream...\n"); ModuleResult module_result = decoder_.FinishDecoding(); if (module_result.failed()) after_error = true; if (validate_functions_job_handle_) { // Wait for background validation to finish, then check if a validation // error was found. // TODO(13447): Do not block here; register validation as another finisher // instead. validate_functions_job_handle_->Join(); validate_functions_job_handle_.reset(); if (validate_functions_job_data_.found_error) after_error = true; } job_->wire_bytes_ = ModuleWireBytes(bytes.as_vector()); job_->bytes_copy_ = std::move(bytes); // Record event metrics. auto duration = base::TimeTicks::Now() - job_->start_time_; job_->metrics_event_.success = !after_error; job_->metrics_event_.streamed = true; job_->metrics_event_.module_size_in_bytes = job_->wire_bytes_.length(); job_->metrics_event_.function_count = num_functions_; job_->metrics_event_.wall_clock_duration_in_us = duration.InMicroseconds(); job_->isolate_->metrics_recorder()->DelayMainThreadEvent(job_->metrics_event_, job_->context_id_); if (after_error) { if (job_->native_module_ && job_->native_module_->wire_bytes().empty()) { // Clean up the temporary cache entry. GetWasmEngine()->StreamingCompilationFailed(prefix_hash_); } // Calling {Failed} will invalidate the {AsyncCompileJob} and delete {this}. job_->Failed(); return; } std::shared_ptr<WasmModule> module = std::move(module_result).value(); // At this point we identified the module as valid (except maybe for function // bodies, if lazy validation is enabled). // This DCHECK could be considered slow, but it only happens once per async // module compilation, and we only re-decode the module structure, without // validation function bodies. Overall this does not add a lot of overhead. DCHECK(DecodeWasmModule(job_->enabled_features_, job_->bytes_copy_.as_vector(), /* validate functions */ false, kWasmOrigin) .ok()); DCHECK_EQ(NativeModuleCache::PrefixHash(job_->wire_bytes_.module_bytes()), prefix_hash_); if (prefix_cache_hit_) { // Restart as an asynchronous, non-streaming compilation. Most likely // {PrepareAndStartCompile} will get the native module from the cache. const bool include_liftoff = v8_flags.liftoff; size_t code_size_estimate = wasm::WasmCodeManager::EstimateNativeModuleCodeSize( module.get(), include_liftoff, job_->dynamic_tiering_); job_->DoSync<AsyncCompileJob::PrepareAndStartCompile>( std::move(module), true, code_size_estimate); return; } // We have to open a HandleScope and prepare the Context for // CreateNativeModule, PrepareRuntimeObjects and FinishCompile as this is a // callback from the embedder. HandleScope scope(job_->isolate_); SaveAndSwitchContext saved_context(job_->isolate_, *job_->native_context_); // Record the size of the wire bytes and the number of functions. In // synchronous and asynchronous (non-streaming) compilation, this happens in // {DecodeWasmModule}. auto* module_size_histogram = job_->isolate_->counters()->wasm_wasm_module_size_bytes(); module_size_histogram->AddSample(job_->wire_bytes_.module_bytes().length()); auto* num_functions_histogram = job_->isolate_->counters()->wasm_functions_per_wasm_module(); num_functions_histogram->AddSample(static_cast<int>(num_functions_)); const bool has_code_section = job_->native_module_ != nullptr; bool cache_hit = false; if (!has_code_section) { // We are processing a WebAssembly module without code section. Create the // native module now (would otherwise happen in {PrepareAndStartCompile} or // {ProcessCodeSectionHeader}). constexpr size_t kCodeSizeEstimate = 0; cache_hit = job_->GetOrCreateNativeModule(std::move(module), kCodeSizeEstimate); } else { job_->native_module_->SetWireBytes(std::move(job_->bytes_copy_)); } const bool needs_finish = job_->DecrementAndCheckFinisherCount(AsyncCompileJob::kStreamingDecoder); DCHECK_IMPLIES(!has_code_section, needs_finish); if (needs_finish) { const bool failed = job_->native_module_->compilation_state()->failed(); if (!cache_hit) { auto* prev_native_module = job_->native_module_.get(); job_->native_module_ = GetWasmEngine()->UpdateNativeModuleCache( failed, std::move(job_->native_module_), job_->isolate_); cache_hit = prev_native_module != job_->native_module_.get(); } // We finally call {Failed} or {FinishCompile}, which will invalidate the // {AsyncCompileJob} and delete {this}. if (failed) { job_->Failed(); } else { job_->FinishCompile(cache_hit); } } } void AsyncStreamingProcessor::OnAbort() { TRACE_STREAMING("Abort stream...\n"); if (validate_functions_job_handle_) { validate_functions_job_handle_->Cancel(); validate_functions_job_handle_.reset(); } if (job_->native_module_ && job_->native_module_->wire_bytes().empty()) { // Clean up the temporary cache entry. GetWasmEngine()->StreamingCompilationFailed(prefix_hash_); } // {Abort} invalidates the {AsyncCompileJob}, which in turn deletes {this}. job_->Abort(); } bool AsyncStreamingProcessor::Deserialize( base::Vector<const uint8_t> module_bytes, base::Vector<const uint8_t> wire_bytes) { TRACE_EVENT0("v8.wasm", "wasm.Deserialize"); base::Optional<TimedHistogramScope> time_scope; if (base::TimeTicks::IsHighResolution()) { time_scope.emplace(job_->isolate()->counters()->wasm_deserialization_time(), job_->isolate()); } // DeserializeNativeModule and FinishCompile assume that they are executed in // a HandleScope, and that a context is set on the isolate. HandleScope scope(job_->isolate_); SaveAndSwitchContext saved_context(job_->isolate_, *job_->native_context_); MaybeHandle<WasmModuleObject> result = DeserializeNativeModule(job_->isolate_, module_bytes, wire_bytes, base::VectorOf(job_->stream_->url())); if (result.is_null()) return false; job_->module_object_ = job_->isolate_->global_handles()->Create(*result.ToHandleChecked()); job_->native_module_ = job_->module_object_->shared_native_module(); job_->wire_bytes_ = ModuleWireBytes(job_->native_module_->wire_bytes()); // Calling {FinishCompile} deletes the {AsyncCompileJob} and {this}. job_->FinishCompile(false); return true; } CompilationStateImpl::CompilationStateImpl( const std::shared_ptr<NativeModule>& native_module, std::shared_ptr<Counters> async_counters, DynamicTiering dynamic_tiering) : native_module_(native_module.get()), native_module_weak_(std::move(native_module)), async_counters_(std::move(async_counters)), compilation_unit_queues_(native_module->num_functions()), dynamic_tiering_(dynamic_tiering) { if (native_module->module()->memories.size() > 1) { detected_features_.Add(kFeature_multi_memory); } } void CompilationStateImpl::InitCompileJob() { DCHECK_NULL(baseline_compile_job_); DCHECK_NULL(top_tier_compile_job_); // Create the job, but don't spawn workers yet. This will happen on // {NotifyConcurrencyIncrease}. baseline_compile_job_ = V8::GetCurrentPlatform()->CreateJob( TaskPriority::kUserVisible, std::make_unique<BackgroundCompileJob>( native_module_weak_, async_counters_, CompilationTier::kBaseline)); top_tier_compile_job_ = V8::GetCurrentPlatform()->CreateJob( TaskPriority::kUserVisible, std::make_unique<BackgroundCompileJob>( native_module_weak_, async_counters_, CompilationTier::kTopTier)); } void CompilationStateImpl::CancelCompilation( CompilationStateImpl::CancellationPolicy cancellation_policy) { base::MutexGuard callbacks_guard(&callbacks_mutex_); if (cancellation_policy == kCancelInitialCompilation && finished_events_.contains( CompilationEvent::kFinishedBaselineCompilation)) { // Initial compilation already finished; cannot be cancelled. return; } // std::memory_order_relaxed is sufficient because no other state is // synchronized with |compile_cancelled_|. compile_cancelled_.store(true, std::memory_order_relaxed); // No more callbacks after abort. callbacks_.clear(); } bool CompilationStateImpl::cancelled() const { return compile_cancelled_.load(std::memory_order_relaxed); } void CompilationStateImpl::ApplyCompilationHintToInitialProgress( const WasmCompilationHint& hint, size_t hint_idx) { // Get old information. uint8_t& progress = compilation_progress_[hint_idx]; ExecutionTier old_baseline_tier = RequiredBaselineTierField::decode(progress); ExecutionTier old_top_tier = RequiredTopTierField::decode(progress); // Compute new information. ExecutionTier new_baseline_tier = ApplyHintToExecutionTier(hint.baseline_tier, old_baseline_tier); ExecutionTier new_top_tier = ApplyHintToExecutionTier(hint.top_tier, old_top_tier); switch (hint.strategy) { case WasmCompilationHintStrategy::kDefault: // Be careful not to switch from lazy to non-lazy. if (old_baseline_tier == ExecutionTier::kNone) { new_baseline_tier = ExecutionTier::kNone; } if (old_top_tier == ExecutionTier::kNone) { new_top_tier = ExecutionTier::kNone; } break; case WasmCompilationHintStrategy::kLazy: new_baseline_tier = ExecutionTier::kNone; new_top_tier = ExecutionTier::kNone; break; case WasmCompilationHintStrategy::kEager: // Nothing to do, use the encoded (new) tiers. break; case WasmCompilationHintStrategy::kLazyBaselineEagerTopTier: new_baseline_tier = ExecutionTier::kNone; break; } progress = RequiredBaselineTierField::update(progress, new_baseline_tier); progress = RequiredTopTierField::update(progress, new_top_tier); // Update counter for outstanding baseline units. outstanding_baseline_units_ += (new_baseline_tier != ExecutionTier::kNone) - (old_baseline_tier != ExecutionTier::kNone); } void CompilationStateImpl::ApplyPgoInfoToInitialProgress( ProfileInformation* pgo_info) { // Functions that were executed in the profiling run are eagerly compiled to // Liftoff. const WasmModule* module = native_module_->module(); for (int func_index : pgo_info->executed_functions()) { uint8_t& progress = compilation_progress_[declared_function_index(module, func_index)]; ExecutionTier old_baseline_tier = RequiredBaselineTierField::decode(progress); // If the function is already marked for eager compilation, we are good. if (old_baseline_tier != ExecutionTier::kNone) continue; // Set the baseline tier to Liftoff, so we eagerly compile to Liftoff. // TODO(13288): Compile Liftoff code in the background, if lazy compilation // is enabled. progress = RequiredBaselineTierField::update(progress, ExecutionTier::kLiftoff); ++outstanding_baseline_units_; } // Functions that were tiered up during PGO generation are eagerly compiled to // TurboFan (in the background, not blocking instantiation). for (int func_index : pgo_info->tiered_up_functions()) { uint8_t& progress = compilation_progress_[declared_function_index(module, func_index)]; ExecutionTier old_baseline_tier = RequiredBaselineTierField::decode(progress); ExecutionTier old_top_tier = RequiredTopTierField::decode(progress); // If the function is already marked for eager or background compilation to // TurboFan, we are good. if (old_baseline_tier == ExecutionTier::kTurbofan) continue; if (old_top_tier == ExecutionTier::kTurbofan) continue; // Set top tier to TurboFan, so we eagerly trigger compilation in the // background. progress = RequiredTopTierField::update(progress, ExecutionTier::kTurbofan); } } void CompilationStateImpl::ApplyPgoInfoLate(ProfileInformation* pgo_info) { TRACE_EVENT0("v8.wasm", "wasm.ApplyPgoInfo"); const WasmModule* module = native_module_->module(); CompilationUnitBuilder builder{native_module_}; base::MutexGuard guard(&callbacks_mutex_); // Functions that were executed in the profiling run are eagerly compiled to // Liftoff (in the background). for (int func_index : pgo_info->executed_functions()) { uint8_t& progress = compilation_progress_[declared_function_index(module, func_index)]; ExecutionTier old_baseline_tier = RequiredBaselineTierField::decode(progress); // If the function is already marked for eager compilation, we are good. if (old_baseline_tier != ExecutionTier::kNone) continue; // If we already compiled Liftoff or TurboFan code, we are also good. ExecutionTier reached_tier = ReachedTierField::decode(progress); if (reached_tier >= ExecutionTier::kLiftoff) continue; // Set the baseline tier to Liftoff and schedule a compilation unit. progress = RequiredBaselineTierField::update(progress, ExecutionTier::kLiftoff); // Add this as a "top tier unit" since it does not contribute to initial // compilation ("baseline finished" might already be triggered). // TODO(clemensb): Rename "baseline finished" to "initial compile finished". // TODO(clemensb): Avoid scheduling both a Liftoff and a TurboFan unit, or // prioritize Liftoff when executing the units. builder.AddTopTierUnit(func_index, ExecutionTier::kLiftoff); } // Functions that were tiered up during PGO generation are eagerly compiled to // TurboFan in the background. for (int func_index : pgo_info->tiered_up_functions()) { uint8_t& progress = compilation_progress_[declared_function_index(module, func_index)]; ExecutionTier old_baseline_tier = RequiredBaselineTierField::decode(progress); ExecutionTier old_top_tier = RequiredTopTierField::decode(progress); // If the function is already marked for eager or background compilation to // TurboFan, we are good. if (old_baseline_tier == ExecutionTier::kTurbofan) continue; if (old_top_tier == ExecutionTier::kTurbofan) continue; // If we already compiled TurboFan code, we are also good. ExecutionTier reached_tier = ReachedTierField::decode(progress); if (reached_tier == ExecutionTier::kTurbofan) continue; // Set top tier to TurboFan and schedule a compilation unit. progress = RequiredTopTierField::update(progress, ExecutionTier::kTurbofan); builder.AddTopTierUnit(func_index, ExecutionTier::kTurbofan); } builder.Commit(); } void CompilationStateImpl::InitializeCompilationProgress( int num_import_wrappers, int num_export_wrappers, ProfileInformation* pgo_info) { DCHECK(!failed()); auto* module = native_module_->module(); base::MutexGuard guard(&callbacks_mutex_); DCHECK_EQ(0, outstanding_baseline_units_); DCHECK(!has_outstanding_export_wrappers_); // Compute the default compilation progress for all functions, and set it. const ExecutionTierPair default_tiers = GetDefaultTiersPerModule( native_module_, dynamic_tiering_, native_module_->IsInDebugState(), IsLazyModule(module)); const uint8_t default_progress = RequiredBaselineTierField::encode(default_tiers.baseline_tier) | RequiredTopTierField::encode(default_tiers.top_tier) | ReachedTierField::encode(ExecutionTier::kNone); compilation_progress_.assign(module->num_declared_functions, default_progress); if (default_tiers.baseline_tier != ExecutionTier::kNone) { outstanding_baseline_units_ += module->num_declared_functions; } // Apply compilation hints, if enabled. if (native_module_->enabled_features().has_compilation_hints()) { size_t num_hints = std::min(module->compilation_hints.size(), size_t{module->num_declared_functions}); for (size_t hint_idx = 0; hint_idx < num_hints; ++hint_idx) { const auto& hint = module->compilation_hints[hint_idx]; ApplyCompilationHintToInitialProgress(hint, hint_idx); } } // Apply PGO information, if available. if (pgo_info) ApplyPgoInfoToInitialProgress(pgo_info); // Account for outstanding wrapper compilation. outstanding_baseline_units_ += num_import_wrappers; has_outstanding_export_wrappers_ = (num_export_wrappers > 0); // Trigger callbacks if module needs no baseline or top tier compilation. This // can be the case for an empty or fully lazy module. TriggerCallbacks(); } void CompilationStateImpl::AddCompilationUnitInternal( CompilationUnitBuilder* builder, int function_index, uint8_t function_progress) { ExecutionTier required_baseline_tier = CompilationStateImpl::RequiredBaselineTierField::decode( function_progress); ExecutionTier required_top_tier = CompilationStateImpl::RequiredTopTierField::decode(function_progress); ExecutionTier reached_tier = CompilationStateImpl::ReachedTierField::decode(function_progress); if (reached_tier < required_baseline_tier) { builder->AddBaselineUnit(function_index, required_baseline_tier); } if (reached_tier < required_top_tier && required_baseline_tier != required_top_tier) { builder->AddTopTierUnit(function_index, required_top_tier); } } void CompilationStateImpl::InitializeCompilationUnits( std::unique_ptr<CompilationUnitBuilder> builder) { int offset = native_module_->module()->num_imported_functions; { base::MutexGuard guard(&callbacks_mutex_); for (size_t i = 0, e = compilation_progress_.size(); i < e; ++i) { uint8_t function_progress = compilation_progress_[i]; int func_index = offset + static_cast<int>(i); AddCompilationUnitInternal(builder.get(), func_index, function_progress); } } builder->Commit(); } void CompilationStateImpl::AddCompilationUnit(CompilationUnitBuilder* builder, int func_index) { int offset = native_module_->module()->num_imported_functions; int progress_index = func_index - offset; uint8_t function_progress; { // TODO(ahaas): This lock may cause overhead. If so, we could get rid of the // lock as follows: // 1) Make compilation_progress_ an array of atomic<uint8_t>, and access it // lock-free. // 2) Have a copy of compilation_progress_ that we use for initialization. // 3) Just re-calculate the content of compilation_progress_. base::MutexGuard guard(&callbacks_mutex_); function_progress = compilation_progress_[progress_index]; } AddCompilationUnitInternal(builder, func_index, function_progress); } void CompilationStateImpl::InitializeCompilationProgressAfterDeserialization( base::Vector<const int> lazy_functions, base::Vector<const int> eager_functions) { TRACE_EVENT2("v8.wasm", "wasm.CompilationAfterDeserialization", "num_lazy_functions", lazy_functions.size(), "num_eager_functions", eager_functions.size()); base::Optional<TimedHistogramScope> lazy_compile_time_scope; if (base::TimeTicks::IsHighResolution()) { lazy_compile_time_scope.emplace( counters()->wasm_compile_after_deserialize()); } auto* module = native_module_->module(); { base::MutexGuard guard(&callbacks_mutex_); DCHECK(compilation_progress_.empty()); // Initialize the compilation progress as if everything was // TurboFan-compiled. constexpr uint8_t kProgressAfterTurbofanDeserialization = RequiredBaselineTierField::encode(ExecutionTier::kTurbofan) | RequiredTopTierField::encode(ExecutionTier::kTurbofan) | ReachedTierField::encode(ExecutionTier::kTurbofan); compilation_progress_.assign(module->num_declared_functions, kProgressAfterTurbofanDeserialization); // Update compilation state for lazy functions. constexpr uint8_t kProgressForLazyFunctions = RequiredBaselineTierField::encode(ExecutionTier::kNone) | RequiredTopTierField::encode(ExecutionTier::kNone) | ReachedTierField::encode(ExecutionTier::kNone); for (auto func_index : lazy_functions) { compilation_progress_[declared_function_index(module, func_index)] = kProgressForLazyFunctions; } // Update compilation state for eagerly compiled functions. constexpr bool kNotLazy = false; ExecutionTierPair default_tiers = GetDefaultTiersPerModule(native_module_, dynamic_tiering_, native_module_->IsInDebugState(), kNotLazy); uint8_t progress_for_eager_functions = RequiredBaselineTierField::encode(default_tiers.baseline_tier) | RequiredTopTierField::encode(default_tiers.top_tier) | ReachedTierField::encode(ExecutionTier::kNone); for (auto func_index : eager_functions) { // Check that {func_index} is not contained in {lazy_functions}. DCHECK_EQ( compilation_progress_[declared_function_index(module, func_index)], kProgressAfterTurbofanDeserialization); compilation_progress_[declared_function_index(module, func_index)] = progress_for_eager_functions; } DCHECK_NE(ExecutionTier::kNone, default_tiers.baseline_tier); outstanding_baseline_units_ += eager_functions.size(); // Export wrappers are compiled synchronously after deserialization, so set // that as finished already. Baseline compilation is done if we do not have // any Liftoff functions to compile. finished_events_.Add(CompilationEvent::kFinishedExportWrappers); if (eager_functions.empty() || v8_flags.wasm_lazy_compilation) { finished_events_.Add(CompilationEvent::kFinishedBaselineCompilation); } } auto builder = std::make_unique<CompilationUnitBuilder>(native_module_); InitializeCompilationUnits(std::move(builder)); if (!v8_flags.wasm_lazy_compilation) { WaitForCompilationEvent(CompilationEvent::kFinishedBaselineCompilation); } } void CompilationStateImpl::AddCallback( std::unique_ptr<CompilationEventCallback> callback) { base::MutexGuard callbacks_guard(&callbacks_mutex_); // Immediately trigger events that already happened. for (auto event : {CompilationEvent::kFinishedExportWrappers, CompilationEvent::kFinishedBaselineCompilation, CompilationEvent::kFailedCompilation}) { if (finished_events_.contains(event)) { callback->call(event); } } constexpr base::EnumSet<CompilationEvent> kFinalEvents{ CompilationEvent::kFailedCompilation}; if (!finished_events_.contains_any(kFinalEvents)) { callbacks_.emplace_back(std::move(callback)); } } void CompilationStateImpl::CommitCompilationUnits( base::Vector<WasmCompilationUnit> baseline_units, base::Vector<WasmCompilationUnit> top_tier_units, base::Vector<std::shared_ptr<JSToWasmWrapperCompilationUnit>> js_to_wasm_wrapper_units) { if (!js_to_wasm_wrapper_units.empty()) { // |js_to_wasm_wrapper_units_| will only be initialized once. DCHECK_NULL(js_to_wasm_wrapper_job_); js_to_wasm_wrapper_units_.insert(js_to_wasm_wrapper_units_.end(), js_to_wasm_wrapper_units.begin(), js_to_wasm_wrapper_units.end()); js_to_wasm_wrapper_job_ = V8::GetCurrentPlatform()->PostJob( TaskPriority::kUserBlocking, std::make_unique<AsyncCompileJSToWasmWrapperJob>( native_module_weak_, js_to_wasm_wrapper_units_.size())); } if (!baseline_units.empty() || !top_tier_units.empty()) { compilation_unit_queues_.AddUnits(baseline_units, top_tier_units, native_module_->module()); } if (!baseline_units.empty()) { DCHECK(baseline_compile_job_->IsValid()); baseline_compile_job_->NotifyConcurrencyIncrease(); } if (!top_tier_units.empty()) { DCHECK(top_tier_compile_job_->IsValid()); top_tier_compile_job_->NotifyConcurrencyIncrease(); } } void CompilationStateImpl::CommitTopTierCompilationUnit( WasmCompilationUnit unit) { CommitCompilationUnits({}, {&unit, 1}, {}); } void CompilationStateImpl::AddTopTierPriorityCompilationUnit( WasmCompilationUnit unit, size_t priority) { compilation_unit_queues_.AddTopTierPriorityUnit(unit, priority); // We should not have a {CodeSpaceWriteScope} open at this point, as // {NotifyConcurrencyIncrease} can spawn new threads which could inherit PKU // permissions (which would be a security issue). top_tier_compile_job_->NotifyConcurrencyIncrease(); } std::shared_ptr<JSToWasmWrapperCompilationUnit> CompilationStateImpl::GetJSToWasmWrapperCompilationUnit(size_t index) { DCHECK_LT(index, js_to_wasm_wrapper_units_.size()); return js_to_wasm_wrapper_units_[index]; } void CompilationStateImpl::FinalizeJSToWasmWrappers(Isolate* isolate, const WasmModule* module) { // TODO(6792): Wrappers below are allocated with {Factory::NewCode}. As an // optimization we create a code memory modification scope that avoids // changing the page permissions back-and-forth between RWX and RX, because // many such wrapper are allocated in sequence below. TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), "wasm.FinalizeJSToWasmWrappers", "wrappers", js_to_wasm_wrapper_units_.size()); isolate->heap()->EnsureWasmCanonicalRttsSize(module->MaxCanonicalTypeIndex() + 1); for (auto& unit : js_to_wasm_wrapper_units_) { DCHECK_EQ(isolate, unit->isolate()); // Note: The code is either the compiled signature-specific wrapper or the // generic wrapper built-in. Handle<Code> code = unit->Finalize(); if (!code->is_builtin()) { uint32_t index = GetExportWrapperIndex(unit->canonical_sig_index(), unit->is_import()); isolate->heap()->js_to_wasm_wrappers()->Set( index, MaybeObject::FromObject(*code)); // Do not increase code stats for non-jitted wrappers. RecordStats(*code, isolate->counters()); isolate->counters()->wasm_compiled_export_wrapper()->Increment(1); } } } CompilationUnitQueues::Queue* CompilationStateImpl::GetQueueForCompileTask( int task_id) { return compilation_unit_queues_.GetQueueForTask(task_id); } base::Optional<WasmCompilationUnit> CompilationStateImpl::GetNextCompilationUnit( CompilationUnitQueues::Queue* queue, CompilationTier tier) { return compilation_unit_queues_.GetNextUnit(queue, tier); } void CompilationStateImpl::OnFinishedUnits( base::Vector<WasmCode*> code_vector) { TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), "wasm.OnFinishedUnits", "units", code_vector.size()); base::MutexGuard guard(&callbacks_mutex_); // Assume an order of execution tiers that represents the quality of their // generated code. static_assert(ExecutionTier::kNone < ExecutionTier::kLiftoff && ExecutionTier::kLiftoff < ExecutionTier::kTurbofan, "Assume an order on execution tiers"); DCHECK_EQ(compilation_progress_.size(), native_module_->module()->num_declared_functions); for (size_t i = 0; i < code_vector.size(); i++) { WasmCode* code = code_vector[i]; DCHECK_NOT_NULL(code); DCHECK_LT(code->index(), native_module_->num_functions()); if (code->index() < static_cast<int>(native_module_->num_imported_functions())) { // Import wrapper. DCHECK_EQ(code->tier(), ExecutionTier::kTurbofan); outstanding_baseline_units_--; } else { // Function. DCHECK_NE(code->tier(), ExecutionTier::kNone); // Read function's compilation progress. // This view on the compilation progress may differ from the actually // compiled code. Any lazily compiled function does not contribute to the // compilation progress but may publish code to the code manager. int slot_index = declared_function_index(native_module_->module(), code->index()); uint8_t function_progress = compilation_progress_[slot_index]; ExecutionTier required_baseline_tier = RequiredBaselineTierField::decode(function_progress); ExecutionTier reached_tier = ReachedTierField::decode(function_progress); // Check whether required baseline or top tier are reached. if (reached_tier < required_baseline_tier && required_baseline_tier <= code->tier()) { DCHECK_GT(outstanding_baseline_units_, 0); outstanding_baseline_units_--; } if (code->tier() == ExecutionTier::kTurbofan) { bytes_since_last_chunk_ += code->instructions().size(); } // Update function's compilation progress. if (code->tier() > reached_tier) { compilation_progress_[slot_index] = ReachedTierField::update( compilation_progress_[slot_index], code->tier()); } DCHECK_LE(0, outstanding_baseline_units_); } } TriggerCallbacks(); } void CompilationStateImpl::OnFinishedJSToWasmWrapperUnits() { base::MutexGuard guard(&callbacks_mutex_); has_outstanding_export_wrappers_ = false; TriggerCallbacks(); } void CompilationStateImpl::TriggerCallbacks() { DCHECK(!callbacks_mutex_.TryLock()); base::EnumSet<CompilationEvent> triggered_events; if (!has_outstanding_export_wrappers_) { triggered_events.Add(CompilationEvent::kFinishedExportWrappers); if (outstanding_baseline_units_ == 0) { triggered_events.Add(CompilationEvent::kFinishedBaselineCompilation); } } // For dynamic tiering, trigger "compilation chunk finished" after a new chunk // of size {v8_flags.wasm_caching_threshold}. if (dynamic_tiering_ && static_cast<size_t>(v8_flags.wasm_caching_threshold) < bytes_since_last_chunk_) { triggered_events.Add(CompilationEvent::kFinishedCompilationChunk); bytes_since_last_chunk_ = 0; } if (compile_failed_.load(std::memory_order_relaxed)) { // *Only* trigger the "failed" event. triggered_events = base::EnumSet<CompilationEvent>({CompilationEvent::kFailedCompilation}); } if (triggered_events.empty()) return; // Don't trigger past events again. triggered_events -= finished_events_; // There can be multiple compilation chunks, thus do not store this. finished_events_ |= triggered_events - CompilationEvent::kFinishedCompilationChunk; for (auto event : {std::make_pair(CompilationEvent::kFailedCompilation, "wasm.CompilationFailed"), std::make_pair(CompilationEvent::kFinishedExportWrappers, "wasm.ExportWrappersFinished"), std::make_pair(CompilationEvent::kFinishedBaselineCompilation, "wasm.BaselineFinished"), std::make_pair(CompilationEvent::kFinishedCompilationChunk, "wasm.CompilationChunkFinished")}) { if (!triggered_events.contains(event.first)) continue; DCHECK_NE(compilation_id_, kInvalidCompilationID); TRACE_EVENT1("v8.wasm", event.second, "id", compilation_id_); for (auto& callback : callbacks_) { callback->call(event.first); } } if (outstanding_baseline_units_ == 0 && !has_outstanding_export_wrappers_) { auto new_end = std::remove_if( callbacks_.begin(), callbacks_.end(), [](const auto& callback) { return callback->release_after_final_event(); }); callbacks_.erase(new_end, callbacks_.end()); } } void CompilationStateImpl::OnCompilationStopped(WasmFeatures detected) { base::MutexGuard guard(&mutex_); detected_features_.Add(detected); } void CompilationStateImpl::PublishDetectedFeatures(Isolate* isolate) { // Notifying the isolate of the feature counts must take place under // the mutex, because even if we have finished baseline compilation, // tiering compilations may still occur in the background. base::MutexGuard guard(&mutex_); UpdateFeatureUseCounts(isolate, detected_features_); } void CompilationStateImpl::PublishCompilationResults( std::vector<std::unique_ptr<WasmCode>> unpublished_code) { if (unpublished_code.empty()) return; // For import wrapper compilation units, add result to the cache. int num_imported_functions = native_module_->num_imported_functions(); WasmImportWrapperCache* cache = native_module_->import_wrapper_cache(); for (const auto& code : unpublished_code) { int func_index = code->index(); DCHECK_LE(0, func_index); DCHECK_LT(func_index, native_module_->num_functions()); if (func_index < num_imported_functions) { const WasmFunction& function = native_module_->module()->functions[func_index]; uint32_t canonical_type_index = native_module_->module() ->isorecursive_canonical_type_ids[function.sig_index]; WasmImportWrapperCache::CacheKey key( kDefaultImportCallKind, canonical_type_index, static_cast<int>(function.sig->parameter_count()), kNoSuspend); // If two imported functions have the same key, only one of them should // have been added as a compilation unit. So it is always the first time // we compile a wrapper for this key here. DCHECK_NULL((*cache)[key]); (*cache)[key] = code.get(); code->IncRef(); } } PublishCode(base::VectorOf(unpublished_code)); } void CompilationStateImpl::PublishCode( base::Vector<std::unique_ptr<WasmCode>> code) { WasmCodeRefScope code_ref_scope; std::vector<WasmCode*> published_code = native_module_->PublishCode(std::move(code)); // Defer logging code in case wire bytes were not fully received yet. if (native_module_->log_code() && native_module_->HasWireBytes()) { GetWasmEngine()->LogCode(base::VectorOf(published_code)); } OnFinishedUnits(base::VectorOf(std::move(published_code))); } void CompilationStateImpl::SchedulePublishCompilationResults( std::vector<std::unique_ptr<WasmCode>> unpublished_code, CompilationTier tier) { PublishState& state = publish_state_[tier]; { base::MutexGuard guard(&state.mutex_); if (state.publisher_running_) { // Add new code to the queue and return. state.publish_queue_.reserve(state.publish_queue_.size() + unpublished_code.size()); for (auto& c : unpublished_code) { state.publish_queue_.emplace_back(std::move(c)); } return; } state.publisher_running_ = true; } while (true) { PublishCompilationResults(std::move(unpublished_code)); unpublished_code.clear(); // Keep publishing new code that came in. base::MutexGuard guard(&state.mutex_); DCHECK(state.publisher_running_); if (state.publish_queue_.empty()) { state.publisher_running_ = false; return; } unpublished_code.swap(state.publish_queue_); } } size_t CompilationStateImpl::NumOutstandingCompilations( CompilationTier tier) const { return compilation_unit_queues_.GetSizeForTier(tier); } void CompilationStateImpl::SetError() { compile_cancelled_.store(true, std::memory_order_relaxed); if (compile_failed_.exchange(true, std::memory_order_relaxed)) { return; // Already failed before. } base::MutexGuard callbacks_guard(&callbacks_mutex_); TriggerCallbacks(); callbacks_.clear(); } void CompilationStateImpl::WaitForCompilationEvent( CompilationEvent expect_event) { switch (expect_event) { case CompilationEvent::kFinishedExportWrappers: break; case CompilationEvent::kFinishedBaselineCompilation: if (baseline_compile_job_->IsValid()) baseline_compile_job_->Join(); break; default: // Waiting on other CompilationEvent doesn't make sense. UNREACHABLE(); } if (js_to_wasm_wrapper_job_ && js_to_wasm_wrapper_job_->IsValid()) { js_to_wasm_wrapper_job_->Join(); } #ifdef DEBUG base::EnumSet<CompilationEvent> events{expect_event, CompilationEvent::kFailedCompilation}; base::MutexGuard guard(&callbacks_mutex_); DCHECK(finished_events_.contains_any(events)); #endif } void CompilationStateImpl::TierUpAllFunctions() { const WasmModule* module = native_module_->module(); uint32_t num_wasm_functions = module->num_declared_functions; WasmCodeRefScope code_ref_scope; CompilationUnitBuilder builder(native_module_); for (uint32_t i = 0; i < num_wasm_functions; ++i) { int func_index = module->num_imported_functions + i; WasmCode* code = native_module_->GetCode(func_index); if (!code || !code->is_turbofan()) { builder.AddTopTierUnit(func_index, ExecutionTier::kTurbofan); } } builder.Commit(); // Join the compilation, until no compilation units are left anymore. class DummyDelegate final : public JobDelegate { bool ShouldYield() override { return false; } bool IsJoiningThread() const override { return true; } void NotifyConcurrencyIncrease() override { UNIMPLEMENTED(); } uint8_t GetTaskId() override { return kMainTaskId; } }; DummyDelegate delegate; ExecuteCompilationUnits(native_module_weak_, async_counters_.get(), &delegate, CompilationTier::kTopTier); // We cannot wait for other compilation threads to finish, so we explicitly // compile all functions which are not yet available as TurboFan code. for (uint32_t i = 0; i < num_wasm_functions; ++i) { uint32_t func_index = module->num_imported_functions + i; WasmCode* code = native_module_->GetCode(func_index); if (!code || !code->is_turbofan()) { wasm::GetWasmEngine()->CompileFunction(async_counters_.get(), native_module_, func_index, wasm::ExecutionTier::kTurbofan); } } } namespace { using JSToWasmWrapperSet = std::unordered_set<JSToWasmWrapperKey, base::hash<JSToWasmWrapperKey>>; using JSToWasmWrapperUnitVector = std::vector<std::pair<JSToWasmWrapperKey, std::unique_ptr<JSToWasmWrapperCompilationUnit>>>; class CompileJSToWasmWrapperJob final : public BaseCompileJSToWasmWrapperJob { public: explicit CompileJSToWasmWrapperJob( JSToWasmWrapperUnitVector* compilation_units) : BaseCompileJSToWasmWrapperJob(compilation_units->size()), compilation_units_(compilation_units) {} void Run(JobDelegate* delegate) override { size_t index; while (GetNextUnitIndex(&index)) { JSToWasmWrapperCompilationUnit* unit = (*compilation_units_)[index].second.get(); unit->Execute(); CompleteUnit(); if (delegate && delegate->ShouldYield()) return; } } private: JSToWasmWrapperUnitVector* const compilation_units_; }; } // namespace void CompileJsToWasmWrappers(Isolate* isolate, const WasmModule* module) { TRACE_EVENT0("v8.wasm", "wasm.CompileJsToWasmWrappers"); isolate->heap()->EnsureWasmCanonicalRttsSize(module->MaxCanonicalTypeIndex() + 1); JSToWasmWrapperSet set; JSToWasmWrapperUnitVector compilation_units; WasmFeatures enabled_features = WasmFeatures::FromIsolate(isolate); // Prepare compilation units in the main thread. for (auto exp : module->export_table) { if (exp.kind != kExternalFunction) continue; auto& function = module->functions[exp.index]; uint32_t canonical_type_index = module->isorecursive_canonical_type_ids[function.sig_index]; int wrapper_index = GetExportWrapperIndex(canonical_type_index, function.imported); MaybeObject existing_wrapper = isolate->heap()->js_to_wasm_wrappers()->Get(wrapper_index); if (existing_wrapper.IsStrongOrWeak() && !IsUndefined(existing_wrapper.GetHeapObject())) { continue; } JSToWasmWrapperKey key(function.imported, canonical_type_index); const auto [it, inserted] = set.insert(key); if (inserted) { auto unit = std::make_unique<JSToWasmWrapperCompilationUnit>( isolate, function.sig, canonical_type_index, module, function.imported, enabled_features, JSToWasmWrapperCompilationUnit::kAllowGeneric); compilation_units.emplace_back(key, std::move(unit)); } } { // This is nested inside the event above, so the name can be less // descriptive. It's mainly to log the number of wrappers. TRACE_EVENT1("v8.wasm", "wasm.JsToWasmWrapperCompilation", "num_wrappers", compilation_units.size()); auto job = std::make_unique<CompileJSToWasmWrapperJob>(&compilation_units); if (v8_flags.wasm_num_compilation_tasks > 0) { auto job_handle = V8::GetCurrentPlatform()->CreateJob( TaskPriority::kUserVisible, std::move(job)); // Wait for completion, while contributing to the work. job_handle->Join(); } else { job->Run(nullptr); } } // Finalize compilation jobs in the main thread. // TODO(6792): Wrappers below are allocated with {Factory::NewCode}. As an // optimization we create a code memory modification scope that avoids // changing the page permissions back-and-forth between RWX and RX, because // many such wrapper are allocated in sequence below. for (auto& pair : compilation_units) { JSToWasmWrapperKey key = pair.first; JSToWasmWrapperCompilationUnit* unit = pair.second.get(); DCHECK_EQ(isolate, unit->isolate()); Handle<Code> code = unit->Finalize(); if (!code->is_builtin()) { int wrapper_index = GetExportWrapperIndex(key.second, key.first); isolate->heap()->js_to_wasm_wrappers()->Set( wrapper_index, HeapObjectReference::Strong(*code)); // Do not increase code stats for non-jitted wrappers. RecordStats(*code, isolate->counters()); isolate->counters()->wasm_compiled_export_wrapper()->Increment(1); } } } WasmCode* CompileImportWrapper( NativeModule* native_module, Counters* counters, ImportCallKind kind, const FunctionSig* sig, uint32_t canonical_type_index, int expected_arity, Suspend suspend, WasmImportWrapperCache::ModificationScope* cache_scope) { // Entry should exist, so that we don't insert a new one and invalidate // other threads' iterators/references, but it should not have been compiled // yet. WasmImportWrapperCache::CacheKey key(kind, canonical_type_index, expected_arity, suspend); DCHECK_NULL((*cache_scope)[key]); bool source_positions = is_asmjs_module(native_module->module()); // Keep the {WasmCode} alive until we explicitly call {IncRef}. WasmCodeRefScope code_ref_scope; CompilationEnv env = native_module->CreateCompilationEnv(); WasmCompilationResult result = compiler::CompileWasmImportCallWrapper( &env, kind, sig, source_positions, expected_arity, suspend); std::unique_ptr<WasmCode> wasm_code = native_module->AddCode( result.func_index, result.code_desc, result.frame_slot_count, result.tagged_parameter_slots, result.protected_instructions_data.as_vector(), result.source_positions.as_vector(), GetCodeKind(result), ExecutionTier::kNone, kNotForDebugging); WasmCode* published_code = native_module->PublishCode(std::move(wasm_code)); (*cache_scope)[key] = published_code; published_code->IncRef(); counters->wasm_generated_code_size()->Increment( published_code->instructions().length()); counters->wasm_reloc_size()->Increment(published_code->reloc_info().length()); return published_code; } } // namespace wasm } // namespace internal } // namespace v8 #undef TRACE_COMPILE #undef TRACE_STREAMING #undef TRACE_LAZY