%PDF- %PDF-
Direktori : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/compiler/ |
Current File : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/compiler/revectorizer.h |
// Copyright 2022 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_COMPILER_REVECTORIZER_H_ #define V8_COMPILER_REVECTORIZER_H_ // Revectorizer is an optimization to promote pairs of simd128 nodes to new // simd256 nodes accelerated by wider vector available from hardware e.g. the // YMM registers from AVX2 instruction set when possible and beneficial. The // main algorithm is based on the Superword Level Parallel (SLP) vectorization // technique. #include <vector> #include "src/base/small-vector.h" #include "src/compiler/graph.h" #include "src/compiler/linear-scheduler.h" #include "src/compiler/machine-graph.h" #include "src/compiler/machine-operator.h" #include "src/compiler/node-marker.h" #include "src/compiler/node-properties.h" #include "src/compiler/node.h" #include "src/compiler/schedule.h" #include "src/zone/zone-containers.h" namespace v8 { namespace internal { namespace compiler { struct V8_EXPORT_PRIVATE MemoryOffsetComparer { bool operator()(const Node* lhs, const Node* rhs) const; }; using StoreNodeSet = ZoneSet<Node*, MemoryOffsetComparer>; // A PackNode consists of a fixed number of isomorphic simd128 nodes which can // execute in parallel and convert to a 256-bit simd node later. The nodes in a // PackNode must satisfy that they can be scheduled in the same basic block and // are mutually independent. class PackNode final : public NON_EXPORTED_BASE(ZoneObject) { public: explicit PackNode(Zone* zone, const ZoneVector<Node*>& node_group) : nodes_(node_group.cbegin(), node_group.cend(), zone), operands_(zone), revectorized_node_(nullptr) {} const ZoneVector<Node*>& Nodes() const { return nodes_; } bool IsSame(const ZoneVector<Node*>& node_group) const { return nodes_ == node_group; } Node* RevectorizedNode() const { return revectorized_node_; } void SetRevectorizedNode(Node* node) { revectorized_node_ = node; } // returns the index operand of this PackNode. PackNode* GetOperand(size_t index) { DCHECK_LT(index, operands_.size()); return operands_[index]; } ZoneVector<PackNode*>::size_type GetOperandsSize() const { return operands_.size(); } void SetOperand(size_t index, PackNode* pnode) { if (operands_.size() < index + 1) operands_.resize(index + 1); operands_[index] = pnode; } void Print() const; private: ZoneVector<Node*> nodes_; ZoneVector<PackNode*> operands_; Node* revectorized_node_; }; // An auxillary tree structure with a set of PackNodes based on the Superword // Level Parallelism (SLP) vectorization technique. The BuildTree method will // start from a selected root, e.g. a group of consecutive stores, and extend // through value inputs to create new PackNodes if the inputs are valid, or // conclude that the current PackNode is a leaf and terminate the tree. // Below is an example of SLPTree where loads and stores in each PackNode are // all consecutive. // [Load0, Load1] [Load2, Load3] // \ / // [Add0, Add1] // | // [Store0, Store1] class SLPTree : public NON_EXPORTED_BASE(ZoneObject) { public: explicit SLPTree(Zone* zone, Graph* graph) : zone_(zone), graph_(graph), root_(nullptr), on_stack_(zone), stack_(zone), node_to_packnode_(zone) { scheduler_ = zone->New<LinearScheduler>(zone, graph); } PackNode* BuildTree(const ZoneVector<Node*>& roots); void DeleteTree(); PackNode* GetPackNode(Node* node); void Print(const char* info); template <typename FunctionType> void ForEach(FunctionType callback); Node* GetEarlySchedulePosition(Node* node) { return scheduler_->GetEarlySchedulePosition(node); } private: friend class LinearScheduler; // This is the recursive part of BuildTree. PackNode* BuildTreeRec(const ZoneVector<Node*>& node_group, unsigned depth); // Baseline: create a new PackNode, and return. PackNode* NewPackNode(const ZoneVector<Node*>& node_group); // Recursion: create a new PackNode and call BuildTreeRec recursively PackNode* NewPackNodeAndRecurs(const ZoneVector<Node*>& node_group, int start_index, int count, unsigned depth); bool CanBePacked(const ZoneVector<Node*>& node_group); Graph* graph() const { return graph_; } Zone* zone() const { return zone_; } // Node stack operations. void PopStack(); void PushStack(const ZoneVector<Node*>& node_group); void ClearStack(); bool OnStack(Node* node); bool AllOnStack(const ZoneVector<Node*>& node_group); bool StackTopIsPhi(); void TryReduceLoadChain(const ZoneVector<Node*>& loads); bool IsSideEffectFreeLoad(const ZoneVector<Node*>& node_group); bool SameBasicBlock(Node* node0, Node* node1) { return scheduler_->SameBasicBlock(node0, node1); } Zone* const zone_; Graph* const graph_; PackNode* root_; LinearScheduler* scheduler_; ZoneSet<Node*> on_stack_; ZoneStack<ZoneVector<Node*>> stack_; // Maps a specific node to PackNode. ZoneUnorderedMap<Node*, PackNode*> node_to_packnode_; static constexpr size_t RecursionMaxDepth = 1000; }; // The Revectorizer pass will firstly collect seeds with valid group of // consecutive stores as the root to build the SLPTree. If the SLPTree is built // successfully, it will estimate the cost of the 256-bit transformation for // each PackNode and conduct the final revectorization if benefitial. class V8_EXPORT_PRIVATE Revectorizer final : public NON_EXPORTED_BASE(ZoneObject) { public: Revectorizer(Zone* zone, Graph* graph, MachineGraph* mcgraph); void DetectCPUFeatures(); bool TryRevectorize(const char* name); private: void CollectSeeds(); bool ReduceStoreChains(ZoneMap<Node*, StoreNodeSet>* store_chains); bool ReduceStoreChain(const ZoneVector<Node*>& Stores); void PrintStores(ZoneMap<Node*, StoreNodeSet>* store_chains); Zone* zone() const { return zone_; } Graph* graph() const { return graph_; } MachineGraph* mcgraph() const { return mcgraph_; } PackNode* GetPackNode(Node* node) const { return slp_tree_->GetPackNode(node); } bool DecideVectorize(); void SetEffectInput(PackNode* pnode, int index, Node*& nput); void SetMemoryOpInputs(base::SmallVector<Node*, 2>& inputs, PackNode* pnode, int index); Node* VectorizeTree(PackNode* pnode); void UpdateSources(); Zone* const zone_; Graph* const graph_; MachineGraph* const mcgraph_; ZoneMap<Node*, ZoneMap<Node*, StoreNodeSet>*> group_of_stores_; std::unordered_set<Node*> sources_; SLPTree* slp_tree_; bool support_simd256_; compiler::NodeObserver* node_observer_for_test_; }; } // namespace compiler } // namespace internal } // namespace v8 #endif // V8_COMPILER_REVECTORIZER_H_