From 77dbabb0f824df80ecc091ebb6efbdeccf0a34d2 Mon Sep 17 00:00:00 2001 From: PaddlePaddle-Gardener Date: Thu, 13 Jan 2022 14:24:39 +0800 Subject: [PATCH] mirgate_38871 --- paddle/fluid/eager/api/utils/tensor_utils.cc | 62 ++++ paddle/fluid/eager/backward.cc | 232 ++++++++++++ paddle/fluid/eager/eager_tensor.h | 8 +- paddle/fluid/eager/grad_node_info.cc | 270 ++++++++++++++ paddle/fluid/eager/grad_node_info.h | 231 ++++++++++++ .../grad_node_info_test.cc | 161 +++++++++ .../eager/tests/task_tests/backward_test.cc | 332 ++++++++++++++++++ .../cross_batch_accumulation_test.cc | 88 +++++ .../fluid/eager/tests/task_tests/hook_test.cc | 218 ++++++++++++ paddle/fluid/pybind/eager_method.cc | 44 +++ .../tests/unittests/test_egr_python_api.py | 27 ++ .../unittests/test_imperative_auto_prune.py | 15 +- 12 files changed, 1681 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/eager/api/utils/tensor_utils.cc b/paddle/fluid/eager/api/utils/tensor_utils.cc index e69de29bb2..115c9144df 100644 --- a/paddle/fluid/eager/api/utils/tensor_utils.cc +++ b/paddle/fluid/eager/api/utils/tensor_utils.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/api/utils/tensor_utils.h" +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/utils.h" + +#include "paddle/pten/api/all.h" + +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/framework/pten_utils.h" +#include "paddle/fluid/framework/variable.h" + +namespace egr { +namespace egr_utils_api { + +bool IsLeafTensor(const egr::EagerTensor& target) { + std::shared_ptr grad_node = EagerUtils::grad_node(target); + if (std::dynamic_pointer_cast(grad_node)) { + return true; + } + + return false; +} + +egr::EagerTensor CreateTensorWithValue(const pten::DDim& ddim, + const paddle::platform::Place& place, + const pten::DataType& dtype, + const pten::DataLayout& layout, + float value, bool is_leaf) { + paddle::experimental::Tensor tensor = paddle::experimental::full( + paddle::framework::vectorize(ddim), paddle::experimental::Scalar(value), + dtype, pten::TransToPtenBackend(place), layout); + + egr::EagerTensor out = egr::EagerTensor(); + out.set_tensor(std::make_shared(tensor)); + auto meta = EagerUtils::autograd_meta(&out); + if (is_leaf) { + auto accumulation_node = std::make_shared(); + meta->SetGradNode(accumulation_node); + meta->SetStopGradient(false); + } + + return out; +} + +} // namespace egr_utils_api +} // namespace egr diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index e69de29bb2..01cb1b81e3 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -0,0 +1,232 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/backward.h" +#include + +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/grad_tensor_holder.h" +#include "paddle/fluid/eager/utils.h" + +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/errors.h" + +#include "glog/logging.h" + +namespace egr { + +std::unordered_map getInDegreeMap( + const std::queue& init_queue) { + // Calculate in_degree for each node + // We can completely remove this pass, if in_degree were set during forward + // pass + std::unordered_map node_in_degree_map; + + // Copy nodes + std::queue queue = init_queue; + std::unordered_set visited; + + // Visit each node exactly once in any order + while (!queue.empty()) { + GradNodeBase* node = queue.front(); + queue.pop(); + + if (visited.count(node)) { + continue; + } + visited.insert(node); + + // Find and append next nodes + const std::vector>& edges = node->GetEdges(); + for (const auto& edge_list : edges) { + for (const Edge& edge : edge_list) { + GradNodeBase* next_node = edge.GetMutableGradNode().get(); + + // Next node could be nullptr if it is leaf tensor with no + // AccumulationNode attached + // Or it could also originated from dispensable inputs + if (!next_node) continue; + + // Update in_degree + if (!node_in_degree_map.count(next_node)) + node_in_degree_map[next_node] = 0; + node_in_degree_map[next_node]++; + queue.push(next_node); + } + } + } + + return node_in_degree_map; +} + +void RunBackwardHooks( + const std::vector>& grad_tensors, + egr::GradNodeBase* grad_node) { + grad_node->ApplyGradientHooks(grad_tensors); + VLOG(6) << "Apply Reduce Hooks for node"; + grad_node->ApplyReduceHooks(); +} + +void RunBackward(const std::vector& tensors, + const std::vector& grad_tensors, + bool retain_graph) { + VLOG(6) << "Start Backward"; + // *Gradient Hook should happen at node-level + // *Inplace version check should perform at node-level + // *Cross-batch accumulation happens at forward pass + + /* --- Initialization --- */ + // 1. Init queue with starting nodes + // 2. Prepare initial input buffers + std::queue queue; + std::unordered_map> + node_input_buffers_dict; + for (size_t i = 0; i < tensors.size(); i++) { + const egr::EagerTensor& tensor = tensors[i]; + + AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(tensor); + // Get grad input info from target tensors + auto input_info = auto_grad_meta->OutRankInfo(); + + VLOG(2) << "Out Rank of Tensor is slot: " << input_info.first + << ", rank: " << input_info.second; + // Get target GradNodeBase from target tensors + GradNodeBase* grad_node = auto_grad_meta->GetMutableGradNode().get(); + + // Prepare GradTensorHolder + if (!node_input_buffers_dict.count(grad_node)) { + VLOG(6) << "Create Value for grad input tensor " << i; + node_input_buffers_dict[grad_node] = + std::make_unique(grad_node->InputMeta()); + } + + if (grad_tensors.size() > 0) { + PADDLE_ENFORCE( + grad_tensors.size() == tensors.size(), + paddle::platform::errors::Fatal( + "Detected size mismatch between tensors and grad_tensors" + "grad_tensors should either have " + "size = 0 or same size as tensors")); + // Feed given tensor if it's provided + VLOG(6) << "Fill grad input tensor " << i << "with give grad tensor"; + node_input_buffers_dict[grad_node]->add( + input_info.first, input_info.second, grad_tensors[i]); + + } else { + VLOG(6) << "Fill grad input tensor " << i << " with 1.0"; + // Initialize tensor with 1.0 + // Forward Tensor "tensor" is passed to indicate tensortype, datatype and + // dims + // GradTensorHolder will initialize another tensor with same tensortype, + // datatype and dims but filled with 1.0 + node_input_buffers_dict[grad_node]->add( + input_info.first, input_info.second, tensor, true /*fill_one=true*/); + } + + // Prepare queue + queue.push(grad_node); + } + + VLOG(6) << "Update In degree Map for backward"; + // 3. Compute in_degree for each node + std::unordered_map node_in_degree_map = + getInDegreeMap(queue); + + /* --- Topological Visit --- */ + // 1. Pop queue + // 2. Run node + // |- node(grads) + // |- Prepare for next node + // 3. Update queue + VLOG(6) << "Run Backward"; + while (!queue.empty()) { + GradNodeBase* node = queue.front(); + queue.pop(); + + // Run node: This is where Hook happens + PADDLE_ENFORCE( + node_input_buffers_dict.count(node), + paddle::platform::errors::Fatal( + "Unable to find next node in the InputBuufer" + "Trying to run Node without configuring its GradTensorHolder")); + + std::unique_ptr node_input_buffer = + std::move(node_input_buffers_dict[node]); + VLOG(6) << "Run Backward Kernel with input_buffer"; + + RunBackwardHooks(node_input_buffer->Buffers(), node); + // TODO(jiabin): Support post hook here and make hook run in seperate + // operator + // Run Pre Backward Node and get outputs + std::vector> grad_output_tensors = + (*node)(node_input_buffer->Buffers()); + // TODO(jiabin): Should we erase it or find a more efficient way. + node_input_buffers_dict.erase(node); + + // Prepare GradTensorHolder for next node + const std::vector>& edges = node->GetEdges(); + + PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), + paddle::platform::errors::Fatal( + "Number of edges should be either empty ( for leaf node " + ") or the same as number of output grad tensors, but we " + "got edges size is: %d, grad_output size is: %d", + edges.size(), grad_output_tensors.size())); + + for (size_t i = 0; i < edges.size(); i++) { + for (size_t j = 0; j < edges[i].size(); j++) { + const Edge& edge = edges[i][j]; + auto edge_rank = edge.GetEdgeRankInfo(); + // Since we make edge has as same rank as bwd outputs, we indexing them + // with + // the same rank(i, j) + VLOG(6) << "Get Edge with slot: " << i << ", rank: " << j; + egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j]; + if (!grad_output_tensor.defined() || + !grad_output_tensor.initialized()) { + VLOG(6) << "We get grad_output_tensor with slot: " << i + << ", rank: " << j << " as uninitialized or undefined tensor"; + } + GradNodeBase* next_node = edge.GetMutableGradNode().get(); + + // Next node could be nullptr if it is leaf tensor with no + // AccumulationNode attached + // Or it could also originated from dispensable inputs + if (!next_node) continue; + + if (!node_input_buffers_dict.count(next_node)) { + node_input_buffers_dict[next_node] = + std::make_unique(next_node->InputMeta()); + } + VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first + << ", rank: " << edge_rank.second; + node_input_buffers_dict[next_node]->add( + edge_rank.first, edge_rank.second, grad_output_tensor); + + // Update queue + node_in_degree_map[next_node]--; + PADDLE_ENFORCE(node_in_degree_map[next_node] >= 0, + paddle::platform::errors::Fatal( + "Detected in-degree value smaller than zero." + "Node's in-degree cannot be negative")); + if (node_in_degree_map[next_node] == 0) { + queue.emplace(std::move(next_node)); + } + } + } + } +} + +} // namespace egr diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 72fe5732e9..80faad9080 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -195,7 +195,6 @@ class EagerTensor final { } tensor_->copy_(*(src.tensor_.get()), blocking); } - /* Part 6: Operator overloading */ EagerTensor& operator=(const EagerTensor& x) & { tensor_ = x.tensor_; @@ -238,7 +237,7 @@ class EagerTensor final { // Contruct framework::Tensor from egr::EagerTensor auto tensor_dense = std::dynamic_pointer_cast(tensor_->impl()); - if (tensor_dense) { + if (tensor_dense && tensor_dense.get()) { paddle::experimental::SharesStorage(tensor_dense.get(), framework_tensor); } else { @@ -292,11 +291,10 @@ class EagerTensor final { template void SetImplWithLegacyTensor() { const auto& framework_tensor = var_.Get(); - if (this->initialized()) { + if (defined()) { VLOG(8) << "Sync Var to initialized tensor for: " << name(); paddle::experimental::ReMakePtenDenseTensor( - framework_tensor, - static_cast(this->impl().get())); + framework_tensor, static_cast(impl().get())); } else { VLOG(8) << "Sync Var to uninitialized tensor for: " << name(); this->set_impl(std::move( diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index e69de29bb2..49bd416d46 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -0,0 +1,270 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/pten/common/data_type.h" +#include "paddle/pten/core/dense_tensor.h" + +#include "paddle/fluid/framework/var_type.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/errors.h" + +#include "glog/logging.h" + +/** + * Implementation of GradNodeBase, Edge and InputBuffer. +**/ +namespace egr { + +GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) { + bwd_in_meta_.resize(bwd_in_slot_num); + bwd_out_meta_.resize(bwd_out_slot_num); + // adj_edges has the same num as backward outputs + adj_edges_.resize(bwd_out_slot_num); +} + +void GradNodeBase::AddEdges(std::vector* metas, size_t slot_id) { + PADDLE_ENFORCE_LT( + slot_id, adj_edges_.size(), + paddle::platform::errors::InvalidArgument( + "Given slot id is out of range of adj_edges outter size, " + "adj_edges is designed to has the same size of grad " + "inputs's slot num.")); + for (const auto& meta : *metas) { + // adj_edges has as same rank as fwd inputs, and record it's output rank + // from + // its pre-ops + if (meta && !meta->StopGradient()) { + auto node = meta->GetMutableGradNode(); + if (node) { + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } else { + meta->SetGradNode(std::make_shared()); + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } + } + } +} + +void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { + PADDLE_ENFORCE_LT( + slot_id, adj_edges_.size(), + paddle::platform::errors::InvalidArgument( + "Given slot id is out of range of adj_edges outter size, " + "adj_edges is designed to has the same size of grad " + "inputs's slot num.")); + if (meta && !meta->StopGradient()) { + VLOG(6) << "Add Edges for slot: " << slot_id; + auto node = meta->GetMutableGradNode(); + if (node) { + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } else { + meta->SetGradNode(std::make_shared()); + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } + } +} + +const std::vector& GradNodeBase::InputMeta() const { + return bwd_in_meta_; +} + +const std::vector& GradNodeBase::OutputMeta() const { + return bwd_out_meta_; +} + +void GradNodeBase::SetGradInMeta(const std::vector& fwd_out, + size_t slot_rank) { + size_t slot_size = fwd_out.size(); + PADDLE_ENFORCE_LE( + slot_rank, (bwd_in_meta_.size() - 1), + paddle::platform::errors::InvalidArgument( + "Slot Rank should less equal than bwd_in_meta_ size, since " + "bwd_in_meta_ is designed to hold as same num as backward " + "inputs.")); + auto& meta = bwd_in_meta_.at(slot_rank); + PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, + paddle::platform::errors::PreconditionNotMet( + "Bwd_in_meta should only be init once, addition " + "initialization for it is forbidden. If you got this " + "error, it indicates bugs in framework.")); + // Init stop gradient vector before use to avoid push back + meta.Init(slot_size); + for (size_t i = 0; i < slot_size; i++) { + PADDLE_ENFORCE_NOT_NULL(fwd_out[i], + paddle::platform::errors::PreconditionNotMet( + "Bwd_in_meta should only be called while " + "autograd_meta is not null. If you got this " + "error, it indicates bugs in framework.")); + if (fwd_out[i]->StopGradient()) { + // Set Stop Gradient only when its true or non-initialized autograd_meta, + // since all default value is false. + meta.SetStopGradient(i, fwd_out[i]->StopGradient()); + } + } +} + +void GradNodeBase::SetGradInMeta(const AutogradMeta& fwd_out, + size_t slot_rank) { + PADDLE_ENFORCE_LE( + slot_rank, (bwd_in_meta_.size() - 1), + paddle::platform::errors::InvalidArgument( + "Slot Rank should less equal than bwd_in_meta_ size, since " + "bwd_in_meta_ is designed to hold as same num as backward " + "inputs.")); + auto& meta = bwd_in_meta_.at(slot_rank); + PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, + paddle::platform::errors::PreconditionNotMet( + "Bwd_in_meta should only be init once, Additional " + "initialization for it is forbidden. If you got this " + "error, it indicates bugs in framework.")); + // Init stop gradient vector before use to avoid push back + VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank; + meta.Init(1); + meta.SetStopGradient(0, fwd_out.StopGradient()); +} + +void GradNodeBase::SetGradOutMeta(const std::vector& fwd_in, + size_t slot_rank) { + size_t slot_size = fwd_in.size(); + PADDLE_ENFORCE_LE( + slot_rank, (bwd_out_meta_.size() - 1), + paddle::platform::errors::InvalidArgument( + "Slot Rank should less equal than bwd_out_meta_ size, " + "since bwd_out_meta_ is designed to hold as same num as " + "backward outputs.")); + auto& meta = bwd_out_meta_.at(slot_rank); + PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, + paddle::platform::errors::PreconditionNotMet( + "Bwd_out_meta should only be init once. Additional " + "initialization for it is forbidden. If you got this " + "error, it indicates bugs in framework.")); + // Init stop gradient vector before use to avoid push back + meta.Init(slot_size); + for (size_t i = 0; i < slot_size; i++) { + if (!fwd_in[i]) { + meta.SetStopGradient(i, true); + continue; + } + if (fwd_in[i]->StopGradient()) { + // Set Stop Gradient only when its true or non-initialized autograd_meta, + // since all default value is false. + meta.SetStopGradient(i, fwd_in[i]->StopGradient()); + } + } +} + +void GradNodeBase::SetGradOutMeta(const AutogradMeta& fwd_in, + size_t slot_rank) { + PADDLE_ENFORCE_LE( + (slot_rank + 1), bwd_out_meta_.size(), + paddle::platform::errors::InvalidArgument( + "Slot Rank should less equal than bwd_out_meta_ size, " + "since bwd_out_meta_ is designed to hold as same num as " + "backward outputs.")); + auto& meta = bwd_out_meta_.at(slot_rank); + PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, + paddle::platform::errors::PreconditionNotMet( + "Bwd_out_meta should only be init once. Additional " + "initialization for it is forbidden. If you got this " + "error, it indicates bugs in framework.")); + // Init stop gradient vector before use to avoid push back + meta.Init(1); + meta.SetStopGradient(0, fwd_in.StopGradient()); +} + +void GradNodeBase::SetDefaultGradInOutMeta() { + PADDLE_ENFORCE((bwd_out_meta_.size() == 1) && (bwd_in_meta_.size() == 1), + paddle::platform::errors::PreconditionNotMet( + "We can only support 1 input and 1 output in default grad " + "meta setter, other size of inputs and outputs should " + "create with Setter and Getters")); + // Default stop_gradient is false and slot id is 0, slot size is 1; + bwd_out_meta_[0].Init(1); + bwd_in_meta_[0].Init(1); +} + +const std::vector>& GradNodeBase::GetEdges() const { + return adj_edges_; +} + +void GradNodeBase::RegisterGradientHook( + size_t slot_id, size_t rank, + const std::function& hook) { + gradient_hooks_.emplace_back(std::make_tuple(slot_id, rank, hook)); +} + +void GradNodeBase::RegisterReduceHook(const std::function& hook) { + reduce_hooks_.emplace_back(hook); +} + +std::vector> GradNodeBase::ApplyGradientHooks( + const std::vector>& tensors) { + std::vector> outs(tensors.size()); + for (auto& tuple : gradient_hooks_) { + size_t slot_id = std::get<0>(tuple); + size_t rank = std::get<1>(tuple); + std::function& hook = + std::get<2>(tuple); + + PADDLE_ENFORCE(slot_id < tensors.size(), + paddle::platform::errors::Fatal( + "Slot_id from registered hook should be smaller than " + "slot size of grad_tensors")); + + PADDLE_ENFORCE(rank < tensors[slot_id].size(), + paddle::platform::errors::Fatal( + "rank of slot %d from registered hook should be smaller " + "than rank size of grad_tensors", + slot_id)); + + std::vector& slot_out = outs[slot_id]; + slot_out.resize(tensors[slot_id].size()); + egr::EagerTensor& out = slot_out[rank]; + if (!out.defined() || !out.initialized()) { + VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name(); + out = hook(tensors[slot_id][rank]); + } else { + // TODO(jiabin): Why this? + out = hook(out); + } + } + + for (size_t i = 0; i < outs.size(); i++) { + if (outs[i].empty() && (!tensors[i].empty())) { + outs[i].resize(tensors[i].size()); + } + // TODO(Jiabin): Optimize this if we only add hook slot by slot + for (size_t j = 0; j < outs[i].size(); j++) { + if (!outs[i][j].defined() || !outs[i][j].initialized()) { + outs[i][j] = tensors[i][j]; + } + } + } + + return outs; +} + +void GradNodeBase::ApplyReduceHooks() { + for (auto& hook : reduce_hooks_) { + hook(); + } +} +} // namespace egr diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index e69de29bb2..f15c50ef75 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -0,0 +1,231 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/pten/api/all.h" +#include "paddle/pten/include/core.h" + +namespace egr { +/** + * GradNodeBase is base class of all grad node, which is what should be used by + * eager execution, we define most of backward autograd members here, and for + * each Operator, they should hold their onw forward Inputs as TensorWrapper. + * + * The GradNodeBase will be held in autograd_meta, and it is also a member of + * Edge, which indicates the edge of backward graph. + * + * TODO:(yangzhanlue) GradNodeBase will also in charge of get the correct input + * from GradOpDescMaker to GradNodeBase. + * + * NOTE:GradNodeBase has a method named run, this method should be overrided by + * the + * specific derived class, it will prepare backward inputs and double backward's + * depends. Then, it will call C++ API of backward kernel functions to finish + * backward computation. + * + * NOTE:GradNodeBase holds its own inputs and Outputs + * + * Edge is defined to descripe depend of backward, an Edge is what linked + * between two + * node, it should contain a Node and rank of this Node (this is used to + * indicate which + * input of grad this edge belong). + * */ +class Edge; +class AutogradMeta; + +/** + * GradSlotMeta is used to Record Forward Tensor info to backward, since paddle + * has lots of operators + * whose backward logic is depends on if it has some specific inputs or outputs. + * So, we need a meta info + * to record it's needs. + * **/ +class GradSlotMeta { + public: + GradSlotMeta() = default; + void Init(size_t size) { + size_ = static_cast(size); + stop_gradient_.resize(size, false); + } + + bool IsInitialized() const { return size_ != -1; } + bool IsStopGradient(size_t rank) const { return stop_gradient_[rank]; } + int Size() const { return size_; } + void SetStopGradient(size_t rank, bool stop_gradient = true) { + stop_gradient_.at(rank) = stop_gradient; + } + + private: + int size_{-1}; + std::vector stop_gradient_{false}; +}; + +class GradNodeBase { + public: + GradNodeBase() = default; + GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num); + // TODO(jiabin): Should we have other constructor here? + virtual ~GradNodeBase() = default; + + /** + * operator() designed to contian the real backward execution logic, it should + * be + * overrided by derived class defined for each operator. It accepts a vector + * of + * Tensor which contains grads input of current operator + * + * Note: why we need backward inputs and outputs construct as vector of vector + * of egr::EagerTensor? + * Since all of paddle op composite in form of {"Slot name ", vector}, + * so, vector of vector + * is better choice to fit this format. + * **/ + virtual std::vector> operator()( + const std::vector>& grads) = 0; + + /** + * AddEdges is designed to set input tensors' backward Node as current + * node's Edges. + * This method should be call in forward code and for double backward depends + * computation. + * + * This one is called slot by slot + * **/ + void AddEdges(std::vector* metas, size_t slot_id); + void AddEdges(AutogradMeta* meta, size_t slot_id); + + /** + * GetEdges is designed to get all edges of current node**/ + const std::vector>& GetEdges() const; + + /** + * Get Input Meta of current Grad node**/ + const std::vector& InputMeta() const; + /** + * Get Output Meta of current Grad node**/ + const std::vector& OutputMeta() const; + /** + * Set bwd ins and outs info with forward vars + * **/ + + void SetGradInMeta(const std::vector& fwd_out, + size_t slot_rank); + void SetGradInMeta(const AutogradMeta& fwd_out, size_t slot_rank); + + void SetGradOutMeta(const std::vector& fwd_in, + size_t slot_rank); + void SetGradOutMeta(const AutogradMeta& fwd_in, size_t slot_rank); + + /** + * Default setters for Grad in/out meta this should be used for same special + * Node which will not create by user + * **/ + void SetDefaultGradInOutMeta(); + /** + * Register GradientHook or ReduceHook + * **/ + void RegisterGradientHook( + size_t slot_id, size_t rank, + const std::function& hook); + void RegisterReduceHook(const std::function& hook); + + /** + * Apply GradientHook or ReduceHook + * **/ + inline bool GradientHooksRegistered() { return gradient_hooks_.size() != 0; } + inline bool ReduceHooksRegistered() { return reduce_hooks_.size() != 0; } + + std::vector> ApplyGradientHooks( + const std::vector>& tensors); + void ApplyReduceHooks(); + + private: + // TODO(jiabin): Use SmallVector instead after merge PR from develop + + // Edges recorded the backward related node info, which indicate all edges + // linked + // by this Grad Node. + // Why we need vector>: Edges is as same rank as bwd output. + std::vector> adj_edges_; + + // bwd_out_meta_ is used to record Grad output info for backward + std::vector bwd_out_meta_; + + // bwd_in_meta_ used to record Grad input info for backward + std::vector bwd_in_meta_; + // Gradient Hooks + // Customer may register a list of hooks which will be called in order during + // backward + // Each entry consists one pair of + std::vector>> + gradient_hooks_; + std::vector> reduce_hooks_; +}; + +class Edge { + public: + // Default constructor for Edges in order to construct it for AutogradMeta + Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {} + + // In real use cases we should create Edge from grad node and input rank which + // indicate which edge it is. + // Since we have slot design in operators we will have to locate an edge with + // slot + // and rank. + Edge(const std::shared_ptr& grad_node, size_t in_slot_id, + size_t in_rank) + : in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {} + + Edge(const std::shared_ptr& grad_node, + const std::pair& rank_info) + : in_slot_id_(rank_info.first), + in_rank_(rank_info.second), + grad_node_(grad_node) {} + + GradNodeBase* GetGradNode() const { return grad_node_.get(); } + + std::shared_ptr GetMutableGradNode() const { + return grad_node_; + } + + std::pair GetEdgeRankInfo() const { + return std::make_pair(in_slot_id_, in_rank_); + } + + void SetEdgeRankInfo(size_t slot_id, size_t in_rank) { + in_slot_id_ = slot_id; + in_rank_ = in_rank; + } + + void SetEdgeRankInfo( + const std::pair& edge_rank) { + in_slot_id_ = edge_rank.first; + in_rank_ = edge_rank.second; + } + + // Currently we use grad_node_ to identify if a edge is initialized. + bool IsInitialized() const { return grad_node_.get(); } + + private: + size_t in_slot_id_; + size_t in_rank_; + std::shared_ptr grad_node_; +}; + +} // namespace egr diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index e69de29bb2..a89fb019d5 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -0,0 +1,161 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h" +#include "paddle/pten/api/lib/utils/allocator.h" + +TEST(GradNodeInfo, GradSlotMeta) { + auto grad_slot = egr::GradSlotMeta(); + CHECK(grad_slot.IsInitialized() == false); + VLOG(6) << "Init GradSlotMeta"; + grad_slot.Init(2); + CHECK(grad_slot.IsInitialized() == true); + VLOG(6) << "Set SetStopGradient"; + grad_slot.SetStopGradient(0); + CHECK(grad_slot.IsStopGradient(0) == true); + CHECK_EQ(grad_slot.Size(), 2); +} + +TEST(GradNodeInfo, GradNodeBase) { + VLOG(6) << "Construct Grad Node"; + auto grad_test_node0 = std::make_shared( + /* val */ 5.0, /* in_num */ 2, /* out_num */ 2); + auto grad_test_node1 = std::make_shared(); + std::vector> grads; + pten::DenseTensorMeta meta = pten::DenseTensorMeta( + pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); + std::shared_ptr dt = std::make_shared( + std::make_shared( + paddle::platform::CPUPlace()), + meta); + auto* dt_ptr = dt->mutable_data(); + dt_ptr[0] = 5.0f; + egr::EagerTensor et1(dt); + grads = {{et1}}; + VLOG(6) << "Test Grad Node Call"; + auto res = (*grad_test_node0)(grads); + CHECK_EQ(std::dynamic_pointer_cast(res[0][0].impl()) + ->data()[0], + 6.0f); + VLOG(6) << "Test Add Edges"; + egr::Edge edge0(grad_test_node1, 1, 2); + auto auto_grad0 = std::make_shared(edge0); + auto_grad0->SetStopGradient(false); + egr::Edge edge1(grad_test_node1, 3, 4); + auto auto_grad1 = std::make_shared(edge1); + auto_grad1->SetStopGradient(false); + grad_test_node0->AddEdges(auto_grad0.get(), 0); + CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first, + size_t(1)); + CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second, + size_t(2)); + std::vector metas = {auto_grad1.get()}; + grad_test_node0->AddEdges(&metas, 1); + CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first, + size_t(3)); + CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second, + size_t(4)); + + VLOG(6) << "Test Set Meta and Get Meta"; + auto_grad1->SetStopGradient(true); + grad_test_node0->SetGradInMeta(metas, 0); + grad_test_node0->SetGradInMeta(*auto_grad1.get(), 1); + grad_test_node0->SetGradOutMeta(metas, 0); + grad_test_node0->SetGradOutMeta(*auto_grad1.get(), 1); + CHECK_EQ(grad_test_node0->InputMeta()[0].Size(), 1); + CHECK_EQ(grad_test_node0->InputMeta()[1].Size(), 1); + CHECK(grad_test_node0->OutputMeta()[0].IsStopGradient(0)); + CHECK(grad_test_node0->OutputMeta()[1].IsStopGradient(0)); + + VLOG(6) << "Test Default Set Meta and Get Meta"; + auto grad_test_node2 = std::make_shared( + /* val */ 5.0, /* in_num */ 1, /* out_num */ 1); + grad_test_node2->SetDefaultGradInOutMeta(); + CHECK(grad_test_node2->OutputMeta()[0].IsInitialized()); + CHECK(grad_test_node2->OutputMeta()[0].IsStopGradient(0) == false); + CHECK_EQ(grad_test_node2->OutputMeta()[0].Size(), 1); + + VLOG(6) << "Test Gradient Hook"; + auto gradient_hook = [](const egr::EagerTensor& et) -> egr::EagerTensor { + egr::EagerTensor res; + pten::DenseTensorMeta meta = pten::DenseTensorMeta( + pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); + std::shared_ptr dt = std::make_shared( + std::make_shared( + paddle::platform::CPUPlace()), + meta); + auto* dt_ptr = dt->mutable_data(); + dt_ptr[0] = 6.0f; + auto* et_ptr = + std::dynamic_pointer_cast(et.impl())->data(); + dt_ptr[0] += et_ptr[0]; + res.set_impl(dt); + VLOG(6) << "Running Gradient Hook"; + return res; + }; + grad_test_node0->RegisterGradientHook(0, 0, gradient_hook); + // 5 + 6 + auto grad_hook_res = grad_test_node0->ApplyGradientHooks(grads); + CHECK_EQ( + std::dynamic_pointer_cast(grad_hook_res[0][0].impl()) + ->data()[0], + 11.0); + + VLOG(6) << "Test Reduce Hook"; + auto reduce_hook = [&](void) -> void { + auto* et_ptr = std::dynamic_pointer_cast(et1.impl()) + ->mutable_data(); + et_ptr[0] = 100.0; + VLOG(6) << "Running Reduce Hook"; + }; + grad_test_node0->RegisterReduceHook(reduce_hook); + grad_test_node0->ApplyReduceHooks(); + CHECK_EQ(std::dynamic_pointer_cast(et1.impl()) + ->data()[0], + 100.0); +} + +TEST(GradNodeInfo, Edge) { + auto grad_test_node0 = std::make_shared(5, 2, 2); + VLOG(6) << "Test Construct Edge"; + egr::Edge edge0 = egr::Edge(); + CHECK(edge0.IsInitialized() == false); + egr::Edge edge1 = egr::Edge(grad_test_node0, size_t(0), size_t(0)); + CHECK(edge1.IsInitialized() == true); + egr::Edge edge2 = + egr::Edge(grad_test_node0, std::make_pair(size_t(1), size_t(0))); + VLOG(6) << "Test Set Edge's Grad Node"; + auto* grad_node = edge1.GetGradNode(); + CHECK_EQ(grad_node->InputMeta().size(), size_t(2)); + auto mt_grad_node = edge1.GetMutableGradNode(); + auto auto_grad1 = std::make_shared(); + std::vector metas = {auto_grad1.get()}; + // Uninitialized AutogradMeta indicates + mt_grad_node->SetGradInMeta(metas, 0); + CHECK(grad_node->InputMeta()[0].IsStopGradient(0) == true); + VLOG(6) << "Test Get/Set Edge Rank Info"; + CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(1)); + CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(0)); + edge2.SetEdgeRankInfo(2, 3); + CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(2)); + CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(3)); + edge2.SetEdgeRankInfo(std::make_pair(size_t(4), size_t(5))); + CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(4)); + CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(5)); +} diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index e69de29bb2..3737fd95ad 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -0,0 +1,332 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/api/utils/tensor_utils.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/backward.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/tests/test_utils.h" + +#include "paddle/fluid/eager/api/all.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_meta.h" + +namespace egr { + +TEST(Backward, SingleNodeEmptyGrad) { + // Prepare Device Contexts + eager_test::InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor target_tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + + egr::EagerTensor leaf_tensor; + { + // Create Scale Node + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta + node0_ptr->SetDefaultGradInOutMeta(); + AutogradMeta* auto_grad_meta = EagerUtils::autograd_meta(&target_tensor); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + egr_utils_api::RetainGradForTensor(leaf_tensor); + + // Connect Node0 -> AccumulationNode via Edge + auto meta = egr::AutogradMeta(); + meta.SetStopGradient(false); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + std::vector res = {&meta}; + node0_ptr->AddEdges(&res, 0); + } + std::vector outs = {target_tensor}; + // Run Backward + RunBackward(outs, {}); + + // Check Output Value + eager_test::CompareGradTensorWithValue(leaf_tensor, 5.0); +} + +TEST(Backward, SingleNodeCustomGrad) { + // Prepare Device Contexts + eager_test::InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + + std::vector grad_tensors; + // Create Grad Tensor + egr::EagerTensor grad_tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + grad_tensors.emplace_back(std::move(grad_tensor)); + + egr::EagerTensor leaf_tensor; + { + // Create Scale Node + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta + node0_ptr->SetDefaultGradInOutMeta(); + + // Connect Tensor and Node via AutoGradMeta + AutogradMeta* auto_grad_meta = + EagerUtils::autograd_meta(&(target_tensors[0])); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + egr_utils_api::RetainGradForTensor(leaf_tensor); + + // Connect Node0 -> AccumulationNode via Edge + auto meta = egr::AutogradMeta(); + meta.SetStopGradient(false); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + std::vector res = {&meta}; + node0_ptr->AddEdges(&res, 0); + } + + // Run Backward + RunBackward(target_tensors, grad_tensors); + + // Check Output Value + eager_test::CompareGradTensorWithValue(leaf_tensor, 50.0); +} + +/* +Node1 + | +Node0 + | + inp0 +*/ +TEST(Backward, LinearNodes) { + // Prepare Device Contexts + eager_test::InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + + egr::EagerTensor leaf_tensor; + { + // Create Node0 + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta for node0 + node0_ptr->SetDefaultGradInOutMeta(); + + // Create Node1 + auto node1_ptr = std::make_shared(1, 1); + node1_ptr->SetAttributes_scale(10.0 /*scale*/); + + // Set grad in/out meta for node1 + node1_ptr->SetDefaultGradInOutMeta(); + + // Connect Input Tensor and Node0 via AutoGradMeta + AutogradMeta* auto_grad_meta = + EagerUtils::autograd_meta(&(target_tensors[0])); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + + // Connect Node0 -> Node1 via Edge + auto meta0 = egr::AutogradMeta(); + meta0.SetStopGradient(false); + meta0.SetSingleOutRankWithSlot(0, 0); + meta0.SetGradNode(node1_ptr); + std::vector res0 = {&meta0}; + node0_ptr->AddEdges(&res0, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + egr_utils_api::RetainGradForTensor(leaf_tensor); + + // Connect Node1 -> AccumulationNode via Edge + auto meta1 = egr::AutogradMeta(); + meta1.SetStopGradient(false); + meta1.SetSingleOutRankWithSlot(0, 0); + meta1.SetGradNode(acc_node_ptr); + std::vector res1 = {&meta1}; + node1_ptr->AddEdges(&res1, 0); + } + + // Use Empty Grad Tensor + RunBackward(target_tensors, {}); + + // Check Output Value + eager_test::CompareGradTensorWithValue(leaf_tensor, 50.0); +} + +/* + Node2 + | | +Node0 Node1 + | | + inp0 inp1 +*/ +TEST(Backward, WithAccumulation) { + // Prepare Device Contexts + eager_test::InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + std::vector target_tensors; + egr::EagerTensor tensor0 = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + egr::EagerTensor tensor1 = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor0)); + target_tensors.emplace_back(std::move(tensor1)); + + // Create Grad Tensor + std::vector grad_tensors; + egr::EagerTensor grad_tensor0 = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + egr::EagerTensor grad_tensor1 = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + grad_tensors.emplace_back(std::move(grad_tensor0)); + grad_tensors.emplace_back(std::move(grad_tensor1)); + + egr::EagerTensor leaf_tensor; + { + // Create Node0 + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + node0_ptr->SetDefaultGradInOutMeta(); + + // Create Node1 + auto node1_ptr = std::make_shared(1, 1); + node1_ptr->SetAttributes_scale(10.0 /*scale*/); + node1_ptr->SetDefaultGradInOutMeta(); + // Create Node2 + auto node2_ptr = std::make_shared(1, 1); + node2_ptr->SetAttributes_scale(20.0 /*scale*/); + node2_ptr->SetDefaultGradInOutMeta(); + // Connect Inp0 and Node0 via AutoGradMeta + AutogradMeta* auto_grad_meta0 = + EagerUtils::autograd_meta(&(target_tensors[0])); + auto_grad_meta0->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta0->SetSingleOutRankWithSlot(0, 0); + + // Connect Inp1 and Node1 via AutoGradMeta + AutogradMeta* auto_grad_meta1 = + EagerUtils::autograd_meta(&(target_tensors[1])); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(node1_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + // Connect Node0 -> Node2 via Edge + auto meta0 = egr::AutogradMeta(); + meta0.SetStopGradient(false); + meta0.SetSingleOutRankWithSlot(0, 0); + meta0.SetGradNode(node2_ptr); + std::vector res0 = {&meta0}; + node0_ptr->AddEdges(&res0, 0); + + // Connect Node1 -> Node2 via Edge + auto meta1 = egr::AutogradMeta(); + meta1.SetStopGradient(false); + meta1.SetSingleOutRankWithSlot(0, 0); + meta1.SetGradNode(node2_ptr); + std::vector res1 = {&meta1}; + node1_ptr->AddEdges(&res1, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta2->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta2->SetSingleOutRankWithSlot(0, 0); + + egr_utils_api::RetainGradForTensor(leaf_tensor); + + // Connect Node2 -> AccumulationNode via Edge + auto meta2 = egr::AutogradMeta(); + meta2.SetStopGradient(false); + meta2.SetSingleOutRankWithSlot(0, 0); + meta2.SetGradNode(acc_node_ptr); + std::vector res2 = {&meta2}; + node2_ptr->AddEdges(&res2, 0); + } + + RunBackward(target_tensors, grad_tensors); + + eager_test::CompareGradTensorWithValue(leaf_tensor, 2500.0); +} + +} // namespace egr diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index e69de29bb2..7f180fa107 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/api/utils/tensor_utils.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/backward.h" +#include "paddle/fluid/eager/grad_node_info.h" + +#include "paddle/fluid/eager/api/all.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_meta.h" + +#include "paddle/fluid/eager/tests/test_utils.h" + +namespace egr { + +TEST(CrossBatchAccumulation, SingleScaleNode) { + eager_test::InitEnv(paddle::platform::CPUPlace()); + + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + egr::EagerTensor& target_tensor = target_tensors[0]; + + egr::EagerTensor leaf_tensor = egr::EagerTensor(); + { + auto scale_node_ptr = std::make_shared(1, 1); + scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); + + scale_node_ptr->SetDefaultGradInOutMeta(); + + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta = EagerUtils::autograd_meta(&target_tensor); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(scale_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0 + + auto meta = AutogradMeta(); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetStopGradient(false); + meta.SetGradNode(acc_node_ptr); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + egr_utils_api::RetainGradForTensor(leaf_tensor); + } + + RunBackward(target_tensors, {}); + + eager_test::CompareGradTensorWithValue(target_tensor, 1.0); + eager_test::CompareGradTensorWithValue(leaf_tensor, 5.0); + + RunBackward(target_tensors, {}); + + eager_test::CompareGradTensorWithValue(target_tensor, 1.0); + eager_test::CompareGradTensorWithValue(leaf_tensor, 10.0); +} + +} // namespace egr diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index e69de29bb2..0f8039dade 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -0,0 +1,218 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/backward.h" +#include "paddle/fluid/eager/grad_node_info.h" + +#include "paddle/fluid/eager/api/all.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_meta.h" + +#include "paddle/fluid/eager/tests/test_utils.h" + +namespace egr { + +egr::EagerTensor hook_function(const egr::EagerTensor& t) { + auto t_dense = std::dynamic_pointer_cast(t.impl()); + + auto ret_meta = pten::DenseTensorMeta(t_dense->dtype(), t_dense->dims(), + t_dense->layout()); + auto place = t_dense->place(); + size_t bytes_size = + paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype()); + auto ret_dense = std::make_shared( + pten::make_intrusive( + paddle::memory::Alloc(place, bytes_size)), + std::move(ret_meta)); + + float* t_ptr = t_dense->mutable_data(); + float* ret_ptr = ret_dense->mutable_data(); + for (int i = 0; i < ret_dense->numel(); i++) { + ret_ptr[i] = t_ptr[i] + 3.0; + } + + auto ret_impl = std::dynamic_pointer_cast(ret_dense); + egr::EagerTensor ret = egr::EagerTensor(); + ret.set_impl(ret_impl); + + return ret; +} + +TEST(RetainGrad, HookBeforeRetainGrad) { + eager_test::InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + egr::EagerTensor& target_tensor = target_tensors[0]; + + // Create ScaleNode + auto scale_node_ptr = std::make_shared(1, 1); + scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta for node0 + scale_node_ptr->SetDefaultGradInOutMeta(); + + // Create AccumulationNode + auto acc_node_ptr = std::make_shared(); + + // Connect Input Tensor and ScaleNode via AutoGradMeta + // Apply RetainGrad + { + // ScaleNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(scale_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + target_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook); + egr_utils_api::RetainGradForTensor( + target_tensor); // result: 1.0 + 3.0 = 4.0 + } + + // Connect ScaleNode -> AccumulationNode via Edge + { + auto meta = AutogradMeta(); + meta.SetStopGradient(false); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); + } + + // Retain Grad for leaf tensor1 + egr::EagerTensor leaf_tensor = egr::EagerTensor(); + { + // AccumulationNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + leaf_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + egr_utils_api::RegisterGradientHookForTensor(leaf_tensor, hook); + egr_utils_api::RetainGradForTensor( + leaf_tensor); // result: 4.0*5.0 + 3.0 = 23.0 + } + + RunBackward(target_tensors, {}); + + eager_test::CompareGradTensorWithValue(target_tensor, 4.0); + eager_test::CompareGradTensorWithValue(leaf_tensor, 23.0); +} + +TEST(RetainGrad, HookAfterRetainGrad) { + eager_test::InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + egr::EagerTensor& target_tensor = target_tensors[0]; + + // Create ScaleNode + auto scale_node_ptr = std::make_shared(1, 1); + scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); + // Set grad in/out meta for node0 + scale_node_ptr->SetDefaultGradInOutMeta(); + // Create AccumulationNode + auto acc_node_ptr = std::make_shared(); + + // Connect Input Tensor and ScaleNode via AutoGradMeta + // Apply RetainGrad + { + // ScaleNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(scale_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + target_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0 + egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook); + } + + // Connect ScaleNode -> AccumulationNode via Edge + { + auto meta = AutogradMeta(); + meta.SetStopGradient(false); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); + } + + // Retain Grad for leaf tensor1 + egr::EagerTensor leaf_tensor = egr::EagerTensor(); + { + // AccumulationNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + leaf_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + egr_utils_api::RetainGradForTensor( + leaf_tensor); // RetainGrad for leaf tensor gets + // postponed, result: 4.0*5.0 + 3.0 = + // 23.0 + egr_utils_api::RegisterGradientHookForTensor(leaf_tensor, hook); + } + + RunBackward(target_tensors, {}); + eager_test::CompareGradTensorWithValue(target_tensor, 1.0); + eager_test::CompareGradTensorWithValue(leaf_tensor, 23.0); +} +} // namespace egr diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index c56fe5be4d..a0067f9c64 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -234,6 +234,44 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* eager_tensor__share_buffer_to(EagerTensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_SYNC_TRY + egr::EagerTensor* src_ptr = + &(reinterpret_cast(PyTuple_GET_ITEM(args, 0)) + ->eager_tensor); + PADDLE_ENFORCE_EQ(self->eager_tensor.initialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized! please initialize " + "src tensor before share_buffer_with to other.", + self->eager_tensor.name())); + src_ptr->set_impl(self->eager_tensor.impl()); + Py_INCREF(Py_None); + return Py_None; + EAGER_CATCH_AND_THROW_RETURN_NULL +} + +static PyObject* eager_tensor__is_shared_buffer_with(EagerTensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_SYNC_TRY + egr::EagerTensor src_tensor = + CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0); + PADDLE_ENFORCE_EQ(src_tensor.initialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized! please initialize " + "src tensor before share_buffer_with to other.", + src_tensor.name())); + bool res = false; + if (!self->eager_tensor.defined() || !src_tensor.defined()) { + return ToPyObject(res); + } + res = (self->eager_tensor.impl().get() == src_tensor.impl().get()); + return ToPyObject(res); + EAGER_CATCH_AND_THROW_RETURN_NULL +} + static PyObject* eager_tensor_method_detach(EagerTensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY @@ -278,6 +316,12 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_is_shared_buffer_to", + (PyCFunction)(void (*)(void))eager_tensor__share_buffer_to, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"_share_buffer_with", + (PyCFunction)(void (*)(void))eager_tensor__is_shared_buffer_with, + METH_VARARGS | METH_KEYWORDS, NULL}, {"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach, METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index e4576fe2ea..3ab7981cdb 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -645,6 +645,33 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_cpu_place()) + def test_share_buffer_to(): + arr = np.ones([4, 16, 16, 32]).astype('float32') + arr1 = np.zeros([4, 16]).astype('float32') + arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones( + [4, 16, 16, 32]).astype('float32') + tensor = None + tensor2 = None + tensor = paddle.to_tensor(arr, core.VarDesc.VarType.FP32, + core.CPUPlace()) + tensor3 = core.eager.EagerTensor() + if core.is_compiled_with_cuda(): + tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, + core.CUDAPlace(0)) + else: + tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, + core.CPUPlace()) + self.assertTrue(np.array_equal(tensor.numpy(), arr1)) + self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) + tensor2._share_buffer_to(tensor) + self.assertTrue(np.array_equal(tensor.numpy(), arr2)) + self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) + self.assertTrue(tensor._is_shared_buffer_with(tensor2)) + self.assertTrue(tensor2._is_shared_buffer_with(tensor)) + tensor._share_buffer_to(tensor3) + self.assertTrue(np.array_equal(tensor3.numpy(), arr2)) + self.assertTrue(tensor3._is_shared_buffer_with(tensor)) + def test_properties(self): print("Test_properties") with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index b82a058ae4..d2e1a4fbb1 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -15,6 +15,7 @@ import unittest import paddle.fluid as fluid import numpy as np +from paddle.fluid.framework import _test_eager_guard class AutoPruneLayer0(fluid.Layer): @@ -145,7 +146,7 @@ class MyLayer2(fluid.Layer): class TestImperativeAutoPrune(unittest.TestCase): - def test_auto_prune(self): + def func_auto_prune(self): with fluid.dygraph.guard(): case1 = AutoPruneLayer0(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") @@ -157,7 +158,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case1.linear2.weight._grad_ivar() is not None) self.assertTrue(case1.linear1.weight._grad_ivar() is not None) - def test_auto_prune2(self): + def test_auto_prune(self): + with _test_eager_guard(): + self.func_auto_prune() + self.func_auto_prune() + + def func_auto_prune2(self): with fluid.dygraph.guard(): case2 = AutoPruneLayer1(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") @@ -170,6 +176,11 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case2.linear2.weight._grad_ivar() is None) self.assertTrue(case2.linear1.weight._grad_ivar() is not None) + def test_auto_prune2(self): + with _test_eager_guard(): + self.func_auto_prune2() + self.func_auto_prune2() + def test_auto_prune3(self): with fluid.dygraph.guard(): case3 = AutoPruneLayer3(input_size=784) -- Gitee