Skip to content
Snippets Groups Projects
Unverified Commit 9e3dcd0d authored by Jerry Jiarui XU's avatar Jerry Jiarui XU Committed by GitHub
Browse files

refactor ops: unify cuda ops api (#2277)

parent e03c96dc
No related branches found
No related tags found
No related merge requests found
Showing
with 396 additions and 241 deletions
...@@ -3,7 +3,7 @@ import torch.nn.functional as F ...@@ -3,7 +3,7 @@ import torch.nn.functional as F
from torch.autograd import Function from torch.autograd import Function
from torch.autograd.function import once_differentiable from torch.autograd.function import once_differentiable
from . import affine_grid_cuda from . import affine_grid_ext
class _AffineGridGenerator(Function): class _AffineGridGenerator(Function):
...@@ -15,7 +15,7 @@ class _AffineGridGenerator(Function): ...@@ -15,7 +15,7 @@ class _AffineGridGenerator(Function):
ctx.size = size ctx.size = size
ctx.align_corners = align_corners ctx.align_corners = align_corners
func = affine_grid_cuda.affine_grid_generator_forward func = affine_grid_ext.affine_grid_generator_forward
output = func(theta, size, align_corners) output = func(theta, size, align_corners)
...@@ -28,7 +28,7 @@ class _AffineGridGenerator(Function): ...@@ -28,7 +28,7 @@ class _AffineGridGenerator(Function):
size = ctx.size size = ctx.size
align_corners = ctx.align_corners align_corners = ctx.align_corners
func = affine_grid_cuda.affine_grid_generator_backward func = affine_grid_ext.affine_grid_generator_backward
grad_input = func(grad_output, theta, size, align_corners) grad_input = func(grad_output, theta, size, align_corners)
......
// Modified from https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/AffineGridGenerator.cpp
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <torch/extension.h>
namespace mmdetection {
using namespace at;
Tensor affine_grid_generator_forward(const Tensor &theta, IntArrayRef size,
bool align_corners);
Tensor affine_grid_generator_backward(const Tensor &grad, IntArrayRef size,
bool align_corners);
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("affine_grid_generator_forward", &affine_grid_generator_forward,
"affine_grid_generator_forward");
m.def("affine_grid_generator_backward", &affine_grid_generator_backward,
"affine_grid_generator_backward");
}
} // namespace mmdetection
...@@ -105,11 +105,4 @@ Tensor affine_grid_generator_backward(const Tensor& grad, IntArrayRef size, ...@@ -105,11 +105,4 @@ Tensor affine_grid_generator_backward(const Tensor& grad, IntArrayRef size,
size[3], size[4], align_corners); size[3], size[4], align_corners);
} }
} }
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("affine_grid_generator_forward", &affine_grid_generator_forward,
"affine_grid_generator_forward");
m.def("affine_grid_generator_backward", &affine_grid_generator_backward,
"affine_grid_generator_backward");
}
} // namespace mmdetection } // namespace mmdetection
...@@ -5,7 +5,7 @@ from mmcv.cnn import normal_init, xavier_init ...@@ -5,7 +5,7 @@ from mmcv.cnn import normal_init, xavier_init
from torch.autograd import Function from torch.autograd import Function
from torch.nn.modules.module import Module from torch.nn.modules.module import Module
from . import carafe_cuda, carafe_naive_cuda from . import carafe_ext, carafe_naive_ext
class CARAFENaiveFunction(Function): class CARAFENaiveFunction(Function):
...@@ -27,8 +27,8 @@ class CARAFENaiveFunction(Function): ...@@ -27,8 +27,8 @@ class CARAFENaiveFunction(Function):
n, c, h, w = features.size() n, c, h, w = features.size()
output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
if features.is_cuda: if features.is_cuda:
carafe_naive_cuda.forward(features, masks, kernel_size, group_size, carafe_naive_ext.forward(features, masks, kernel_size, group_size,
scale_factor, output) scale_factor, output)
else: else:
raise NotImplementedError raise NotImplementedError
...@@ -47,9 +47,9 @@ class CARAFENaiveFunction(Function): ...@@ -47,9 +47,9 @@ class CARAFENaiveFunction(Function):
grad_input = torch.zeros_like(features) grad_input = torch.zeros_like(features)
grad_masks = torch.zeros_like(masks) grad_masks = torch.zeros_like(masks)
carafe_naive_cuda.backward(grad_output.contiguous(), features, masks, carafe_naive_ext.backward(grad_output.contiguous(), features, masks,
kernel_size, group_size, scale_factor, kernel_size, group_size, scale_factor,
grad_input, grad_masks) grad_input, grad_masks)
return grad_input, grad_masks, None, None, None return grad_input, grad_masks, None, None, None
...@@ -95,9 +95,8 @@ class CARAFEFunction(Function): ...@@ -95,9 +95,8 @@ class CARAFEFunction(Function):
rfeatures = features.new_zeros(features.size(), requires_grad=False) rfeatures = features.new_zeros(features.size(), requires_grad=False)
rmasks = masks.new_zeros(masks.size(), requires_grad=False) rmasks = masks.new_zeros(masks.size(), requires_grad=False)
if features.is_cuda: if features.is_cuda:
carafe_cuda.forward(features, rfeatures, masks, rmasks, carafe_ext.forward(features, rfeatures, masks, rmasks, kernel_size,
kernel_size, group_size, scale_factor, routput, group_size, scale_factor, routput, output)
output)
else: else:
raise NotImplementedError raise NotImplementedError
...@@ -120,10 +119,10 @@ class CARAFEFunction(Function): ...@@ -120,10 +119,10 @@ class CARAFEFunction(Function):
rgrad_masks = torch.zeros_like(masks, requires_grad=False) rgrad_masks = torch.zeros_like(masks, requires_grad=False)
grad_input = torch.zeros_like(features, requires_grad=False) grad_input = torch.zeros_like(features, requires_grad=False)
grad_masks = torch.zeros_like(masks, requires_grad=False) grad_masks = torch.zeros_like(masks, requires_grad=False)
carafe_cuda.backward(grad_output.contiguous(), rfeatures, masks, carafe_ext.backward(grad_output.contiguous(), rfeatures, masks,
kernel_size, group_size, scale_factor, kernel_size, group_size, scale_factor,
rgrad_output, rgrad_input_hs, rgrad_input, rgrad_output, rgrad_input_hs, rgrad_input,
rgrad_masks, grad_input, grad_masks) rgrad_masks, grad_input, grad_masks)
return grad_input, grad_masks, None, None, None, None return grad_input, grad_masks, None, None, None, None
......
#include <ATen/ATen.h>
#include <torch/extension.h>
#include <cmath>
#include <vector>
#ifdef WITH_CUDA
int carafe_forward_cuda(at::Tensor features, at::Tensor rfeatures,
at::Tensor masks, at::Tensor rmasks, int kernel_size,
int group_size, int scale_factor, at::Tensor routput,
at::Tensor output);
int carafe_backward_cuda(at::Tensor top_grad, at::Tensor rfeatures,
at::Tensor masks, int kernel_size, int group_size,
int scale_factor, at::Tensor rtop_grad,
at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad,
at::Tensor rmask_grad, at::Tensor bottom_grad,
at::Tensor mask_grad);
#endif
int carafe_forward(at::Tensor features, at::Tensor rfeatures,
at::Tensor masks, at::Tensor rmasks, int kernel_size,
int group_size, int scale_factor, at::Tensor routput,
at::Tensor output) {
if (features.type().is_cuda()) {
#ifdef WITH_CUDA
return carafe_forward_cuda(features, rfeatures, masks, rmasks, kernel_size,
group_size, scale_factor, routput, output);
#else
AT_ERROR("carafe is not compiled with GPU support");
#endif
}
AT_ERROR("carafe is not implemented on CPU");
}
int carafe_backward(at::Tensor top_grad, at::Tensor rfeatures,
at::Tensor masks, int kernel_size, int group_size,
int scale_factor, at::Tensor rtop_grad,
at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad,
at::Tensor rmask_grad, at::Tensor bottom_grad,
at::Tensor mask_grad) {
if (top_grad.type().is_cuda()) {
#ifdef WITH_CUDA
return carafe_backward_cuda(top_grad, rfeatures, masks, kernel_size,
group_size, scale_factor, rtop_grad, rbottom_grad_hs, rbottom_grad,
rmask_grad, bottom_grad, mask_grad);
#else
AT_ERROR("carafe is not compiled with GPU support");
#endif
}
AT_ERROR("carafe is not implemented on CPU");
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &carafe_forward, "carafe forward");
m.def("backward", &carafe_backward, "carafe backward");
}
#include <ATen/ATen.h>
#include <torch/torch.h>
#include <cmath>
#include <vector>
#ifdef WITH_CUDA
int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks,
int kernel_size, int group_size, int scale_factor,
at::Tensor output);
int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features,
at::Tensor masks, int kernel_size,
int group_size, int scale_factor,
at::Tensor bottom_grad, at::Tensor mask_grad);
#endif
int carafe_naive_forward(at::Tensor features, at::Tensor masks,
int kernel_size, int group_size, int scale_factor,
at::Tensor output) {
if (features.type().is_cuda()) {
#ifdef WITH_CUDA
return carafe_naive_forward_cuda(features, masks, kernel_size,
group_size, scale_factor, output);
#else
AT_ERROR("carafe naive is not compiled with GPU support");
#endif
}
AT_ERROR("carafe naive is not implemented on CPU");
}
int carafe_naive_backward(at::Tensor top_grad, at::Tensor features,
at::Tensor masks, int kernel_size,
int group_size, int scale_factor,
at::Tensor bottom_grad, at::Tensor mask_grad) {
if (top_grad.type().is_cuda()) {
#ifdef WITH_CUDA
return carafe_naive_backward_cuda(top_grad, features, masks, kernel_size,
group_size, scale_factor, bottom_grad, mask_grad);
#else
AT_ERROR("carafe naive is not compiled with GPU support");
#endif
}
AT_ERROR("carafe naive is not implemented on CPU");
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &carafe_naive_forward, "carafe_naive forward");
m.def("backward", &carafe_naive_backward, "carafe_naive backward");
}
...@@ -106,8 +106,3 @@ int carafe_backward_cuda(at::Tensor top_grad, at::Tensor rfeatures, ...@@ -106,8 +106,3 @@ int carafe_backward_cuda(at::Tensor top_grad, at::Tensor rfeatures,
return 1; return 1;
} }
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &carafe_forward_cuda, "carafe forward (CUDA)");
m.def("backward", &carafe_backward_cuda, "carafe backward (CUDA)");
}
...@@ -67,9 +67,3 @@ int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features, ...@@ -67,9 +67,3 @@ int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features,
return 1; return 1;
} }
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &carafe_naive_forward_cuda, "carafe_naive forward (CUDA)");
m.def("backward", &carafe_naive_backward_cuda,
"carafe_naive backward (CUDA)");
}
...@@ -8,7 +8,7 @@ from torch.autograd.function import once_differentiable ...@@ -8,7 +8,7 @@ from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single from torch.nn.modules.utils import _pair, _single
from mmdet.utils import print_log from mmdet.utils import print_log
from . import deform_conv_cuda from . import deform_conv_ext
class DeformConvFunction(Function): class DeformConvFunction(Function):
...@@ -49,7 +49,7 @@ class DeformConvFunction(Function): ...@@ -49,7 +49,7 @@ class DeformConvFunction(Function):
cur_im2col_step = min(ctx.im2col_step, input.shape[0]) cur_im2col_step = min(ctx.im2col_step, input.shape[0])
assert (input.shape[0] % assert (input.shape[0] %
cur_im2col_step) == 0, 'im2col step must divide batchsize' cur_im2col_step) == 0, 'im2col step must divide batchsize'
deform_conv_cuda.deform_conv_forward_cuda( deform_conv_ext.deform_conv_forward(
input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1], input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1],
weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0], weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0],
ctx.padding[1], ctx.padding[0], ctx.dilation[1], ctx.padding[1], ctx.padding[0], ctx.dilation[1],
...@@ -74,7 +74,7 @@ class DeformConvFunction(Function): ...@@ -74,7 +74,7 @@ class DeformConvFunction(Function):
if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
grad_input = torch.zeros_like(input) grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset) grad_offset = torch.zeros_like(offset)
deform_conv_cuda.deform_conv_backward_input_cuda( deform_conv_ext.deform_conv_backward_input(
input, offset, grad_output, grad_input, input, offset, grad_output, grad_input,
grad_offset, weight, ctx.bufs_[0], weight.size(3), grad_offset, weight, ctx.bufs_[0], weight.size(3),
weight.size(2), ctx.stride[1], ctx.stride[0], weight.size(2), ctx.stride[1], ctx.stride[0],
...@@ -84,7 +84,7 @@ class DeformConvFunction(Function): ...@@ -84,7 +84,7 @@ class DeformConvFunction(Function):
if ctx.needs_input_grad[2]: if ctx.needs_input_grad[2]:
grad_weight = torch.zeros_like(weight) grad_weight = torch.zeros_like(weight)
deform_conv_cuda.deform_conv_backward_parameters_cuda( deform_conv_ext.deform_conv_backward_parameters(
input, offset, grad_output, input, offset, grad_output,
grad_weight, ctx.bufs_[0], ctx.bufs_[1], weight.size(3), grad_weight, ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
weight.size(2), ctx.stride[1], ctx.stride[0], weight.size(2), ctx.stride[1], ctx.stride[0],
...@@ -142,7 +142,7 @@ class ModulatedDeformConvFunction(Function): ...@@ -142,7 +142,7 @@ class ModulatedDeformConvFunction(Function):
output = input.new_empty( output = input.new_empty(
ModulatedDeformConvFunction._infer_shape(ctx, input, weight)) ModulatedDeformConvFunction._infer_shape(ctx, input, weight))
ctx._bufs = [input.new_empty(0), input.new_empty(0)] ctx._bufs = [input.new_empty(0), input.new_empty(0)]
deform_conv_cuda.modulated_deform_conv_cuda_forward( deform_conv_ext.modulated_deform_conv_forward(
input, weight, bias, ctx._bufs[0], offset, mask, output, input, weight, bias, ctx._bufs[0], offset, mask, output,
ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride, ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,
ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
...@@ -160,7 +160,7 @@ class ModulatedDeformConvFunction(Function): ...@@ -160,7 +160,7 @@ class ModulatedDeformConvFunction(Function):
grad_mask = torch.zeros_like(mask) grad_mask = torch.zeros_like(mask)
grad_weight = torch.zeros_like(weight) grad_weight = torch.zeros_like(weight)
grad_bias = torch.zeros_like(bias) grad_bias = torch.zeros_like(bias)
deform_conv_cuda.modulated_deform_conv_cuda_backward( deform_conv_ext.modulated_deform_conv_backward(
input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1], input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],
grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_input, grad_weight, grad_bias, grad_offset, grad_mask,
grad_output, weight.shape[2], weight.shape[3], ctx.stride, grad_output, weight.shape[2], weight.shape[3], ctx.stride,
......
...@@ -4,7 +4,7 @@ from torch.autograd import Function ...@@ -4,7 +4,7 @@ from torch.autograd import Function
from torch.autograd.function import once_differentiable from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair from torch.nn.modules.utils import _pair
from . import deform_pool_cuda from . import deform_pool_ext
class DeformRoIPoolingFunction(Function): class DeformRoIPoolingFunction(Function):
...@@ -44,7 +44,7 @@ class DeformRoIPoolingFunction(Function): ...@@ -44,7 +44,7 @@ class DeformRoIPoolingFunction(Function):
n = rois.shape[0] n = rois.shape[0]
output = data.new_empty(n, out_channels, out_size, out_size) output = data.new_empty(n, out_channels, out_size, out_size)
output_count = data.new_empty(n, out_channels, out_size, out_size) output_count = data.new_empty(n, out_channels, out_size, out_size)
deform_pool_cuda.deform_psroi_pooling_cuda_forward( deform_pool_ext.deform_psroi_pooling_forward(
data, rois, offset, output, output_count, ctx.no_trans, data, rois, offset, output, output_count, ctx.no_trans,
ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,
ctx.part_size, ctx.sample_per_part, ctx.trans_std) ctx.part_size, ctx.sample_per_part, ctx.trans_std)
...@@ -67,7 +67,7 @@ class DeformRoIPoolingFunction(Function): ...@@ -67,7 +67,7 @@ class DeformRoIPoolingFunction(Function):
grad_rois = None grad_rois = None
grad_offset = torch.zeros_like(offset) grad_offset = torch.zeros_like(offset)
deform_pool_cuda.deform_psroi_pooling_cuda_backward( deform_pool_ext.deform_psroi_pooling_backward(
grad_output, data, rois, offset, output_count, grad_input, grad_output, data, rois, offset, output_count, grad_input,
grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,
ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,
......
...@@ -683,19 +683,3 @@ void modulated_deform_conv_cuda_backward( ...@@ -683,19 +683,3 @@ void modulated_deform_conv_cuda_backward(
grad_output.size(2), grad_output.size(3), grad_output.size(2), grad_output.size(3),
grad_output.size(4)}); grad_output.size(4)});
} }
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("deform_conv_forward_cuda", &deform_conv_forward_cuda,
"deform forward (CUDA)");
m.def("deform_conv_backward_input_cuda", &deform_conv_backward_input_cuda,
"deform_conv_backward_input (CUDA)");
m.def("deform_conv_backward_parameters_cuda",
&deform_conv_backward_parameters_cuda,
"deform_conv_backward_parameters (CUDA)");
m.def("modulated_deform_conv_cuda_forward",
&modulated_deform_conv_cuda_forward,
"modulated deform conv forward (CUDA)");
m.def("modulated_deform_conv_cuda_backward",
&modulated_deform_conv_cuda_backward,
"modulated deform conv backward (CUDA)");
}
...@@ -80,11 +80,3 @@ void deform_psroi_pooling_cuda_backward( ...@@ -80,11 +80,3 @@ void deform_psroi_pooling_cuda_backward(
spatial_scale, output_dim, group_size, pooled_size, part_size, spatial_scale, output_dim, group_size, pooled_size, part_size,
sample_per_part, trans_std); sample_per_part, trans_std);
} }
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
"deform psroi pooling forward(CUDA)");
m.def("deform_psroi_pooling_cuda_backward",
&deform_psroi_pooling_cuda_backward,
"deform psroi pooling backward(CUDA)");
}
// modify from
// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
#include <torch/extension.h>
#include <ATen/DeviceGuard.h>
#include <cmath>
#include <vector>
#ifdef WITH_CUDA
int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
at::Tensor offset, at::Tensor output,
at::Tensor columns, at::Tensor ones, int kW,
int kH, int dW, int dH, int padW, int padH,
int dilationW, int dilationH, int group,
int deformable_group, int im2col_step);
int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
at::Tensor gradOutput, at::Tensor gradInput,
at::Tensor gradOffset, at::Tensor weight,
at::Tensor columns, int kW, int kH, int dW,
int dH, int padW, int padH, int dilationW,
int dilationH, int group,
int deformable_group, int im2col_step);
int deform_conv_backward_parameters_cuda(
at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
at::Tensor gradWeight, // at::Tensor gradBias,
at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
int padW, int padH, int dilationW, int dilationH, int group,
int deformable_group, float scale, int im2col_step);
void modulated_deform_conv_cuda_forward(
at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
int kernel_h, int kernel_w, const int stride_h, const int stride_w,
const int pad_h, const int pad_w, const int dilation_h,
const int dilation_w, const int group, const int deformable_group,
const bool with_bias);
void modulated_deform_conv_cuda_backward(
at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
at::Tensor offset, at::Tensor mask, at::Tensor columns,
at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
const bool with_bias);
#endif
int deform_conv_forward(at::Tensor input, at::Tensor weight,
at::Tensor offset, at::Tensor output,
at::Tensor columns, at::Tensor ones, int kW,
int kH, int dW, int dH, int padW, int padH,
int dilationW, int dilationH, int group,
int deformable_group, int im2col_step) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return deform_conv_forward_cuda(input, weight, offset, output, columns,
ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group,
deformable_group, im2col_step);
#else
AT_ERROR("deform conv is not compiled with GPU support");
#endif
}
AT_ERROR("deform conv is not implemented on CPU");
}
int deform_conv_backward_input(at::Tensor input, at::Tensor offset,
at::Tensor gradOutput, at::Tensor gradInput,
at::Tensor gradOffset, at::Tensor weight,
at::Tensor columns, int kW, int kH, int dW,
int dH, int padW, int padH, int dilationW,
int dilationH, int group,
int deformable_group, int im2col_step) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return deform_conv_backward_input_cuda(input, offset, gradOutput,
gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH,
dilationW, dilationH, group, deformable_group, im2col_step);
#else
AT_ERROR("deform conv is not compiled with GPU support");
#endif
}
AT_ERROR("deform conv is not implemented on CPU");
}
int deform_conv_backward_parameters(
at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
at::Tensor gradWeight, // at::Tensor gradBias,
at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
int padW, int padH, int dilationW, int dilationH, int group,
int deformable_group, float scale, int im2col_step) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return deform_conv_backward_parameters_cuda(input, offset, gradOutput,
gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW,
dilationH, group, deformable_group, scale, im2col_step);
#else
AT_ERROR("deform conv is not compiled with GPU support");
#endif
}
AT_ERROR("deform conv is not implemented on CPU");
}
void modulated_deform_conv_forward(
at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
int kernel_h, int kernel_w, const int stride_h, const int stride_w,
const int pad_h, const int pad_w, const int dilation_h,
const int dilation_w, const int group, const int deformable_group,
const bool with_bias) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return modulated_deform_conv_cuda_forward(input, weight, bias, ones,
offset, mask, output, columns, kernel_h, kernel_w, stride_h,
stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
deformable_group, with_bias);
#else
AT_ERROR("modulated deform conv is not compiled with GPU support");
#endif
}
AT_ERROR("modulated deform conv is not implemented on CPU");
}
void modulated_deform_conv_backward(
at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
at::Tensor offset, at::Tensor mask, at::Tensor columns,
at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
const bool with_bias) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return modulated_deform_conv_cuda_backward(input, weight, bias, ones,
offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset,
grad_mask, grad_output, kernel_h, kernel_w, stride_h, stride_w,
pad_h, pad_w, dilation_h, dilation_w, group, deformable_group,
with_bias);
#else
AT_ERROR("modulated deform conv is not compiled with GPU support");
#endif
}
AT_ERROR("modulated deform conv is not implemented on CPU");
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("deform_conv_forward", &deform_conv_forward,
"deform forward");
m.def("deform_conv_backward_input", &deform_conv_backward_input,
"deform_conv_backward_input");
m.def("deform_conv_backward_parameters",
&deform_conv_backward_parameters,
"deform_conv_backward_parameters");
m.def("modulated_deform_conv_forward",
&modulated_deform_conv_forward,
"modulated deform conv forward");
m.def("modulated_deform_conv_backward",
&modulated_deform_conv_backward,
"modulated deform conv backward");
}
// modify from
// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
// based on
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
#include <torch/extension.h>
#include <ATen/DeviceGuard.h>
#include <cmath>
#include <vector>
#ifdef WITH_CUDA
void deform_psroi_pooling_cuda_forward(
at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std);
void deform_psroi_pooling_cuda_backward(
at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std);
#endif
void deform_psroi_pooling_forward(
at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return deform_psroi_pooling_cuda_forward(input, bbox, trans, out, top_count,
no_trans, spatial_scale, output_dim, group_size, pooled_size,
part_size, sample_per_part, trans_std);
#else
AT_ERROR("deform psroi pooling is not compiled with GPU support");
#endif
}
AT_ERROR("deform psroi pooling is not implemented on CPU");
}
void deform_psroi_pooling_backward(
at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return deform_psroi_pooling_cuda_backward(out_grad, input, bbox, trans,
top_count, input_grad, trans_grad, no_trans, spatial_scale,
output_dim, group_size, pooled_size, part_size, sample_per_part,
trans_std);
#else
AT_ERROR("deform psroi pooling is not compiled with GPU support");
#endif
}
AT_ERROR("deform psroi pooling is not implemented on CPU");
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward,
"deform psroi pooling forward");
m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward,
"deform psroi pooling backward");
}
...@@ -3,7 +3,7 @@ import torch.nn.functional as F ...@@ -3,7 +3,7 @@ import torch.nn.functional as F
from torch.autograd import Function from torch.autograd import Function
from torch.autograd.function import once_differentiable from torch.autograd.function import once_differentiable
from . import grid_sampler_cuda from . import grid_sampler_ext
class _GridSampler(Function): class _GridSampler(Function):
...@@ -16,18 +16,9 @@ class _GridSampler(Function): ...@@ -16,18 +16,9 @@ class _GridSampler(Function):
ctx.padding_mode_enum = padding_mode_enum ctx.padding_mode_enum = padding_mode_enum
ctx.align_corners = align_corners ctx.align_corners = align_corners
if input.is_cuda: output = grid_sampler_ext.grid_sampler_forward(input, grid, mode_enum,
if input.dim() == 4: padding_mode_enum,
func = grid_sampler_cuda.grid_sampler_2d_forward_cuda align_corners)
else:
func = grid_sampler_cuda.grid_sampler_3d_forward_cuda
else:
if input.dim() == 4:
func = grid_sampler_cuda.grid_sampler_2d_forward_cpu
else:
func = grid_sampler_cuda.grid_sampler_3d_forward_cpu
output = func(input, grid, mode_enum, padding_mode_enum, align_corners)
return output return output
...@@ -39,19 +30,9 @@ class _GridSampler(Function): ...@@ -39,19 +30,9 @@ class _GridSampler(Function):
padding_mode_enum = ctx.padding_mode_enum padding_mode_enum = ctx.padding_mode_enum
align_corners = ctx.align_corners align_corners = ctx.align_corners
if input.is_cuda: grad_input, grad_grid = grid_sampler_ext.grid_sampler_backward(
if input.dim() == 4: grad_output, input, grid, mode_enum, padding_mode_enum,
func = grid_sampler_cuda.grid_sampler_2d_backward_cuda align_corners)
else:
func = grid_sampler_cuda.grid_sampler_3d_backward_cuda
else:
if input.dim() == 4:
func = grid_sampler_cuda.grid_sampler_2d_backward_cpu
else:
func = grid_sampler_cuda.grid_sampler_3d_backward_cpu
grad_input, grad_grid = func(grad_output, input, grid, mode_enum,
padding_mode_enum, align_corners)
return grad_input, grad_grid, None, None, None return grad_input, grad_grid, None, None, None
......
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Config.h>
#include <ATen/cuda/CUDAConfig.h>
#if !AT_CUDNN_ENABLED()
namespace at { namespace native {
// See Note [ATen preprocessor philosophy]
Tensor cudnn_grid_sampler_forward(
const Tensor& input_t, const Tensor& grid_t) {
AT_ERROR("cudnn_grid_sampler_forward: ATen not compiled with cuDNN support");
}
std::tuple<Tensor, Tensor> cudnn_grid_sampler_backward(
const Tensor& input_t, const Tensor& grid_t,
const Tensor& grad_output_t) {
AT_ERROR("cudnn_grid_sampler_backward: ATen not compiled with cuDNN support");
}
}}
#else // AT_CUDNN_ENABLED
#include <ATen/cudnn/Descriptors.h>
#include <ATen/cudnn/Types.h>
#include <ATen/cudnn/Utils.h>
#include <ATen/cuda/Exceptions.h>
#include <ATen/TensorUtils.h>
// TODO: descriptor checking
namespace mmdetection {
using namespace at;
namespace {
void setSamplerDescriptor(SpatialTransformerDescriptor& desc, cudnnDataType_t dataType, const at::Tensor& tensor)
{
int inputSize[4] = {0};
for (int i = 0; i < tensor.dim(); ++i) {
inputSize[i] = (int) tensor.size(i);
}
desc.set(dataType, 4, inputSize);
}
void checkGridSize(CheckedFrom c, TensorArg grid, TensorArg input)
{
// assert size of grid is n*h*w*2
// FYI: grid is between [-1, 1], where -1 left most pixel,
// 1 represents right most pixel (and hence 0 is the center pixel)
// if grid has values >1 or <-1, those values are ignored
checkContiguous(c, grid);
checkDim(c, grid, 4);
// TODO: Maybe more user friendly to report where the expected size
// came from
checkSize(c, grid, 0, input->size(0));
checkSize(c, grid, 3, 2);
}
} // namespace
Tensor cudnn_grid_sampler_forward(
const Tensor& input_t, const Tensor& grid_t)
{
TensorArg input{ contiguousIfZeroInStrides(input_t), "input", 1 },
grid{ grid_t.contiguous(), "grid", 2 };
CheckedFrom c = "cudnn_grid_sampler_forward";
checkAllSameGPU(c, {input, grid});
checkAllSameType(c, {input, grid});
checkGridSize(c, grid, input);
checkDim(c, input, 4);
auto output_t = at::empty({0}, input->options());
output_t.resize_({input->size(0), input->size(1), grid->size(1), grid->size(2)});
TensorDescriptor idesc{ *input }; // input descriptor
TensorDescriptor odesc{ output_t }; // output descriptor
SpatialTransformerDescriptor desc; // sampler descriptor
auto handle = getCudnnHandle();
auto dataType = getCudnnDataType(*input);
setSamplerDescriptor(desc, dataType, output_t);
Constant one(dataType, 1);
Constant zero(dataType, 0);
AT_CUDNN_CHECK(cudnnSpatialTfSamplerForward(
handle, desc.desc(),
&one, idesc.desc(), input->data_ptr(),
grid->data_ptr(),
&zero, odesc.desc(), output_t.data_ptr()
));
return output_t;
}
// NB: CuDNN does not support output mask; you always get both
// gradients.
std::tuple<Tensor, Tensor> cudnn_grid_sampler_backward(
const Tensor& input_t, const Tensor& grid_t,
const Tensor& grad_output_t)
{
TensorArg input{ contiguousIfZeroInStrides(input_t), "input", 1 },
grid{ grid_t.contiguous(), "grid", 2 },
grad_output{ contiguousIfZeroInStrides(grad_output_t), "grad_output", 3 };
CheckedFrom c = "cudnn_grid_sampler_backward";
checkAllSameGPU(c, {input, grad_output, grid});
checkGridSize(c, grid, input);
checkDim(c, input, 4);
checkDim(c, grad_output, 4);
auto grad_input_t = at::empty({0}, input->options());
grad_input_t.resize_(input->sizes());
auto grad_grid_t = at::empty({0}, grid->options());
grad_grid_t.resize_(grid->sizes());
TensorDescriptor idesc{ *input }; // input descriptor
TensorDescriptor odesc{ *grad_output }; // grad_output descriptor
TensorDescriptor gdesc{ grad_input_t }; // grad_input descriptor
SpatialTransformerDescriptor desc; // sampler descriptor
auto handle = getCudnnHandle();
auto dataType = getCudnnDataType(*input);
setSamplerDescriptor(desc, dataType, *grad_output);
Constant one(dataType, 1);
Constant zero(dataType, 0);
AT_CUDNN_CHECK(cudnnSpatialTfSamplerBackward(
handle, desc.desc(),
&one, idesc.desc(), input->data_ptr(),
&zero, gdesc.desc(), grad_input_t.data_ptr(),
&one, odesc.desc(), grad_output->data_ptr(),
// intruigingly, the outputs don't need descriptors
grid->data_ptr(),
&zero, grad_grid_t.data_ptr()
));
return std::tuple<Tensor, Tensor>{ grad_input_t, grad_grid_t };
}
} // namespace mmdetection
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment