Skip to content
GitLab
菜单
项目
群组
代码片段
帮助
帮助
支持
社区论坛
快捷键
?
提交反馈
登录/注册
切换导航
菜单
打开侧边栏
wanggh
apex
提交
bdac244e
未验证
提交
bdac244e
编辑于
9月 30, 2021
作者:
X Wang
提交者:
GitHub
9月 30, 2021
浏览文件
use cuda caching allocator from pytorch (#1180)
上级
2a559c51
变更
4
Hide whitespace changes
Inline
Side-by-side
.gitignore
浏览文件 @
bdac244e
...
...
@@ -4,3 +4,5 @@ build
docs/build
*~
__pycache__
*.so
.vscode
\ No newline at end of file
apex/contrib/csrc/groupbn/batch_norm.cu
浏览文件 @
bdac244e
#include
<ATen/ATen.h>
#include
<ATen/cuda/CUDAContext.h>
#include
<THC/THCNumerics.cuh>
#include
<c10/cuda/CUDACachingAllocator.h>
#include
"THC/THC.h"
...
...
@@ -26,23 +27,20 @@ static size_t round_up_to_multiple(size_t x, int multiple) {
return
((
x
+
multiple
-
1
)
/
multiple
)
*
multiple
;
}
// TODO: Stop manually allocating CUDA memory; allocate an ATen byte
// tensor instead.
struct
Workspace
{
Workspace
(
size_t
size
)
:
size
(
size
),
data
(
NULL
)
{
data
=
THCudaMalloc
(
at
::
globalContext
().
lazyInitCUDA
(),
size
);
auto
&
allocator
=
*::
c10
::
cuda
::
CUDACachingAllocator
::
get
();
dataPtr
=
allocator
.
allocate
(
size
);
data
=
dataPtr
.
get
();
}
Workspace
(
const
Workspace
&
)
=
delete
;
Workspace
(
Workspace
&&
)
=
default
;
Workspace
&
operator
=
(
Workspace
&&
)
=
default
;
~
Workspace
()
{
if
(
data
)
{
THCudaFree
(
at
::
globalContext
().
lazyInitCUDA
(),
data
);
}
}
~
Workspace
()
=
default
;
size_t
size
;
void
*
data
;
c10
::
DataPtr
dataPtr
;
};
// Return {y}
...
...
apex/contrib/csrc/groupbn/batch_norm_add_relu.cu
浏览文件 @
bdac244e
#include
<ATen/ATen.h>
#include
<ATen/cuda/CUDAContext.h>
#include
<THC/THCNumerics.cuh>
#include
<c10/cuda/CUDACachingAllocator.h>
#include
"THC/THC.h"
...
...
@@ -27,23 +28,20 @@ static size_t round_up_to_multiple(size_t x, int multiple) {
return
((
x
+
multiple
-
1
)
/
multiple
)
*
multiple
;
}
// TODO: Stop manually allocating CUDA memory; allocate an ATen byte
// tensor instead.
struct
Workspace
{
Workspace
(
size_t
size
)
:
size
(
size
),
data
(
NULL
)
{
data
=
THCudaMalloc
(
at
::
globalContext
().
lazyInitCUDA
(),
size
);
auto
&
allocator
=
*::
c10
::
cuda
::
CUDACachingAllocator
::
get
();
dataPtr
=
allocator
.
allocate
(
size
);
data
=
dataPtr
.
get
();
}
Workspace
(
const
Workspace
&
)
=
delete
;
Workspace
(
Workspace
&&
)
=
default
;
Workspace
&
operator
=
(
Workspace
&&
)
=
default
;
~
Workspace
()
{
if
(
data
)
{
THCudaFree
(
at
::
globalContext
().
lazyInitCUDA
(),
data
);
}
}
~
Workspace
()
=
default
;
size_t
size
;
void
*
data
;
c10
::
DataPtr
dataPtr
;
};
// Return {y}
...
...
apex/contrib/csrc/xentropy/xentropy_kernel.cu
浏览文件 @
bdac244e
...
...
@@ -78,7 +78,6 @@
#include
<THC/THC.h>
#include
<THC/THCGeneral.h>
#include
<THC/THCThrustAllocator.cuh>
#include
"type_shim.h"
#include
"compat.h"
...
...
编辑
预览
Supports
Markdown
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录