CUDA C 学习笔记_2.0.4

2023-02-10 17:13 作者:上岸的小浣熊 0人读过 | 我要投稿

CUDA 纹理对象实现二维卷积

在笔记 2.0.2 中我们用普通的方法实现了二维卷积运算，本次主要通过纹理对象调用纹理内存实现二维卷积，并对笔记 1.0.4-3 中纹理内存的使用细则做更详细的说明。

1 申请数组

在 2.0.3 的方法中，我们对卷积前后的数据分别申请了 host 端和 device 端数据，采用纹理内存时，不再分配卷积前数据的 device 端内存，而是将 host 端数据传入 CUDA 数组。申请数组的方式如下：

cudaChannelFormatDesc channelDesc

= cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);

//这个函数中注意两个参数，第一个32是指数据大小，常用的int 和float都是32位的，最后一个参数有3种：cudaChannelFormatKindUnsigned→无符号整型；cudaChannelFormatKindsigned→带符号整型；cudaChannelFormatKindFloat→浮点型。

cudaArray_t cuArray;

cudaMallocArray(&cuArray, &channelDesc, width, height); //width为列数，height为行数

2 拷贝数据

数据拷贝有许多函数，同时不同的函数也可以实现相同的拷贝需求，参考下方网址：(https://developer.download.nvidia.cn/compute/DevZone/docs/html/C/doc/html/group__CUDART__MEMORY.html)，本次拷贝方式如下：

const size_t spitch = width * sizeof(int);

cudaMemcpy2DToArray

(cuArray, o1, o2, data, spitch, width * sizeof(int), height, cudaMemcpyHostToDevice);

cudaArray为目标数组，o1为width方向起始偏移量，o2为height方向起始偏移量，data为源主机端数据，最后一个为不同的拷贝类型。

3 给出纹理对象的属性参数并创建纹理对象

具体说明参考学习笔记 1.0.4_3。但需要注意以下事项，如果你在纹理内存中使用源数据，texDesc.filterMode 要使用 cudaFilterModePoint，而不能使用线性滤波；想保持源数据数值 texDesc.readMode 使用 cudaReadModeElementType，否则可以选择进行归一化。索引的坐标由texDesc.normalizedCoords 决定是否归一化，如果归一化 (值为1) ，在核函数中访问时要使用归一化之后的坐标访问，否则值为0。

另外，对于二维卷积而言，使用纹理可以省去扩边操作，纹理可以对超出索引范围的数据进行内部处理，我们只需要选择超出范围后赋值 0 即可。

4 核函数访问并将计算结果传回 host 端

(注意：在VS中tex2D函数会出现红色波浪线报错，直接忽略即可，如果其余部分正确，是可以成功编译并运行的，不要被某些网络回答误导)

在核函数中使用tex2D等函数拾取纹理时，要指明数据类型，同时避免出现赋值两边的数据类型矛盾，如：tex2D <int> ( ) 和 tex2D <float> ( ) 。函数中共包含3个参数，第一个为纹理名称，后面为x和y坐标 (三维手动加1即可)。

5 销毁纹理释放CUDA数组及其余内存

完整代码

#include <stdio.h>

#include <stdlib.h>

#include <math.h>

#include <string.h>

#include "cuda_runtime.h"

#include "device_launch_parameters.h"

const int m = 32, n = 32, p = 3, q = 3;

__constant__ int N[p * q];

__global__ void conv2(int* s, cudaTextureObject_t texObj, int bdy_x, int bdy_y)

{

int i = blockIdx.x * blockDim.x + threadIdx.x;

int j = blockDim.y * blockIdx.y + threadIdx.y;

int i2, j2, r1, r2;

int mids;

if (i < m && j < n) {

for (i2 = 0; i2 < p; i2++) {

for (j2 = 0; j2 < q; j2++) {

r1 = i2 - bdy_x;

r2 = j2 - bdy_y;

mids += N[i2 * q + j2] * tex2D<int>(texObj, i+r1, j+r2);}}

s[i * n + j] = mids; }

}

int main(int argc, char* argv[])

{

int i, j;

int* a, * d_s;

int B1[p][q], B2[p][q];

int bdy_x = floor(p / 2), bdy_y = floor(q / 2);

cudaMallocHost(&a, m * n * sizeof(int));

cudaMalloc(&d_s, m * n * sizeof(int));

//给出内核

for (i = 0; i < p; i++) {

for (j = 0; j < q; j++){

B1[i][j] = i + j; } }

//给出源数据

for (i = 0; i < m; i++) {

for (j = 0; j < n; j++) {

a[i * n + j] = i+j; } }

//内核翻转并拷贝至常量内存

for (i = 0; i < p; i++) {

for (j = 0; j < q; j++) {

B2[i][j] = B1[p - i - 1][q - j - 1]; } }

cudaMemcpyToSymbol(N, B2, p * q * sizeof(int));

//申请数组并拷贝数据

cudaChannelFormatDesc channelDesc =cudaCreateChannelDesc

(32, 0, 0, 0, cudaChannelFormatKindUnsigned);

cudaArray_t cuArray;

cudaMallocArray(&cuArray, &channelDesc, n, m);

const size_t spitch = n * sizeof(int);

cudaMemcpy2DToArray

(cuArray, 0, 0, a, spitch, n * sizeof(int), m, cudaMemcpyHostToDevice);

//定义纹理相关属性

struct cudaResourceDesc resDesc;

memset(&resDesc, 0, sizeof(resDesc));

resDesc.resType = cudaResourceTypeArray;

resDesc.res.array.array = cuArray;

struct cudaTextureDesc texDesc;

memset(&texDesc, 0, sizeof(texDesc));

texDesc.addressMode[0] = cudaAddressModeBorder;

texDesc.addressMode[1] = cudaAddressModeBorder;

texDesc.filterMode = cudaFilterModePoint;

texDesc.readMode = cudaReadModeElementType;

texDesc.normalizedCoords = 0;

//创建纹理

cudaTextureObject_t texObj = 0;

cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);

dim3 threads(8, 8);

dim3 blocks((m + threads.x - 1) / threads.x, (n + threads.y - 1) / threads.y);

conv2 << <blocks, threads >> > (d_s, texObj, bdy_x, bdy_y);

cudaDeviceSynchronize();

cudaMemcpy(a, d_s, m * n * sizeof(int), cudaMemcpyDeviceToHost);

cudaDestroyTextureObject(texObj);

cudaFreeArray(cuArray);

cudaFreeHost(a);

cudaFree(d_s);

}

标签：CUDA C/C++二维卷积纹理内存纹理对象

CUDA C 学习笔记_2.0.4

CUDA C 学习笔记_2.0.4的评论 (共条)

你可能也喜欢这些文章

最新发布的文章

CUDA C 学习笔记_2.0.4

本文作者的其他文章

CUDA C 学习笔记_2.0.4的评论 (共 条)

你可能也喜欢这些文章

最新发布的文章

CUDA C 学习笔记_2.0.4的评论 (共条)