许可优化
产品
解决方案
服务支持
关于
软件库
当前位置:服务支持 >  软件文章 >  使用纯C语言通过Direct3D 11的Compute Shader做通用目的计算

使用纯C语言通过Direct3D 11的Compute Shader做通用目的计算

阅读数 5
点赞 0
article_banner
从2010年起,基于GPGPU的通用目的计算随着OpenCL以及CUDA的大热而变得异常火热。而基于GPU的通用目的计算,其实从其本质上上来说就是通过GPU内部的Compute Shader来完成的。而OpenCL以及CUDA则是将主机端与GPU端的通信接口做了更为标准化的统一。而在最近这几年中,除了OpenCL与CUDA之外,还有像微软发布的C++ AMP,还有最近被融合到OpenMP的OpenACC等工具,这些都是利用GPU的大规模数据级并行计算来做数据级密集通用目的计算的。
而现在在高性能计算领域,用得比较多的仍然是CUDA与OpenCL。但是对应用开发者来说,如果我们要将一个应用上传到Windows Store,那么我们只能使用微软官方出的API;同理,我们如果要将应用上传到App Store,那么也只能使用Apple推出的Metal API。由于Metal API在使用上来说非常简便,并且Apple在编程指南上都有详细的描述以及demo提供,所以各位要参考基于Metal API的通用目的计算,可以直接上Apple开发者官网即可。而基于Direct3D的Compute shader构建起来比较繁琐,而且完整使用的例子也较少,这里将提供一份完整的,基于纯C语言的demo。

以下代码部分都用到了一些C99标准中所引入的语法特性以及库文件,所以各位应该至少在Visual Studio 2013上,最好是Visual Studio 2015上编写以下代码。笔者用的开发环境是Visual Studio 2015 Express Edition for Desktop,这是微软免费的IDE,尽管自带的工具不多,但够用。
我们首先创建一个名为SimpleCS的Windows Console Application,然后在Application Settings中将复选框里的钩子全都去掉,然后勾选上Empty Project。然后我们添加main.c文件。根据这篇博文设置项目选项: http://blog.csdn.net/zenny_chen/article/details/52938512
然后在链接库选项中,把所有的12改成11即可。因为我们这里要用的是Direct3D 11,而不是12。12用起来非常繁琐,而且有几个C API的实现还有bug,等它稳定了之后我会在介绍Direct3D 12中使用Compute Shader的例子。然后,仍然选择x64进行构建。
以下是main.c的内容:
  1. // compute shader简单示例
  2. #include <windows.h>
  3. #include <crtdbg.h>
  4. #include <d3dcommon.h>
  5. #include <d3d11.h>
  6. #include <d3dcompiler.h>
  7. #include <stdio.h>
  8. #include <stdint.h>
  9. #include <stdbool.h>
  10. #define NUM_ELEMENTS 2048
  11. static struct BufType
  12. {
  13. int i;
  14. float f;
  15. } s_vBuf0[NUM_ELEMENTS], s_vBuf1[NUM_ELEMENTS];
  16. static bool CreateComputeDevice(ID3D11Device** ppDeviceOut, ID3D11DeviceContext** ppContextOut)
  17. {
  18. *ppDeviceOut = NULL;
  19. *ppContextOut = NULL;
  20. const uint32_t uCreationFlags = D3D11_CREATE_DEVICE_SINGLETHREADED | D3D11_CREATE_DEVICE_DEBUG;
  21. D3D_FEATURE_LEVEL flOut;
  22. const D3D_FEATURE_LEVEL flvl[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
  23. bool result = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, uCreationFlags, flvl,
  24. sizeof(flvl) / sizeof(D3D_FEATURE_LEVEL), D3D11_SDK_VERSION, ppDeviceOut, &flOut, ppContextOut) >= 0;
  25. if (result)
  26. printf("Currently use Direct3D level: %d.%d\n", flOut >> 12, (flOut >> 8) & 0xf);
  27. return result;
  28. }
  29. static bool CreateStructureBuffer(ID3D11Device* pDevice, uint32_t elementSize, uint32_t uCount,
  30. void* pInitData, ID3D11Buffer** ppBufferOut)
  31. {
  32. *ppBufferOut = NULL;
  33. D3D11_BUFFER_DESC desc;
  34. ZeroMemory(&desc, sizeof(desc));
  35. desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
  36. desc.ByteWidth = elementSize*uCount;
  37. desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
  38. desc.StructureByteStride = elementSize;
  39. if (pInitData != NULL)
  40. {
  41. D3D11_SUBRESOURCE_DATA InitData = { 0 };
  42. InitData.pSysMem = pInitData;
  43. return pDevice->lpVtbl->CreateBuffer(pDevice, &desc, &InitData, ppBufferOut) >= 0;
  44. }
  45. else
  46. return pDevice->lpVtbl->CreateBuffer(pDevice, &desc, NULL, ppBufferOut) >= 0;
  47. }
  48. static bool CreateConstantBuffer(ID3D11Device* pDevice, uint32_t nBytes, void* pInitData, ID3D11Buffer** ppBufferOut)
  49. {
  50. *ppBufferOut = NULL;
  51. D3D11_BUFFER_DESC desc;
  52. ZeroMemory(&desc, sizeof(desc));
  53. desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
  54. desc.ByteWidth = nBytes;
  55. desc.Usage = D3D11_USAGE_DYNAMIC;
  56. desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
  57. D3D11_SUBRESOURCE_DATA initData;
  58. initData.pSysMem = pInitData;
  59. initData.SysMemPitch = 0;
  60. initData.SysMemSlicePitch = 0;
  61. return pDevice->lpVtbl->CreateBuffer(pDevice, &desc, &initData, ppBufferOut) >= 0;
  62. }
  63. static bool CreateComputeShader(LPCWSTR pSrcFile, LPCSTR pFunctionName,
  64. ID3D11Device* pDevice, ID3D11ComputeShader** ppShaderOut)
  65. {
  66. uint32_t dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
  67. // Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders.
  68. // Setting this flag improves the shader debugging experience, but still allows 
  69. // the shaders to be optimized and to run exactly the way they will run in 
  70. // the release configuration of this program.
  71. dwShaderFlags |= D3DCOMPILE_DEBUG;
  72. const D3D_SHADER_MACRO defines[] =
  73. {
  74. "USE_STRUCTURED_BUFFERS", "1",
  75. NULL, NULL
  76. };
  77. // We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware
  78. ID3DBlob* pErrorBlob = NULL;
  79. ID3DBlob* computeShader = NULL;
  80. if (D3DCompileFromFile(pSrcFile, defines, NULL, pFunctionName, "cs_5_0", dwShaderFlags, 0,
  81. &computeShader, &pErrorBlob) < 0)
  82. {
  83. if (pErrorBlob != NULL)
  84. OutputDebugStringA((char*)pErrorBlob->lpVtbl->GetBufferPointer(pErrorBlob));
  85. if(pErrorBlob != NULL)
  86. pErrorBlob->lpVtbl->Release(pErrorBlob);
  87. if(computeShader != NULL)
  88. computeShader->lpVtbl->Release(computeShader);
  89. return false;
  90. }
  91. bool result = true;
  92. if (pDevice->lpVtbl->CreateComputeShader(pDevice, computeShader->lpVtbl->GetBufferPointer(computeShader),
  93. computeShader->lpVtbl->GetBufferSize(computeShader), NULL, ppShaderOut))
  94. result = false;
  95. if (pErrorBlob != NULL)
  96. pErrorBlob->lpVtbl->Release(pErrorBlob);
  97. if (computeShader != NULL)
  98. computeShader->lpVtbl->Release(computeShader);
  99. return result;
  100. }
  101. /**
  102. 利用ID3D11Device::CreateShaderResouceView()来创建GPU中Buffer的resourceView
  103. */
  104. static bool CreateBufferSRV(ID3D11Device* pDevice, ID3D11Buffer* pBuffer, ID3D11ShaderResourceView** ppSRVOut)
  105. {
  106. D3D11_BUFFER_DESC descBuf;
  107. ZeroMemory(&descBuf, sizeof(descBuf));
  108. pBuffer->lpVtbl->GetDesc(pBuffer, &descBuf);
  109. D3D11_SHADER_RESOURCE_VIEW_DESC desc;
  110. ZeroMemory(&desc, sizeof(desc));
  111. desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
  112. desc.BufferEx.FirstElement = 0;
  113. //假定这是个structure buffer
  114. desc.Format = DXGI_FORMAT_UNKNOWN;
  115. desc.BufferEx.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride;
  116. return pDevice->lpVtbl->CreateShaderResourceView(pDevice, (ID3D11Resource*)pBuffer, &desc, ppSRVOut) >= 0;
  117. }
  118. static bool CreateBufferUAV(ID3D11Device* pDevice, ID3D11Buffer* pBuffer, ID3D11UnorderedAccessView** ppUAVOut)
  119. {
  120. D3D11_BUFFER_DESC descBuf;
  121. ZeroMemory(&descBuf, sizeof(descBuf));
  122. pBuffer->lpVtbl->GetDesc(pBuffer, &descBuf);
  123. D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
  124. ZeroMemory(&desc, sizeof(desc));
  125. desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
  126. desc.Buffer.FirstElement = 0;
  127. //假设这是一个structure buffer
  128. desc.Format = DXGI_FORMAT_UNKNOWN;
  129. desc.Buffer.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride;
  130. return pDevice->lpVtbl->CreateUnorderedAccessView(pDevice, (ID3D11Resource*)pBuffer, &desc, ppUAVOut) >= 0;
  131. }
  132. static void RunComputeShader(ID3D11DeviceContext* pImmediateContext, ID3D11ComputeShader* pComputeShader,
  133. uint32_t nSRVs, uint32_t nUAVs, ID3D11ShaderResourceView* pShaderResourceViews[],
  134. ID3D11UnorderedAccessView* pUnorderedViews[], uint32_t X, uint32_t Y, uint32_t Z)
  135. {
  136. pImmediateContext->lpVtbl->CSSetShader(pImmediateContext, pComputeShader, NULL, 0);
  137. pImmediateContext->lpVtbl->CSSetShaderResources(pImmediateContext, 0, nSRVs, pShaderResourceViews);
  138. pImmediateContext->lpVtbl->CSSetUnorderedAccessViews(pImmediateContext, 0, nUAVs, pUnorderedViews, NULL);
  139. pImmediateContext->lpVtbl->Dispatch(pImmediateContext, NUM_ELEMENTS, 1, 1);
  140. //清空Shader和各个Shader Resource View、Unordered Access View以及一些Constant Buffer
  141. pImmediateContext->lpVtbl->CSSetShader(pImmediateContext, NULL, NULL, 0);
  142. ID3D11UnorderedAccessView* ppUAViewNULL[] = { NULL, NULL };
  143. pImmediateContext->lpVtbl->CSSetUnorderedAccessViews(pImmediateContext, 0, 2, ppUAViewNULL, NULL);
  144. ID3D11ShaderResourceView* ppSRVNULL[2] = { NULL,NULL };
  145. pImmediateContext->lpVtbl->CSSetShaderResources(pImmediateContext, 0, 2, ppSRVNULL);
  146. ID3D11Buffer* ppCBNULL[1] = { NULL };
  147. pImmediateContext->lpVtbl->CSSetConstantBuffers(pImmediateContext, 0, 1, ppCBNULL);
  148. }
  149. static ID3D11Buffer* CreateAndCopyToDebugBuf(ID3D11Device* pDevice, ID3D11DeviceContext* pd3dImmediateContext,
cpp
运行
  1. ID3D11Buffer* pBuffer)
  2. {
  3. ID3D11Buffer* debugBuf = NULL;
  4. D3D11_BUFFER_DESC desc;
  5. ZeroMemory(&desc, sizeof(desc));
  6. pBuffer->lpVtbl->GetDesc(pBuffer, &desc);
  7. desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
  8. desc.Usage = D3D11_USAGE_STAGING;
  9. desc.BindFlags = 0;
  10. desc.MiscFlags = 0;
  11. if (pDevice->lpVtbl->CreateBuffer(pDevice, &desc, NULL, &debugBuf) >= 0)
  12. {
  13. pd3dImmediateContext->lpVtbl->CopyResource(pd3dImmediateContext, (ID3D11Resource*)debugBuf,
  14. (ID3D11Resource*)pBuffer);
  15. }
  16. return debugBuf;
  17. }
  18. int main(void)
  19. {
  20. _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
  21. ID3D11Device *device = NULL;
  22. ID3D11DeviceContext *context = NULL;
  23. ID3D11ComputeShader *computeShader = NULL;
  24. //各个Buffer指针变量
  25. ID3D11Buffer *srcBuffer0 = NULL;
  26. ID3D11Buffer *srcBuffer1 = NULL;
  27. ID3D11Buffer *resultBuffer = NULL;
  28. ID3D11Buffer *srcDstBuffer = NULL;
  29. ID3D11Buffer *constBuffer = NULL;
  30. //读写上面buffer的ID3D11ShaderResourceView和UnorderedAccessView接口
  31. ID3D11ShaderResourceView *srcBuf0SRV = NULL;
  32. ID3D11ShaderResourceView *srcBuf1SRV = NULL;
  33. ID3D11UnorderedAccessView *resBufUAV = NULL;
  34. ID3D11UnorderedAccessView *srcdstBufUAV = NULL;
  35. int localBuffer[NUM_ELEMENTS];
  36. for (int i = 0; i < NUM_ELEMENTS; i++)
  37. localBuffer[i] = i + 1;
  38. do
  39. {
  40. if (!CreateComputeDevice(&device, &context))
  41. {
  42. puts("CreateComputeDevice failed!");
  43. break;
  44. }
  45. if (!CreateComputeShader(L"compute.hlsl", "CSMain", device, &computeShader))
  46. {
  47. puts("CreateComputeShader failed!");
  48. break;
  49. }
  50. //初始化计算数据
  51. for (int i = 0; i<NUM_ELEMENTS; i++)
  52. {
  53. s_vBuf0[i].i = i;
  54. s_vBuf0[i].f = (float)i;
  55. s_vBuf1[i].i = i;
  56. s_vBuf1[i].f = (float)i;
  57. }
  58. //为CPU中的数组创建GPU中相应Buffer
  59. if (!CreateStructureBuffer(device, sizeof(struct BufType), NUM_ELEMENTS, s_vBuf0, &srcBuffer0))
  60. {
  61. puts("create srcBuffer0 failed");
  62. break;
  63. }
  64. if (!CreateStructureBuffer(device, sizeof(struct BufType), NUM_ELEMENTS, s_vBuf1, &srcBuffer1))
  65. {
  66. puts("create srcBuffer1 failed");
  67. break;
  68. }
  69. if (!CreateStructureBuffer(device, sizeof(struct BufType), NUM_ELEMENTS, NULL, &resultBuffer))
  70. {
  71. puts("Create resultBuffer failed");
  72. break;
  73. }
  74. if (!CreateStructureBuffer(device, sizeof(localBuffer[0]), _countof(localBuffer), localBuffer, &srcDstBuffer))
  75. {
  76. puts("create srcDstBuffer failed");
  77. break;
  78. }
  79. // 在D3D11中,常量缓存至少需要4个int元素
  80. int value[4] = { 10, 20 };
  81. if (!CreateConstantBuffer(device, sizeof(value), value, &constBuffer))
  82. {
  83. puts("Create constBuffer failed!");
  84. break;
  85. }
  86. // 绑定常量缓存
  87. context->lpVtbl->CSSetConstantBuffers(context, 0, 1, &constBuffer);
  88. //为buffer创建相应的shader resource view与unordered access view
  89. if (!CreateBufferSRV(device, srcBuffer0, &srcBuf0SRV))
  90. {
  91. puts("create srcBuf0SRV failed");
  92. break;
  93. }
  94. if (!CreateBufferSRV(device, srcBuffer1, &srcBuf1SRV))
  95. {
  96. puts("create srcBuf1SRV failed");
  97. break;
  98. }
  99. if (!CreateBufferUAV(device, resultBuffer, &resBufUAV))
  100. {
  101. puts("create resBufUAV failed");
  102. break;
  103. }
  104. if (!CreateBufferUAV(device, srcDstBuffer, &srcdstBufUAV))
  105. {
  106. puts("create srcdstBufUAV failed!");
  107. break;
  108. }
  109. ID3D11ShaderResourceView* shaderResourceViews[] = { srcBuf0SRV, srcBuf1SRV };
  110. ID3D11UnorderedAccessView* unorderedAccessViews[] = { resBufUAV, srcdstBufUAV };
  111. //运行Shader Compute程序
  112. RunComputeShader(context, computeShader, _countof(shaderResourceViews), _countof(unorderedAccessViews),
  113. shaderResourceViews, unorderedAccessViews, NUM_ELEMENTS, 1, 1);
  114. //将GPU计算的结果写回CPU
  115. ID3D11Buffer* debugBuf = NULL;
  116. // 先查看resultBuffer中的内容
  117. debugBuf = CreateAndCopyToDebugBuf(device, context, resultBuffer);
  118. if (debugBuf == NULL)
  119. {
  120. puts("debugBuf create failed!");
  121. break;
  122. }
  123. D3D11_MAPPED_SUBRESOURCE mappedResource;
  124. context->lpVtbl->Map(context, (ID3D11Resource*)debugBuf, 0, D3D11_MAP_READ, 0, &mappedResource);
  125. struct BufType *p = mappedResource.pData;
  126. puts("Output GPU resultBuffer results, first ten:");
  127. for (int i = 0; i < 10; i++)
  128. printf("i: %d, f: %.1f\n", p[i].i, p[i].f);
  129. puts("last ten:");
  130. for(int i = NUM_ELEMENTS - 10; i < NUM_ELEMENTS; i++)
  131. printf("i: %d, f: %.1f\n", p[i].i, p[i].f);
  132. context->lpVtbl->Unmap(context, (ID3D11Resource*)debugBuf, 0);
  133. debugBuf->lpVtbl->Release(debugBuf);
  134. // 再查看srcdstBuffer中的内容
  135. debugBuf = CreateAndCopyToDebugBuf(device, context, srcDstBuffer);
  136. if (debugBuf == NULL)
  137. {
  138. puts("debugBuf create failed!");
  139. break;
  140. }
  141. context->lpVtbl->Map(context, (ID3D11Resource*)debugBuf, 0, D3D11_MAP_READ, 0, &mappedResource);
  142. int *q = mappedResource.pData;
  143. puts("Output GPU srcDstBuffer results, first ten:");
  144. for (int i = 0; i < 10; i++)
  145. printf("[%d] = %d\n", i, q[i]);
  146. puts("last ten:");
  147. for (int i = NUM_ELEMENTS - 10; i < NUM_ELEMENTS; i++)
  148. printf("[%d] = %d\n", i, q[i]);
  149. context->lpVtbl->Unmap(context, (ID3D11Resource*)debugBuf, 0);
  150. debugBuf->lpVtbl->Release(debugBuf);
  151. }
  152. while (false);
  153. //释放资源
  154. if (srcBuf0SRV != NULL)
  155. srcBuf0SRV->lpVtbl->Release(srcBuf0SRV);
  156. if (srcBuf1SRV != NULL)
  157. srcBuf1SRV->lpVtbl->Release(srcBuf1SRV);
  158. if (resBufUAV != NULL)
  159. resBufUAV->lpVtbl->Release(resBufUAV);
  160. if (srcdstBufUAV != NULL)
  161. srcdstBufUAV->lpVtbl->Release(srcdstBufUAV);
  162. if (srcBuffer0 != NULL)
  163. srcBuffer0->lpVtbl->Release(srcBuffer0);
  164. if (srcBuffer1 != NULL)
  165. srcBuffer1->lpVtbl->Release(srcBuffer1);
  166. if (resultBuffer != NULL)
  167. resultBuffer->lpVtbl->Release(resultBuffer);
  168. if (srcDstBuffer != NULL)
  169. srcDstBuffer->lpVtbl->Release(srcDstBuffer);
  170. if (computeShader != NULL)
  171. computeShader->lpVtbl->Release(computeShader);
  172. if (context != NULL)
  173. context->lpVtbl->Release(context);
  174. if (device != NULL)
  175. device->lpVtbl->Release(device);
  176. puts("\nInput enter to exit...");
  177. getchar();
  178. }
cpp
运行

完成之后,我们再创建一个名为compute.hlsl的shader文件,将它存放在与main.c相同的目录下。
  1. // 这是一个计算着色器程序
  2. struct BufType
  3. {
  4. int i;
  5. float f;
  6. };
  7. // 对应于主机端的constant buffer
  8. cbuffer cbNeverChanges : register(b0)
  9. {
  10. int cValue0;
  11. int cValue1;
  12. };
  13. // 对应于主机端的Shader Resource View
  14. StructuredBuffer<BufType> buffer0 : register(t0);
  15. StructuredBuffer<BufType> buffer1 : register(t1);
  16. // 对应于主机端的Unordered Access View
  17. RWStructuredBuffer<BufType> bufferOut : register(u0);
  18. RWStructuredBuffer<int> srcdstBuffer : register(u1);
  19. // Direct3D中,一个线程组(threadgroup)最多允许1024个线程
  20. [numthreads(1024, 1, 1)]
  21. void CSMain(uint3 groupID : SV_GroupID, uint3 tid : SV_DispatchThreadID,
  22. uint3 localTID : SV_GroupThreadID, uint gIdx : SV_GroupIndex)
  23. {
  24. const int index = tid.x;
  25. const int cValue = cValue1 / cValue0;
  26. int resValue = (buffer0[index].i + buffer1[index].i) * cValue - srcdstBuffer[index];
  27. bufferOut[index].i = resValue;
  28. bufferOut[index].f = (buffer0[index].f + buffer1[index].f) * float(cValue);
  29. srcdstBuffer[index] = resValue;
  30. }
cpp
运行


我们在保存这两个文件的时候,可以在在菜单栏File下面找到Advanced Save Options...,可以将Encoding改为Unicode(UTF-8 without Signature),这样我们就可以在所有操作系统以及语言环境上看到正常的中文汉字了。否则系统不支持GBK或GB2312,会导致汉字部分出现乱码。完成之后我们就可以编译运行了。


免责声明:本文系网络转载或改编,未找到原创作者,版权归原作者所有。如涉及版权,请联系删
相关文章
QR Code
微信扫一扫,欢迎咨询~

联系我们
武汉格发信息技术有限公司
湖北省武汉市经开区科技园西路6号103孵化器
电话:155-2731-8020 座机:027-59821821
邮件:tanzw@gofarlic.com
Copyright © 2023 Gofarsoft Co.,Ltd. 保留所有权利
遇到许可问题?该如何解决!?
评估许可证实际采购量? 
不清楚软件许可证使用数据? 
收到软件厂商律师函!?  
想要少购买点许可证,节省费用? 
收到软件厂商侵权通告!?  
有正版license,但许可证不够用,需要新购? 
联系方式 155-2731-8020
预留信息,一起解决您的问题
* 姓名:
* 手机:

* 公司名称:

姓名不为空

手机不正确

公司不为空