Skip to content

Commit d834291

Browse files
sneakyHulkSS-JIA
authored andcommitted
Fix for Raspberry Pi 5 GPU
1 parent 495eec7 commit d834291

3 files changed

Lines changed: 42 additions & 34 deletions

File tree

backends/vulkan/runtime/vk_api/Adapter.cpp

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,6 @@ VkDevice create_logical_device(
123123
defined(ETVK_INSPECT_PIPELINES)
124124
VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME,
125125
#endif /* VK_KHR_pipeline_executable_properties && ETVK_INSPECT_PIPELINES */
126-
#ifdef VK_KHR_cooperative_matrix
127-
VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME,
128-
#endif /* VK_KHR_cooperative_matrix */
129-
#ifdef VK_NV_cooperative_matrix2
130-
VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME,
131-
#endif /* VK_NV_cooperative_matrix2 */
132126
};
133127

134128
std::vector<const char*> enabled_device_extensions;
@@ -185,20 +179,6 @@ VkDevice create_logical_device(
185179
extension_list_top = &shader_int_dot_product_features;
186180
#endif /* VK_KHR_shader_integer_dot_product */
187181

188-
#ifdef VK_KHR_cooperative_matrix
189-
VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix_features{
190-
physical_device.cooperative_matrix_features};
191-
cooperative_matrix_features.pNext = extension_list_top;
192-
extension_list_top = &cooperative_matrix_features;
193-
#endif /* VK_KHR_cooperative_matrix */
194-
195-
#ifdef VK_NV_cooperative_matrix2
196-
VkPhysicalDeviceCooperativeMatrix2FeaturesNV cooperative_matrix2_features{
197-
physical_device.cooperative_matrix2_features};
198-
cooperative_matrix2_features.pNext = extension_list_top;
199-
extension_list_top = &cooperative_matrix2_features;
200-
#endif /* VK_NV_cooperative_matrix2 */
201-
202182
device_create_info.pNext = extension_list_top;
203183

204184
VkDevice handle = nullptr;
@@ -215,7 +195,9 @@ VkDevice create_logical_device(
215195
return handle;
216196
}
217197

218-
bool test_linear_tiling_3d_image_support(VkDevice device) {
198+
bool test_linear_tiling_3d_image_support(
199+
VkDevice device,
200+
VkPhysicalDevice physical_device) {
219201
// Test creating a 3D image with linear tiling to see if it is supported.
220202
// According to the Vulkan spec, linear tiling may not be supported for 3D
221203
// images.
@@ -242,9 +224,15 @@ bool test_linear_tiling_3d_image_support(VkDevice device) {
242224

243225
if (res == VK_SUCCESS) {
244226
vkDestroyImage(device, image, nullptr);
227+
228+
VkFormatProperties props;
229+
vkGetPhysicalDeviceFormatProperties(
230+
physical_device, VK_FORMAT_R32G32B32A32_SFLOAT, &props);
231+
232+
return props.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
245233
}
246234

247-
return res == VK_SUCCESS;
235+
return false;
248236
}
249237

250238
} // namespace
@@ -275,8 +263,9 @@ Adapter::Adapter(
275263
compute_pipeline_cache_(device_.handle, cache_data_path),
276264
sampler_cache_(device_.handle),
277265
vma_(instance_, physical_device_.handle, device_.handle),
278-
linear_tiling_3d_enabled_{
279-
test_linear_tiling_3d_image_support(device_.handle)},
266+
linear_tiling_3d_enabled_{test_linear_tiling_3d_image_support(
267+
device_.handle,
268+
physical_device_.handle)},
280269
owns_device_{true} {}
281270

282271
Adapter::Adapter(
@@ -286,7 +275,7 @@ Adapter::Adapter(
286275
const uint32_t num_queues,
287276
const std::string& cache_data_path)
288277
: queue_usage_mutex_{},
289-
physical_device_(instance, physical_device),
278+
physical_device_(physical_device),
290279
queues_{},
291280
queue_usage_{},
292281
queue_mutexes_{},
@@ -298,8 +287,9 @@ Adapter::Adapter(
298287
compute_pipeline_cache_(device_.handle, cache_data_path),
299288
sampler_cache_(device_.handle),
300289
vma_(instance_, physical_device_.handle, device_.handle),
301-
linear_tiling_3d_enabled_{
302-
test_linear_tiling_3d_image_support(device_.handle)},
290+
linear_tiling_3d_enabled_{test_linear_tiling_3d_image_support(
291+
device_.handle,
292+
physical_device_.handle)},
303293
owns_device_{false} {
304294
std::vector<VkDeviceQueueCreateInfo> queue_create_infos;
305295
std::vector<std::pair<uint32_t, uint32_t>> queues_to_get;
@@ -375,10 +365,6 @@ void Adapter::submit_cmd(
375365
VK_CHECK(vkQueueSubmit(device_queue.handle, 1u, &submit_info, fence));
376366
}
377367

378-
void Adapter::override_device_name(const std::string& new_name) {
379-
physical_device_.override_device_name(new_name);
380-
}
381-
382368
std::string Adapter::stringize() const {
383369
std::stringstream ss;
384370

backends/vulkan/runtime/vk_api/memory/Allocator.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,32 @@
1111
namespace vkcompute {
1212
namespace vkapi {
1313

14+
bool test_host_cached_available(VkPhysicalDevice physical_device) {
15+
VkPhysicalDeviceMemoryProperties mem_props;
16+
vkGetPhysicalDeviceMemoryProperties(physical_device, &mem_props);
17+
18+
VkMemoryPropertyFlags const flags = mem_props.memoryTypes->propertyFlags;
19+
20+
bool const host_visible = flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
21+
bool const host_cached = flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
22+
23+
if (host_visible && host_cached) {
24+
return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
25+
}
26+
27+
return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
28+
}
29+
1430
Allocator::Allocator(
1531
VkInstance instance,
1632
VkPhysicalDevice physical_device,
1733
VkDevice device)
1834
: instance_{},
1935
physical_device_(physical_device),
2036
device_(device),
21-
allocator_{VK_NULL_HANDLE} {
37+
allocator_{VK_NULL_HANDLE},
38+
allocation_strategy_device_to_host_{
39+
test_host_cached_available(physical_device_)} {
2240
VmaVulkanFunctions vk_functions{};
2341
vk_functions.vkGetInstanceProcAddr = vkGetInstanceProcAddr;
2442
vk_functions.vkGetDeviceProcAddr = vkGetDeviceProcAddr;
@@ -44,7 +62,9 @@ Allocator::Allocator(Allocator&& other) noexcept
4462
: instance_(other.instance_),
4563
physical_device_(other.physical_device_),
4664
device_(other.device_),
47-
allocator_(other.allocator_) {
65+
allocator_(other.allocator_),
66+
allocation_strategy_device_to_host_(
67+
other.allocation_strategy_device_to_host_) {
4868
other.allocator_ = VK_NULL_HANDLE;
4969
other.device_ = VK_NULL_HANDLE;
5070
other.physical_device_ = VK_NULL_HANDLE;
@@ -158,7 +178,7 @@ VulkanBuffer Allocator::create_staging_buffer(
158178
alloc_create_info.flags |=
159179
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
160180
} else {
161-
alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
181+
alloc_create_info.flags |= allocation_strategy_device_to_host_;
162182
}
163183
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
164184
alloc_create_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;

backends/vulkan/runtime/vk_api/memory/Allocator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class Allocator final {
5757
VkPhysicalDevice physical_device_;
5858
VkDevice device_;
5959
VmaAllocator allocator_;
60+
VmaAllocationCreateFlags allocation_strategy_device_to_host_;
61+
6062

6163
public:
6264
VmaAllocationCreateInfo gpuonly_resource_create_info();

0 commit comments

Comments
 (0)