wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10  - using `implace_it` on iterators
11
12## Framebuffers and Render passes
13
14Render passes are cached on the device and kept forever.
15
16Framebuffers are also cached on the device, but they are removed when
17any of the image views (they have) gets removed.
18If Vulkan supports image-less framebuffers,
19then the actual views are excluded from the framebuffer key.
20
21## Fences
22
23If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
24Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
25
26!*/
27
28mod adapter;
29mod command;
30mod conv;
31mod device;
32mod instance;
33
34use std::{borrow::Borrow, ffi::CStr, fmt, num::NonZeroU32, sync::Arc};
35
36use arrayvec::ArrayVec;
37use ash::{
38    extensions::{ext, khr},
39    vk,
40};
41use parking_lot::Mutex;
42
43const MILLIS_TO_NANOS: u64 = 1_000_000;
44const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
45
46#[derive(Clone)]
47pub struct Api;
48
49impl crate::Api for Api {
50    type Instance = Instance;
51    type Surface = Surface;
52    type Adapter = Adapter;
53    type Device = Device;
54
55    type Queue = Queue;
56    type CommandEncoder = CommandEncoder;
57    type CommandBuffer = CommandBuffer;
58
59    type Buffer = Buffer;
60    type Texture = Texture;
61    type SurfaceTexture = SurfaceTexture;
62    type TextureView = TextureView;
63    type Sampler = Sampler;
64    type QuerySet = QuerySet;
65    type Fence = Fence;
66
67    type BindGroupLayout = BindGroupLayout;
68    type BindGroup = BindGroup;
69    type PipelineLayout = PipelineLayout;
70    type ShaderModule = ShaderModule;
71    type RenderPipeline = RenderPipeline;
72    type ComputePipeline = ComputePipeline;
73}
74
75struct DebugUtils {
76    extension: ext::DebugUtils,
77    messenger: vk::DebugUtilsMessengerEXT,
78
79    /// Owning pointer to the debug messenger callback user data.
80    ///
81    /// `InstanceShared::drop` destroys the debug messenger before
82    /// dropping this, so the callback should never receive a dangling
83    /// user data pointer.
84    #[allow(dead_code)]
85    callback_data: Box<DebugUtilsMessengerUserData>,
86}
87
88/// User data needed by `instance::debug_utils_messenger_callback`.
89///
90/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
91/// pointer refers to one of these values.
92#[derive(Debug)]
93pub struct DebugUtilsMessengerUserData {
94    /// Validation layer description, from `vk::LayerProperties`.
95    validation_layer_description: std::ffi::CString,
96
97    /// Validation layer specification version, from `vk::LayerProperties`.
98    validation_layer_spec_version: u32,
99
100    /// If the OBS layer is present. OBS never increments the version of their layer,
101    /// so there's no reason to have the version.
102    has_obs_layer: bool,
103}
104
105pub struct InstanceShared {
106    raw: ash::Instance,
107    extensions: Vec<&'static CStr>,
108    drop_guard: Option<crate::DropGuard>,
109    flags: crate::InstanceFlags,
110    debug_utils: Option<DebugUtils>,
111    get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>,
112    entry: ash::Entry,
113    has_nv_optimus: bool,
114    android_sdk_version: u32,
115    driver_api_version: u32,
116}
117
118pub struct Instance {
119    shared: Arc<InstanceShared>,
120}
121
122struct Swapchain {
123    raw: vk::SwapchainKHR,
124    raw_flags: vk::SwapchainCreateFlagsKHR,
125    functor: khr::Swapchain,
126    device: Arc<DeviceShared>,
127    fence: vk::Fence,
128    images: Vec<vk::Image>,
129    config: crate::SurfaceConfiguration,
130    view_formats: Vec<wgt::TextureFormat>,
131}
132
133pub struct Surface {
134    raw: vk::SurfaceKHR,
135    functor: khr::Surface,
136    instance: Arc<InstanceShared>,
137    swapchain: Option<Swapchain>,
138}
139
140#[derive(Debug)]
141pub struct SurfaceTexture {
142    index: u32,
143    texture: Texture,
144}
145
146impl Borrow<Texture> for SurfaceTexture {
147    fn borrow(&self) -> &Texture {
148        &self.texture
149    }
150}
151
152pub struct Adapter {
153    raw: vk::PhysicalDevice,
154    instance: Arc<InstanceShared>,
155    //queue_families: Vec<vk::QueueFamilyProperties>,
156    known_memory_flags: vk::MemoryPropertyFlags,
157    phd_capabilities: adapter::PhysicalDeviceCapabilities,
158    //phd_features: adapter::PhysicalDeviceFeatures,
159    downlevel_flags: wgt::DownlevelFlags,
160    private_caps: PrivateCapabilities,
161    workarounds: Workarounds,
162}
163
164// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
165enum ExtensionFn<T> {
166    /// The loaded function pointer struct for an extension.
167    Extension(T),
168    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
169    Promoted,
170}
171
172struct DeviceExtensionFunctions {
173    draw_indirect_count: Option<khr::DrawIndirectCount>,
174    timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>,
175}
176
177/// Set of internal capabilities, which don't show up in the exposed
178/// device geometry, but affect the code paths taken internally.
179#[derive(Clone, Debug)]
180struct PrivateCapabilities {
181    /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
182    ///
183    /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
184    flip_y_requires_shift: bool,
185    imageless_framebuffers: bool,
186    image_view_usage: bool,
187    timeline_semaphores: bool,
188    texture_d24: bool,
189    texture_d24_s8: bool,
190    texture_s8: bool,
191    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
192    can_present: bool,
193    non_coherent_map_mask: wgt::BufferAddress,
194    robust_buffer_access: bool,
195    robust_image_access: bool,
196    zero_initialize_workgroup_memory: bool,
197}
198
199bitflags::bitflags!(
200    /// Workaround flags.
201    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
202    pub struct Workarounds: u32 {
203        /// Only generate SPIR-V for one entry point at a time.
204        const SEPARATE_ENTRY_POINTS = 0x1;
205        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
206        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
207        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
208        /// If the following code returns false, then nvidia will end up filling the wrong range.
209        ///
210        /// ```skip
211        /// fn nvidia_succeeds() -> bool {
212        ///   # let (copy_length, start_offset) = (0, 0);
213        ///     if copy_length >= 4096 {
214        ///         if start_offset % 16 != 0 {
215        ///             if copy_length == 4096 {
216        ///                 return true;
217        ///             }
218        ///             if copy_length % 16 == 0 {
219        ///                 return false;
220        ///             }
221        ///         }
222        ///     }
223        ///     true
224        /// }
225        /// ```
226        ///
227        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
228        /// if they cover a range of 4096 bytes or more.
229        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
230    }
231);
232
233#[derive(Clone, Debug, Eq, Hash, PartialEq)]
234struct AttachmentKey {
235    format: vk::Format,
236    layout: vk::ImageLayout,
237    ops: crate::AttachmentOps,
238}
239
240impl AttachmentKey {
241    /// Returns an attachment key for a compatible attachment.
242    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
243        Self {
244            format,
245            layout,
246            ops: crate::AttachmentOps::all(),
247        }
248    }
249}
250
251#[derive(Clone, Eq, Hash, PartialEq)]
252struct ColorAttachmentKey {
253    base: AttachmentKey,
254    resolve: Option<AttachmentKey>,
255}
256
257#[derive(Clone, Eq, Hash, PartialEq)]
258struct DepthStencilAttachmentKey {
259    base: AttachmentKey,
260    stencil_ops: crate::AttachmentOps,
261}
262
263#[derive(Clone, Eq, Default, Hash, PartialEq)]
264struct RenderPassKey {
265    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
266    depth_stencil: Option<DepthStencilAttachmentKey>,
267    sample_count: u32,
268    multiview: Option<NonZeroU32>,
269}
270
271#[derive(Clone, Debug, Eq, Hash, PartialEq)]
272struct FramebufferAttachment {
273    /// Can be NULL if the framebuffer is image-less
274    raw: vk::ImageView,
275    raw_image_flags: vk::ImageCreateFlags,
276    view_usage: crate::TextureUses,
277    view_format: wgt::TextureFormat,
278    raw_view_formats: Vec<vk::Format>,
279}
280
281#[derive(Clone, Eq, Hash, PartialEq)]
282struct FramebufferKey {
283    attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
284    extent: wgt::Extent3d,
285    sample_count: u32,
286}
287
288struct DeviceShared {
289    raw: ash::Device,
290    family_index: u32,
291    queue_index: u32,
292    raw_queue: ash::vk::Queue,
293    handle_is_owned: bool,
294    instance: Arc<InstanceShared>,
295    physical_device: ash::vk::PhysicalDevice,
296    enabled_extensions: Vec<&'static CStr>,
297    extension_fns: DeviceExtensionFunctions,
298    vendor_id: u32,
299    timestamp_period: f32,
300    private_caps: PrivateCapabilities,
301    workarounds: Workarounds,
302    render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
303    framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
304}
305
306pub struct Device {
307    shared: Arc<DeviceShared>,
308    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
309    desc_allocator:
310        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
311    valid_ash_memory_types: u32,
312    naga_options: naga::back::spv::Options<'static>,
313    #[cfg(feature = "renderdoc")]
314    render_doc: crate::auxil::renderdoc::RenderDoc,
315}
316
317pub struct Queue {
318    raw: vk::Queue,
319    swapchain_fn: khr::Swapchain,
320    device: Arc<DeviceShared>,
321    family_index: u32,
322    /// We use a redundant chain of semaphores to pass on the signal
323    /// from submissions to the last present, since it's required by the
324    /// specification.
325    /// It would be correct to use a single semaphore there, but
326    /// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508).
327    relay_semaphores: [vk::Semaphore; 2],
328    relay_index: Option<usize>,
329}
330
331#[derive(Debug)]
332pub struct Buffer {
333    raw: vk::Buffer,
334    block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
335}
336
337#[derive(Debug)]
338pub struct Texture {
339    raw: vk::Image,
340    drop_guard: Option<crate::DropGuard>,
341    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
342    usage: crate::TextureUses,
343    format: wgt::TextureFormat,
344    raw_flags: vk::ImageCreateFlags,
345    copy_size: crate::CopyExtent,
346    view_formats: Vec<wgt::TextureFormat>,
347}
348
349impl Texture {
350    /// # Safety
351    ///
352    /// - The image handle must not be manually destroyed
353    pub unsafe fn raw_handle(&self) -> vk::Image {
354        self.raw
355    }
356}
357
358#[derive(Debug)]
359pub struct TextureView {
360    raw: vk::ImageView,
361    layers: NonZeroU32,
362    attachment: FramebufferAttachment,
363}
364
365#[derive(Debug)]
366pub struct Sampler {
367    raw: vk::Sampler,
368}
369
370#[derive(Debug)]
371pub struct BindGroupLayout {
372    raw: vk::DescriptorSetLayout,
373    desc_count: gpu_descriptor::DescriptorTotalCount,
374    types: Box<[(vk::DescriptorType, u32)]>,
375    /// Map of binding index to size,
376    binding_arrays: Vec<(u32, NonZeroU32)>,
377}
378
379#[derive(Debug)]
380pub struct PipelineLayout {
381    raw: vk::PipelineLayout,
382    binding_arrays: naga::back::spv::BindingMap,
383}
384
385#[derive(Debug)]
386pub struct BindGroup {
387    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
388}
389
390#[derive(Default)]
391struct Temp {
392    marker: Vec<u8>,
393    buffer_barriers: Vec<vk::BufferMemoryBarrier>,
394    image_barriers: Vec<vk::ImageMemoryBarrier>,
395}
396
397unsafe impl Send for Temp {}
398unsafe impl Sync for Temp {}
399
400impl Temp {
401    fn clear(&mut self) {
402        self.marker.clear();
403        self.buffer_barriers.clear();
404        self.image_barriers.clear();
405        //see also - https://github.com/NotIntMan/inplace_it/issues/8
406    }
407
408    fn make_c_str(&mut self, name: &str) -> &CStr {
409        self.marker.clear();
410        self.marker.extend_from_slice(name.as_bytes());
411        self.marker.push(0);
412        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
413    }
414}
415
416pub struct CommandEncoder {
417    raw: vk::CommandPool,
418    device: Arc<DeviceShared>,
419    active: vk::CommandBuffer,
420    bind_point: vk::PipelineBindPoint,
421    temp: Temp,
422    free: Vec<vk::CommandBuffer>,
423    discarded: Vec<vk::CommandBuffer>,
424    /// If this is true, the active renderpass enabled a debug span,
425    /// and needs to be disabled on renderpass close.
426    rpass_debug_marker_active: bool,
427}
428
429impl fmt::Debug for CommandEncoder {
430    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
431        f.debug_struct("CommandEncoder")
432            .field("raw", &self.raw)
433            .finish()
434    }
435}
436
437#[derive(Debug)]
438pub struct CommandBuffer {
439    raw: vk::CommandBuffer,
440}
441
442#[derive(Debug)]
443#[allow(clippy::large_enum_variant)]
444pub enum ShaderModule {
445    Raw(vk::ShaderModule),
446    Intermediate {
447        naga_shader: crate::NagaShader,
448        runtime_checks: bool,
449    },
450}
451
452#[derive(Debug)]
453pub struct RenderPipeline {
454    raw: vk::Pipeline,
455}
456
457#[derive(Debug)]
458pub struct ComputePipeline {
459    raw: vk::Pipeline,
460}
461
462#[derive(Debug)]
463pub struct QuerySet {
464    raw: vk::QueryPool,
465}
466
467#[derive(Debug)]
468pub enum Fence {
469    TimelineSemaphore(vk::Semaphore),
470    FencePool {
471        last_completed: crate::FenceValue,
472        /// The pending fence values have to be ascending.
473        active: Vec<(crate::FenceValue, vk::Fence)>,
474        free: Vec<vk::Fence>,
475    },
476}
477
478impl Fence {
479    fn check_active(
480        device: &ash::Device,
481        mut max_value: crate::FenceValue,
482        active: &[(crate::FenceValue, vk::Fence)],
483    ) -> Result<crate::FenceValue, crate::DeviceError> {
484        for &(value, raw) in active.iter() {
485            unsafe {
486                if value > max_value && device.get_fence_status(raw)? {
487                    max_value = value;
488                }
489            }
490        }
491        Ok(max_value)
492    }
493
494    fn get_latest(
495        &self,
496        device: &ash::Device,
497        extension: Option<&ExtensionFn<khr::TimelineSemaphore>>,
498    ) -> Result<crate::FenceValue, crate::DeviceError> {
499        match *self {
500            Self::TimelineSemaphore(raw) => unsafe {
501                Ok(match *extension.unwrap() {
502                    ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?,
503                    ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?,
504                })
505            },
506            Self::FencePool {
507                last_completed,
508                ref active,
509                free: _,
510            } => Self::check_active(device, last_completed, active),
511        }
512    }
513
514    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
515        match *self {
516            Self::TimelineSemaphore(_) => {}
517            Self::FencePool {
518                ref mut last_completed,
519                ref mut active,
520                ref mut free,
521            } => {
522                let latest = Self::check_active(device, *last_completed, active)?;
523                let base_free = free.len();
524                for &(value, raw) in active.iter() {
525                    if value <= latest {
526                        free.push(raw);
527                    }
528                }
529                if free.len() != base_free {
530                    active.retain(|&(value, _)| value > latest);
531                    unsafe {
532                        device.reset_fences(&free[base_free..])?;
533                    }
534                }
535                *last_completed = latest;
536            }
537        }
538        Ok(())
539    }
540}
541
542impl crate::Queue<Api> for Queue {
543    unsafe fn submit(
544        &mut self,
545        command_buffers: &[&CommandBuffer],
546        signal_fence: Option<(&mut Fence, crate::FenceValue)>,
547    ) -> Result<(), crate::DeviceError> {
548        let vk_cmd_buffers = command_buffers
549            .iter()
550            .map(|cmd| cmd.raw)
551            .collect::<Vec<_>>();
552
553        let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers);
554
555        let mut fence_raw = vk::Fence::null();
556        let mut vk_timeline_info;
557        let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()];
558        let signal_values;
559
560        if let Some((fence, value)) = signal_fence {
561            fence.maintain(&self.device.raw)?;
562            match *fence {
563                Fence::TimelineSemaphore(raw) => {
564                    signal_values = [!0, value];
565                    signal_semaphores[1] = raw;
566                    vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder()
567                        .signal_semaphore_values(&signal_values);
568                    vk_info = vk_info.push_next(&mut vk_timeline_info);
569                }
570                Fence::FencePool {
571                    ref mut active,
572                    ref mut free,
573                    ..
574                } => {
575                    fence_raw = match free.pop() {
576                        Some(raw) => raw,
577                        None => unsafe {
578                            self.device
579                                .raw
580                                .create_fence(&vk::FenceCreateInfo::builder(), None)?
581                        },
582                    };
583                    active.push((value, fence_raw));
584                }
585            }
586        }
587
588        let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE];
589        let sem_index = match self.relay_index {
590            Some(old_index) => {
591                vk_info = vk_info
592                    .wait_semaphores(&self.relay_semaphores[old_index..old_index + 1])
593                    .wait_dst_stage_mask(&wait_stage_mask);
594                (old_index + 1) % self.relay_semaphores.len()
595            }
596            None => 0,
597        };
598        self.relay_index = Some(sem_index);
599        signal_semaphores[0] = self.relay_semaphores[sem_index];
600
601        let signal_count = if signal_semaphores[1] == vk::Semaphore::null() {
602            1
603        } else {
604            2
605        };
606        vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]);
607
608        profiling::scope!("vkQueueSubmit");
609        unsafe {
610            self.device
611                .raw
612                .queue_submit(self.raw, &[vk_info.build()], fence_raw)?
613        };
614        Ok(())
615    }
616
617    unsafe fn present(
618        &mut self,
619        surface: &mut Surface,
620        texture: SurfaceTexture,
621    ) -> Result<(), crate::SurfaceError> {
622        let ssc = surface.swapchain.as_ref().unwrap();
623
624        let swapchains = [ssc.raw];
625        let image_indices = [texture.index];
626        let mut vk_info = vk::PresentInfoKHR::builder()
627            .swapchains(&swapchains)
628            .image_indices(&image_indices);
629
630        if let Some(old_index) = self.relay_index.take() {
631            vk_info = vk_info.wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]);
632        }
633
634        let suboptimal = {
635            profiling::scope!("vkQueuePresentKHR");
636            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
637                match error {
638                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
639                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
640                    _ => crate::DeviceError::from(error).into(),
641                }
642            })?
643        };
644        if suboptimal {
645            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
646            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
647            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
648            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
649            #[cfg(not(target_os = "android"))]
650            log::warn!("Suboptimal present of frame {}", texture.index);
651        }
652        Ok(())
653    }
654
655    unsafe fn get_timestamp_period(&self) -> f32 {
656        self.device.timestamp_period
657    }
658}
659
660impl From<vk::Result> for crate::DeviceError {
661    fn from(result: vk::Result) -> Self {
662        match result {
663            vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
664                Self::OutOfMemory
665            }
666            vk::Result::ERROR_DEVICE_LOST => Self::Lost,
667            _ => {
668                log::warn!("Unrecognized device error {:?}", result);
669                Self::Lost
670            }
671        }
672    }
673}