wgpu_core/device/
mod.rs

1use crate::{
2    binding_model,
3    hal_api::HalApi,
4    hub::Hub,
5    id,
6    identity::{GlobalIdentityHandlerFactory, Input},
7    resource::{Buffer, BufferAccessResult},
8    resource::{BufferAccessError, BufferMapOperation},
9    Label, DOWNLEVEL_ERROR_MESSAGE,
10};
11
12use arrayvec::ArrayVec;
13use hal::Device as _;
14use smallvec::SmallVec;
15use thiserror::Error;
16use wgt::{BufferAddress, TextureFormat};
17
18use std::{iter, num::NonZeroU32, ptr};
19
20pub mod global;
21mod life;
22pub mod queue;
23pub mod resource;
24#[cfg(any(feature = "trace", feature = "replay"))]
25pub mod trace;
26pub use {life::WaitIdleError, resource::Device};
27
28pub const SHADER_STAGE_COUNT: usize = 3;
29// Should be large enough for the largest possible texture row. This
30// value is enough for a 16k texture with float4 format.
31pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;
32
33const CLEANUP_WAIT_MS: u32 = 5000;
34
35const IMPLICIT_FAILURE: &str = "failed implicit";
36const EP_FAILURE: &str = "EP is invalid";
37
38pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
39
40#[repr(C)]
41#[derive(Clone, Copy, Debug, Eq, PartialEq)]
42#[cfg_attr(feature = "trace", derive(serde::Serialize))]
43#[cfg_attr(feature = "replay", derive(serde::Deserialize))]
44pub enum HostMap {
45    Read,
46    Write,
47}
48
49#[derive(Clone, Debug, Hash, PartialEq)]
50#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))]
51pub(crate) struct AttachmentData<T> {
52    pub colors: ArrayVec<Option<T>, { hal::MAX_COLOR_ATTACHMENTS }>,
53    pub resolves: ArrayVec<T, { hal::MAX_COLOR_ATTACHMENTS }>,
54    pub depth_stencil: Option<T>,
55}
56impl<T: PartialEq> Eq for AttachmentData<T> {}
57impl<T> AttachmentData<T> {
58    pub(crate) fn map<U, F: Fn(&T) -> U>(&self, fun: F) -> AttachmentData<U> {
59        AttachmentData {
60            colors: self.colors.iter().map(|c| c.as_ref().map(&fun)).collect(),
61            resolves: self.resolves.iter().map(&fun).collect(),
62            depth_stencil: self.depth_stencil.as_ref().map(&fun),
63        }
64    }
65}
66
67#[derive(Debug, Copy, Clone)]
68pub enum RenderPassCompatibilityCheckType {
69    RenderPipeline,
70    RenderBundle,
71}
72
73#[derive(Clone, Debug, Hash, PartialEq)]
74#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))]
75pub(crate) struct RenderPassContext {
76    pub attachments: AttachmentData<TextureFormat>,
77    pub sample_count: u32,
78    pub multiview: Option<NonZeroU32>,
79}
80#[derive(Clone, Debug, Error)]
81#[non_exhaustive]
82pub enum RenderPassCompatibilityError {
83    #[error(
84        "Incompatible color attachments at indices {indices:?}: the RenderPass uses textures with formats {expected:?} but the {ty:?} uses attachments with formats {actual:?}",
85    )]
86    IncompatibleColorAttachment {
87        indices: Vec<usize>,
88        expected: Vec<Option<TextureFormat>>,
89        actual: Vec<Option<TextureFormat>>,
90        ty: RenderPassCompatibilityCheckType,
91    },
92    #[error(
93        "Incompatible depth-stencil attachment format: the RenderPass uses a texture with format {expected:?} but the {ty:?} uses an attachment with format {actual:?}",
94    )]
95    IncompatibleDepthStencilAttachment {
96        expected: Option<TextureFormat>,
97        actual: Option<TextureFormat>,
98        ty: RenderPassCompatibilityCheckType,
99    },
100    #[error(
101        "Incompatible sample count: the RenderPass uses textures with sample count {expected:?} but the {ty:?} uses attachments with format {actual:?}",
102    )]
103    IncompatibleSampleCount {
104        expected: u32,
105        actual: u32,
106        ty: RenderPassCompatibilityCheckType,
107    },
108    #[error("Incompatible multiview setting: the RenderPass uses setting {expected:?} but the {ty:?} uses setting {actual:?}")]
109    IncompatibleMultiview {
110        expected: Option<NonZeroU32>,
111        actual: Option<NonZeroU32>,
112        ty: RenderPassCompatibilityCheckType,
113    },
114}
115
116impl RenderPassContext {
117    // Assumes the renderpass only contains one subpass
118    pub(crate) fn check_compatible(
119        &self,
120        other: &Self,
121        ty: RenderPassCompatibilityCheckType,
122    ) -> Result<(), RenderPassCompatibilityError> {
123        if self.attachments.colors != other.attachments.colors {
124            let indices = self
125                .attachments
126                .colors
127                .iter()
128                .zip(&other.attachments.colors)
129                .enumerate()
130                .filter_map(|(idx, (left, right))| (left != right).then_some(idx))
131                .collect();
132            return Err(RenderPassCompatibilityError::IncompatibleColorAttachment {
133                indices,
134                expected: self.attachments.colors.iter().cloned().collect(),
135                actual: other.attachments.colors.iter().cloned().collect(),
136                ty,
137            });
138        }
139        if self.attachments.depth_stencil != other.attachments.depth_stencil {
140            return Err(
141                RenderPassCompatibilityError::IncompatibleDepthStencilAttachment {
142                    expected: self.attachments.depth_stencil,
143                    actual: other.attachments.depth_stencil,
144                    ty,
145                },
146            );
147        }
148        if self.sample_count != other.sample_count {
149            return Err(RenderPassCompatibilityError::IncompatibleSampleCount {
150                expected: self.sample_count,
151                actual: other.sample_count,
152                ty,
153            });
154        }
155        if self.multiview != other.multiview {
156            return Err(RenderPassCompatibilityError::IncompatibleMultiview {
157                expected: self.multiview,
158                actual: other.multiview,
159                ty,
160            });
161        }
162        Ok(())
163    }
164}
165
166pub type BufferMapPendingClosure = (BufferMapOperation, BufferAccessResult);
167
168#[derive(Default)]
169pub struct UserClosures {
170    pub mappings: Vec<BufferMapPendingClosure>,
171    pub submissions: SmallVec<[queue::SubmittedWorkDoneClosure; 1]>,
172}
173
174impl UserClosures {
175    fn extend(&mut self, other: Self) {
176        self.mappings.extend(other.mappings);
177        self.submissions.extend(other.submissions);
178    }
179
180    fn fire(self) {
181        // Note: this logic is specifically moved out of `handle_mapping()` in order to
182        // have nothing locked by the time we execute users callback code.
183        for (operation, status) in self.mappings {
184            operation.callback.call(status);
185        }
186        for closure in self.submissions {
187            closure.call();
188        }
189    }
190}
191
192fn map_buffer<A: hal::Api>(
193    raw: &A::Device,
194    buffer: &mut Buffer<A>,
195    offset: BufferAddress,
196    size: BufferAddress,
197    kind: HostMap,
198) -> Result<ptr::NonNull<u8>, BufferAccessError> {
199    let mapping = unsafe {
200        raw.map_buffer(buffer.raw.as_ref().unwrap(), offset..offset + size)
201            .map_err(DeviceError::from)?
202    };
203
204    buffer.sync_mapped_writes = match kind {
205        HostMap::Read if !mapping.is_coherent => unsafe {
206            raw.invalidate_mapped_ranges(
207                buffer.raw.as_ref().unwrap(),
208                iter::once(offset..offset + size),
209            );
210            None
211        },
212        HostMap::Write if !mapping.is_coherent => Some(offset..offset + size),
213        _ => None,
214    };
215
216    assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
217    assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
218    // Zero out uninitialized parts of the mapping. (Spec dictates all resources
219    // behave as if they were initialized with zero)
220    //
221    // If this is a read mapping, ideally we would use a `clear_buffer` command
222    // before reading the data from GPU (i.e. `invalidate_range`). However, this
223    // would require us to kick off and wait for a command buffer or piggy back
224    // on an existing one (the later is likely the only worthwhile option). As
225    // reading uninitialized memory isn't a particular important path to
226    // support, we instead just initialize the memory here and make sure it is
227    // GPU visible, so this happens at max only once for every buffer region.
228    //
229    // If this is a write mapping zeroing out the memory here is the only
230    // reasonable way as all data is pushed to GPU anyways.
231
232    // No need to flush if it is flushed later anyways.
233    let zero_init_needs_flush_now = mapping.is_coherent && buffer.sync_mapped_writes.is_none();
234    let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };
235
236    for uninitialized in buffer.initialization_status.drain(offset..(size + offset)) {
237        // The mapping's pointer is already offset, however we track the
238        // uninitialized range relative to the buffer's start.
239        let fill_range =
240            (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
241        mapped[fill_range].fill(0);
242
243        if zero_init_needs_flush_now {
244            unsafe {
245                raw.flush_mapped_ranges(buffer.raw.as_ref().unwrap(), iter::once(uninitialized))
246            };
247        }
248    }
249
250    Ok(mapping.ptr)
251}
252
253struct CommandAllocator<A: hal::Api> {
254    free_encoders: Vec<A::CommandEncoder>,
255}
256
257impl<A: hal::Api> CommandAllocator<A> {
258    fn acquire_encoder(
259        &mut self,
260        device: &A::Device,
261        queue: &A::Queue,
262    ) -> Result<A::CommandEncoder, hal::DeviceError> {
263        match self.free_encoders.pop() {
264            Some(encoder) => Ok(encoder),
265            None => unsafe {
266                let hal_desc = hal::CommandEncoderDescriptor { label: None, queue };
267                device.create_command_encoder(&hal_desc)
268            },
269        }
270    }
271
272    fn release_encoder(&mut self, encoder: A::CommandEncoder) {
273        self.free_encoders.push(encoder);
274    }
275
276    fn dispose(self, device: &A::Device) {
277        log::info!("Destroying {} command encoders", self.free_encoders.len());
278        for cmd_encoder in self.free_encoders {
279            unsafe {
280                device.destroy_command_encoder(cmd_encoder);
281            }
282        }
283    }
284}
285
286#[derive(Clone, Debug, Error)]
287#[error("Device is invalid")]
288pub struct InvalidDevice;
289
290#[derive(Clone, Debug, Error)]
291pub enum DeviceError {
292    #[error("Parent device is invalid")]
293    Invalid,
294    #[error("Parent device is lost")]
295    Lost,
296    #[error("Not enough memory left")]
297    OutOfMemory,
298}
299
300impl From<hal::DeviceError> for DeviceError {
301    fn from(error: hal::DeviceError) -> Self {
302        match error {
303            hal::DeviceError::Lost => DeviceError::Lost,
304            hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory,
305        }
306    }
307}
308
309#[derive(Clone, Debug, Error)]
310#[error("Features {0:?} are required but not enabled on the device")]
311pub struct MissingFeatures(pub wgt::Features);
312
313#[derive(Clone, Debug, Error)]
314#[error(
315    "Downlevel flags {0:?} are required but not supported on the device.\n{}",
316    DOWNLEVEL_ERROR_MESSAGE
317)]
318pub struct MissingDownlevelFlags(pub wgt::DownlevelFlags);
319
320#[derive(Clone, Debug)]
321#[cfg_attr(feature = "trace", derive(serde::Serialize))]
322#[cfg_attr(feature = "replay", derive(serde::Deserialize))]
323pub struct ImplicitPipelineContext {
324    pub root_id: id::PipelineLayoutId,
325    pub group_ids: ArrayVec<id::BindGroupLayoutId, { hal::MAX_BIND_GROUPS }>,
326}
327
328pub struct ImplicitPipelineIds<'a, G: GlobalIdentityHandlerFactory> {
329    pub root_id: Input<G, id::PipelineLayoutId>,
330    pub group_ids: &'a [Input<G, id::BindGroupLayoutId>],
331}
332
333impl<G: GlobalIdentityHandlerFactory> ImplicitPipelineIds<'_, G> {
334    fn prepare<A: HalApi>(self, hub: &Hub<A, G>) -> ImplicitPipelineContext {
335        ImplicitPipelineContext {
336            root_id: hub.pipeline_layouts.prepare(self.root_id).into_id(),
337            group_ids: self
338                .group_ids
339                .iter()
340                .map(|id_in| hub.bind_group_layouts.prepare(id_in.clone()).into_id())
341                .collect(),
342        }
343    }
344}
wgpu_core/device/mod.rs

wgpu_core/device/
mod.rs