From 61e677e39f7f4bf6e48cc46a68a3f86e96794d2f Mon Sep 17 00:00:00 2001 From: Victoria Brekenfeld Date: Thu, 7 May 2026 19:34:22 +0200 Subject: [PATCH] kms: Try to re-open the device on permission errors after resume --- src/backend/kms/device.rs | 548 ++++++++++++++++++++++++++++---------- src/backend/kms/mod.rs | 26 +- src/backend/kms/socket.rs | 37 ++- 3 files changed, 453 insertions(+), 158 deletions(-) diff --git a/src/backend/kms/device.rs b/src/backend/kms/device.rs index b2507bc3..c7a0a160 100644 --- a/src/backend/kms/device.rs +++ b/src/backend/kms/device.rs @@ -28,7 +28,7 @@ use smithay::{ }, egl::{EGLContext, EGLDevice, EGLDisplay, context::ContextPriority}, renderer::glow::GlowRenderer, - session::Session, + session::{Session, libseat::LibSeatSession}, }, desktop::utils::OutputPresentationFeedback, output::{Mode as OutputMode, Output, PhysicalProperties, Scale, Subpixel}, @@ -40,7 +40,10 @@ use smithay::{ wayland_server::DisplayHandle, }, utils::{Clock, DevPath, DeviceFd, Monotonic, Point, Transform}, - wayland::drm_lease::{DrmLease, DrmLeaseState}, + wayland::{ + drm_lease::{DrmLease, DrmLeaseState}, + drm_syncobj::supports_syncobj_eventfd, + }, }; use tracing::{error, info, warn}; use wayland_backend::server::ClientId; @@ -99,6 +102,20 @@ pub struct Device { pub socket: Option, } +#[derive(Debug)] +struct ReusableDevice { + leasing_global: Option, + active_clients: HashSet, + socket: Option, +} + +#[derive(Debug)] +struct OldDeviceState { + fd: DeviceFd, + outputs: HashMap, + leased_connectors: HashSet, +} + #[derive(Debug)] pub struct LockedDevice<'a> { pub inner: &'a mut InnerDevice, @@ -222,138 +239,14 @@ impl State { } } - let fd = DrmDeviceFd::new(DeviceFd::from( - self.backend - .kms() - .session - .open( - path, - OFlags::RDWR | OFlags::CLOEXEC | OFlags::NOCTTY | OFlags::NONBLOCK, - ) - .with_context(|| { - format!( - "Failed to optain file descriptor for drm device: {}", - path.display() - ) - })?, - )); - let (drm, notifier) = DrmDevice::new(fd.clone(), false) - .with_context(|| format!("Failed to initialize drm device for: {}", path.display()))?; - let drm_node = DrmNode::from_dev_id(dev)?; - let supports_atomic = drm.is_atomic(); - - let gbm = GbmDevice::new(fd) - .with_context(|| format!("Failed to initialize GBM device for {}", path.display()))?; - let (render_node, render_formats, texture_formats, is_software) = { - let egl = init_egl(&gbm)?; - - let render_node = egl - .device - .try_get_render_node() - .ok() - .and_then(std::convert::identity) - .unwrap_or(drm_node); - let render_formats = egl.context.dmabuf_render_formats().clone(); - let texture_formats = egl.context.dmabuf_texture_formats().clone(); - - ( - render_node, - render_formats, - texture_formats, - egl.device.is_software(), - ) - }; - - let token = self - .common - .event_loop_handle - .insert_source( - notifier, - move |event, metadata, state: &mut State| match event { - DrmEvent::VBlank(crtc) => { - if let Some(device) = state.backend.kms().drm_devices.get_mut(&drm_node) - && let Some(surface) = device.inner.surfaces.get_mut(&crtc) - { - surface.on_vblank(metadata.take()); - } - } - DrmEvent::Error(err) => { - warn!(?err, "Failed to read events of device {:?}.", dev); - } - }, - ) - .with_context(|| format!("Failed to add drm device to event loop: {}", dev))?; - - let socket = match (!is_software) - .then(|| self.create_socket(dh, render_node, texture_formats.clone())) - .transpose() - { - Ok(socket) => socket, - Err(err) => { - warn!( - ?err, - "Failed to initialize hardware-acceleration for clients on {}.", render_node, - ); - None - } - }; - - let leasing_global = match (!is_software) - .then(|| DrmLeaseState::new::(dh, &drm_node)) - .transpose() - { - Ok(global) => global, - Err(err) => { - // TODO: replace with inspect_err, once stable - warn!( - ?err, - "Failed to initialize drm lease global for: {}", drm_node - ); - None - } - }; - - let drm = GbmDrmOutputManager::new( - drm, - GbmAllocator::new( - gbm.clone(), - GbmBufferFlags::RENDERING | GbmBufferFlags::SCANOUT, - ), - GbmFramebufferExporter::new(gbm.clone(), render_node.into()), - Some(gbm.clone()), - [ - Fourcc::Abgr2101010, - Fourcc::Argb2101010, - Fourcc::Abgr8888, - Fourcc::Argb8888, - ], - render_formats, - ); - - let mut device = Device { - drm, - inner: InnerDevice { - dev_node: drm_node, - render_node, - is_software, - egl: None, - - outputs: HashMap::new(), - surfaces: HashMap::new(), - gbm, - - leased_connectors: Vec::new(), - leasing_global, - active_leases: Vec::new(), - active_clients: HashSet::new(), - }, - - supports_atomic, - texture_formats, - event_token: Some(token), - socket, - }; - + let mut device = Device::new( + dev, + path, + &mut self.backend.kms().session, + &mut self.common, + dh, + None, + )?; let connectors = device.enumerate_surfaces()?.added; // There are no removed outputs on newly added devices let mut wl_outputs = Vec::new(); let mut w = self.common.shell.read().global_space().size.w as u32; @@ -387,7 +280,7 @@ impl State { // TODO atomic commit all surfaces together and drop surfaces, if it fails due to bandwidth let kms = self.backend.kms(); - kms.drm_devices.insert(drm_node, device); + kms.drm_devices.insert(device.inner.dev_node, device); } self.common @@ -489,6 +382,218 @@ impl State { Ok(outputs_added) } + pub fn reopen_device( + &mut self, + dev: dev_t, + path: &Path, + dh: &DisplayHandle, + ) -> Result> { + let backend = self.backend.kms(); + let drm_node = backend + .drm_devices + .values() + .find_map(|device| { + (device.inner.dev_node.dev_id() == dev).then_some(device.inner.dev_node) + }) + .with_context(|| format!("Couldn't find drm node for {}", dev))?; + + if let Some(mut device) = backend.drm_devices.shift_remove(&drm_node) { + let is_primary = backend + .primary_node + .read() + .unwrap() + .is_some_and(|node| node == device.inner.render_node); + + // all drm resources become inert + if let Some(token) = device.event_token.take() { + self.common.event_loop_handle.remove(token); + } + for (_, surface) in device.inner.surfaces.drain() { + surface.drop_and_join(); + } + if let Some(leasing_global) = device.inner.leasing_global.as_mut() { + for (connector, _) in &device.inner.leased_connectors { + leasing_global.withdraw_connector(*connector); + } + device.inner.active_leases.clear(); + } + + // These contain a reference to the file descriptor + backend.api.as_mut().remove_node(&device.inner.render_node); + // trigger enumeration + let _ = backend.api.devices(); + + for surface in backend + .drm_devices + .values_mut() + .flat_map(|device| device.inner.surfaces.values_mut()) + { + surface.remove_node(device.inner.render_node); + } + let syncobj_guard = + if is_primary && let Some(syncobj_state) = backend.syncobj_state.as_mut() { + Some(syncobj_state.close_device()) + } else { + None + }; + + let (reusable, mut old_state) = device.reuse(); + + match TryInto::::try_into(old_state.fd) { + Ok(fd) => { + if let Err(err) = backend.session.close(fd) { + warn!("Failed to close drm device fd: {}", err); + } + } + Err(_) => { + warn!(?drm_node, "Unable to close drm device fd cleanly."); + } + }; + + let mut new_device = Device::new( + dev, + path, + &mut backend.session, + &mut self.common, + dh, + Some(reusable), + )?; + + if let Some(guard) = syncobj_guard { + let import_device = new_device.drm.device().device_fd().clone(); + if supports_syncobj_eventfd(&import_device) { + guard.update_device(import_device); + } else { + drop(guard); + dh.remove_global::(backend.syncobj_state.take().unwrap().into_global()); + } + } + + let connectors = new_device.enumerate_surfaces()?.added; + let mut outputs_added = Vec::new(); + let mut w = self.common.shell.read().global_space().size.w as u32; + + { + for (conn, maybe_crtc) in connectors { + if let Some(output) = old_state.outputs.remove(&conn) { + let has_surface = if old_state.leased_connectors.contains(&conn) { + if let Some(crtc) = maybe_crtc { + new_device.inner.leased_connectors.push((conn, crtc)); + info!( + "Connector {} is non-desktop, setting up for leasing", + output.name() + ); + if let Some(lease_state) = new_device.inner.leasing_global.as_mut() + { + let physical = output.physical_properties(); + lease_state.add_connector::( + conn, + output.name(), + format!("{} {}", physical.make, physical.model), + ); + } + } else { + warn!( + "Connector {} is non-desktop, but we don't have a free crtc: not leasing", + output.name() + ); + } + false + } else { + let loc = output.current_location(); + populate_modes( + new_device.drm.device_mut(), + &output, + conn, + true, + (loc.x as u32, loc.y as u32), + ) + .with_context(|| "Failed to enumerate connector modes")?; + + if let Some(crtc) = maybe_crtc { + match Surface::new( + &output, + crtc, + conn, + backend.primary_node.clone(), + new_device.inner.dev_node, + new_device.inner.render_node, + &self.common.event_loop_handle, + self.common.config.dynamic_conf.screen_filter().clone(), + self.common.shell.clone(), + self.common.startup_done.clone(), + ) { + Ok(data) => { + new_device.inner.surfaces.insert(crtc, data); + true + } + Err(err) => { + error!(?crtc, "Failed to initialize surface: {}", err); + false + } + } + } else { + false + } + }; + + if !has_surface { + output + .user_data() + .get::>() + .unwrap() + .borrow_mut() + .enabled = OutputState::Disabled; + } + new_device.inner.outputs.insert(conn, output); + } else { + match new_device.inner.connector_added( + new_device.drm.device_mut(), + backend.primary_node.clone(), + conn, + maybe_crtc, + (w, 0), + &self.common.event_loop_handle, + self.common.config.dynamic_conf.screen_filter().clone(), + self.common.shell.clone(), + self.common.startup_done.clone(), + ) { + Ok((output, should_expose)) => { + if should_expose { + w += output.geometry().size.w as u32; + outputs_added.push(output.clone()); + } + + new_device.inner.outputs.insert(conn, output); + } + Err(err) => { + warn!(?err, "Failed to initialize output, skipping"); + } + } + } + } + } + + self.common + .output_configuration_state + .remove_heads(old_state.outputs.values()); + self.common + .output_configuration_state + .add_heads(outputs_added.iter()); + + for output in old_state.outputs.values() { + self.common.remove_output(output); + } + + backend.drm_devices.insert(drm_node, new_device); + backend.refresh_used_devices()?; + + Ok(outputs_added) + } else { + Ok(Vec::new()) + } + } + pub fn device_removed(&mut self, dev: dev_t, dh: &DisplayHandle) -> Result<()> { let backend = self.backend.kms(); // we can't use DrmNode::from_node_id, because that assumes the node is still on sysfs @@ -581,6 +686,158 @@ pub struct OutputChanges { } impl Device { + fn new( + dev: dev_t, + path: impl AsRef, + session: &mut LibSeatSession, + common: &mut Common, + dh: &DisplayHandle, + reuse: Option, + ) -> Result { + let path = path.as_ref(); + let fd = DrmDeviceFd::new(DeviceFd::from( + session + .open( + path, + OFlags::RDWR | OFlags::CLOEXEC | OFlags::NOCTTY | OFlags::NONBLOCK, + ) + .with_context(|| { + format!( + "Failed to optain file descriptor for drm device: {}", + path.display() + ) + })?, + )); + let (drm, notifier) = DrmDevice::new(fd.clone(), false) + .with_context(|| format!("Failed to initialize drm device for: {}", path.display()))?; + let dev_node = DrmNode::from_dev_id(dev)?; + let supports_atomic = drm.is_atomic(); + + let gbm = GbmDevice::new(fd) + .with_context(|| format!("Failed to initialize GBM device for {}", path.display()))?; + let (render_node, render_formats, texture_formats, is_software) = { + let egl = init_egl(&gbm)?; + + let render_node = egl + .device + .try_get_render_node() + .ok() + .and_then(std::convert::identity) + .unwrap_or(dev_node); + let render_formats = egl.context.dmabuf_render_formats().clone(); + let texture_formats = egl.context.dmabuf_texture_formats().clone(); + + ( + render_node, + render_formats, + texture_formats, + egl.device.is_software(), + ) + }; + + let token = common + .event_loop_handle + .insert_source( + notifier, + move |event, metadata, state: &mut State| match event { + DrmEvent::VBlank(crtc) => { + if let Some(device) = state.backend.kms().drm_devices.get_mut(&dev_node) + && let Some(surface) = device.inner.surfaces.get_mut(&crtc) + { + surface.on_vblank(metadata.take()); + } + } + DrmEvent::Error(err) => { + warn!(?err, "Failed to read events of device {:?}.", dev); + } + }, + ) + .with_context(|| format!("Failed to add drm device to event loop: {}", dev))?; + + let ReusableDevice { + leasing_global, + active_clients, + socket, + } = reuse.unwrap_or_else(|| { + let socket = match (!is_software) + .then(|| common.create_socket(dh, render_node, texture_formats.clone())) + .transpose() + { + Ok(socket) => socket, + Err(err) => { + warn!( + ?err, + "Failed to initialize hardware-acceleration for clients on {}.", + render_node, + ); + None + } + }; + + let leasing_global = match (!is_software) + .then(|| DrmLeaseState::new::(dh, &dev_node)) + .transpose() + { + Ok(global) => global, + Err(err) => { + // TODO: replace with inspect_err, once stable + warn!( + ?err, + "Failed to initialize drm lease global for: {}", dev_node + ); + None + } + }; + + ReusableDevice { + leasing_global, + active_clients: HashSet::new(), + socket, + } + }); + + let drm = GbmDrmOutputManager::new( + drm, + GbmAllocator::new( + gbm.clone(), + GbmBufferFlags::RENDERING | GbmBufferFlags::SCANOUT, + ), + GbmFramebufferExporter::new(gbm.clone(), render_node.into()), + Some(gbm.clone()), + [ + Fourcc::Abgr2101010, + Fourcc::Argb2101010, + Fourcc::Abgr8888, + Fourcc::Argb8888, + ], + render_formats, + ); + + Ok(Device { + drm, + inner: InnerDevice { + dev_node, + render_node, + is_software, + egl: None, + + outputs: HashMap::new(), + surfaces: HashMap::new(), + gbm, + + leased_connectors: Vec::new(), + leasing_global, + active_leases: Vec::new(), + active_clients, + }, + + supports_atomic, + texture_formats, + event_token: Some(token), + socket, + }) + } + pub fn enumerate_surfaces(&mut self) -> Result { // enumerate our outputs let config = @@ -634,6 +891,27 @@ impl Device { drm: self.drm.lock(), } } + + fn reuse(self) -> (ReusableDevice, OldDeviceState) { + let device = ReusableDevice { + leasing_global: self.inner.leasing_global, + active_clients: self.inner.active_clients, + socket: self.socket, + }; + + let state = OldDeviceState { + fd: self.drm.device().device_fd().device_fd(), + outputs: self.inner.outputs, + leased_connectors: self + .inner + .leased_connectors + .iter() + .map(|(conn, _)| *conn) + .collect(), + }; + + (device, state) + } } impl LockedDevice<'_> { diff --git a/src/backend/kms/mod.rs b/src/backend/kms/mod.rs index 4c00b1dd..3a0d8c8c 100644 --- a/src/backend/kms/mod.rs +++ b/src/backend/kms/mod.rs @@ -394,15 +394,37 @@ impl State { continue; } }; + let dh = state.common.display_handle.clone(); + if state.backend.kms().drm_devices.contains_key(&drm_node) { match state.device_changed(dev) { Ok(outputs) => added.extend(outputs), Err(err) => { - error!(?err, "Failed to update drm device {}.", path.display(),) + error!( + ?err, + "Failed to update drm device {}. Re-opening", + path.display(), + ); + match state.reopen_device(dev, path, &dh) { + Ok(outputs) => added.extend(outputs), + Err(err) => { + error!( + ?err, + "Failed to re-open drm device {}. Device lost", + path.display(), + ); + if let Err(err) = state.device_removed(dev, &dh) { + error!( + ?err, + "Failed to close drm device {}.", + path.display(), + ); + }; + } + } } } } else { - let dh = state.common.display_handle.clone(); match state.device_added(dev, path, &dh) { Ok(outputs) => added.extend(outputs), Err(err) => error!(?err, "Failed to add drm device {}.", path.display(),), diff --git a/src/backend/kms/socket.rs b/src/backend/kms/socket.rs index 6148f41f..3babd35a 100644 --- a/src/backend/kms/socket.rs +++ b/src/backend/kms/socket.rs @@ -18,7 +18,7 @@ use smithay::{ use std::sync::Arc; use tracing::{info, warn}; -use crate::state::{ClientState, State, advertised_node_for_client}; +use crate::state::{ClientState, Common, State, advertised_node_for_client}; #[derive(Debug)] pub struct Socket { @@ -27,7 +27,7 @@ pub struct Socket { pub dmabuf_global: DmabufGlobal, } -impl State { +impl Common { pub(super) fn create_socket( &mut self, dh: &DisplayHandle, @@ -36,7 +36,7 @@ impl State { ) -> Result { let socket_name = format!( "{}-{}", - &self.common.socket.to_string_lossy(), + &self.socket.to_string_lossy(), render_node .dev_path() .unwrap() @@ -53,33 +53,28 @@ impl State { .with_context(|| "Failed to create drm format shared memory table")?; let dmabuf_global = self - .common .dmabuf_state .create_global_with_filter_and_default_feedback::(dh, &feedback, filter); - let drm_global_id = self - .common - .wl_drm_state - .create_global_with_filter::( - dh, - render_node - .dev_path_with_type(NodeType::Render) - .or_else(|| render_node.dev_path()) - .ok_or(anyhow!( - "Could not determine path for gpu node: {}", - render_node - ))?, - formats, - &dmabuf_global, - filter, - ); + let drm_global_id = self.wl_drm_state.create_global_with_filter::( + dh, + render_node + .dev_path_with_type(NodeType::Render) + .or_else(|| render_node.dev_path()) + .ok_or(anyhow!( + "Could not determine path for gpu node: {}", + render_node + ))?, + formats, + &dmabuf_global, + filter, + ); // add a special socket for the gpu let listener = ListeningSocketSource::with_name(&socket_name) .with_context(|| format!("Failed to bind socket to {}", socket_name))?; let socket_name_clone = socket_name.clone(); let token = self - .common .event_loop_handle .insert_source(listener, move |client_stream, _, state: &mut State| { if let Err(err) = state.common.display_handle.insert_client(