kms: Try to re-open the device on permission errors after resume

This commit is contained in:
Victoria Brekenfeld 2026-05-07 19:34:22 +02:00 committed by Victoria Brekenfeld
parent 23b198f30b
commit 61e677e39f
3 changed files with 453 additions and 158 deletions

View file

@ -28,7 +28,7 @@ use smithay::{
}, },
egl::{EGLContext, EGLDevice, EGLDisplay, context::ContextPriority}, egl::{EGLContext, EGLDevice, EGLDisplay, context::ContextPriority},
renderer::glow::GlowRenderer, renderer::glow::GlowRenderer,
session::Session, session::{Session, libseat::LibSeatSession},
}, },
desktop::utils::OutputPresentationFeedback, desktop::utils::OutputPresentationFeedback,
output::{Mode as OutputMode, Output, PhysicalProperties, Scale, Subpixel}, output::{Mode as OutputMode, Output, PhysicalProperties, Scale, Subpixel},
@ -40,7 +40,10 @@ use smithay::{
wayland_server::DisplayHandle, wayland_server::DisplayHandle,
}, },
utils::{Clock, DevPath, DeviceFd, Monotonic, Point, Transform}, utils::{Clock, DevPath, DeviceFd, Monotonic, Point, Transform},
wayland::drm_lease::{DrmLease, DrmLeaseState}, wayland::{
drm_lease::{DrmLease, DrmLeaseState},
drm_syncobj::supports_syncobj_eventfd,
},
}; };
use tracing::{error, info, warn}; use tracing::{error, info, warn};
use wayland_backend::server::ClientId; use wayland_backend::server::ClientId;
@ -99,6 +102,20 @@ pub struct Device {
pub socket: Option<Socket>, pub socket: Option<Socket>,
} }
#[derive(Debug)]
struct ReusableDevice {
leasing_global: Option<DrmLeaseState>,
active_clients: HashSet<ClientId>,
socket: Option<Socket>,
}
#[derive(Debug)]
struct OldDeviceState {
fd: DeviceFd,
outputs: HashMap<connector::Handle, Output>,
leased_connectors: HashSet<connector::Handle>,
}
#[derive(Debug)] #[derive(Debug)]
pub struct LockedDevice<'a> { pub struct LockedDevice<'a> {
pub inner: &'a mut InnerDevice, pub inner: &'a mut InnerDevice,
@ -222,138 +239,14 @@ impl State {
} }
} }
let fd = DrmDeviceFd::new(DeviceFd::from( let mut device = Device::new(
self.backend dev,
.kms() path,
.session &mut self.backend.kms().session,
.open( &mut self.common,
path, dh,
OFlags::RDWR | OFlags::CLOEXEC | OFlags::NOCTTY | OFlags::NONBLOCK, None,
) )?;
.with_context(|| {
format!(
"Failed to optain file descriptor for drm device: {}",
path.display()
)
})?,
));
let (drm, notifier) = DrmDevice::new(fd.clone(), false)
.with_context(|| format!("Failed to initialize drm device for: {}", path.display()))?;
let drm_node = DrmNode::from_dev_id(dev)?;
let supports_atomic = drm.is_atomic();
let gbm = GbmDevice::new(fd)
.with_context(|| format!("Failed to initialize GBM device for {}", path.display()))?;
let (render_node, render_formats, texture_formats, is_software) = {
let egl = init_egl(&gbm)?;
let render_node = egl
.device
.try_get_render_node()
.ok()
.and_then(std::convert::identity)
.unwrap_or(drm_node);
let render_formats = egl.context.dmabuf_render_formats().clone();
let texture_formats = egl.context.dmabuf_texture_formats().clone();
(
render_node,
render_formats,
texture_formats,
egl.device.is_software(),
)
};
let token = self
.common
.event_loop_handle
.insert_source(
notifier,
move |event, metadata, state: &mut State| match event {
DrmEvent::VBlank(crtc) => {
if let Some(device) = state.backend.kms().drm_devices.get_mut(&drm_node)
&& let Some(surface) = device.inner.surfaces.get_mut(&crtc)
{
surface.on_vblank(metadata.take());
}
}
DrmEvent::Error(err) => {
warn!(?err, "Failed to read events of device {:?}.", dev);
}
},
)
.with_context(|| format!("Failed to add drm device to event loop: {}", dev))?;
let socket = match (!is_software)
.then(|| self.create_socket(dh, render_node, texture_formats.clone()))
.transpose()
{
Ok(socket) => socket,
Err(err) => {
warn!(
?err,
"Failed to initialize hardware-acceleration for clients on {}.", render_node,
);
None
}
};
let leasing_global = match (!is_software)
.then(|| DrmLeaseState::new::<State>(dh, &drm_node))
.transpose()
{
Ok(global) => global,
Err(err) => {
// TODO: replace with inspect_err, once stable
warn!(
?err,
"Failed to initialize drm lease global for: {}", drm_node
);
None
}
};
let drm = GbmDrmOutputManager::new(
drm,
GbmAllocator::new(
gbm.clone(),
GbmBufferFlags::RENDERING | GbmBufferFlags::SCANOUT,
),
GbmFramebufferExporter::new(gbm.clone(), render_node.into()),
Some(gbm.clone()),
[
Fourcc::Abgr2101010,
Fourcc::Argb2101010,
Fourcc::Abgr8888,
Fourcc::Argb8888,
],
render_formats,
);
let mut device = Device {
drm,
inner: InnerDevice {
dev_node: drm_node,
render_node,
is_software,
egl: None,
outputs: HashMap::new(),
surfaces: HashMap::new(),
gbm,
leased_connectors: Vec::new(),
leasing_global,
active_leases: Vec::new(),
active_clients: HashSet::new(),
},
supports_atomic,
texture_formats,
event_token: Some(token),
socket,
};
let connectors = device.enumerate_surfaces()?.added; // There are no removed outputs on newly added devices let connectors = device.enumerate_surfaces()?.added; // There are no removed outputs on newly added devices
let mut wl_outputs = Vec::new(); let mut wl_outputs = Vec::new();
let mut w = self.common.shell.read().global_space().size.w as u32; let mut w = self.common.shell.read().global_space().size.w as u32;
@ -387,7 +280,7 @@ impl State {
// TODO atomic commit all surfaces together and drop surfaces, if it fails due to bandwidth // TODO atomic commit all surfaces together and drop surfaces, if it fails due to bandwidth
let kms = self.backend.kms(); let kms = self.backend.kms();
kms.drm_devices.insert(drm_node, device); kms.drm_devices.insert(device.inner.dev_node, device);
} }
self.common self.common
@ -489,6 +382,218 @@ impl State {
Ok(outputs_added) Ok(outputs_added)
} }
pub fn reopen_device(
&mut self,
dev: dev_t,
path: &Path,
dh: &DisplayHandle,
) -> Result<Vec<Output>> {
let backend = self.backend.kms();
let drm_node = backend
.drm_devices
.values()
.find_map(|device| {
(device.inner.dev_node.dev_id() == dev).then_some(device.inner.dev_node)
})
.with_context(|| format!("Couldn't find drm node for {}", dev))?;
if let Some(mut device) = backend.drm_devices.shift_remove(&drm_node) {
let is_primary = backend
.primary_node
.read()
.unwrap()
.is_some_and(|node| node == device.inner.render_node);
// all drm resources become inert
if let Some(token) = device.event_token.take() {
self.common.event_loop_handle.remove(token);
}
for (_, surface) in device.inner.surfaces.drain() {
surface.drop_and_join();
}
if let Some(leasing_global) = device.inner.leasing_global.as_mut() {
for (connector, _) in &device.inner.leased_connectors {
leasing_global.withdraw_connector(*connector);
}
device.inner.active_leases.clear();
}
// These contain a reference to the file descriptor
backend.api.as_mut().remove_node(&device.inner.render_node);
// trigger enumeration
let _ = backend.api.devices();
for surface in backend
.drm_devices
.values_mut()
.flat_map(|device| device.inner.surfaces.values_mut())
{
surface.remove_node(device.inner.render_node);
}
let syncobj_guard =
if is_primary && let Some(syncobj_state) = backend.syncobj_state.as_mut() {
Some(syncobj_state.close_device())
} else {
None
};
let (reusable, mut old_state) = device.reuse();
match TryInto::<OwnedFd>::try_into(old_state.fd) {
Ok(fd) => {
if let Err(err) = backend.session.close(fd) {
warn!("Failed to close drm device fd: {}", err);
}
}
Err(_) => {
warn!(?drm_node, "Unable to close drm device fd cleanly.");
}
};
let mut new_device = Device::new(
dev,
path,
&mut backend.session,
&mut self.common,
dh,
Some(reusable),
)?;
if let Some(guard) = syncobj_guard {
let import_device = new_device.drm.device().device_fd().clone();
if supports_syncobj_eventfd(&import_device) {
guard.update_device(import_device);
} else {
drop(guard);
dh.remove_global::<State>(backend.syncobj_state.take().unwrap().into_global());
}
}
let connectors = new_device.enumerate_surfaces()?.added;
let mut outputs_added = Vec::new();
let mut w = self.common.shell.read().global_space().size.w as u32;
{
for (conn, maybe_crtc) in connectors {
if let Some(output) = old_state.outputs.remove(&conn) {
let has_surface = if old_state.leased_connectors.contains(&conn) {
if let Some(crtc) = maybe_crtc {
new_device.inner.leased_connectors.push((conn, crtc));
info!(
"Connector {} is non-desktop, setting up for leasing",
output.name()
);
if let Some(lease_state) = new_device.inner.leasing_global.as_mut()
{
let physical = output.physical_properties();
lease_state.add_connector::<State>(
conn,
output.name(),
format!("{} {}", physical.make, physical.model),
);
}
} else {
warn!(
"Connector {} is non-desktop, but we don't have a free crtc: not leasing",
output.name()
);
}
false
} else {
let loc = output.current_location();
populate_modes(
new_device.drm.device_mut(),
&output,
conn,
true,
(loc.x as u32, loc.y as u32),
)
.with_context(|| "Failed to enumerate connector modes")?;
if let Some(crtc) = maybe_crtc {
match Surface::new(
&output,
crtc,
conn,
backend.primary_node.clone(),
new_device.inner.dev_node,
new_device.inner.render_node,
&self.common.event_loop_handle,
self.common.config.dynamic_conf.screen_filter().clone(),
self.common.shell.clone(),
self.common.startup_done.clone(),
) {
Ok(data) => {
new_device.inner.surfaces.insert(crtc, data);
true
}
Err(err) => {
error!(?crtc, "Failed to initialize surface: {}", err);
false
}
}
} else {
false
}
};
if !has_surface {
output
.user_data()
.get::<RefCell<OutputConfig>>()
.unwrap()
.borrow_mut()
.enabled = OutputState::Disabled;
}
new_device.inner.outputs.insert(conn, output);
} else {
match new_device.inner.connector_added(
new_device.drm.device_mut(),
backend.primary_node.clone(),
conn,
maybe_crtc,
(w, 0),
&self.common.event_loop_handle,
self.common.config.dynamic_conf.screen_filter().clone(),
self.common.shell.clone(),
self.common.startup_done.clone(),
) {
Ok((output, should_expose)) => {
if should_expose {
w += output.geometry().size.w as u32;
outputs_added.push(output.clone());
}
new_device.inner.outputs.insert(conn, output);
}
Err(err) => {
warn!(?err, "Failed to initialize output, skipping");
}
}
}
}
}
self.common
.output_configuration_state
.remove_heads(old_state.outputs.values());
self.common
.output_configuration_state
.add_heads(outputs_added.iter());
for output in old_state.outputs.values() {
self.common.remove_output(output);
}
backend.drm_devices.insert(drm_node, new_device);
backend.refresh_used_devices()?;
Ok(outputs_added)
} else {
Ok(Vec::new())
}
}
pub fn device_removed(&mut self, dev: dev_t, dh: &DisplayHandle) -> Result<()> { pub fn device_removed(&mut self, dev: dev_t, dh: &DisplayHandle) -> Result<()> {
let backend = self.backend.kms(); let backend = self.backend.kms();
// we can't use DrmNode::from_node_id, because that assumes the node is still on sysfs // we can't use DrmNode::from_node_id, because that assumes the node is still on sysfs
@ -581,6 +686,158 @@ pub struct OutputChanges {
} }
impl Device { impl Device {
fn new(
dev: dev_t,
path: impl AsRef<Path>,
session: &mut LibSeatSession,
common: &mut Common,
dh: &DisplayHandle,
reuse: Option<ReusableDevice>,
) -> Result<Self> {
let path = path.as_ref();
let fd = DrmDeviceFd::new(DeviceFd::from(
session
.open(
path,
OFlags::RDWR | OFlags::CLOEXEC | OFlags::NOCTTY | OFlags::NONBLOCK,
)
.with_context(|| {
format!(
"Failed to optain file descriptor for drm device: {}",
path.display()
)
})?,
));
let (drm, notifier) = DrmDevice::new(fd.clone(), false)
.with_context(|| format!("Failed to initialize drm device for: {}", path.display()))?;
let dev_node = DrmNode::from_dev_id(dev)?;
let supports_atomic = drm.is_atomic();
let gbm = GbmDevice::new(fd)
.with_context(|| format!("Failed to initialize GBM device for {}", path.display()))?;
let (render_node, render_formats, texture_formats, is_software) = {
let egl = init_egl(&gbm)?;
let render_node = egl
.device
.try_get_render_node()
.ok()
.and_then(std::convert::identity)
.unwrap_or(dev_node);
let render_formats = egl.context.dmabuf_render_formats().clone();
let texture_formats = egl.context.dmabuf_texture_formats().clone();
(
render_node,
render_formats,
texture_formats,
egl.device.is_software(),
)
};
let token = common
.event_loop_handle
.insert_source(
notifier,
move |event, metadata, state: &mut State| match event {
DrmEvent::VBlank(crtc) => {
if let Some(device) = state.backend.kms().drm_devices.get_mut(&dev_node)
&& let Some(surface) = device.inner.surfaces.get_mut(&crtc)
{
surface.on_vblank(metadata.take());
}
}
DrmEvent::Error(err) => {
warn!(?err, "Failed to read events of device {:?}.", dev);
}
},
)
.with_context(|| format!("Failed to add drm device to event loop: {}", dev))?;
let ReusableDevice {
leasing_global,
active_clients,
socket,
} = reuse.unwrap_or_else(|| {
let socket = match (!is_software)
.then(|| common.create_socket(dh, render_node, texture_formats.clone()))
.transpose()
{
Ok(socket) => socket,
Err(err) => {
warn!(
?err,
"Failed to initialize hardware-acceleration for clients on {}.",
render_node,
);
None
}
};
let leasing_global = match (!is_software)
.then(|| DrmLeaseState::new::<State>(dh, &dev_node))
.transpose()
{
Ok(global) => global,
Err(err) => {
// TODO: replace with inspect_err, once stable
warn!(
?err,
"Failed to initialize drm lease global for: {}", dev_node
);
None
}
};
ReusableDevice {
leasing_global,
active_clients: HashSet::new(),
socket,
}
});
let drm = GbmDrmOutputManager::new(
drm,
GbmAllocator::new(
gbm.clone(),
GbmBufferFlags::RENDERING | GbmBufferFlags::SCANOUT,
),
GbmFramebufferExporter::new(gbm.clone(), render_node.into()),
Some(gbm.clone()),
[
Fourcc::Abgr2101010,
Fourcc::Argb2101010,
Fourcc::Abgr8888,
Fourcc::Argb8888,
],
render_formats,
);
Ok(Device {
drm,
inner: InnerDevice {
dev_node,
render_node,
is_software,
egl: None,
outputs: HashMap::new(),
surfaces: HashMap::new(),
gbm,
leased_connectors: Vec::new(),
leasing_global,
active_leases: Vec::new(),
active_clients,
},
supports_atomic,
texture_formats,
event_token: Some(token),
socket,
})
}
pub fn enumerate_surfaces(&mut self) -> Result<OutputChanges> { pub fn enumerate_surfaces(&mut self) -> Result<OutputChanges> {
// enumerate our outputs // enumerate our outputs
let config = let config =
@ -634,6 +891,27 @@ impl Device {
drm: self.drm.lock(), drm: self.drm.lock(),
} }
} }
fn reuse(self) -> (ReusableDevice, OldDeviceState) {
let device = ReusableDevice {
leasing_global: self.inner.leasing_global,
active_clients: self.inner.active_clients,
socket: self.socket,
};
let state = OldDeviceState {
fd: self.drm.device().device_fd().device_fd(),
outputs: self.inner.outputs,
leased_connectors: self
.inner
.leased_connectors
.iter()
.map(|(conn, _)| *conn)
.collect(),
};
(device, state)
}
} }
impl LockedDevice<'_> { impl LockedDevice<'_> {

View file

@ -394,15 +394,37 @@ impl State {
continue; continue;
} }
}; };
let dh = state.common.display_handle.clone();
if state.backend.kms().drm_devices.contains_key(&drm_node) { if state.backend.kms().drm_devices.contains_key(&drm_node) {
match state.device_changed(dev) { match state.device_changed(dev) {
Ok(outputs) => added.extend(outputs), Ok(outputs) => added.extend(outputs),
Err(err) => { Err(err) => {
error!(?err, "Failed to update drm device {}.", path.display(),) error!(
?err,
"Failed to update drm device {}. Re-opening",
path.display(),
);
match state.reopen_device(dev, path, &dh) {
Ok(outputs) => added.extend(outputs),
Err(err) => {
error!(
?err,
"Failed to re-open drm device {}. Device lost",
path.display(),
);
if let Err(err) = state.device_removed(dev, &dh) {
error!(
?err,
"Failed to close drm device {}.",
path.display(),
);
};
}
}
} }
} }
} else { } else {
let dh = state.common.display_handle.clone();
match state.device_added(dev, path, &dh) { match state.device_added(dev, path, &dh) {
Ok(outputs) => added.extend(outputs), Ok(outputs) => added.extend(outputs),
Err(err) => error!(?err, "Failed to add drm device {}.", path.display(),), Err(err) => error!(?err, "Failed to add drm device {}.", path.display(),),

View file

@ -18,7 +18,7 @@ use smithay::{
use std::sync::Arc; use std::sync::Arc;
use tracing::{info, warn}; use tracing::{info, warn};
use crate::state::{ClientState, State, advertised_node_for_client}; use crate::state::{ClientState, Common, State, advertised_node_for_client};
#[derive(Debug)] #[derive(Debug)]
pub struct Socket { pub struct Socket {
@ -27,7 +27,7 @@ pub struct Socket {
pub dmabuf_global: DmabufGlobal, pub dmabuf_global: DmabufGlobal,
} }
impl State { impl Common {
pub(super) fn create_socket( pub(super) fn create_socket(
&mut self, &mut self,
dh: &DisplayHandle, dh: &DisplayHandle,
@ -36,7 +36,7 @@ impl State {
) -> Result<Socket> { ) -> Result<Socket> {
let socket_name = format!( let socket_name = format!(
"{}-{}", "{}-{}",
&self.common.socket.to_string_lossy(), &self.socket.to_string_lossy(),
render_node render_node
.dev_path() .dev_path()
.unwrap() .unwrap()
@ -53,33 +53,28 @@ impl State {
.with_context(|| "Failed to create drm format shared memory table")?; .with_context(|| "Failed to create drm format shared memory table")?;
let dmabuf_global = self let dmabuf_global = self
.common
.dmabuf_state .dmabuf_state
.create_global_with_filter_and_default_feedback::<State, _>(dh, &feedback, filter); .create_global_with_filter_and_default_feedback::<State, _>(dh, &feedback, filter);
let drm_global_id = self let drm_global_id = self.wl_drm_state.create_global_with_filter::<State, _>(
.common dh,
.wl_drm_state render_node
.create_global_with_filter::<State, _>( .dev_path_with_type(NodeType::Render)
dh, .or_else(|| render_node.dev_path())
render_node .ok_or(anyhow!(
.dev_path_with_type(NodeType::Render) "Could not determine path for gpu node: {}",
.or_else(|| render_node.dev_path()) render_node
.ok_or(anyhow!( ))?,
"Could not determine path for gpu node: {}", formats,
render_node &dmabuf_global,
))?, filter,
formats, );
&dmabuf_global,
filter,
);
// add a special socket for the gpu // add a special socket for the gpu
let listener = ListeningSocketSource::with_name(&socket_name) let listener = ListeningSocketSource::with_name(&socket_name)
.with_context(|| format!("Failed to bind socket to {}", socket_name))?; .with_context(|| format!("Failed to bind socket to {}", socket_name))?;
let socket_name_clone = socket_name.clone(); let socket_name_clone = socket_name.clone();
let token = self let token = self
.common
.event_loop_handle .event_loop_handle
.insert_source(listener, move |client_stream, _, state: &mut State| { .insert_source(listener, move |client_stream, _, state: &mut State| {
if let Err(err) = state.common.display_handle.insert_client( if let Err(err) = state.common.display_handle.insert_client(