summaryrefslogtreecommitdiff
path: root/Kernel/FileSystem
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2021-12-25 23:44:04 +0100
committerAndreas Kling <kling@serenityos.org>2021-12-26 00:42:51 +0100
commit16850423cf16183f4304ded1dd135109e4625fe0 (patch)
tree20d250823654f9c0ddce8c145593febc2e745b2d /Kernel/FileSystem
parent4d585cdb825d23743f1d677ee0a904a16724eef9 (diff)
downloadserenity-16850423cf16183f4304ded1dd135109e4625fe0.zip
Kernel: Fix deadlock caused by page faults while holding disk cache lock
If the data passed to sys$write() is backed by a not-yet-paged-in inode mapping, we could end up in a situation where we get a page fault when trying to copy data from userspace. If that page fault handler tried reading from an inode that someone else had locked while waiting for the disk cache lock, we'd deadlock. This patch fixes the issue by copying the userspace data into a local buffer before acquiring the disk cache lock. This is not ideal since it incurs an extra copy, and I'm sure we can think of a better solution eventually. This was a frequent cause of startup deadlocks on x86_64 for me. :^)
Diffstat (limited to 'Kernel/FileSystem')
-rw-r--r--Kernel/FileSystem/BlockBasedFileSystem.cpp12
1 files changed, 11 insertions, 1 deletions
diff --git a/Kernel/FileSystem/BlockBasedFileSystem.cpp b/Kernel/FileSystem/BlockBasedFileSystem.cpp
index 2b329a7b9b..cf2916d3d1 100644
--- a/Kernel/FileSystem/BlockBasedFileSystem.cpp
+++ b/Kernel/FileSystem/BlockBasedFileSystem.cpp
@@ -133,6 +133,16 @@ ErrorOr<void> BlockBasedFileSystem::write_block(BlockIndex index, const UserOrKe
VERIFY(offset + count <= block_size());
dbgln_if(BBFS_DEBUG, "BlockBasedFileSystem::write_block {}, size={}", index, count);
+ // NOTE: We copy the `data` to write into a local buffer before taking the cache lock.
+ // This makes sure any page faults caused by accessing the data will occur before
+ // we tie down the cache.
+ auto buffered_data_or_error = ByteBuffer::create_uninitialized(count);
+ if (!buffered_data_or_error.has_value())
+ return ENOMEM;
+ auto buffered_data = buffered_data_or_error.release_value();
+
+ TRY(data.read(buffered_data.bytes()));
+
return m_cache.with_exclusive([&](auto& cache) -> ErrorOr<void> {
if (!allow_cache) {
flush_specific_block_if_needed(index);
@@ -147,7 +157,7 @@ ErrorOr<void> BlockBasedFileSystem::write_block(BlockIndex index, const UserOrKe
// Fill the cache first.
TRY(read_block(index, nullptr, block_size()));
}
- TRY(data.read(entry.data + offset, count));
+ memcpy(entry.data + offset, buffered_data.data(), count);
cache->mark_dirty(entry);
entry.has_data = true;