summaryrefslogtreecommitdiff
path: root/Kernel
diff options
context:
space:
mode:
authorSergey Bugaev <bugaevc@serenityos.org>2020-05-28 17:56:25 +0300
committerAndreas Kling <kling@serenityos.org>2020-05-29 07:53:30 +0200
commitfdb71cdf8fa6c48b226e2242fbfdd75216e2f442 (patch)
treea6ab0f769c534ff8c5ce88be595e994ad4193d6c /Kernel
parentb9051263658c405a69bd8bd8030420157d3ca0e0 (diff)
downloadserenity-fdb71cdf8fa6c48b226e2242fbfdd75216e2f442.zip
Kernel: Support read-only filesystem mounts
This adds support for MS_RDONLY, a mount flag that tells the kernel to disallow any attempts to write to the newly mounted filesystem. As this flag is per-mount, and different mounts of the same filesystems (such as in case of bind mounts) can have different mutability settings, you have to go though a custody to find out if the filesystem is mounted read-only, instead of just asking the filesystem itself whether it's inherently read-only. This also adds a lot of checks we were previously missing; and moves some of them to happen after more specific checks (such as regular permission checks). One outstanding hole in this system is sys$mprotect(PROT_WRITE), as there's no way we can know if the original file description this region has been mounted from had been opened through a readonly mount point. Currently, we always allow such sys$mprotect() calls to succeed, which effectively allows anyone to circumvent the effect of MS_RDONLY. We should solve this one way or another.
Diffstat (limited to 'Kernel')
-rw-r--r--Kernel/FileSystem/Custody.cpp8
-rw-r--r--Kernel/FileSystem/Custody.h1
-rw-r--r--Kernel/FileSystem/VirtualFileSystem.cpp62
-rw-r--r--Kernel/Process.cpp3
-rw-r--r--Kernel/UnixTypes.h1
5 files changed, 59 insertions, 16 deletions
diff --git a/Kernel/FileSystem/Custody.cpp b/Kernel/FileSystem/Custody.cpp
index 7699c3ea8d..c58e9207d7 100644
--- a/Kernel/FileSystem/Custody.cpp
+++ b/Kernel/FileSystem/Custody.cpp
@@ -59,4 +59,12 @@ String Custody::absolute_path() const
return builder.to_string();
}
+bool Custody::is_readonly() const
+{
+ if (m_mount_flags & MS_RDONLY)
+ return true;
+
+ return m_inode->fs().is_readonly();
+}
+
}
diff --git a/Kernel/FileSystem/Custody.h b/Kernel/FileSystem/Custody.h
index 45ccc403bf..f39c68eb10 100644
--- a/Kernel/FileSystem/Custody.h
+++ b/Kernel/FileSystem/Custody.h
@@ -54,6 +54,7 @@ public:
String absolute_path() const;
int mount_flags() const { return m_mount_flags; }
+ bool is_readonly() const;
private:
Custody(Custody* parent, const StringView& name, Inode&, int mount_flags);
diff --git a/Kernel/FileSystem/VirtualFileSystem.cpp b/Kernel/FileSystem/VirtualFileSystem.cpp
index 112dece57e..54a6a9f4d8 100644
--- a/Kernel/FileSystem/VirtualFileSystem.cpp
+++ b/Kernel/FileSystem/VirtualFileSystem.cpp
@@ -177,14 +177,15 @@ void VFS::traverse_directory_inode(Inode& dir_inode, Function<bool(const FS::Dir
KResult VFS::utime(StringView path, Custody& base, time_t atime, time_t mtime)
{
- auto descriptor_or_error = VFS::the().open(move(path), 0, 0, base);
- if (descriptor_or_error.is_error())
- return descriptor_or_error.error();
- auto& inode = *descriptor_or_error.value()->inode();
- if (inode.fs().is_readonly())
- return KResult(-EROFS);
+ auto custody_or_error = VFS::the().resolve_path(move(path), base);
+ if (custody_or_error.is_error())
+ return custody_or_error.error();
+ auto& custody = *custody_or_error.value();
+ auto& inode = custody.inode();
if (!Process::current->is_superuser() && inode.metadata().uid != Process::current->euid())
return KResult(-EACCES);
+ if (custody.is_readonly())
+ return KResult(-EROFS);
int error = inode.set_atime(atime);
if (error)
@@ -264,6 +265,12 @@ KResultOr<NonnullRefPtr<FileDescription>> VFS::open(StringView path, int options
descriptor_or_error.value()->set_original_inode({}, inode);
return descriptor_or_error;
}
+
+ // Check for read-only FS. Do this after handling preopen FD and devices,
+ // but before modifying the inode in any way.
+ if ((options & O_WRONLY) && custody.is_readonly())
+ return KResult(-EROFS);
+
if (should_truncate_file) {
inode.truncate(0);
inode.set_mtime(kgettimeofday().tv_sec);
@@ -290,6 +297,8 @@ KResult VFS::mknod(StringView path, mode_t mode, dev_t dev, Custody& base)
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
+ if (parent_custody->is_readonly())
+ return KResult(-EROFS);
LexicalPath p(path);
dbg() << "VFS::mknod: '" << p.basename() << "' mode=" << mode << " dev=" << dev << " in " << parent_inode.identifier();
@@ -310,6 +319,9 @@ KResultOr<NonnullRefPtr<FileDescription>> VFS::create(StringView path, int optio
auto& parent_inode = parent_custody.inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
+ if (parent_custody.is_readonly())
+ return KResult(-EROFS);
+
LexicalPath p(path);
#ifdef VFS_DEBUG
dbg() << "VFS::create: '" << p.basename() << "' in " << parent_inode.identifier();
@@ -348,6 +360,8 @@ KResult VFS::mkdir(StringView path, mode_t mode, Custody& base)
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
+ if (parent_custody->is_readonly())
+ return KResult(-EROFS);
LexicalPath p(path);
#ifdef VFS_DEBUG
@@ -371,6 +385,8 @@ KResult VFS::access(StringView path, int mode, Custody& base)
if (mode & W_OK) {
if (!metadata.may_write(*Process::current))
return KResult(-EACCES);
+ if (custody.is_readonly())
+ return KResult(-EROFS);
}
if (mode & X_OK) {
if (!metadata.may_execute(*Process::current))
@@ -396,11 +412,11 @@ KResultOr<NonnullRefPtr<Custody>> VFS::open_directory(StringView path, Custody&
KResult VFS::chmod(Custody& custody, mode_t mode)
{
auto& inode = custody.inode();
- if (inode.fs().is_readonly())
- return KResult(-EROFS);
if (Process::current->euid() != inode.metadata().uid && !Process::current->is_superuser())
return KResult(-EPERM);
+ if (custody.is_readonly())
+ return KResult(-EROFS);
// Only change the permission bits.
mode = (inode.mode() & ~04777u) | (mode & 04777u);
@@ -449,6 +465,9 @@ KResult VFS::rename(StringView old_path, StringView new_path, Custody& base)
return KResult(-EACCES);
}
+ if (old_parent_custody->is_readonly() || new_parent_custody->is_readonly())
+ return KResult(-EROFS);
+
auto new_basename = LexicalPath(new_path).basename();
if (!new_custody_or_error.is_error()) {
@@ -482,9 +501,6 @@ KResult VFS::rename(StringView old_path, StringView new_path, Custody& base)
KResult VFS::chown(Custody& custody, uid_t a_uid, gid_t a_gid)
{
auto& inode = custody.inode();
- if (inode.fs().is_readonly())
- return KResult(-EROFS);
-
auto metadata = inode.metadata();
if (Process::current->euid() != metadata.uid && !Process::current->is_superuser())
@@ -504,6 +520,9 @@ KResult VFS::chown(Custody& custody, uid_t a_uid, gid_t a_gid)
new_gid = a_gid;
}
+ if (custody.is_readonly())
+ return KResult(-EROFS);
+
dbg() << "VFS::chown(): inode " << inode.identifier() << " <- uid:" << new_uid << " gid:" << new_gid;
if (metadata.is_setuid() || metadata.is_setgid()) {
@@ -546,15 +565,15 @@ KResult VFS::link(StringView old_path, StringView new_path, Custody& base)
if (parent_inode.fsid() != old_inode.fsid())
return KResult(-EXDEV);
- if (parent_inode.fs().is_readonly())
- return KResult(-EROFS);
-
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
if (old_inode.is_directory())
return KResult(-EPERM);
+ if (parent_custody->is_readonly())
+ return KResult(-EROFS);
+
return parent_inode.add_child(old_inode.identifier(), LexicalPath(new_path).basename(), old_inode.mode());
}
@@ -570,6 +589,11 @@ KResult VFS::unlink(StringView path, Custody& base)
if (inode.is_directory())
return KResult(-EISDIR);
+ // We have just checked that the inode is not a directory, and thus it's not
+ // the root. So it should have a parent. Note that this would be invalidated
+ // if we were to support bind-mounting regular files on top of the root.
+ ASSERT(parent_custody);
+
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
@@ -579,6 +603,9 @@ KResult VFS::unlink(StringView path, Custody& base)
return KResult(-EACCES);
}
+ if (parent_custody->is_readonly())
+ return KResult(-EROFS);
+
auto result = parent_inode.remove_child(LexicalPath(path).basename());
if (result.is_error())
return result;
@@ -599,6 +626,8 @@ KResult VFS::symlink(StringView target, StringView linkpath, Custody& base)
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
+ if (parent_custody->is_readonly())
+ return KResult(-EROFS);
LexicalPath p(linkpath);
dbg() << "VFS::symlink: '" << p.basename() << "' (-> '" << target << "') in " << parent_inode.identifier();
@@ -621,8 +650,6 @@ KResult VFS::rmdir(StringView path, Custody& base)
auto& custody = *custody_or_error.value();
auto& inode = custody.inode();
- if (inode.fs().is_readonly())
- return KResult(-EROFS);
// FIXME: We should return EINVAL if the last component of the path is "."
// FIXME: We should return ENOTEMPTY if the last component of the path is ".."
@@ -641,6 +668,9 @@ KResult VFS::rmdir(StringView path, Custody& base)
if (inode.directory_entry_count() != 2)
return KResult(-ENOTEMPTY);
+ if (custody.is_readonly())
+ return KResult(-EROFS);
+
auto result = inode.remove_child(".");
if (result.is_error())
return result;
diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp
index 68aadab1e6..f7c4b5a9cd 100644
--- a/Kernel/Process.cpp
+++ b/Kernel/Process.cpp
@@ -318,6 +318,9 @@ static bool validate_inode_mmap_prot(const Process& process, int prot, const Ino
return false;
if (map_shared) {
+ // FIXME: What about readonly filesystem mounts? We cannot make a
+ // decision here without knowing the mount flags, so we would need to
+ // keep a Custody or something from mmap time.
if ((prot & PROT_WRITE) && !metadata.may_write(process))
return false;
InterruptDisabler disabler;
diff --git a/Kernel/UnixTypes.h b/Kernel/UnixTypes.h
index 9191c69762..6f57ec83b8 100644
--- a/Kernel/UnixTypes.h
+++ b/Kernel/UnixTypes.h
@@ -52,6 +52,7 @@
#define MS_NOEXEC (1 << 1)
#define MS_NOSUID (1 << 2)
#define MS_BIND (1 << 3)
+#define MS_RDONLY (1 << 4)
#define PERF_EVENT_MALLOC 1
#define PERF_EVENT_FREE 2