diff options
author | Sergey Bugaev <bugaevc@serenityos.org> | 2020-05-28 17:56:25 +0300 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-05-29 07:53:30 +0200 |
commit | fdb71cdf8fa6c48b226e2242fbfdd75216e2f442 (patch) | |
tree | a6ab0f769c534ff8c5ce88be595e994ad4193d6c /Kernel | |
parent | b9051263658c405a69bd8bd8030420157d3ca0e0 (diff) | |
download | serenity-fdb71cdf8fa6c48b226e2242fbfdd75216e2f442.zip |
Kernel: Support read-only filesystem mounts
This adds support for MS_RDONLY, a mount flag that tells the kernel to disallow
any attempts to write to the newly mounted filesystem. As this flag is
per-mount, and different mounts of the same filesystems (such as in case of bind
mounts) can have different mutability settings, you have to go though a custody
to find out if the filesystem is mounted read-only, instead of just asking the
filesystem itself whether it's inherently read-only.
This also adds a lot of checks we were previously missing; and moves some of
them to happen after more specific checks (such as regular permission checks).
One outstanding hole in this system is sys$mprotect(PROT_WRITE), as there's no
way we can know if the original file description this region has been mounted
from had been opened through a readonly mount point. Currently, we always allow
such sys$mprotect() calls to succeed, which effectively allows anyone to
circumvent the effect of MS_RDONLY. We should solve this one way or another.
Diffstat (limited to 'Kernel')
-rw-r--r-- | Kernel/FileSystem/Custody.cpp | 8 | ||||
-rw-r--r-- | Kernel/FileSystem/Custody.h | 1 | ||||
-rw-r--r-- | Kernel/FileSystem/VirtualFileSystem.cpp | 62 | ||||
-rw-r--r-- | Kernel/Process.cpp | 3 | ||||
-rw-r--r-- | Kernel/UnixTypes.h | 1 |
5 files changed, 59 insertions, 16 deletions
diff --git a/Kernel/FileSystem/Custody.cpp b/Kernel/FileSystem/Custody.cpp index 7699c3ea8d..c58e9207d7 100644 --- a/Kernel/FileSystem/Custody.cpp +++ b/Kernel/FileSystem/Custody.cpp @@ -59,4 +59,12 @@ String Custody::absolute_path() const return builder.to_string(); } +bool Custody::is_readonly() const +{ + if (m_mount_flags & MS_RDONLY) + return true; + + return m_inode->fs().is_readonly(); +} + } diff --git a/Kernel/FileSystem/Custody.h b/Kernel/FileSystem/Custody.h index 45ccc403bf..f39c68eb10 100644 --- a/Kernel/FileSystem/Custody.h +++ b/Kernel/FileSystem/Custody.h @@ -54,6 +54,7 @@ public: String absolute_path() const; int mount_flags() const { return m_mount_flags; } + bool is_readonly() const; private: Custody(Custody* parent, const StringView& name, Inode&, int mount_flags); diff --git a/Kernel/FileSystem/VirtualFileSystem.cpp b/Kernel/FileSystem/VirtualFileSystem.cpp index 112dece57e..54a6a9f4d8 100644 --- a/Kernel/FileSystem/VirtualFileSystem.cpp +++ b/Kernel/FileSystem/VirtualFileSystem.cpp @@ -177,14 +177,15 @@ void VFS::traverse_directory_inode(Inode& dir_inode, Function<bool(const FS::Dir KResult VFS::utime(StringView path, Custody& base, time_t atime, time_t mtime) { - auto descriptor_or_error = VFS::the().open(move(path), 0, 0, base); - if (descriptor_or_error.is_error()) - return descriptor_or_error.error(); - auto& inode = *descriptor_or_error.value()->inode(); - if (inode.fs().is_readonly()) - return KResult(-EROFS); + auto custody_or_error = VFS::the().resolve_path(move(path), base); + if (custody_or_error.is_error()) + return custody_or_error.error(); + auto& custody = *custody_or_error.value(); + auto& inode = custody.inode(); if (!Process::current->is_superuser() && inode.metadata().uid != Process::current->euid()) return KResult(-EACCES); + if (custody.is_readonly()) + return KResult(-EROFS); int error = inode.set_atime(atime); if (error) @@ -264,6 +265,12 @@ KResultOr<NonnullRefPtr<FileDescription>> VFS::open(StringView path, int options descriptor_or_error.value()->set_original_inode({}, inode); return descriptor_or_error; } + + // Check for read-only FS. Do this after handling preopen FD and devices, + // but before modifying the inode in any way. + if ((options & O_WRONLY) && custody.is_readonly()) + return KResult(-EROFS); + if (should_truncate_file) { inode.truncate(0); inode.set_mtime(kgettimeofday().tv_sec); @@ -290,6 +297,8 @@ KResult VFS::mknod(StringView path, mode_t mode, dev_t dev, Custody& base) auto& parent_inode = parent_custody->inode(); if (!parent_inode.metadata().may_write(*Process::current)) return KResult(-EACCES); + if (parent_custody->is_readonly()) + return KResult(-EROFS); LexicalPath p(path); dbg() << "VFS::mknod: '" << p.basename() << "' mode=" << mode << " dev=" << dev << " in " << parent_inode.identifier(); @@ -310,6 +319,9 @@ KResultOr<NonnullRefPtr<FileDescription>> VFS::create(StringView path, int optio auto& parent_inode = parent_custody.inode(); if (!parent_inode.metadata().may_write(*Process::current)) return KResult(-EACCES); + if (parent_custody.is_readonly()) + return KResult(-EROFS); + LexicalPath p(path); #ifdef VFS_DEBUG dbg() << "VFS::create: '" << p.basename() << "' in " << parent_inode.identifier(); @@ -348,6 +360,8 @@ KResult VFS::mkdir(StringView path, mode_t mode, Custody& base) auto& parent_inode = parent_custody->inode(); if (!parent_inode.metadata().may_write(*Process::current)) return KResult(-EACCES); + if (parent_custody->is_readonly()) + return KResult(-EROFS); LexicalPath p(path); #ifdef VFS_DEBUG @@ -371,6 +385,8 @@ KResult VFS::access(StringView path, int mode, Custody& base) if (mode & W_OK) { if (!metadata.may_write(*Process::current)) return KResult(-EACCES); + if (custody.is_readonly()) + return KResult(-EROFS); } if (mode & X_OK) { if (!metadata.may_execute(*Process::current)) @@ -396,11 +412,11 @@ KResultOr<NonnullRefPtr<Custody>> VFS::open_directory(StringView path, Custody& KResult VFS::chmod(Custody& custody, mode_t mode) { auto& inode = custody.inode(); - if (inode.fs().is_readonly()) - return KResult(-EROFS); if (Process::current->euid() != inode.metadata().uid && !Process::current->is_superuser()) return KResult(-EPERM); + if (custody.is_readonly()) + return KResult(-EROFS); // Only change the permission bits. mode = (inode.mode() & ~04777u) | (mode & 04777u); @@ -449,6 +465,9 @@ KResult VFS::rename(StringView old_path, StringView new_path, Custody& base) return KResult(-EACCES); } + if (old_parent_custody->is_readonly() || new_parent_custody->is_readonly()) + return KResult(-EROFS); + auto new_basename = LexicalPath(new_path).basename(); if (!new_custody_or_error.is_error()) { @@ -482,9 +501,6 @@ KResult VFS::rename(StringView old_path, StringView new_path, Custody& base) KResult VFS::chown(Custody& custody, uid_t a_uid, gid_t a_gid) { auto& inode = custody.inode(); - if (inode.fs().is_readonly()) - return KResult(-EROFS); - auto metadata = inode.metadata(); if (Process::current->euid() != metadata.uid && !Process::current->is_superuser()) @@ -504,6 +520,9 @@ KResult VFS::chown(Custody& custody, uid_t a_uid, gid_t a_gid) new_gid = a_gid; } + if (custody.is_readonly()) + return KResult(-EROFS); + dbg() << "VFS::chown(): inode " << inode.identifier() << " <- uid:" << new_uid << " gid:" << new_gid; if (metadata.is_setuid() || metadata.is_setgid()) { @@ -546,15 +565,15 @@ KResult VFS::link(StringView old_path, StringView new_path, Custody& base) if (parent_inode.fsid() != old_inode.fsid()) return KResult(-EXDEV); - if (parent_inode.fs().is_readonly()) - return KResult(-EROFS); - if (!parent_inode.metadata().may_write(*Process::current)) return KResult(-EACCES); if (old_inode.is_directory()) return KResult(-EPERM); + if (parent_custody->is_readonly()) + return KResult(-EROFS); + return parent_inode.add_child(old_inode.identifier(), LexicalPath(new_path).basename(), old_inode.mode()); } @@ -570,6 +589,11 @@ KResult VFS::unlink(StringView path, Custody& base) if (inode.is_directory()) return KResult(-EISDIR); + // We have just checked that the inode is not a directory, and thus it's not + // the root. So it should have a parent. Note that this would be invalidated + // if we were to support bind-mounting regular files on top of the root. + ASSERT(parent_custody); + auto& parent_inode = parent_custody->inode(); if (!parent_inode.metadata().may_write(*Process::current)) return KResult(-EACCES); @@ -579,6 +603,9 @@ KResult VFS::unlink(StringView path, Custody& base) return KResult(-EACCES); } + if (parent_custody->is_readonly()) + return KResult(-EROFS); + auto result = parent_inode.remove_child(LexicalPath(path).basename()); if (result.is_error()) return result; @@ -599,6 +626,8 @@ KResult VFS::symlink(StringView target, StringView linkpath, Custody& base) auto& parent_inode = parent_custody->inode(); if (!parent_inode.metadata().may_write(*Process::current)) return KResult(-EACCES); + if (parent_custody->is_readonly()) + return KResult(-EROFS); LexicalPath p(linkpath); dbg() << "VFS::symlink: '" << p.basename() << "' (-> '" << target << "') in " << parent_inode.identifier(); @@ -621,8 +650,6 @@ KResult VFS::rmdir(StringView path, Custody& base) auto& custody = *custody_or_error.value(); auto& inode = custody.inode(); - if (inode.fs().is_readonly()) - return KResult(-EROFS); // FIXME: We should return EINVAL if the last component of the path is "." // FIXME: We should return ENOTEMPTY if the last component of the path is ".." @@ -641,6 +668,9 @@ KResult VFS::rmdir(StringView path, Custody& base) if (inode.directory_entry_count() != 2) return KResult(-ENOTEMPTY); + if (custody.is_readonly()) + return KResult(-EROFS); + auto result = inode.remove_child("."); if (result.is_error()) return result; diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 68aadab1e6..f7c4b5a9cd 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -318,6 +318,9 @@ static bool validate_inode_mmap_prot(const Process& process, int prot, const Ino return false; if (map_shared) { + // FIXME: What about readonly filesystem mounts? We cannot make a + // decision here without knowing the mount flags, so we would need to + // keep a Custody or something from mmap time. if ((prot & PROT_WRITE) && !metadata.may_write(process)) return false; InterruptDisabler disabler; diff --git a/Kernel/UnixTypes.h b/Kernel/UnixTypes.h index 9191c69762..6f57ec83b8 100644 --- a/Kernel/UnixTypes.h +++ b/Kernel/UnixTypes.h @@ -52,6 +52,7 @@ #define MS_NOEXEC (1 << 1) #define MS_NOSUID (1 << 2) #define MS_BIND (1 << 3) +#define MS_RDONLY (1 << 4) #define PERF_EVENT_MALLOC 1 #define PERF_EVENT_FREE 2 |