diff options
author | Alec Murphy <alec@checksum.fail> | 2022-11-10 15:36:07 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2022-12-04 12:08:48 +0000 |
commit | 8677dbfc7ffb4119b5049fdd7eb10527ee4a1f21 (patch) | |
tree | 1ce3a1092f46f12ff704096acd9ae444742c613d | |
parent | e3112a3d2e7ec5c6958ed075a52b380f7d80557b (diff) | |
download | serenity-8677dbfc7ffb4119b5049fdd7eb10527ee4a1f21.zip |
Utilities: Add strings
-rw-r--r-- | Base/usr/share/man/man1/strings.md | 39 | ||||
-rw-r--r-- | Userland/Utilities/CMakeLists.txt | 2 | ||||
-rw-r--r-- | Userland/Utilities/strings.cpp | 140 |
3 files changed, 180 insertions, 1 deletions
diff --git a/Base/usr/share/man/man1/strings.md b/Base/usr/share/man/man1/strings.md new file mode 100644 index 0000000000..abcaf5284b --- /dev/null +++ b/Base/usr/share/man/man1/strings.md @@ -0,0 +1,39 @@ +## Name + +strings - find printable strings in files + +## Synopsis + +```**sh +$ strings [-n NUMBER] [-p] [-t FORMAT] [PATHS...] +``` + +## Description + +`strings` looks for printable strings in each file specified in `PATHS` and writes them to standard output. If `PATHS` is not specified, input is read from standard input. + +## Options + +* `-n NUMBER`: Specify the minimum string length (4 is default). +* `-p`: Write the pathname for each file specified in `PATHS` to standard output. +* `-t FORMAT`: Write each string preceded by its byte offset from the start of the file in the specified `FORMAT`, where `FORMAT` matches one of the following: `d` (decimal), `o` (octal), or `x` (hexidecimal). + +## Examples + +Display the printable strings in /bin/strings with a minimum length of 8 characters: + +```sh +$ strings -n 8 /bin/strings +``` + +Display the printable strings in a binary file, preceded by their byte offset in hexadecimal format: + +```sh +$ strings -t x ~/Videos/test.webm +``` + +Display the printable strings in all .txt files in the current directory, preceded by their pathname: + +```sh +$ strings -p *.txt +``` diff --git a/Userland/Utilities/CMakeLists.txt b/Userland/Utilities/CMakeLists.txt index c9f1226d9a..f342ad292b 100644 --- a/Userland/Utilities/CMakeLists.txt +++ b/Userland/Utilities/CMakeLists.txt @@ -8,7 +8,7 @@ list(APPEND REQUIRED_TARGETS ) list(APPEND RECOMMENDED_TARGETS adjtime aplay abench asctl bt checksum chres cksum copy fortune gunzip gzip init install keymap lsirq lsof lspci man mknod mktemp - nc netstat notify ntpquery open passwd pls printf pro shot tar tt unzip wallpaper zip + nc netstat notify ntpquery open passwd pls printf pro shot strings tar tt unzip wallpaper zip ) # FIXME: Support specifying component dependencies for utilities (e.g. WebSocket for telws) diff --git a/Userland/Utilities/strings.cpp b/Userland/Utilities/strings.cpp new file mode 100644 index 0000000000..531db31838 --- /dev/null +++ b/Userland/Utilities/strings.cpp @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2022, the SerenityOS developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/CharacterTypes.h> +#include <AK/Forward.h> +#include <LibCore/ArgsParser.h> +#include <LibCore/Stream.h> +#include <LibCore/System.h> +#include <LibMain/Main.h> +#include <unistd.h> + +enum class StringOffsetFormat { + None = 0, + Decimal, + Octal, + Hexadecimal +}; + +// NOTE: This is similar to how the cat utility works in the sense of aggregating +// data in 32K buffer. +static constexpr size_t buffer_read_size = 32768; + +static bool should_print_characters(Vector<u8> const& characters) +{ + for (u8 ch : characters) { + if (is_ascii_printable(ch) && !is_ascii_space(ch)) + return true; + } + return false; +} + +static void print_characters(Vector<u8> const& characters, StringOffsetFormat string_offset_format, size_t string_offset_position) +{ + switch (string_offset_format) { + case StringOffsetFormat::Decimal: + out("{:>7d} ", string_offset_position); + break; + case StringOffsetFormat::Octal: + out("{:>7o} ", string_offset_position); + break; + case StringOffsetFormat::Hexadecimal: + out("{:>7x} ", string_offset_position); + break; + default: + break; + } + outln("{:s}", characters.span()); +} + +static int process_characters_in_span(Vector<u8>& characters, ReadonlyBytes span) +{ + int processed_characters = 0; + for (u8 ch : span) { + ++processed_characters; + if (is_ascii_printable(ch) || ch == '\t') + characters.append(ch); + else + break; + } + return processed_characters; +} + +static ErrorOr<void> process_strings_in_file(StringView path, bool show_paths, StringOffsetFormat string_offset_format, size_t minimum_string_length) +{ + Array<u8, buffer_read_size> buffer; + Vector<u8> output_characters; + auto file = TRY(Core::Stream::File::open_file_or_standard_stream(path, Core::Stream::OpenMode::Read)); + size_t processed_characters = 0; + size_t string_offset_position = 0; + bool did_show_path = false; + while (!file->is_eof()) { + auto buffer_span = TRY(file->read(buffer)); + while (!buffer_span.is_empty()) { + string_offset_position += processed_characters; + processed_characters = process_characters_in_span(output_characters, buffer_span); + if (show_paths && !did_show_path) { + outln("path {}:", path); + did_show_path = true; + } + if (output_characters.size() >= minimum_string_length && should_print_characters(output_characters)) { + print_characters(output_characters, string_offset_format, string_offset_position); + } + buffer_span = buffer_span.slice(processed_characters); + output_characters.clear(); + } + } + return {}; +} + +ErrorOr<int> serenity_main(Main::Arguments arguments) +{ + TRY(Core::System::pledge("stdio rpath")); + + Vector<StringView> paths; + size_t minimum_string_length = 4; + bool show_paths = false; + + StringOffsetFormat string_offset_format { StringOffsetFormat::None }; + + Core::ArgsParser args_parser; + args_parser.add_option(minimum_string_length, "Specify the minimum string length.", nullptr, 'n', "number"); + args_parser.add_option(show_paths, "Display the path for each matched file.", nullptr, 'p'); + args_parser.add_option({ Core::ArgsParser::OptionArgumentMode::Required, + "Write offset relative to start of each file in (d)ec, (o)ct, or he(x) format.", + nullptr, + 't', + "format", + [&string_offset_format](char const* s) { + StringView value = { s, strlen(s) }; + if (value == "d") { + string_offset_format = StringOffsetFormat::Decimal; + } else if (value == "o") { + string_offset_format = StringOffsetFormat::Octal; + } else if (value == "x") { + string_offset_format = StringOffsetFormat::Hexadecimal; + } else { + return false; + } + return true; + } }); + args_parser.set_general_help("Write the sequences of printable characters in files or pipes to stdout."); + args_parser.add_positional_argument(paths, "File path", "path", Core::ArgsParser::Required::No); + args_parser.parse(arguments); + + if (minimum_string_length < 1) { + warnln("Invalid minimum string length {}", minimum_string_length); + return 1; + } + + if (paths.is_empty()) + paths.append("-"sv); + + for (auto const& path : paths) + TRY(process_strings_in_file(path, show_paths, string_offset_format, minimum_string_length)); + + return 0; +} |