summaryrefslogtreecommitdiff
path: root/Userland/Utilities/strings.cpp
blob: 8f1f0ab7d2329e5c125e87fec345585fa9146671 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
 * Copyright (c) 2022, the SerenityOS developers.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/CharacterTypes.h>
#include <AK/Forward.h>
#include <LibCore/ArgsParser.h>
#include <LibCore/File.h>
#include <LibCore/System.h>
#include <LibMain/Main.h>
#include <unistd.h>

enum class StringOffsetFormat {
    None = 0,
    Decimal,
    Octal,
    Hexadecimal
};

// NOTE: This is similar to how the cat utility works in the sense of aggregating
// data in 32K buffer.
static constexpr size_t buffer_read_size = 32768;

static bool should_print_characters(Vector<u8> const& characters)
{
    for (u8 ch : characters) {
        if (is_ascii_printable(ch) && !is_ascii_space(ch))
            return true;
    }
    return false;
}

static void print_characters(Vector<u8> const& characters, StringOffsetFormat string_offset_format, size_t string_offset_position)
{
    switch (string_offset_format) {
    case StringOffsetFormat::Decimal:
        out("{:>7d} ", string_offset_position);
        break;
    case StringOffsetFormat::Octal:
        out("{:>7o} ", string_offset_position);
        break;
    case StringOffsetFormat::Hexadecimal:
        out("{:>7x} ", string_offset_position);
        break;
    default:
        break;
    }
    outln("{:s}", characters.span());
}

static int process_characters_in_span(Vector<u8>& characters, ReadonlyBytes span)
{
    int processed_characters = 0;
    for (u8 ch : span) {
        ++processed_characters;
        if (is_ascii_printable(ch) || ch == '\t')
            characters.append(ch);
        else
            break;
    }
    return processed_characters;
}

static ErrorOr<void> process_strings_in_file(StringView path, bool show_paths, StringOffsetFormat string_offset_format, size_t minimum_string_length)
{
    Array<u8, buffer_read_size> buffer;
    Vector<u8> output_characters;
    auto file = TRY(Core::File::open_file_or_standard_stream(path, Core::File::OpenMode::Read));
    size_t processed_characters = 0;
    size_t string_offset_position = 0;
    bool did_show_path = false;
    while (!file->is_eof()) {
        auto buffer_span = TRY(file->read(buffer));
        while (!buffer_span.is_empty()) {
            string_offset_position += processed_characters;
            processed_characters = process_characters_in_span(output_characters, buffer_span);
            if (show_paths && !did_show_path) {
                outln("path {}:", path);
                did_show_path = true;
            }
            if (output_characters.size() >= minimum_string_length && should_print_characters(output_characters)) {
                print_characters(output_characters, string_offset_format, string_offset_position);
            }
            buffer_span = buffer_span.slice(processed_characters);
            output_characters.clear();
        }
    }
    return {};
}

ErrorOr<int> serenity_main(Main::Arguments arguments)
{
    TRY(Core::System::pledge("stdio rpath"));

    Vector<StringView> paths;
    size_t minimum_string_length = 4;
    bool show_paths = false;

    StringOffsetFormat string_offset_format { StringOffsetFormat::None };

    Core::ArgsParser args_parser;
    args_parser.add_option(minimum_string_length, "Specify the minimum string length.", nullptr, 'n', "number");
    args_parser.add_option(show_paths, "Display the path for each matched file.", nullptr, 'p');
    args_parser.add_option({ Core::ArgsParser::OptionArgumentMode::Required,
        "Write offset relative to start of each file in (d)ec, (o)ct, or he(x) format.",
        nullptr,
        't',
        "format",
        [&string_offset_format](StringView value) {
            if (value == "d") {
                string_offset_format = StringOffsetFormat::Decimal;
            } else if (value == "o") {
                string_offset_format = StringOffsetFormat::Octal;
            } else if (value == "x") {
                string_offset_format = StringOffsetFormat::Hexadecimal;
            } else {
                return false;
            }
            return true;
        } });
    args_parser.set_general_help("Write the sequences of printable characters in files or pipes to stdout.");
    args_parser.add_positional_argument(paths, "File path", "path", Core::ArgsParser::Required::No);
    args_parser.parse(arguments);

    if (minimum_string_length < 1) {
        warnln("Invalid minimum string length {}", minimum_string_length);
        return 1;
    }

    if (paths.is_empty())
        paths.append("-"sv);

    for (auto const& path : paths)
        TRY(process_strings_in_file(path, show_paths, string_offset_format, minimum_string_length));

    return 0;
}