1 files changed, 256 insertions, 0 deletions
diff --git a/Userland/Libraries/LibRegex/C/Regex.cpp b/Userland/Libraries/LibRegex/C/Regex.cpp
new file mode 100644
index 0000000000..ad39b18af5
--- /dev/null
+++ b/Userland/Libraries/LibRegex/C/Regex.cpp
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <AK/String.h>
+#include <AK/StringBuilder.h>
+#include <LibRegex/Regex.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifdef __serenity__
+#    include <regex.h>
+#else
+#    include <LibC/regex.h>
+#endif
+
+struct internal_regex_t {
+    u8 cflags;
+    u8 eflags;
+    OwnPtr<Regex<PosixExtended>> re;
+    size_t re_pat_errpos;
+    ReError re_pat_err;
+    String re_pat;
+    size_t re_nsub;
+};
+
+static internal_regex_t* impl_from(regex_t* re)
+{
+    if (!re)
+        return nullptr;
+
+    return reinterpret_cast<internal_regex_t*>(re->__data);
+}
+
+static const internal_regex_t* impl_from(const regex_t* re)
+{
+    return impl_from(const_cast<regex_t*>(re));
+}
+
+extern "C" {
+
+int regcomp(regex_t* reg, const char* pattern, int cflags)
+{
+    if (!reg)
+        return REG_ESPACE;
+
+    // Note that subsequent uses of regcomp() without regfree() _will_ leak memory
+    // This could've been prevented if libc provided a reginit() or similar, but it does not.
+    reg->__data = new internal_regex_t { 0, 0, {}, 0, ReError::REG_NOERR, {}, 0 };
+
+    auto preg = impl_from(reg);
+
+    if (!(cflags & REG_EXTENDED))
+        return REG_ENOSYS;
+
+    preg->cflags = cflags;
+
+    String pattern_str(pattern);
+    preg->re = make<Regex<PosixExtended>>(pattern_str, PosixOptions {} | (PosixFlags)cflags | PosixFlags::SkipTrimEmptyMatches);
+
+    auto parser_result = preg->re->parser_result;
+    if (parser_result.error != regex::Error::NoError) {
+        preg->re_pat_errpos = parser_result.error_token.position();
+        preg->re_pat_err = (ReError)parser_result.error;
+        preg->re_pat = pattern;
+
+        dbg() << "Have Error: " << (ReError)parser_result.error;
+
+        return (ReError)parser_result.error;
+    }
+
+    preg->re_nsub = parser_result.capture_groups_count;
+
+    return REG_NOERR;
+}
+
+int regexec(const regex_t* reg, const char* string, size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+    auto preg = impl_from(reg);
+
+    if (!preg->re || preg->re_pat_err) {
+        if (preg->re_pat_err)
+            return preg->re_pat_err;
+        return REG_BADPAT;
+    }
+
+    RegexResult result;
+    if (eflags & REG_SEARCH)
+        result = preg->re->search(string, PosixOptions {} | (PosixFlags)eflags);
+    else
+        result = preg->re->match(string, PosixOptions {} | (PosixFlags)eflags);
+
+    if (result.success) {
+        auto size = result.matches.size();
+        if (size && nmatch && pmatch) {
+            pmatch[0].rm_cnt = size;
+
+            size_t match_index { 0 };
+            for (size_t i = 0; i < size; ++i) {
+                pmatch[match_index].rm_so = result.matches.at(i).global_offset;
+                pmatch[match_index].rm_eo = pmatch[match_index].rm_so + result.matches.at(i).view.length();
+                if (match_index > 0)
+                    pmatch[match_index].rm_cnt = result.capture_group_matches.size();
+
+                ++match_index;
+                if (match_index >= nmatch)
+                    return REG_NOERR;
+
+                if (i < result.capture_group_matches.size()) {
+                    auto capture_groups_size = result.capture_group_matches.at(i).size();
+                    for (size_t j = 0; j < preg->re->parser_result.capture_groups_count; ++j) {
+                        if (j >= capture_groups_size || !result.capture_group_matches.at(i).at(j).view.length()) {
+                            pmatch[match_index].rm_so = -1;
+                            pmatch[match_index].rm_eo = -1;
+                            pmatch[match_index].rm_cnt = 0;
+                        } else {
+                            pmatch[match_index].rm_so = result.capture_group_matches.at(i).at(j).global_offset;
+                            pmatch[match_index].rm_eo = pmatch[match_index].rm_so + result.capture_group_matches.at(i).at(j).view.length();
+                            pmatch[match_index].rm_cnt = 1;
+                        }
+
+                        ++match_index;
+                        if (match_index >= nmatch)
+                            return REG_NOERR;
+                    }
+                }
+            }
+
+            if (match_index < nmatch) {
+                for (size_t i = match_index; i < nmatch; ++i) {
+                    pmatch[i].rm_so = -1;
+                    pmatch[i].rm_eo = -1;
+                    pmatch[i].rm_cnt = 0;
+                }
+            }
+        }
+        return REG_NOERR;
+    } else {
+        if (nmatch && pmatch) {
+            pmatch[0].rm_so = -1;
+            pmatch[0].rm_eo = -1;
+            pmatch[0].rm_cnt = 0;
+        }
+    }
+
+    return REG_NOMATCH;
+}
+
+inline static String get_error(ReError errcode)
+{
+    String error;
+    switch ((ReError)errcode) {
+    case REG_NOERR:
+        error = "No error";
+        break;
+    case REG_NOMATCH:
+        error = "regexec() failed to match.";
+        break;
+    case REG_BADPAT:
+        error = "Invalid regular expression.";
+        break;
+    case REG_ECOLLATE:
+        error = "Invalid collating element referenced.";
+        break;
+    case REG_ECTYPE:
+        error = "Invalid character class type referenced.";
+        break;
+    case REG_EESCAPE:
+        error = "Trailing \\ in pattern.";
+        break;
+    case REG_ESUBREG:
+        error = "Number in \\digit invalid or in error.";
+        break;
+    case REG_EBRACK:
+        error = "[ ] imbalance.";
+        break;
+    case REG_EPAREN:
+        error = "\\( \\) or ( ) imbalance.";
+        break;
+    case REG_EBRACE:
+        error = "\\{ \\} imbalance.";
+        break;
+    case REG_BADBR:
+        error = "Content of \\{ \\} invalid: not a number, number too large, more than two numbers, first larger than second.";
+        break;
+    case REG_ERANGE:
+        error = "Invalid endpoint in range expression.";
+        break;
+    case REG_ESPACE:
+        error = "Out of memory.";
+        break;
+    case REG_BADRPT:
+        error = "?, * or + not preceded by valid regular expression.";
+        break;
+    case REG_ENOSYS:
+        error = "The implementation does not support the function.";
+        break;
+    case REG_EMPTY_EXPR:
+        error = "Empty expression provided";
+        break;
+    }
+
+    return error;
+}
+
+size_t regerror(int errcode, const regex_t* reg, char* errbuf, size_t errbuf_size)
+{
+    String error;
+    auto preg = impl_from(reg);
+
+    if (!preg)
+        error = get_error((ReError)errcode);
+    else
+        error = preg->re->error_string(get_error(preg->re_pat_err));
+
+    if (!errbuf_size)
+        return error.length();
+
+    if (!error.copy_characters_to_buffer(errbuf, errbuf_size))
+        return 0;
+
+    return error.length();
+}
+
+void regfree(regex_t* reg)
+{
+    auto preg = impl_from(reg);
+    if (preg) {
+        delete preg;
+        reg->__data = nullptr;
+    }
+}
+}