summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorailin-nemui <ailin-nemui@users.noreply.github.com>2017-07-03 09:53:09 +0200
committerGitHub <noreply@github.com>2017-07-03 09:53:09 +0200
commit1656dc1e549cfbbe330aafcd8b92177aa9a5555f (patch)
tree3e0c555e3a15ee58e50c7db7f06dd6262d01b8de /src/core
parent2b209348bd2a90afbe1782b0b321d99892b7002b (diff)
parent1fc170ee11c308fae14a06aa29f2d8e3882cf9ce (diff)
downloadirssi-1656dc1e549cfbbe330aafcd8b92177aa9a5555f.zip
Merge pull request #653 from ailin-nemui/regexex
Enable UTF8 in GRegex
Diffstat (limited to 'src/core')
-rw-r--r--src/core/Makefile.am8
-rw-r--r--src/core/ignore.c40
-rw-r--r--src/core/ignore.h11
-rw-r--r--src/core/iregex-gregex.c165
-rw-r--r--src/core/iregex-regexh.c99
-rw-r--r--src/core/iregex.h47
-rw-r--r--src/core/misc.c4
7 files changed, 326 insertions, 48 deletions
diff --git a/src/core/Makefile.am b/src/core/Makefile.am
index 10bd035a..91daba3f 100644
--- a/src/core/Makefile.am
+++ b/src/core/Makefile.am
@@ -7,6 +7,12 @@ AM_CPPFLAGS = \
-DSYSCONFDIR=\""$(sysconfdir)"\" \
-DMODULEDIR=\""$(libdir)/irssi/modules"\"
+if USE_GREGEX
+regex_impl=iregex-gregex.c
+else
+regex_impl=iregex-regexh.c
+endif
+
libcore_a_SOURCES = \
args.c \
channels.c \
@@ -45,6 +51,7 @@ libcore_a_SOURCES = \
signals.c \
special-vars.c \
utf8.c \
+ $(regex_impl) \
wcwidth.c \
tls.c \
write-buffer.c
@@ -97,6 +104,7 @@ pkginc_core_HEADERS = \
signals.h \
special-vars.h \
utf8.h \
+ iregex.h \
window-item-def.h \
tls.h \
write-buffer.h \
diff --git a/src/core/ignore.c b/src/core/ignore.c
index d4a92e3c..cec91e6b 100644
--- a/src/core/ignore.c
+++ b/src/core/ignore.c
@@ -24,6 +24,7 @@
#include "levels.h"
#include "lib-config/iconfig.h"
#include "settings.h"
+#include "iregex.h"
#include "masks.h"
#include "servers.h"
@@ -67,13 +68,8 @@ static int ignore_match_pattern(IGNORE_REC *rec, const char *text)
return FALSE;
if (rec->regexp) {
-#ifdef USE_GREGEX
return rec->preg != NULL &&
- g_regex_match(rec->preg, text, 0, NULL);
-#else
- return rec->regexp_compiled &&
- regexec(&rec->preg, text, 0, NULL, 0) == 0;
-#endif
+ i_regex_match(rec->preg, text, 0, NULL);
}
return rec->fullword ?
@@ -327,41 +323,19 @@ static void ignore_remove_config(IGNORE_REC *rec)
static void ignore_init_rec(IGNORE_REC *rec)
{
-#ifdef USE_GREGEX
if (rec->preg != NULL)
- g_regex_unref(rec->preg);
+ i_regex_unref(rec->preg);
if (rec->regexp && rec->pattern != NULL) {
GError *re_error = NULL;
- rec->preg = g_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_CASELESS, 0, &re_error);
+ rec->preg = i_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &re_error);
if (rec->preg == NULL) {
g_warning("Failed to compile regexp '%s': %s", rec->pattern, re_error->message);
g_error_free(re_error);
}
}
-#else
- char *errbuf;
- int errcode, errbuf_len;
-
- if (rec->regexp_compiled) regfree(&rec->preg);
- rec->regexp_compiled = FALSE;
-
- if (rec->regexp && rec->pattern != NULL) {
- errcode = regcomp(&rec->preg, rec->pattern,
- REG_EXTENDED|REG_ICASE|REG_NOSUB);
- if (errcode != 0) {
- errbuf_len = regerror(errcode, &rec->preg, 0, 0);
- errbuf = g_malloc(errbuf_len);
- regerror(errcode, &rec->preg, errbuf, errbuf_len);
- g_warning("Failed to compile regexp '%s': %s", rec->pattern, errbuf);
- g_free(errbuf);
- } else {
- rec->regexp_compiled = TRUE;
- }
- }
-#endif
}
void ignore_add_rec(IGNORE_REC *rec)
@@ -381,11 +355,7 @@ static void ignore_destroy(IGNORE_REC *rec, int send_signal)
if (send_signal)
signal_emit("ignore destroyed", 1, rec);
-#ifdef USE_GREGEX
- if (rec->preg != NULL) g_regex_unref(rec->preg);
-#else
- if (rec->regexp_compiled) regfree(&rec->preg);
-#endif
+ if (rec->preg != NULL) i_regex_unref(rec->preg);
if (rec->channels != NULL) g_strfreev(rec->channels);
g_free_not_null(rec->mask);
g_free_not_null(rec->servertag);
diff --git a/src/core/ignore.h b/src/core/ignore.h
index 80ae1d12..e18be3c4 100644
--- a/src/core/ignore.h
+++ b/src/core/ignore.h
@@ -1,9 +1,7 @@
#ifndef __IGNORE_H
#define __IGNORE_H
-#ifndef USE_GREGEX
-# include <regex.h>
-#endif
+#include "iregex.h"
typedef struct _IGNORE_REC IGNORE_REC;
@@ -20,12 +18,7 @@ struct _IGNORE_REC {
unsigned int regexp:1;
unsigned int fullword:1;
unsigned int replies:1; /* ignore replies to nick in channel */
-#ifdef USE_GREGEX
- GRegex *preg;
-#else
- unsigned int regexp_compiled:1; /* should always be TRUE, unless regexp is invalid */
- regex_t preg;
-#endif
+ Regex *preg;
};
extern GSList *ignores;
diff --git a/src/core/iregex-gregex.c b/src/core/iregex-gregex.c
new file mode 100644
index 00000000..36b4faa4
--- /dev/null
+++ b/src/core/iregex-gregex.c
@@ -0,0 +1,165 @@
+#include <string.h>
+
+#include "iregex.h"
+
+struct _MatchInfo {
+ const char *valid_string;
+ GMatchInfo *g_match_info;
+};
+
+static const gchar *
+make_valid_utf8(const gchar *text, gboolean *free_ret)
+{
+ GString *str;
+ const gchar *ptr;
+ if (g_utf8_validate(text, -1, NULL)) {
+ if (free_ret)
+ *free_ret = FALSE;
+ return text;
+ }
+
+ str = g_string_sized_new(strlen(text) + 12);
+
+ ptr = text;
+ while (*ptr) {
+ gunichar c = g_utf8_get_char_validated(ptr, -1);
+ /* the unicode is invalid */
+ if (c == (gunichar)-1 || c == (gunichar)-2) {
+ /* encode the byte into PUA-A */
+ g_string_append_unichar(str, (gunichar) (0xfff00 | (*ptr & 0xff)));
+ ptr++;
+ } else {
+ g_string_append_unichar(str, c);
+ ptr = g_utf8_next_char(ptr);
+ }
+ }
+
+ if (free_ret)
+ *free_ret = TRUE;
+ return g_string_free(str, FALSE);
+}
+
+Regex *
+i_regex_new (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error)
+{
+ const gchar *valid_pattern;
+ gboolean free_valid_pattern;
+ Regex *ret = NULL;
+
+ valid_pattern = make_valid_utf8(pattern, &free_valid_pattern);
+ ret = g_regex_new(valid_pattern, compile_options, match_options, error);
+
+ if (free_valid_pattern)
+ g_free_not_null((gchar *)valid_pattern);
+
+ return ret;
+}
+
+void
+i_regex_unref (Regex *regex)
+{
+ g_regex_unref(regex);
+}
+
+gboolean
+i_regex_match (const Regex *regex,
+ const gchar *string,
+ GRegexMatchFlags match_options,
+ MatchInfo **match_info)
+{
+ gboolean ret;
+ gboolean free_valid_string;
+ const gchar *valid_string = make_valid_utf8(string, &free_valid_string);
+
+ if (match_info != NULL)
+ *match_info = g_new0(MatchInfo, 1);
+
+ ret = g_regex_match(regex, valid_string, match_options,
+ match_info != NULL ? &(*match_info)->g_match_info : NULL);
+
+ if (free_valid_string) {
+ if (match_info != NULL)
+ (*match_info)->valid_string = valid_string;
+ else
+ g_free_not_null((gchar *)valid_string);
+ }
+
+ return ret;
+}
+
+static gsize
+strlen_pua_oddly(const char *str)
+{
+ const gchar *ptr;
+ gsize ret = 0;
+ ptr = str;
+
+ while (*ptr) {
+ const gchar *old;
+ gunichar c = g_utf8_get_char(ptr);
+ old = ptr;
+ ptr = g_utf8_next_char(ptr);
+
+ /* it is our PUA encoded byte */
+ if ((c & 0xfff00) == 0xfff00)
+ ret++;
+ else
+ ret += ptr - old;
+ }
+
+ return ret;
+}
+
+/* new_string should be passed in here from the i_regex_match call.
+ The start_pos and end_pos will then be calculated as if they were on
+ the original string */
+gboolean
+i_match_info_fetch_pos (const MatchInfo *match_info,
+ gint match_num,
+ gint *start_pos,
+ gint *end_pos)
+{
+ gint tmp_start, tmp_end, new_start_pos;
+ gboolean ret;
+
+ if (!match_info->valid_string || (!start_pos && !end_pos))
+ return g_match_info_fetch_pos(match_info->g_match_info,
+ match_num, start_pos, end_pos);
+
+ ret = g_match_info_fetch_pos(match_info->g_match_info,
+ match_num, &tmp_start, &tmp_end);
+ if (start_pos || end_pos) {
+ const gchar *str = match_info->valid_string;
+ gchar *to_start = g_strndup(str, tmp_start);
+ new_start_pos = strlen_pua_oddly(to_start);
+ g_free_not_null(to_start);
+
+ if (start_pos)
+ *start_pos = new_start_pos;
+
+ if (end_pos) {
+ gchar *to_end = g_strndup(str + tmp_start, tmp_end - tmp_start);
+ *end_pos = new_start_pos + strlen_pua_oddly(to_end);
+ g_free_not_null(to_end);
+ }
+ }
+ return ret;
+}
+
+gboolean
+i_match_info_matches (const MatchInfo *match_info)
+{
+ g_return_val_if_fail(match_info != NULL, FALSE);
+
+ return g_match_info_matches(match_info->g_match_info);
+}
+
+void
+i_match_info_free (MatchInfo *match_info)
+{
+ g_match_info_free(match_info->g_match_info);
+ g_free(match_info);
+}
diff --git a/src/core/iregex-regexh.c b/src/core/iregex-regexh.c
new file mode 100644
index 00000000..897eb7e2
--- /dev/null
+++ b/src/core/iregex-regexh.c
@@ -0,0 +1,99 @@
+#include "iregex.h"
+
+Regex *
+i_regex_new (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error)
+{
+ Regex *regex;
+ char *errbuf;
+ int cflags;
+ int errcode, errbuf_len;
+
+ regex = g_new0(Regex, 1);
+ cflags = REG_EXTENDED;
+ if (compile_options & G_REGEX_CASELESS)
+ cflags |= REG_ICASE;
+ if (compile_options & G_REGEX_MULTILINE)
+ cflags |= REG_NEWLINE;
+ if (match_options & G_REGEX_MATCH_NOTBOL)
+ cflags |= REG_NOTBOL;
+ if (match_options & G_REGEX_MATCH_NOTEOL)
+ cflags |= REG_NOTEOL;
+
+ errcode = regcomp(regex, pattern, cflags);
+ if (errcode != 0) {
+ errbuf_len = regerror(errcode, regex, 0, 0);
+ errbuf = g_malloc(errbuf_len);
+ regerror(errcode, regex, errbuf, errbuf_len);
+ g_set_error(error, G_REGEX_ERROR, errcode, "%s", errbuf);
+ g_free(errbuf);
+ g_free(regex);
+ return NULL;
+ } else {
+ return regex;
+ }
+}
+
+void
+i_regex_unref (Regex *regex)
+{
+ regfree(regex);
+ g_free(regex);
+}
+
+gboolean
+i_regex_match (const Regex *regex,
+ const gchar *string,
+ GRegexMatchFlags match_options,
+ MatchInfo **match_info)
+{
+ int groups;
+ int eflags;
+
+ g_return_val_if_fail(regex != NULL, FALSE);
+
+ if (match_info != NULL) {
+ groups = 1 + regex->re_nsub;
+ *match_info = g_new0(MatchInfo, groups);
+ } else {
+ groups = 0;
+ }
+
+ eflags = 0;
+ if (match_options & G_REGEX_MATCH_NOTBOL)
+ eflags |= REG_NOTBOL;
+ if (match_options & G_REGEX_MATCH_NOTEOL)
+ eflags |= REG_NOTEOL;
+
+ return regexec(regex, string, groups, groups ? *match_info : NULL, eflags) == 0;
+}
+
+gboolean
+i_match_info_fetch_pos (const MatchInfo *match_info,
+ gint match_num,
+ gint *start_pos,
+ gint *end_pos)
+{
+ if (start_pos != NULL)
+ *start_pos = match_info[match_num].rm_so;
+ if (end_pos != NULL)
+ *end_pos = match_info[match_num].rm_eo;
+
+ return TRUE;
+}
+
+gboolean
+i_match_info_matches (const MatchInfo *match_info)
+{
+ g_return_val_if_fail(match_info != NULL, FALSE);
+
+ return match_info[0].rm_so != -1;
+}
+
+void
+i_match_info_free (MatchInfo *match_info)
+{
+ g_free(match_info);
+}
diff --git a/src/core/iregex.h b/src/core/iregex.h
new file mode 100644
index 00000000..e67378d7
--- /dev/null
+++ b/src/core/iregex.h
@@ -0,0 +1,47 @@
+#ifndef __REGEX_H
+#define __REGEX_H
+
+#include "common.h"
+
+#ifdef USE_GREGEX
+
+#include <glib.h>
+typedef GRegex Regex;
+typedef struct _MatchInfo MatchInfo;
+
+#else
+
+#include <regex.h>
+typedef regex_t Regex;
+typedef regmatch_t MatchInfo;
+
+#endif
+
+gboolean
+i_match_info_matches (const MatchInfo *match_info);
+
+void
+i_match_info_free (MatchInfo *match_info);
+
+Regex *
+i_regex_new (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error);
+
+void
+i_regex_unref (Regex *regex);
+
+gboolean
+i_regex_match (const Regex *regex,
+ const gchar *string,
+ GRegexMatchFlags match_options,
+ MatchInfo **match_info);
+
+gboolean
+i_match_info_fetch_pos (const MatchInfo *match_info,
+ gint match_num,
+ gint *start_pos,
+ gint *end_pos);
+
+#endif
diff --git a/src/core/misc.c b/src/core/misc.c
index ce49925b..7249b1a7 100644
--- a/src/core/misc.c
+++ b/src/core/misc.c
@@ -22,10 +22,6 @@
#include "misc.h"
#include "commands.h"
-#ifndef USE_GREGEX
-# include <regex.h>
-#endif
-
typedef struct {
int condition;
GInputFunction function;