diff options
author | ailin-nemui <ailin-nemui@users.noreply.github.com> | 2017-07-03 09:53:09 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-03 09:53:09 +0200 |
commit | 1656dc1e549cfbbe330aafcd8b92177aa9a5555f (patch) | |
tree | 3e0c555e3a15ee58e50c7db7f06dd6262d01b8de /src/core | |
parent | 2b209348bd2a90afbe1782b0b321d99892b7002b (diff) | |
parent | 1fc170ee11c308fae14a06aa29f2d8e3882cf9ce (diff) | |
download | irssi-1656dc1e549cfbbe330aafcd8b92177aa9a5555f.zip |
Merge pull request #653 from ailin-nemui/regexex
Enable UTF8 in GRegex
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/Makefile.am | 8 | ||||
-rw-r--r-- | src/core/ignore.c | 40 | ||||
-rw-r--r-- | src/core/ignore.h | 11 | ||||
-rw-r--r-- | src/core/iregex-gregex.c | 165 | ||||
-rw-r--r-- | src/core/iregex-regexh.c | 99 | ||||
-rw-r--r-- | src/core/iregex.h | 47 | ||||
-rw-r--r-- | src/core/misc.c | 4 |
7 files changed, 326 insertions, 48 deletions
diff --git a/src/core/Makefile.am b/src/core/Makefile.am index 10bd035a..91daba3f 100644 --- a/src/core/Makefile.am +++ b/src/core/Makefile.am @@ -7,6 +7,12 @@ AM_CPPFLAGS = \ -DSYSCONFDIR=\""$(sysconfdir)"\" \ -DMODULEDIR=\""$(libdir)/irssi/modules"\" +if USE_GREGEX +regex_impl=iregex-gregex.c +else +regex_impl=iregex-regexh.c +endif + libcore_a_SOURCES = \ args.c \ channels.c \ @@ -45,6 +51,7 @@ libcore_a_SOURCES = \ signals.c \ special-vars.c \ utf8.c \ + $(regex_impl) \ wcwidth.c \ tls.c \ write-buffer.c @@ -97,6 +104,7 @@ pkginc_core_HEADERS = \ signals.h \ special-vars.h \ utf8.h \ + iregex.h \ window-item-def.h \ tls.h \ write-buffer.h \ diff --git a/src/core/ignore.c b/src/core/ignore.c index d4a92e3c..cec91e6b 100644 --- a/src/core/ignore.c +++ b/src/core/ignore.c @@ -24,6 +24,7 @@ #include "levels.h" #include "lib-config/iconfig.h" #include "settings.h" +#include "iregex.h" #include "masks.h" #include "servers.h" @@ -67,13 +68,8 @@ static int ignore_match_pattern(IGNORE_REC *rec, const char *text) return FALSE; if (rec->regexp) { -#ifdef USE_GREGEX return rec->preg != NULL && - g_regex_match(rec->preg, text, 0, NULL); -#else - return rec->regexp_compiled && - regexec(&rec->preg, text, 0, NULL, 0) == 0; -#endif + i_regex_match(rec->preg, text, 0, NULL); } return rec->fullword ? @@ -327,41 +323,19 @@ static void ignore_remove_config(IGNORE_REC *rec) static void ignore_init_rec(IGNORE_REC *rec) { -#ifdef USE_GREGEX if (rec->preg != NULL) - g_regex_unref(rec->preg); + i_regex_unref(rec->preg); if (rec->regexp && rec->pattern != NULL) { GError *re_error = NULL; - rec->preg = g_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_CASELESS, 0, &re_error); + rec->preg = i_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &re_error); if (rec->preg == NULL) { g_warning("Failed to compile regexp '%s': %s", rec->pattern, re_error->message); g_error_free(re_error); } } -#else - char *errbuf; - int errcode, errbuf_len; - - if (rec->regexp_compiled) regfree(&rec->preg); - rec->regexp_compiled = FALSE; - - if (rec->regexp && rec->pattern != NULL) { - errcode = regcomp(&rec->preg, rec->pattern, - REG_EXTENDED|REG_ICASE|REG_NOSUB); - if (errcode != 0) { - errbuf_len = regerror(errcode, &rec->preg, 0, 0); - errbuf = g_malloc(errbuf_len); - regerror(errcode, &rec->preg, errbuf, errbuf_len); - g_warning("Failed to compile regexp '%s': %s", rec->pattern, errbuf); - g_free(errbuf); - } else { - rec->regexp_compiled = TRUE; - } - } -#endif } void ignore_add_rec(IGNORE_REC *rec) @@ -381,11 +355,7 @@ static void ignore_destroy(IGNORE_REC *rec, int send_signal) if (send_signal) signal_emit("ignore destroyed", 1, rec); -#ifdef USE_GREGEX - if (rec->preg != NULL) g_regex_unref(rec->preg); -#else - if (rec->regexp_compiled) regfree(&rec->preg); -#endif + if (rec->preg != NULL) i_regex_unref(rec->preg); if (rec->channels != NULL) g_strfreev(rec->channels); g_free_not_null(rec->mask); g_free_not_null(rec->servertag); diff --git a/src/core/ignore.h b/src/core/ignore.h index 80ae1d12..e18be3c4 100644 --- a/src/core/ignore.h +++ b/src/core/ignore.h @@ -1,9 +1,7 @@ #ifndef __IGNORE_H #define __IGNORE_H -#ifndef USE_GREGEX -# include <regex.h> -#endif +#include "iregex.h" typedef struct _IGNORE_REC IGNORE_REC; @@ -20,12 +18,7 @@ struct _IGNORE_REC { unsigned int regexp:1; unsigned int fullword:1; unsigned int replies:1; /* ignore replies to nick in channel */ -#ifdef USE_GREGEX - GRegex *preg; -#else - unsigned int regexp_compiled:1; /* should always be TRUE, unless regexp is invalid */ - regex_t preg; -#endif + Regex *preg; }; extern GSList *ignores; diff --git a/src/core/iregex-gregex.c b/src/core/iregex-gregex.c new file mode 100644 index 00000000..36b4faa4 --- /dev/null +++ b/src/core/iregex-gregex.c @@ -0,0 +1,165 @@ +#include <string.h> + +#include "iregex.h" + +struct _MatchInfo { + const char *valid_string; + GMatchInfo *g_match_info; +}; + +static const gchar * +make_valid_utf8(const gchar *text, gboolean *free_ret) +{ + GString *str; + const gchar *ptr; + if (g_utf8_validate(text, -1, NULL)) { + if (free_ret) + *free_ret = FALSE; + return text; + } + + str = g_string_sized_new(strlen(text) + 12); + + ptr = text; + while (*ptr) { + gunichar c = g_utf8_get_char_validated(ptr, -1); + /* the unicode is invalid */ + if (c == (gunichar)-1 || c == (gunichar)-2) { + /* encode the byte into PUA-A */ + g_string_append_unichar(str, (gunichar) (0xfff00 | (*ptr & 0xff))); + ptr++; + } else { + g_string_append_unichar(str, c); + ptr = g_utf8_next_char(ptr); + } + } + + if (free_ret) + *free_ret = TRUE; + return g_string_free(str, FALSE); +} + +Regex * +i_regex_new (const gchar *pattern, + GRegexCompileFlags compile_options, + GRegexMatchFlags match_options, + GError **error) +{ + const gchar *valid_pattern; + gboolean free_valid_pattern; + Regex *ret = NULL; + + valid_pattern = make_valid_utf8(pattern, &free_valid_pattern); + ret = g_regex_new(valid_pattern, compile_options, match_options, error); + + if (free_valid_pattern) + g_free_not_null((gchar *)valid_pattern); + + return ret; +} + +void +i_regex_unref (Regex *regex) +{ + g_regex_unref(regex); +} + +gboolean +i_regex_match (const Regex *regex, + const gchar *string, + GRegexMatchFlags match_options, + MatchInfo **match_info) +{ + gboolean ret; + gboolean free_valid_string; + const gchar *valid_string = make_valid_utf8(string, &free_valid_string); + + if (match_info != NULL) + *match_info = g_new0(MatchInfo, 1); + + ret = g_regex_match(regex, valid_string, match_options, + match_info != NULL ? &(*match_info)->g_match_info : NULL); + + if (free_valid_string) { + if (match_info != NULL) + (*match_info)->valid_string = valid_string; + else + g_free_not_null((gchar *)valid_string); + } + + return ret; +} + +static gsize +strlen_pua_oddly(const char *str) +{ + const gchar *ptr; + gsize ret = 0; + ptr = str; + + while (*ptr) { + const gchar *old; + gunichar c = g_utf8_get_char(ptr); + old = ptr; + ptr = g_utf8_next_char(ptr); + + /* it is our PUA encoded byte */ + if ((c & 0xfff00) == 0xfff00) + ret++; + else + ret += ptr - old; + } + + return ret; +} + +/* new_string should be passed in here from the i_regex_match call. + The start_pos and end_pos will then be calculated as if they were on + the original string */ +gboolean +i_match_info_fetch_pos (const MatchInfo *match_info, + gint match_num, + gint *start_pos, + gint *end_pos) +{ + gint tmp_start, tmp_end, new_start_pos; + gboolean ret; + + if (!match_info->valid_string || (!start_pos && !end_pos)) + return g_match_info_fetch_pos(match_info->g_match_info, + match_num, start_pos, end_pos); + + ret = g_match_info_fetch_pos(match_info->g_match_info, + match_num, &tmp_start, &tmp_end); + if (start_pos || end_pos) { + const gchar *str = match_info->valid_string; + gchar *to_start = g_strndup(str, tmp_start); + new_start_pos = strlen_pua_oddly(to_start); + g_free_not_null(to_start); + + if (start_pos) + *start_pos = new_start_pos; + + if (end_pos) { + gchar *to_end = g_strndup(str + tmp_start, tmp_end - tmp_start); + *end_pos = new_start_pos + strlen_pua_oddly(to_end); + g_free_not_null(to_end); + } + } + return ret; +} + +gboolean +i_match_info_matches (const MatchInfo *match_info) +{ + g_return_val_if_fail(match_info != NULL, FALSE); + + return g_match_info_matches(match_info->g_match_info); +} + +void +i_match_info_free (MatchInfo *match_info) +{ + g_match_info_free(match_info->g_match_info); + g_free(match_info); +} diff --git a/src/core/iregex-regexh.c b/src/core/iregex-regexh.c new file mode 100644 index 00000000..897eb7e2 --- /dev/null +++ b/src/core/iregex-regexh.c @@ -0,0 +1,99 @@ +#include "iregex.h" + +Regex * +i_regex_new (const gchar *pattern, + GRegexCompileFlags compile_options, + GRegexMatchFlags match_options, + GError **error) +{ + Regex *regex; + char *errbuf; + int cflags; + int errcode, errbuf_len; + + regex = g_new0(Regex, 1); + cflags = REG_EXTENDED; + if (compile_options & G_REGEX_CASELESS) + cflags |= REG_ICASE; + if (compile_options & G_REGEX_MULTILINE) + cflags |= REG_NEWLINE; + if (match_options & G_REGEX_MATCH_NOTBOL) + cflags |= REG_NOTBOL; + if (match_options & G_REGEX_MATCH_NOTEOL) + cflags |= REG_NOTEOL; + + errcode = regcomp(regex, pattern, cflags); + if (errcode != 0) { + errbuf_len = regerror(errcode, regex, 0, 0); + errbuf = g_malloc(errbuf_len); + regerror(errcode, regex, errbuf, errbuf_len); + g_set_error(error, G_REGEX_ERROR, errcode, "%s", errbuf); + g_free(errbuf); + g_free(regex); + return NULL; + } else { + return regex; + } +} + +void +i_regex_unref (Regex *regex) +{ + regfree(regex); + g_free(regex); +} + +gboolean +i_regex_match (const Regex *regex, + const gchar *string, + GRegexMatchFlags match_options, + MatchInfo **match_info) +{ + int groups; + int eflags; + + g_return_val_if_fail(regex != NULL, FALSE); + + if (match_info != NULL) { + groups = 1 + regex->re_nsub; + *match_info = g_new0(MatchInfo, groups); + } else { + groups = 0; + } + + eflags = 0; + if (match_options & G_REGEX_MATCH_NOTBOL) + eflags |= REG_NOTBOL; + if (match_options & G_REGEX_MATCH_NOTEOL) + eflags |= REG_NOTEOL; + + return regexec(regex, string, groups, groups ? *match_info : NULL, eflags) == 0; +} + +gboolean +i_match_info_fetch_pos (const MatchInfo *match_info, + gint match_num, + gint *start_pos, + gint *end_pos) +{ + if (start_pos != NULL) + *start_pos = match_info[match_num].rm_so; + if (end_pos != NULL) + *end_pos = match_info[match_num].rm_eo; + + return TRUE; +} + +gboolean +i_match_info_matches (const MatchInfo *match_info) +{ + g_return_val_if_fail(match_info != NULL, FALSE); + + return match_info[0].rm_so != -1; +} + +void +i_match_info_free (MatchInfo *match_info) +{ + g_free(match_info); +} diff --git a/src/core/iregex.h b/src/core/iregex.h new file mode 100644 index 00000000..e67378d7 --- /dev/null +++ b/src/core/iregex.h @@ -0,0 +1,47 @@ +#ifndef __REGEX_H +#define __REGEX_H + +#include "common.h" + +#ifdef USE_GREGEX + +#include <glib.h> +typedef GRegex Regex; +typedef struct _MatchInfo MatchInfo; + +#else + +#include <regex.h> +typedef regex_t Regex; +typedef regmatch_t MatchInfo; + +#endif + +gboolean +i_match_info_matches (const MatchInfo *match_info); + +void +i_match_info_free (MatchInfo *match_info); + +Regex * +i_regex_new (const gchar *pattern, + GRegexCompileFlags compile_options, + GRegexMatchFlags match_options, + GError **error); + +void +i_regex_unref (Regex *regex); + +gboolean +i_regex_match (const Regex *regex, + const gchar *string, + GRegexMatchFlags match_options, + MatchInfo **match_info); + +gboolean +i_match_info_fetch_pos (const MatchInfo *match_info, + gint match_num, + gint *start_pos, + gint *end_pos); + +#endif diff --git a/src/core/misc.c b/src/core/misc.c index ce49925b..7249b1a7 100644 --- a/src/core/misc.c +++ b/src/core/misc.c @@ -22,10 +22,6 @@ #include "misc.h" #include "commands.h" -#ifndef USE_GREGEX -# include <regex.h> -#endif - typedef struct { int condition; GInputFunction function; |