summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorailin-nemui <ailin-nemui@users.noreply.github.com>2017-02-16 22:48:13 +0100
committerailin-nemui <ailin-nemui@users.noreply.github.com>2017-06-04 00:52:53 +0200
commit79bbca4644cad7f2dee89c7ac6b8f9acc2c8b427 (patch)
tree86b908491ac1ab00cb079526b1f32a5d294d75a6 /src
parent31b9d115b065570020ce9be1a1d8cd49212f70a9 (diff)
downloadirssi-79bbca4644cad7f2dee89c7ac6b8f9acc2c8b427.zip
Refactor regex and implement UTF8 mode for GRegex
- with non-unicode byte to Private Use Area A mapping - move all ifdefs to iregex.h file only
Diffstat (limited to 'src')
-rw-r--r--src/core/Makefile.am8
-rw-r--r--src/core/ignore.c40
-rw-r--r--src/core/ignore.h11
-rw-r--r--src/core/iregex-gregex.c137
-rw-r--r--src/core/iregex-regexh.c101
-rw-r--r--src/core/iregex.h52
-rw-r--r--src/core/misc.c4
-rw-r--r--src/fe-common/core/fe-ignore.c5
-rw-r--r--src/fe-common/core/hilight-text.c51
-rw-r--r--src/fe-common/core/hilight-text.h12
-rw-r--r--src/fe-text/textbuffer.c37
11 files changed, 326 insertions, 132 deletions
diff --git a/src/core/Makefile.am b/src/core/Makefile.am
index 10bd035a..91daba3f 100644
--- a/src/core/Makefile.am
+++ b/src/core/Makefile.am
@@ -7,6 +7,12 @@ AM_CPPFLAGS = \
-DSYSCONFDIR=\""$(sysconfdir)"\" \
-DMODULEDIR=\""$(libdir)/irssi/modules"\"
+if USE_GREGEX
+regex_impl=iregex-gregex.c
+else
+regex_impl=iregex-regexh.c
+endif
+
libcore_a_SOURCES = \
args.c \
channels.c \
@@ -45,6 +51,7 @@ libcore_a_SOURCES = \
signals.c \
special-vars.c \
utf8.c \
+ $(regex_impl) \
wcwidth.c \
tls.c \
write-buffer.c
@@ -97,6 +104,7 @@ pkginc_core_HEADERS = \
signals.h \
special-vars.h \
utf8.h \
+ iregex.h \
window-item-def.h \
tls.h \
write-buffer.h \
diff --git a/src/core/ignore.c b/src/core/ignore.c
index d4a92e3c..63a507f5 100644
--- a/src/core/ignore.c
+++ b/src/core/ignore.c
@@ -24,6 +24,7 @@
#include "levels.h"
#include "lib-config/iconfig.h"
#include "settings.h"
+#include "iregex.h"
#include "masks.h"
#include "servers.h"
@@ -67,13 +68,8 @@ static int ignore_match_pattern(IGNORE_REC *rec, const char *text)
return FALSE;
if (rec->regexp) {
-#ifdef USE_GREGEX
return rec->preg != NULL &&
- g_regex_match(rec->preg, text, 0, NULL);
-#else
- return rec->regexp_compiled &&
- regexec(&rec->preg, text, 0, NULL, 0) == 0;
-#endif
+ i_regex_match(rec->preg, text, 0, NULL, NULL);
}
return rec->fullword ?
@@ -327,41 +323,19 @@ static void ignore_remove_config(IGNORE_REC *rec)
static void ignore_init_rec(IGNORE_REC *rec)
{
-#ifdef USE_GREGEX
if (rec->preg != NULL)
- g_regex_unref(rec->preg);
+ i_regex_unref(rec->preg);
if (rec->regexp && rec->pattern != NULL) {
GError *re_error = NULL;
- rec->preg = g_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_CASELESS, 0, &re_error);
+ rec->preg = i_regex_new(rec->pattern, G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &re_error);
if (rec->preg == NULL) {
g_warning("Failed to compile regexp '%s': %s", rec->pattern, re_error->message);
g_error_free(re_error);
}
}
-#else
- char *errbuf;
- int errcode, errbuf_len;
-
- if (rec->regexp_compiled) regfree(&rec->preg);
- rec->regexp_compiled = FALSE;
-
- if (rec->regexp && rec->pattern != NULL) {
- errcode = regcomp(&rec->preg, rec->pattern,
- REG_EXTENDED|REG_ICASE|REG_NOSUB);
- if (errcode != 0) {
- errbuf_len = regerror(errcode, &rec->preg, 0, 0);
- errbuf = g_malloc(errbuf_len);
- regerror(errcode, &rec->preg, errbuf, errbuf_len);
- g_warning("Failed to compile regexp '%s': %s", rec->pattern, errbuf);
- g_free(errbuf);
- } else {
- rec->regexp_compiled = TRUE;
- }
- }
-#endif
}
void ignore_add_rec(IGNORE_REC *rec)
@@ -381,11 +355,7 @@ static void ignore_destroy(IGNORE_REC *rec, int send_signal)
if (send_signal)
signal_emit("ignore destroyed", 1, rec);
-#ifdef USE_GREGEX
- if (rec->preg != NULL) g_regex_unref(rec->preg);
-#else
- if (rec->regexp_compiled) regfree(&rec->preg);
-#endif
+ if (rec->preg != NULL) i_regex_unref(rec->preg);
if (rec->channels != NULL) g_strfreev(rec->channels);
g_free_not_null(rec->mask);
g_free_not_null(rec->servertag);
diff --git a/src/core/ignore.h b/src/core/ignore.h
index 80ae1d12..e18be3c4 100644
--- a/src/core/ignore.h
+++ b/src/core/ignore.h
@@ -1,9 +1,7 @@
#ifndef __IGNORE_H
#define __IGNORE_H
-#ifndef USE_GREGEX
-# include <regex.h>
-#endif
+#include "iregex.h"
typedef struct _IGNORE_REC IGNORE_REC;
@@ -20,12 +18,7 @@ struct _IGNORE_REC {
unsigned int regexp:1;
unsigned int fullword:1;
unsigned int replies:1; /* ignore replies to nick in channel */
-#ifdef USE_GREGEX
- GRegex *preg;
-#else
- unsigned int regexp_compiled:1; /* should always be TRUE, unless regexp is invalid */
- regex_t preg;
-#endif
+ Regex *preg;
};
extern GSList *ignores;
diff --git a/src/core/iregex-gregex.c b/src/core/iregex-gregex.c
new file mode 100644
index 00000000..0de11e64
--- /dev/null
+++ b/src/core/iregex-gregex.c
@@ -0,0 +1,137 @@
+#include <string.h>
+
+#include "iregex.h"
+
+const gchar *
+make_valid_utf8(const gchar *text, gboolean *free_ret)
+{
+ GString *str;
+ const gchar *ptr;
+ if (g_utf8_validate(text, -1, NULL)) {
+ if (free_ret)
+ *free_ret = FALSE;
+ return text;
+ }
+
+ str = g_string_sized_new(strlen(text) + 12);
+
+ ptr = text;
+ while (*ptr) {
+ gunichar c = g_utf8_get_char_validated(ptr, -1);
+ /* the unicode is invalid */
+ if (c == (gunichar)-1 || c == (gunichar)-2) {
+ /* encode the byte into PUA-A */
+ g_string_append_unichar(str, (gunichar) (0xfff00 | (*ptr & 0xff)));
+ ptr++;
+ } else {
+ g_string_append_unichar(str, c);
+ ptr = g_utf8_next_char(ptr);
+ }
+ }
+
+ if (free_ret)
+ *free_ret = TRUE;
+ return g_string_free(str, FALSE);
+}
+
+Regex *
+i_regex_new (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error)
+{
+ const gchar *valid_pattern;
+ gboolean free_valid_pattern;
+ Regex *ret = NULL;
+
+ valid_pattern = make_valid_utf8(pattern, &free_valid_pattern);
+ ret = g_regex_new(valid_pattern, compile_options, match_options, error);
+
+ if (free_valid_pattern)
+ g_free_not_null((gchar *)valid_pattern);
+
+ return ret;
+}
+
+void
+i_regex_unref (Regex *regex)
+{
+ g_regex_unref(regex);
+}
+
+/* if new_string is present, the caller must free new_string.
+ otherwise, g_match_info_get_string must not be used. */
+gboolean
+i_regex_match (const Regex *regex,
+ const gchar *string,
+ GRegexMatchFlags match_options,
+ MatchInfo **match_info,
+ const gchar **new_string)
+{
+ gboolean ret;
+ gboolean free_valid_string;
+ const gchar *valid_string = make_valid_utf8(string, &free_valid_string);
+
+ ret = g_regex_match(regex, valid_string, match_options, match_info);
+ if (free_valid_string) {
+ if (new_string)
+ *new_string = valid_string;
+ else
+ g_free_not_null((gchar *)valid_string);
+ }
+ return ret;
+}
+
+gsize
+strlen_pua_oddly(const char *str)
+{
+ const gchar *ptr;
+ gsize ret = 0;
+ ptr = str;
+
+ while (*ptr) {
+ const gchar *old;
+ gunichar c = g_utf8_get_char(ptr);
+ old = ptr;
+ ptr = g_utf8_next_char(ptr);
+
+ /* it is our PUA encoded byte */
+ if ((c & 0xfff00) == 0xfff00)
+ ret++;
+ else
+ ret += ptr - old;
+ }
+
+ return ret;
+}
+
+gboolean
+i_match_info_fetch_pos (const MatchInfo *match_info,
+ gint match_num,
+ gint *start_pos,
+ gint *end_pos,
+ const gchar *new_string)
+{
+ gint tmp_start, tmp_end, new_start_pos;
+ gboolean ret;
+
+ if (!new_string || (!start_pos && !end_pos))
+ return g_match_info_fetch_pos(match_info, match_num, start_pos, end_pos);
+
+ ret = g_match_info_fetch_pos(match_info, match_num, &tmp_start, &tmp_end);
+ if (start_pos || end_pos) {
+ gchar *to_start = g_strndup(new_string, tmp_start);
+ new_start_pos = strlen_pua_oddly(to_start);
+ g_free_not_null(to_start);
+
+ if (start_pos)
+ *start_pos = new_start_pos;
+
+ if (end_pos) {
+ gchar *to_end = g_strndup(new_string + tmp_start, tmp_end - tmp_start);
+ *end_pos = new_start_pos + strlen_pua_oddly(to_end);
+ g_free_not_null(to_end);
+ }
+ }
+ return ret;
+}
diff --git a/src/core/iregex-regexh.c b/src/core/iregex-regexh.c
new file mode 100644
index 00000000..aabe44f6
--- /dev/null
+++ b/src/core/iregex-regexh.c
@@ -0,0 +1,101 @@
+#include "iregex.h"
+
+Regex *
+i_regex_new (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error)
+{
+ Regex *regex;
+ char *errbuf;
+ int cflags;
+ int errcode, errbuf_len;
+
+ regex = g_new0(Regex, 1);
+ cflags = REG_EXTENDED;
+ if (compile_options & G_REGEX_CASELESS)
+ cflags |= REG_ICASE;
+ if (compile_options & G_REGEX_MULTILINE)
+ cflags |= REG_NEWLINE;
+ if (match_options & G_REGEX_MATCH_NOTBOL)
+ cflags |= REG_NOTBOL;
+ if (match_options & G_REGEX_MATCH_NOTEOL)
+ cflags |= REG_NOTEOL;
+
+ errcode = regcomp(regex, pattern, cflags);
+ if (errcode != 0) {
+ errbuf_len = regerror(errcode, regex, 0, 0);
+ errbuf = g_malloc(errbuf_len);
+ regerror(errcode, regex, errbuf, errbuf_len);
+ g_set_error(error, G_REGEX_ERROR, errcode, "%s", errbuf);
+ g_free(errbuf);
+ g_free(regex);
+ return NULL;
+ } else {
+ return regex;
+ }
+}
+
+void
+i_regex_unref (Regex *regex)
+{
+ regfree(regex);
+ g_free(regex);
+}
+
+gboolean
+i_regex_match (const Regex *regex,
+ const gchar *string,
+ GRegexMatchFlags match_options,
+ MatchInfo **match_info,
+ const gchar **new_string)
+{
+ int groups;
+ int eflags;
+
+ g_return_val_if_fail(regex != NULL, FALSE);
+
+ if (match_info != NULL) {
+ groups = 1 + regex->re_nsub;
+ *match_info = g_new0(MatchInfo, groups);
+ } else {
+ groups = 0;
+ }
+
+ eflags = 0;
+ if (match_options & G_REGEX_MATCH_NOTBOL)
+ eflags |= REG_NOTBOL;
+ if (match_options & G_REGEX_MATCH_NOTEOL)
+ eflags |= REG_NOTEOL;
+
+ return regexec(regex, string, groups, groups ? *match_info : NULL, eflags) == 0;
+}
+
+gboolean
+i_match_info_fetch_pos (const MatchInfo *match_info,
+ gint match_num,
+ gint *start_pos,
+ gint *end_pos,
+ const gchar *new_string)
+{
+ if (start_pos != NULL)
+ *start_pos = match_info[match_num].rm_so;
+ if (end_pos != NULL)
+ *end_pos = match_info[match_num].rm_eo;
+
+ return TRUE;
+}
+
+gboolean
+i_match_info_matches (const MatchInfo *match_info)
+{
+ g_return_val_if_fail(match_info != NULL, FALSE);
+
+ return match_info[0].rm_so != -1;
+}
+
+void
+i_match_info_free (MatchInfo *match_info)
+{
+ g_free(match_info);
+}
diff --git a/src/core/iregex.h b/src/core/iregex.h
new file mode 100644
index 00000000..adeea987
--- /dev/null
+++ b/src/core/iregex.h
@@ -0,0 +1,52 @@
+#ifndef __REGEX_H
+#define __REGEX_H
+
+#include "common.h"
+
+#ifdef USE_GREGEX
+
+#include <glib.h>
+typedef GRegex Regex;
+typedef GMatchInfo MatchInfo;
+
+#define i_match_info_matches g_match_info_matches
+#define i_match_info_free g_match_info_free
+
+#else
+
+#include <regex.h>
+typedef regex_t Regex;
+typedef regmatch_t MatchInfo;
+
+gboolean
+i_match_info_matches (const MatchInfo *match_info);
+
+void
+i_match_info_free (MatchInfo *match_info);
+
+#endif
+
+Regex *
+i_regex_new (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error);
+
+void
+i_regex_unref (Regex *regex);
+
+gboolean
+i_regex_match (const Regex *regex,
+ const gchar *string,
+ GRegexMatchFlags match_options,
+ MatchInfo **match_info,
+ const gchar **new_string);
+
+gboolean
+i_match_info_fetch_pos (const MatchInfo *match_info,
+ gint match_num,
+ gint *start_pos,
+ gint *end_pos,
+ const gchar *new_string);
+
+#endif
diff --git a/src/core/misc.c b/src/core/misc.c
index 0f038cbb..4b1e72f6 100644
--- a/src/core/misc.c
+++ b/src/core/misc.c
@@ -22,10 +22,6 @@
#include "misc.h"
#include "commands.h"
-#ifndef USE_GREGEX
-# include <regex.h>
-#endif
-
typedef struct {
int condition;
GInputFunction function;
diff --git a/src/fe-common/core/fe-ignore.c b/src/fe-common/core/fe-ignore.c
index 800e881d..03fd4dd2 100644
--- a/src/fe-common/core/fe-ignore.c
+++ b/src/fe-common/core/fe-ignore.c
@@ -58,13 +58,8 @@ static void ignore_print(int index, IGNORE_REC *rec)
g_string_append(options, "-regexp ");
if (rec->pattern == NULL)
g_string_append(options, "[INVALID! -pattern missing] ");
-#ifdef USE_GREGEX
else if (rec->preg == NULL)
g_string_append(options, "[INVALID!] ");
-#else
- else if (!rec->regexp_compiled)
- g_string_append(options, "[INVALID!] ");
-#endif
}
if (rec->fullword) g_string_append(options, "-full ");
if (rec->replies) g_string_append(options, "-replies ");
diff --git a/src/fe-common/core/hilight-text.c b/src/fe-common/core/hilight-text.c
index dd38be87..6a2c97dc 100644
--- a/src/fe-common/core/hilight-text.c
+++ b/src/fe-common/core/hilight-text.c
@@ -26,6 +26,7 @@
#include "misc.h"
#include "lib-config/iconfig.h"
#include "settings.h"
+#include "iregex.h"
#include "servers.h"
#include "channels.h"
@@ -101,11 +102,7 @@ static void hilight_destroy(HILIGHT_REC *rec)
{
g_return_if_fail(rec != NULL);
-#ifdef USE_GREGEX
- if (rec->preg != NULL) g_regex_unref(rec->preg);
-#else
- if (rec->regexp_compiled) regfree(&rec->preg);
-#endif
+ if (rec->preg != NULL) i_regex_unref(rec->preg);
if (rec->channels != NULL) g_strfreev(rec->channels);
g_free_not_null(rec->color);
g_free_not_null(rec->act_color);
@@ -122,19 +119,10 @@ static void hilights_destroy_all(void)
static void hilight_init_rec(HILIGHT_REC *rec)
{
-#ifdef USE_GREGEX
if (rec->preg != NULL)
- g_regex_unref(rec->preg);
+ i_regex_unref(rec->preg);
- rec->preg = g_regex_new(rec->text, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_CASELESS, 0, NULL);
-#else
- if (rec->regexp_compiled) regfree(&rec->preg);
- if (!rec->regexp)
- rec->regexp_compiled = FALSE;
- else
- rec->regexp_compiled = regcomp(&rec->preg, rec->text,
- rec->case_sensitive ? REG_EXTENDED : (REG_EXTENDED|REG_ICASE)) == 0;
-#endif
+ rec->preg = i_regex_new(rec->text, G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, NULL);
}
void hilight_create(HILIGHT_REC *rec)
@@ -207,30 +195,18 @@ static gboolean hilight_match_text(HILIGHT_REC *rec, const char *text,
gboolean ret = FALSE;
if (rec->regexp) {
-#ifdef USE_GREGEX
if (rec->preg != NULL) {
- GMatchInfo *match;
+ MatchInfo *match;
+ const char *new_text = NULL;
- g_regex_match (rec->preg, text, 0, &match);
+ i_regex_match(rec->preg, text, 0, &match, &new_text);
- if (g_match_info_matches(match))
- ret = g_match_info_fetch_pos(match, 0, match_beg, match_end);
+ if (i_match_info_matches(match))
+ ret = i_match_info_fetch_pos(match, 0, match_beg, match_end, new_text);
- g_match_info_free(match);
+ i_match_info_free(match);
+ g_free_not_null((char *)new_text);
}
-#else
- regmatch_t rmatch[1];
-
- if (rec->regexp_compiled &&
- regexec(&rec->preg, text, 1, rmatch, 0) == 0) {
- if (rmatch[0].rm_so > 0 &&
- match_beg != NULL && match_end != NULL) {
- *match_beg = rmatch[0].rm_so;
- *match_end = rmatch[0].rm_eo;
- }
- ret = TRUE;
- }
-#endif
} else {
char *match;
@@ -524,13 +500,8 @@ static void hilight_print(int index, HILIGHT_REC *rec)
if (rec->case_sensitive) g_string_append(options, "-matchcase ");
if (rec->regexp) {
g_string_append(options, "-regexp ");
-#ifdef USE_GREGEX
if (rec->preg == NULL)
g_string_append(options, "[INVALID!] ");
-#else
- if (!rec->regexp_compiled)
- g_string_append(options, "[INVALID!] ");
-#endif
}
if (rec->priority != 0)
diff --git a/src/fe-common/core/hilight-text.h b/src/fe-common/core/hilight-text.h
index 76beec1f..1d942f29 100644
--- a/src/fe-common/core/hilight-text.h
+++ b/src/fe-common/core/hilight-text.h
@@ -1,10 +1,7 @@
#ifndef __HILIGHT_TEXT_H
#define __HILIGHT_TEXT_H
-#ifndef USE_GREGEX
-# include <regex.h>
-#endif
-
+#include "iregex.h"
#include "formats.h"
struct _HILIGHT_REC {
@@ -24,12 +21,7 @@ struct _HILIGHT_REC {
unsigned int fullword:1; /* match `text' only for full words */
unsigned int regexp:1; /* `text' is a regular expression */
unsigned int case_sensitive:1;/* `text' must match case */
-#ifdef USE_GREGEX
- GRegex *preg;
-#else
- unsigned int regexp_compiled:1; /* should always be TRUE, unless regexp is invalid */
- regex_t preg;
-#endif
+ Regex *preg;
char *servertag;
};
diff --git a/src/fe-text/textbuffer.c b/src/fe-text/textbuffer.c
index 3668f4c7..eb841096 100644
--- a/src/fe-text/textbuffer.c
+++ b/src/fe-text/textbuffer.c
@@ -24,13 +24,10 @@
#include "misc.h"
#include "formats.h"
#include "utf8.h"
+#include "iregex.h"
#include "textbuffer.h"
-#ifndef USE_GREGEX
-# include <regex.h>
-#endif
-
#define TEXT_CHUNK_USABLE_SIZE (LINE_TEXT_CHUNK_SIZE-2-(int)sizeof(char*))
TEXT_BUFFER_REC *textbuffer_create(void)
@@ -545,11 +542,7 @@ GList *textbuffer_find_text(TEXT_BUFFER_REC *buffer, LINE_REC *startline,
int before, int after,
int regexp, int fullword, int case_sensitive)
{
-#ifdef USE_GREGEX
- GRegex *preg;
-#else
- regex_t preg;
-#endif
+ Regex *preg;
LINE_REC *line, *pre_line;
GList *matches;
GString *str;
@@ -559,23 +552,14 @@ GList *textbuffer_find_text(TEXT_BUFFER_REC *buffer, LINE_REC *startline,
g_return_val_if_fail(buffer != NULL, NULL);
g_return_val_if_fail(text != NULL, NULL);
-#ifdef USE_GREGEX
preg = NULL;
if (regexp) {
- preg = g_regex_new(text, G_REGEX_RAW | (case_sensitive ? 0 : G_REGEX_CASELESS), 0, NULL);
+ preg = i_regex_new(text, case_sensitive ? 0 : G_REGEX_CASELESS, 0, NULL);
if (preg == NULL)
return NULL;
}
-#else
- if (regexp) {
- int flags = REG_EXTENDED | REG_NOSUB |
- (case_sensitive ? 0 : REG_ICASE);
- if (regcomp(&preg, text, flags) != 0)
- return NULL;
- }
-#endif
matches = NULL; match_after = 0;
str = g_string_new(NULL);
@@ -592,17 +576,16 @@ GList *textbuffer_find_text(TEXT_BUFFER_REC *buffer, LINE_REC *startline,
(line->info.level & nolevel) == 0;
if (*text != '\0') {
+ const char *tmp = NULL;
textbuffer_line2text(line, FALSE, str);
if (line_matched) {
line_matched = regexp ?
-#ifdef USE_GREGEX
- g_regex_match(preg, str->str, 0, NULL)
-#else
- regexec(&preg, str->str, 0, NULL, 0) == 0
-#endif
+ i_regex_match(preg, str->str, 0, NULL, &tmp)
: match_func(str->str, text) != NULL;
}
+ if (tmp && tmp != str->str)
+ g_free_not_null((char *)tmp);
}
if (line_matched) {
@@ -631,12 +614,8 @@ GList *textbuffer_find_text(TEXT_BUFFER_REC *buffer, LINE_REC *startline,
}
}
-#ifdef USE_GREGEX
if (preg != NULL)
- g_regex_unref(preg);
-#else
- if (regexp) regfree(&preg);
-#endif
+ i_regex_unref(preg);
g_string_free(str, TRUE);
return matches;
}