summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSebastien Helleu <flashcode@flashtux.org>2009-03-22 16:38:06 +0100
committerSebastien Helleu <flashcode@flashtux.org>2009-03-22 16:38:06 +0100
commit7c793257ffa0d2963a5ce8d34e59534839bdc54a (patch)
tree31c7c349f45cc4ea63cfdafb40373f5a91f44fb1 /src
parent558c9f261446438b5a3d8ff997e0b93143f11a69 (diff)
downloadweechat-7c793257ffa0d2963a5ce8d34e59534839bdc54a.zip
Add option aspell.check.real_time (off by default), fix bugs with aspell and utf-8 chars, add function utf8_char_int to API
Diffstat (limited to 'src')
-rw-r--r--src/core/wee-utf8.c55
-rw-r--r--src/core/wee-utf8.h1
-rw-r--r--src/plugins/aspell/weechat-aspell-config.c21
-rw-r--r--src/plugins/aspell/weechat-aspell-config.h5
-rw-r--r--src/plugins/aspell/weechat-aspell-speller.c2
-rw-r--r--src/plugins/aspell/weechat-aspell-speller.h2
-rw-r--r--src/plugins/aspell/weechat-aspell.c131
-rw-r--r--src/plugins/aspell/weechat-aspell.h2
-rw-r--r--src/plugins/plugin.c1
-rw-r--r--src/plugins/weechat-plugin.h3
10 files changed, 131 insertions, 92 deletions
diff --git a/src/core/wee-utf8.c b/src/core/wee-utf8.c
index 7c5b5cf06..bae3693fe 100644
--- a/src/core/wee-utf8.c
+++ b/src/core/wee-utf8.c
@@ -226,6 +226,61 @@ utf8_next_char (const char *string)
}
/*
+ * utf8_char_int: return UTF-8 char as integer
+ */
+
+int
+utf8_char_int (const char *string)
+{
+ const unsigned char *ptr_string;
+
+ if (!string)
+ return 0;
+
+ ptr_string = (unsigned char *)string;
+
+ /* UTF-8, 2 bytes: 110vvvvv 10vvvvvv */
+ if ((ptr_string[0] & 0xE0) == 0xC0)
+ {
+ if (!ptr_string[1])
+ return (int)(ptr_string[0] & 0x1F);
+ return ((int)(ptr_string[0] & 0x1F) << 6) +
+ ((int)(ptr_string[1] & 0x3F));
+ }
+ /* UTF-8, 3 bytes: 1110vvvv 10vvvvvv 10vvvvvv */
+ else if ((ptr_string[0] & 0xF0) == 0xE0)
+ {
+ if (!ptr_string[1])
+ return (int)(ptr_string[0] & 0x0F);
+ if (!ptr_string[2])
+ return (((int)(ptr_string[0] & 0x0F)) << 6) +
+ ((int)(ptr_string[1] & 0x3F));
+ return (((int)(ptr_string[0] & 0x0F)) << 12) +
+ (((int)(ptr_string[1] & 0x3F)) << 6) +
+ ((int)(ptr_string[2] & 0x3F));
+ }
+ /* UTF-8, 4 bytes: 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv */
+ else if ((ptr_string[0] & 0xF8) == 0xF0)
+ {
+ if (!ptr_string[1])
+ return (int)ptr_string[0] & 0x07;
+ if (!ptr_string[2])
+ return (((int)(ptr_string[0] & 0x07)) << 6) +
+ ((int)(ptr_string[1] & 0x3F));
+ if (!ptr_string[3])
+ return (((int)(ptr_string[0] & 0x07)) << 12) +
+ (((int)(ptr_string[1] & 0x3F)) << 6) +
+ ((int)(ptr_string[2] & 0x3F));
+ return (((int)(ptr_string[0] & 0x07)) << 18) +
+ (((int)(ptr_string[1] & 0x3F)) << 12) +
+ (((int)(ptr_string[2] & 0x3F)) << 6) +
+ ((int)(ptr_string[3] & 0x3F));
+ }
+ /* UTF-8, 1 byte: 0vvvvvvv */
+ return (int)ptr_string[0];
+}
+
+/*
* utf8_char_size: return UTF-8 char size (in bytes)
*/
diff --git a/src/core/wee-utf8.h b/src/core/wee-utf8.h
index 60511e607..d6fc8f927 100644
--- a/src/core/wee-utf8.h
+++ b/src/core/wee-utf8.h
@@ -38,6 +38,7 @@ extern int utf8_is_valid (const char *string, char **error);
extern void utf8_normalize (const char *string, char replacement);
extern char *utf8_prev_char (const char *string_start, const char *string);
extern char *utf8_next_char (const char *string);
+extern int utf8_char_int (const char *string);
extern int utf8_char_size (const char *string);
extern int utf8_strlen (const char *string);
extern int utf8_strnlen (const char *string, int bytes);
diff --git a/src/plugins/aspell/weechat-aspell-config.c b/src/plugins/aspell/weechat-aspell-config.c
index d61d96bf5..43032b564 100644
--- a/src/plugins/aspell/weechat-aspell-config.c
+++ b/src/plugins/aspell/weechat-aspell-config.c
@@ -16,7 +16,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/* aspell-config.c: aspell configuration options */
+/* weechat-aspell-config.c: aspell configuration options */
#include <stdlib.h>
@@ -40,8 +40,9 @@ struct t_config_option *weechat_aspell_config_look_color;
struct t_config_option *weechat_aspell_config_check_commands;
struct t_config_option *weechat_aspell_config_check_default_dict;
-struct t_config_option *weechat_aspell_config_check_word_min_length;
struct t_config_option *weechat_aspell_config_check_during_search;
+struct t_config_option *weechat_aspell_config_check_real_time;
+struct t_config_option *weechat_aspell_config_check_word_min_length;
char **weechat_aspell_commands_to_check = NULL;
@@ -304,17 +305,23 @@ weechat_aspell_config_init ()
"aspell on buffers for which you didn't explicitely enabled it)"),
NULL, 0, 0, "", NULL, 0,
NULL, NULL, &weechat_aspell_config_change_default_dict, NULL, NULL, NULL);
+ weechat_aspell_config_check_during_search = weechat_config_new_option (
+ weechat_aspell_config_file, ptr_section,
+ "during_search", "boolean",
+ N_("check words during text search in buffer"),
+ NULL, 0, 0, "off", NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
+ weechat_aspell_config_check_real_time = weechat_config_new_option (
+ weechat_aspell_config_file, ptr_section,
+ "real_time", "boolean",
+ N_("real-time spell checking of words (slower, disabled by default: "
+ "words are checked only if there's delimiter after)"),
+ NULL, 0, 0, "off", NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
weechat_aspell_config_check_word_min_length = weechat_config_new_option (
weechat_aspell_config_file, ptr_section,
"word_min_length", "integer",
N_("minimum length for a word to be spell checked (use 0 to check all "
"words)"),
NULL, 0, INT_MAX, "2", NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
- weechat_aspell_config_check_during_search = weechat_config_new_option (
- weechat_aspell_config_file, ptr_section,
- "during_search", "boolean",
- N_("check words during text search in buffer"),
- NULL, 0, 0, "off", NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
/* dict */
ptr_section = weechat_config_new_section (weechat_aspell_config_file, "dict",
diff --git a/src/plugins/aspell/weechat-aspell-config.h b/src/plugins/aspell/weechat-aspell-config.h
index efb4590ab..6f8fc3ab2 100644
--- a/src/plugins/aspell/weechat-aspell-config.h
+++ b/src/plugins/aspell/weechat-aspell-config.h
@@ -27,8 +27,9 @@ extern struct t_config_option *weechat_aspell_config_look_color;
extern struct t_config_option *weechat_aspell_config_check_commands;
extern struct t_config_option *weechat_aspell_config_check_default_dict;
-extern struct t_config_option *weechat_aspell_config_check_word_min_length;
extern struct t_config_option *weechat_aspell_config_check_during_search;
+extern struct t_config_option *weechat_aspell_config_check_real_time;
+extern struct t_config_option *weechat_aspell_config_check_word_min_length;
extern char **weechat_aspell_commands_to_check;
extern int weechat_aspell_count_commands_to_check;
@@ -41,4 +42,4 @@ extern int weechat_aspell_config_read ();
extern int weechat_aspell_config_write ();
extern void weechat_aspell_config_free ();
-#endif /* aspell-config.h */
+#endif /* weechat-aspell-config.h */
diff --git a/src/plugins/aspell/weechat-aspell-speller.c b/src/plugins/aspell/weechat-aspell-speller.c
index 44f19aed7..2dd7e2a0c 100644
--- a/src/plugins/aspell/weechat-aspell-speller.c
+++ b/src/plugins/aspell/weechat-aspell-speller.c
@@ -16,7 +16,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/* aspell-speller.c: speller management for aspell plugin */
+/* weechat-aspell-speller.c: speller management for aspell plugin */
#include <stdlib.h>
diff --git a/src/plugins/aspell/weechat-aspell-speller.h b/src/plugins/aspell/weechat-aspell-speller.h
index 8e12cfdd4..646640902 100644
--- a/src/plugins/aspell/weechat-aspell-speller.h
+++ b/src/plugins/aspell/weechat-aspell-speller.h
@@ -38,4 +38,4 @@ extern struct t_aspell_speller *weechat_aspell_speller_new (const char *lang);
extern void weechat_aspell_speller_free (struct t_aspell_speller *speller);
extern void weechat_aspell_speller_free_all ();
-#endif /* aspell-speller.h */
+#endif /* weechat-aspell-speller.h */
diff --git a/src/plugins/aspell/weechat-aspell.c b/src/plugins/aspell/weechat-aspell.c
index dc6f1a2f9..18f961e1f 100644
--- a/src/plugins/aspell/weechat-aspell.c
+++ b/src/plugins/aspell/weechat-aspell.c
@@ -16,14 +16,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/* aspell.c: aspell plugin for WeeChat */
+/* weechat-aspell.c: aspell plugin for WeeChat */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
-#include <ctype.h>
+#include <wctype.h>
#include "../weechat-plugin.h"
#include "weechat-aspell.h"
@@ -551,45 +551,6 @@ weechat_aspell_command_authorized (const char *command)
}
/*
- * weechat_aspell_string_strip_punctuation: strip punctuation chars at the
- * begining and at the end of a word
- */
-
-char *
-weechat_aspell_string_strip_punctuation (const char *word)
-{
- const char *ptr_start, *ptr_end;
-
- if (!word)
- return NULL;
-
- ptr_start = word;
- while (ptr_start[0])
- {
- if (!ispunct (ptr_start[0]))
- break;
- ptr_start++;
- }
-
- if (!ptr_start[0])
- return strdup ("");
-
- ptr_end = ptr_start + strlen (ptr_start) - 1;
-
- while (ptr_end >= ptr_start)
- {
- if (!ispunct (ptr_end[0]))
- break;
- ptr_end--;
- }
-
- if (ptr_end < ptr_start)
- return strdup ("");
-
- return weechat_strndup (ptr_start, ptr_end - ptr_start + 1);
-}
-
-/*
* weechat_aspell_string_is_url: detect if a word is an url
*/
@@ -629,17 +590,17 @@ weechat_aspell_string_is_url (const char *word)
int
weechat_aspell_string_is_simili_number (const char *word)
{
- const char *ptr_word;
+ int utf8_char_int;
- if (!word)
+ if (!word || !word[0])
return 0;
- ptr_word = word;
- while (ptr_word[0])
+ while (word && word[0])
{
- if (!ispunct (ptr_word[0]) && !isdigit (ptr_word[0]))
+ utf8_char_int = weechat_utf8_char_int (word);
+ if (!iswpunct (utf8_char_int) && !iswdigit (utf8_char_int))
return 0;
- ptr_word++;
+ word = weechat_utf8_next_char (word);
}
/* there's only digit or punctuation */
@@ -654,35 +615,29 @@ weechat_aspell_string_is_simili_number (const char *word)
int
weechat_aspell_check_word (struct t_gui_buffer *buffer, const char *word)
{
- char *clean_word;
struct t_aspell_speller *ptr_speller;
int rc;
- clean_word = weechat_aspell_string_strip_punctuation (word);
-
- if (!clean_word)
- return 1;
-
rc = 0;
/* word too small? then do not check word */
if ((weechat_config_integer (weechat_aspell_config_check_word_min_length) > 0)
- && ((int)strlen (clean_word) < weechat_config_integer (weechat_aspell_config_check_word_min_length)))
+ && ((int)strlen (word) < weechat_config_integer (weechat_aspell_config_check_word_min_length)))
rc = 1;
else
{
/* word is URL? then do not check word */
- if (weechat_aspell_string_is_url (clean_word))
+ if (weechat_aspell_string_is_url (word))
rc = 1;
else
{
/* word is a number? then do not check word */
- if (weechat_aspell_string_is_simili_number (clean_word))
+ if (weechat_aspell_string_is_simili_number (word))
rc = 1;
else
{
/* word is a nick of nicklist on this buffer? then do not check word */
- if (weechat_nicklist_search_nick (buffer, NULL, clean_word))
+ if (weechat_nicklist_search_nick (buffer, NULL, word))
rc = 1;
else
{
@@ -690,7 +645,7 @@ weechat_aspell_check_word (struct t_gui_buffer *buffer, const char *word)
for (ptr_speller = weechat_aspell_spellers; ptr_speller;
ptr_speller = ptr_speller->next_speller)
{
- if (aspell_speller_check (ptr_speller->speller, clean_word, -1) == 1)
+ if (aspell_speller_check (ptr_speller->speller, word, -1) == 1)
{
rc = 1;
break;
@@ -701,8 +656,6 @@ weechat_aspell_check_word (struct t_gui_buffer *buffer, const char *word)
}
}
- free (clean_word);
-
return rc;
}
@@ -716,8 +669,9 @@ weechat_aspell_modifier_cb (void *data, const char *modifier,
{
long unsigned int value;
struct t_gui_buffer *buffer;
- char *result, *ptr_string, *pos_space;
+ char *result, *ptr_string, *pos_space, *ptr_end, save_end;
const char *color_normal, *color_error;
+ int utf8_char_int, char_size;
int length, index_result, length_word, word_ok;
int length_color_normal, length_color_error;
@@ -784,13 +738,17 @@ weechat_aspell_modifier_cb (void *data, const char *modifier,
&& (ptr_string[1] != ' '))
{
ptr_string++;
- pos_space = strchr (ptr_string, ' ');
- if (!pos_space)
+ pos_space = ptr_string;
+ while (pos_space && pos_space[0] && (pos_space[0] != ' '))
+ {
+ pos_space = weechat_utf8_next_char (pos_space);
+ }
+ if (!pos_space || !pos_space[0])
{
free (result);
return NULL;
}
-
+
pos_space[0] = '\0';
/* exit if command is not authorized for spell checking */
@@ -809,24 +767,39 @@ weechat_aspell_modifier_cb (void *data, const char *modifier,
while (ptr_string[0])
{
- while (ptr_string[0] == ' ')
+ /* find start of word */
+ utf8_char_int = weechat_utf8_char_int (ptr_string);
+ while (!iswalnum (utf8_char_int) || iswspace (utf8_char_int))
{
- result[index_result++] = ' ';
- ptr_string++;
+ char_size = weechat_utf8_char_size (ptr_string);
+ memcpy (result + index_result, ptr_string, char_size);
+ index_result += char_size;
+ ptr_string += char_size;
+ if (!ptr_string[0])
+ break;
+ utf8_char_int = weechat_utf8_char_int (ptr_string);
}
if (!ptr_string[0])
break;
- pos_space = strchr (ptr_string, ' ');
- if (pos_space)
+ ptr_end = weechat_utf8_next_char (ptr_string);
+ utf8_char_int = weechat_utf8_char_int (ptr_end);
+ while (iswalnum (utf8_char_int))
{
- pos_space[0] = '\0';
- length_word = pos_space - ptr_string;
+ ptr_end = weechat_utf8_next_char (ptr_end);
+ if (!ptr_end[0])
+ break;
+ utf8_char_int = weechat_utf8_char_int (ptr_end);
}
- else
- length_word = strlen (ptr_string);
+ save_end = ptr_end[0];
+ ptr_end[0] = '\0';
+ length_word = ptr_end - ptr_string;
- word_ok = weechat_aspell_check_word (buffer, ptr_string);
+ if ((save_end != '\0')
+ || (weechat_config_integer (weechat_aspell_config_check_real_time)))
+ word_ok = weechat_aspell_check_word (buffer, ptr_string);
+ else
+ word_ok = 1;
/* add error color */
if (!word_ok)
@@ -846,13 +819,11 @@ weechat_aspell_modifier_cb (void *data, const char *modifier,
index_result += length_color_normal;
}
- if (pos_space)
- {
- pos_space[0] = ' ';
- ptr_string = pos_space;
- }
- else
+ if (save_end == '\0')
break;
+
+ ptr_end[0] = save_end;
+ ptr_string = ptr_end;
}
result[index_result] = '\0';
diff --git a/src/plugins/aspell/weechat-aspell.h b/src/plugins/aspell/weechat-aspell.h
index 13728ed47..763652b64 100644
--- a/src/plugins/aspell/weechat-aspell.h
+++ b/src/plugins/aspell/weechat-aspell.h
@@ -38,4 +38,4 @@ extern struct t_aspell_code countries_avail[];
extern void weechat_aspell_create_spellers (struct t_gui_buffer *buffer);
-#endif /* aspell.h */
+#endif /* weechat-aspell.h */
diff --git a/src/plugins/plugin.c b/src/plugins/plugin.c
index 40f6a8f76..b373c5f39 100644
--- a/src/plugins/plugin.c
+++ b/src/plugins/plugin.c
@@ -420,6 +420,7 @@ plugin_load (const char *filename)
new_plugin->utf8_normalize = &utf8_normalize;
new_plugin->utf8_prev_char = &utf8_prev_char;
new_plugin->utf8_next_char = &utf8_next_char;
+ new_plugin->utf8_char_int = &utf8_char_int;
new_plugin->utf8_char_size = &utf8_char_size;
new_plugin->utf8_strlen = &utf8_strlen;
new_plugin->utf8_strnlen = &utf8_strnlen;
diff --git a/src/plugins/weechat-plugin.h b/src/plugins/weechat-plugin.h
index 96c1e79d7..880fd45bb 100644
--- a/src/plugins/weechat-plugin.h
+++ b/src/plugins/weechat-plugin.h
@@ -175,6 +175,7 @@ struct t_weechat_plugin
void (*utf8_normalize) (const char *string, char replacement);
char *(*utf8_prev_char) (const char *string_start, const char *string);
char *(*utf8_next_char) (const char *string);
+ int (*utf8_char_int) (const char *string);
int (*utf8_char_size) (const char *string);
int (*utf8_strlen) (const char *string);
int (*utf8_strnlen) (const char *string, int bytes);
@@ -708,6 +709,8 @@ extern int weechat_plugin_end (struct t_weechat_plugin *plugin);
weechat_plugin->utf8_prev_char(__start, __string)
#define weechat_utf8_next_char(__string) \
weechat_plugin->utf8_next_char(__string)
+#define weechat_utf8_char_int(__string) \
+ weechat_plugin->utf8_char_int(__string)
#define weechat_utf8_char_size(__string) \
weechat_plugin->utf8_char_size(__string)
#define weechat_utf8_strlen(__string) \