summaryrefslogtreecommitdiff
path: root/runtime/autoload/tohtml.vim
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2010-11-16 20:34:40 +0100
committerBram Moolenaar <Bram@vim.org>2010-11-16 20:34:40 +0100
commit166af9bb6f4f0c9a2f570766d26b42d2041860f2 (patch)
tree88791f76ef3946812ab212f61e130fdd39078852 /runtime/autoload/tohtml.vim
parent644d37b84d8f520f807ef665b476c47b72862eff (diff)
downloadvim-166af9bb6f4f0c9a2f570766d26b42d2041860f2.zip
Updated runtime files.
Diffstat (limited to 'runtime/autoload/tohtml.vim')
-rw-r--r--runtime/autoload/tohtml.vim457
1 files changed, 405 insertions, 52 deletions
diff --git a/runtime/autoload/tohtml.vim b/runtime/autoload/tohtml.vim
index c24d6ff42..c9dd2f764 100644
--- a/runtime/autoload/tohtml.vim
+++ b/runtime/autoload/tohtml.vim
@@ -1,6 +1,6 @@
" Vim autoload file for the tohtml plugin.
" Maintainer: Ben Fritz <fritzophrenic@gmail.com>
-" Last Change: 2010 Aug 12
+" Last Change: 2010 Oct 07
"
" Additional contributors:
"
@@ -13,10 +13,284 @@
let s:cpo_sav = &cpo
set cpo-=C
-func! tohtml#Convert2HTML(line1, line2)
+" Automatically find charsets from all encodings supported natively by Vim. With
+" the 8bit- and 2byte- prefixes, Vim can actually support more encodings than
+" this. Let the user specify these however since they won't be supported on
+" every system. TODO: how? g:html_charsets and g:html_encodings?
+"
+" Note, not all of Vim's supported encodings have a charset to use.
+"
+" Names in this list are from:
+" http://www.iana.org/assignments/character-sets
+" g:tohtml#encoding_to_charset: {{{
+let g:tohtml#encoding_to_charset = {
+ \ 'latin1' : 'ISO-8859-1',
+ \ 'iso-8859-2' : 'ISO-8859-2',
+ \ 'iso-8859-3' : 'ISO-8859-3',
+ \ 'iso-8859-4' : 'ISO-8859-4',
+ \ 'iso-8859-5' : 'ISO-8859-5',
+ \ 'iso-8859-6' : 'ISO-8859-6',
+ \ 'iso-8859-7' : 'ISO-8859-7',
+ \ 'iso-8859-8' : 'ISO-8859-8',
+ \ 'iso-8859-9' : 'ISO-8859-9',
+ \ 'iso-8859-10' : '',
+ \ 'iso-8859-13' : 'ISO-8859-13',
+ \ 'iso-8859-14' : '',
+ \ 'iso-8859-15' : 'ISO-8859-15',
+ \ 'koi8-r' : 'KOI8-R',
+ \ 'koi8-u' : 'KOI8-U',
+ \ 'macroman' : 'macintosh',
+ \ 'cp437' : '',
+ \ 'cp775' : '',
+ \ 'cp850' : '',
+ \ 'cp852' : '',
+ \ 'cp855' : '',
+ \ 'cp857' : '',
+ \ 'cp860' : '',
+ \ 'cp861' : '',
+ \ 'cp862' : '',
+ \ 'cp863' : '',
+ \ 'cp865' : '',
+ \ 'cp866' : 'IBM866',
+ \ 'cp869' : '',
+ \ 'cp874' : '',
+ \ 'cp1250' : 'windows-1250',
+ \ 'cp1251' : 'windows-1251',
+ \ 'cp1253' : 'windows-1253',
+ \ 'cp1254' : 'windows-1254',
+ \ 'cp1255' : 'windows-1255',
+ \ 'cp1256' : 'windows-1256',
+ \ 'cp1257' : 'windows-1257',
+ \ 'cp1258' : 'windows-1258',
+ \ 'euc-jp' : 'EUC-JP',
+ \ 'sjis' : 'Shift_JIS',
+ \ 'cp932' : 'Shift_JIS',
+ \ 'cp949' : '',
+ \ 'euc-kr' : 'EUC-KR',
+ \ 'cp936' : 'GBK',
+ \ 'euc-cn' : 'GB2312',
+ \ 'big5' : 'Big5',
+ \ 'cp950' : 'Big5',
+ \ 'utf-8' : 'UTF-8',
+ \ 'ucs-2' : 'UTF-8',
+ \ 'ucs-2le' : 'UTF-8',
+ \ 'utf-16' : 'UTF-8',
+ \ 'utf-16le' : 'UTF-8',
+ \ 'ucs-4' : 'UTF-8',
+ \ 'ucs-4le' : 'UTF-8',
+ \ }
+lockvar g:tohtml#encoding_to_charset
+" Notes:
+" 1. All UCS/UTF are converted to UTF-8 because it is much better supported
+" 2. Any blank spaces are there because Vim supports it but at least one major
+" web browser does not according to http://wiki.whatwg.org/wiki/Web_Encodings.
+" }}}
+
+" Only automatically find encodings supported natively by Vim, let the user
+" specify the encoding if it's not natively supported. This function is only
+" used when the user specifies the charset, they better know what they are
+" doing!
+"
+" Names in this list are from:
+" http://www.iana.org/assignments/character-sets
+" g:tohtml#charset_to_encoding: {{{
+let g:tohtml#charset_to_encoding = {
+ \ 'iso_8859-1:1987' : 'latin1',
+ \ 'iso-ir-100' : 'latin1',
+ \ 'iso_8859-1' : 'latin1',
+ \ 'iso-8859-1' : 'latin1',
+ \ 'latin1' : 'latin1',
+ \ 'l1' : 'latin1',
+ \ 'ibm819' : 'latin1',
+ \ 'cp819' : 'latin1',
+ \ 'csisolatin1' : 'latin1',
+ \ 'iso_8859-2:1987' : 'iso-8859-2',
+ \ 'iso-ir-101' : 'iso-8859-2',
+ \ 'iso_8859-2' : 'iso-8859-2',
+ \ 'iso-8859-2' : 'iso-8859-2',
+ \ 'latin2' : 'iso-8859-2',
+ \ 'l2' : 'iso-8859-2',
+ \ 'csisolatin2' : 'iso-8859-2',
+ \ 'iso_8859-3:1988' : 'iso-8859-3',
+ \ 'iso-ir-109' : 'iso-8859-3',
+ \ 'iso_8859-3' : 'iso-8859-3',
+ \ 'iso-8859-3' : 'iso-8859-3',
+ \ 'latin3' : 'iso-8859-3',
+ \ 'l3' : 'iso-8859-3',
+ \ 'csisolatin3' : 'iso-8859-3',
+ \ 'iso_8859-4:1988' : 'iso-8859-4',
+ \ 'iso-ir-110' : 'iso-8859-4',
+ \ 'iso_8859-4' : 'iso-8859-4',
+ \ 'iso-8859-4' : 'iso-8859-4',
+ \ 'latin4' : 'iso-8859-4',
+ \ 'l4' : 'iso-8859-4',
+ \ 'csisolatin4' : 'iso-8859-4',
+ \ 'iso_8859-5:1988' : 'iso-8859-5',
+ \ 'iso-ir-144' : 'iso-8859-5',
+ \ 'iso_8859-5' : 'iso-8859-5',
+ \ 'iso-8859-5' : 'iso-8859-5',
+ \ 'cyrillic' : 'iso-8859-5',
+ \ 'csisolatincyrillic' : 'iso-8859-5',
+ \ 'iso_8859-6:1987' : 'iso-8859-6',
+ \ 'iso-ir-127' : 'iso-8859-6',
+ \ 'iso_8859-6' : 'iso-8859-6',
+ \ 'iso-8859-6' : 'iso-8859-6',
+ \ 'ecma-114' : 'iso-8859-6',
+ \ 'asmo-708' : 'iso-8859-6',
+ \ 'arabic' : 'iso-8859-6',
+ \ 'csisolatinarabic' : 'iso-8859-6',
+ \ 'iso_8859-7:1987' : 'iso-8859-7',
+ \ 'iso-ir-126' : 'iso-8859-7',
+ \ 'iso_8859-7' : 'iso-8859-7',
+ \ 'iso-8859-7' : 'iso-8859-7',
+ \ 'elot_928' : 'iso-8859-7',
+ \ 'ecma-118' : 'iso-8859-7',
+ \ 'greek' : 'iso-8859-7',
+ \ 'greek8' : 'iso-8859-7',
+ \ 'csisolatingreek' : 'iso-8859-7',
+ \ 'iso_8859-8:1988' : 'iso-8859-8',
+ \ 'iso-ir-138' : 'iso-8859-8',
+ \ 'iso_8859-8' : 'iso-8859-8',
+ \ 'iso-8859-8' : 'iso-8859-8',
+ \ 'hebrew' : 'iso-8859-8',
+ \ 'csisolatinhebrew' : 'iso-8859-8',
+ \ 'iso_8859-9:1989' : 'iso-8859-9',
+ \ 'iso-ir-148' : 'iso-8859-9',
+ \ 'iso_8859-9' : 'iso-8859-9',
+ \ 'iso-8859-9' : 'iso-8859-9',
+ \ 'latin5' : 'iso-8859-9',
+ \ 'l5' : 'iso-8859-9',
+ \ 'csisolatin5' : 'iso-8859-9',
+ \ 'iso-8859-10' : 'iso-8859-10',
+ \ 'iso-ir-157' : 'iso-8859-10',
+ \ 'l6' : 'iso-8859-10',
+ \ 'iso_8859-10:1992' : 'iso-8859-10',
+ \ 'csisolatin6' : 'iso-8859-10',
+ \ 'latin6' : 'iso-8859-10',
+ \ 'iso-8859-13' : 'iso-8859-13',
+ \ 'iso-8859-14' : 'iso-8859-14',
+ \ 'iso-ir-199' : 'iso-8859-14',
+ \ 'iso_8859-14:1998' : 'iso-8859-14',
+ \ 'iso_8859-14' : 'iso-8859-14',
+ \ 'latin8' : 'iso-8859-14',
+ \ 'iso-celtic' : 'iso-8859-14',
+ \ 'l8' : 'iso-8859-14',
+ \ 'iso-8859-15' : 'iso-8859-15',
+ \ 'iso_8859-15' : 'iso-8859-15',
+ \ 'latin-9' : 'iso-8859-15',
+ \ 'koi8-r' : 'koi8-r',
+ \ 'cskoi8r' : 'koi8-r',
+ \ 'koi8-u' : 'koi8-u',
+ \ 'macintosh' : 'macroman',
+ \ 'mac' : 'macroman',
+ \ 'csmacintosh' : 'macroman',
+ \ 'ibm437' : 'cp437',
+ \ 'cp437' : 'cp437',
+ \ '437' : 'cp437',
+ \ 'cspc8codepage437' : 'cp437',
+ \ 'ibm775' : 'cp775',
+ \ 'cp775' : 'cp775',
+ \ 'cspc775baltic' : 'cp775',
+ \ 'ibm850' : 'cp850',
+ \ 'cp850' : 'cp850',
+ \ '850' : 'cp850',
+ \ 'cspc850multilingual' : 'cp850',
+ \ 'ibm852' : 'cp852',
+ \ 'cp852' : 'cp852',
+ \ '852' : 'cp852',
+ \ 'cspcp852' : 'cp852',
+ \ 'ibm855' : 'cp855',
+ \ 'cp855' : 'cp855',
+ \ '855' : 'cp855',
+ \ 'csibm855' : 'cp855',
+ \ 'ibm857' : 'cp857',
+ \ 'cp857' : 'cp857',
+ \ '857' : 'cp857',
+ \ 'csibm857' : 'cp857',
+ \ 'ibm860' : 'cp860',
+ \ 'cp860' : 'cp860',
+ \ '860' : 'cp860',
+ \ 'csibm860' : 'cp860',
+ \ 'ibm861' : 'cp861',
+ \ 'cp861' : 'cp861',
+ \ '861' : 'cp861',
+ \ 'cp-is' : 'cp861',
+ \ 'csibm861' : 'cp861',
+ \ 'ibm862' : 'cp862',
+ \ 'cp862' : 'cp862',
+ \ '862' : 'cp862',
+ \ 'cspc862latinhebrew' : 'cp862',
+ \ 'ibm863' : 'cp863',
+ \ 'cp863' : 'cp863',
+ \ '863' : 'cp863',
+ \ 'csibm863' : 'cp863',
+ \ 'ibm865' : 'cp865',
+ \ 'cp865' : 'cp865',
+ \ '865' : 'cp865',
+ \ 'csibm865' : 'cp865',
+ \ 'ibm866' : 'cp866',
+ \ 'cp866' : 'cp866',
+ \ '866' : 'cp866',
+ \ 'csibm866' : 'cp866',
+ \ 'ibm869' : 'cp869',
+ \ 'cp869' : 'cp869',
+ \ '869' : 'cp869',
+ \ 'cp-gr' : 'cp869',
+ \ 'csibm869' : 'cp869',
+ \ 'windows-1250' : 'cp1250',
+ \ 'windows-1251' : 'cp1251',
+ \ 'windows-1253' : 'cp1253',
+ \ 'windows-1254' : 'cp1254',
+ \ 'windows-1255' : 'cp1255',
+ \ 'windows-1256' : 'cp1256',
+ \ 'windows-1257' : 'cp1257',
+ \ 'windows-1258' : 'cp1258',
+ \ 'extended_unix_code_packed_format_for_japanese' : 'euc-jp',
+ \ 'cseucpkdfmtjapanese' : 'euc-jp',
+ \ 'euc-jp' : 'euc-jp',
+ \ 'shift_jis' : 'sjis',
+ \ 'ms_kanji' : 'sjis',
+ \ 'sjis' : 'sjis',
+ \ 'csshiftjis' : 'sjis',
+ \ 'ibm-thai' : 'cp874',
+ \ 'csibmthai' : 'cp874',
+ \ 'ks_c_5601-1987' : 'cp949',
+ \ 'iso-ir-149' : 'cp949',
+ \ 'ks_c_5601-1989' : 'cp949',
+ \ 'ksc_5601' : 'cp949',
+ \ 'korean' : 'cp949',
+ \ 'csksc56011987' : 'cp949',
+ \ 'euc-kr' : 'euc-kr',
+ \ 'cseuckr' : 'euc-kr',
+ \ 'gbk' : 'cp936',
+ \ 'cp936' : 'cp936',
+ \ 'ms936' : 'cp936',
+ \ 'windows-936' : 'cp936',
+ \ 'gb_2312-80' : 'euc-cn',
+ \ 'iso-ir-58' : 'euc-cn',
+ \ 'chinese' : 'euc-cn',
+ \ 'csiso58gb231280' : 'euc-cn',
+ \ 'big5' : 'big5',
+ \ 'csbig5' : 'big5',
+ \ 'utf-8' : 'utf-8',
+ \ 'iso-10646-ucs-2' : 'ucs-2',
+ \ 'csunicode' : 'ucs-2',
+ \ 'utf-16' : 'utf-16',
+ \ 'utf-16be' : 'utf-16',
+ \ 'utf-16le' : 'utf-16le',
+ \ 'utf-32' : 'ucs-4',
+ \ 'utf-32be' : 'ucs-4',
+ \ 'utf-32le' : 'ucs-4le',
+ \ 'iso-10646-ucs-4' : 'ucs-4',
+ \ 'csucs4' : 'ucs-4'
+ \ }
+lockvar g:tohtml#charset_to_encoding
+"}}}
+
+func! tohtml#Convert2HTML(line1, line2) "{{{
let s:settings = tohtml#GetUserSettings()
- if !&diff || s:settings.diff_one_file
+ if !&diff || s:settings.diff_one_file "{{{
if a:line2 >= a:line1
let g:html_start_line = a:line1
let g:html_end_line = a:line2
@@ -24,31 +298,50 @@ func! tohtml#Convert2HTML(line1, line2)
let g:html_start_line = a:line2
let g:html_end_line = a:line1
endif
- runtime syntax/2html.vim
- else
+ runtime syntax/2html.vim "}}}
+ else "{{{
let win_list = []
let buf_list = []
windo | if &diff | call add(win_list, winbufnr(0)) | endif
let s:settings.whole_filler = 1
let g:html_diff_win_num = 0
for window in win_list
+ " switch to the next buffer to convert
exe ":" . bufwinnr(window) . "wincmd w"
+
+ " figure out whether current charset and encoding will work, if not
+ " default to UTF-8
+ if !exists('g:html_use_encoding') &&
+ \ (&l:fileencoding!='' && &l:fileencoding!=s:settings.vim_encoding ||
+ \ &l:fileencoding=='' && &encoding!=s:settings.vim_encoding)
+ echohl WarningMsg
+ echomsg "TOhtml: mismatched file encodings in Diff buffers, using UTF-8"
+ echohl None
+ let s:settings.vim_encoding = 'utf-8'
+ let s:settings.encoding = 'UTF-8'
+ endif
+
+ " set up for diff-mode conversion
let g:html_start_line = 1
let g:html_end_line = line('$')
let g:html_diff_win_num += 1
+
+ " convert this file
runtime syntax/2html.vim
+
+ " remember the HTML buffer for later combination
call add(buf_list, bufnr('%'))
endfor
unlet g:html_diff_win_num
call tohtml#Diff2HTML(win_list, buf_list)
- endif
+ endif "}}}
unlet g:html_start_line
unlet g:html_end_line
unlet s:settings
-endfunc
+endfunc "}}}
-func! tohtml#Diff2HTML(win_list, buf_list)
+func! tohtml#Diff2HTML(win_list, buf_list) "{{{
let xml_line = ""
let tag_close = '>'
@@ -87,7 +380,7 @@ func! tohtml#Diff2HTML(win_list, buf_list)
call add(html, '<head>')
" include encoding as close to the top as possible, but only if not already
- " contained in XML information (to avoid haggling over content type)
+ " contained in XML information
if s:settings.encoding != "" && !s:settings.use_xhtml
call add(html, "<meta http-equiv=\"content-type\" content=\"text/html; charset=" . s:settings.encoding . '"' . tag_close)
endif
@@ -133,7 +426,8 @@ func! tohtml#Diff2HTML(win_list, buf_list)
let s:body_end_line = getline('.')
endif
- " Grab the style information. Some of this will be duplicated...
+ " Grab the style information. Some of this will be duplicated so only insert
+ " it if it's not already there. {{{
1
let style_start = search('^<style type="text/css">')
1
@@ -151,8 +445,10 @@ func! tohtml#Diff2HTML(win_list, buf_list)
let insert_index += 1
endif
endfor
- endif
+ endif " }}}
+ " everything new will get added before the diff styles so diff highlight
+ " properly overrides normal highlight
if diff_style_start != 0
let insert_index = diff_style_start
endif
@@ -203,6 +499,20 @@ func! tohtml#Diff2HTML(win_list, buf_list)
" just in case some user autocmd creates content in the new buffer, make sure
" it is empty before proceeding
%d
+
+ " set the fileencoding to match the charset we'll be using
+ let &l:fileencoding=s:settings.vim_encoding
+
+ " According to http://www.w3.org/TR/html4/charset.html#doc-char-set, the byte
+ " order mark is highly recommend on the web when using multibyte encodings. But,
+ " it is not a good idea to include it on UTF-8 files. Otherwise, let Vim
+ " determine when it is actually inserted.
+ if s:settings.vim_encoding == 'utf-8'
+ setlocal nobomb
+ else
+ setlocal bomb
+ endif
+
call append(0, html)
if len(style) > 0
@@ -210,7 +520,7 @@ func! tohtml#Diff2HTML(win_list, buf_list)
let style_start = search('^</head>')-1
" Insert javascript to toggle matching folds open and closed in all windows,
- " if dynamic folding is active.
+ " if dynamic folding is active. {{{
if s:settings.dynamic_folds
call append(style_start, [
\ "<script type='text/javascript'>",
@@ -234,13 +544,13 @@ func! tohtml#Diff2HTML(win_list, buf_list)
\ s:settings.use_xhtml ? '//]]>' : " -->",
\ "</script>"
\ ])
- endif
+ endif "}}}
" Insert styles from all the generated html documents and additional styles
" for the table-based layout of the side-by-side diff. The diff should take
" up the full browser window (but not more), and be static in size,
" horizontally scrollable when the lines are too long. Otherwise, the diff
- " is pretty useless for really long lines.
+ " is pretty useless for really long lines. {{{
if s:settings.use_css
call append(style_start,
\ ['<style type="text/css">']+
@@ -252,28 +562,28 @@ func! tohtml#Diff2HTML(win_list, buf_list)
\ 'td div { overflow: auto; }',
\ s:settings.use_xhtml ? '' : '-->',
\ '</style>'
- \ ])
- endif
+ \])
+ endif "}}}
endif
let &paste = s:old_paste
let &magic = s:old_magic
-endfunc
+endfunc "}}}
" Gets a single user option and sets it in the passed-in Dict, or gives it the
" default value if the option doesn't actually exist.
-func! tohtml#GetOption(settings, option, default)
+func! tohtml#GetOption(settings, option, default) "{{{
if exists('g:html_'.a:option)
let a:settings[a:option] = g:html_{a:option}
else
let a:settings[a:option] = a:default
endif
-endfunc
+endfunc "}}}
" returns a Dict containing the values of all user options for 2html, including
" default values for those not given an explicit value by the user. Discards the
" html_ prefix of the option for nicer looking code.
-func! tohtml#GetUserSettings()
+func! tohtml#GetUserSettings() "{{{
if exists('s:settings')
" just restore the known options if we've already retrieved them
return s:settings
@@ -289,7 +599,7 @@ func! tohtml#GetUserSettings()
let g:html_use_xhtml = g:use_xhtml
endif
- " get current option settings with appropriate defaults
+ " get current option settings with appropriate defaults {{{
call tohtml#GetOption(user_settings, 'no_progress', !has("statusline") )
call tohtml#GetOption(user_settings, 'diff_one_file', 0 )
call tohtml#GetOption(user_settings, 'number_lines', &number )
@@ -302,8 +612,9 @@ func! tohtml#GetUserSettings()
call tohtml#GetOption(user_settings, 'no_pre', 0 )
call tohtml#GetOption(user_settings, 'whole_filler', 0 )
call tohtml#GetOption(user_settings, 'use_xhtml', 0 )
+ " }}}
- " override those settings that need it
+ " override those settings that need it {{{
" hover opening implies dynamic folding
if user_settings.hover_unfold
@@ -330,49 +641,91 @@ func! tohtml#GetUserSettings()
" aren't allowed inside a <pre> block
if !user_settings.use_css
let user_settings.no_pre = 1
- endif
+ endif "}}}
- " Figure out proper MIME charset from the 'encoding' option.
- if exists("g:html_use_encoding")
+ if exists("g:html_use_encoding") "{{{
+ " user specified the desired MIME charset, figure out proper
+ " 'fileencoding' from it or warn the user if we cannot
let user_settings.encoding = g:html_use_encoding
+ let user_settings.vim_encoding = tohtml#EncodingFromCharset(g:html_use_encoding)
+ if user_settings.vim_encoding == ''
+ echohl WarningMsg
+ echomsg "TOhtml: file encoding for"
+ \ g:html_use_encoding
+ \ "unknown, please set 'fileencoding'"
+ echohl None
+ endif
else
- let vim_encoding = &encoding
- if vim_encoding =~ '^8bit\|^2byte'
- let vim_encoding = substitute(vim_encoding, '^8bit-\|^2byte-', '', '')
+ " Figure out proper MIME charset from 'fileencoding' if possible
+ if &l:fileencoding != ''
+ let user_settings.vim_encoding = &l:fileencoding
+ call tohtml#CharsetFromEncoding(user_settings)
+ endif
+
+ " else from 'encoding' if possible
+ if &l:fileencoding == '' || user_settings.encoding == ''
+ let user_settings.vim_encoding = &encoding
+ call tohtml#CharsetFromEncoding(user_settings)
endif
- if vim_encoding == 'latin1'
- let user_settings.encoding = 'iso-8859-1'
- elseif vim_encoding =~ "^cp12"
- let user_settings.encoding = substitute(vim_encoding, 'cp', 'windows-', '')
- elseif vim_encoding == 'sjis' || vim_encoding == 'cp932'
- let user_settings.encoding = 'Shift_JIS'
- elseif vim_encoding == 'big5' || vim_encoding == 'cp950'
- let user_settings.encoding = "Big5"
- elseif vim_encoding == 'euc-cn'
- let user_settings.encoding = 'GB_2312-80'
- elseif vim_encoding == 'euc-tw'
- let user_settings.encoding = ""
- elseif vim_encoding =~ '^euc\|^iso\|^koi'
- let user_settings.encoding = substitute(vim_encoding, '.*', '\U\0', '')
- elseif vim_encoding == 'cp949'
- let user_settings.encoding = 'KS_C_5601-1987'
- elseif vim_encoding == 'cp936'
- let user_settings.encoding = 'GBK'
- elseif vim_encoding =~ '^ucs\|^utf'
+
+ " else default to UTF-8 and warn user
+ if user_settings.encoding == ''
+ let user_settings.vim_encoding = 'utf-8'
let user_settings.encoding = 'UTF-8'
- else
- let user_settings.encoding = ""
+ echohl WarningMsg
+ echomsg "TOhtml: couldn't determine MIME charset, using UTF-8"
+ echohl None
endif
- endif
+ endif "}}}
" TODO: font
return user_settings
endif
-endfunc
+endfunc "}}}
+
+" get the proper HTML charset name from a Vim encoding option.
+function! tohtml#CharsetFromEncoding(settings) "{{{
+ let l:vim_encoding = a:settings.vim_encoding
+ if exists('g:html_charset_override') && has_key(g:html_charset_override, l:vim_encoding)
+ let a:settings.encoding = g:html_charset_override[l:vim_encoding]
+ else
+ if l:vim_encoding =~ '^8bit\|^2byte'
+ " 8bit- and 2byte- prefixes are to indicate encodings available on the
+ " system that Vim will convert with iconv(), look up just the encoding name,
+ " not Vim's prefix.
+ let l:vim_encoding = substitute(l:vim_encoding, '^8bit-\|^2byte-', '', '')
+ endif
+ if has_key(g:tohtml#encoding_to_charset, l:vim_encoding)
+ let a:settings.encoding = g:tohtml#encoding_to_charset[l:vim_encoding]
+ else
+ let a:settings.encoding = ""
+ endif
+ endif
+ if a:settings.encoding != ""
+ let l:vim_encoding = tohtml#EncodingFromCharset(a:settings.encoding)
+ if l:vim_encoding != ""
+ " if the Vim encoding to HTML encoding conversion is set up (by default or
+ " by the user) to convert to a different encoding, we need to also change
+ " the Vim encoding of the new buffer
+ let a:settings.vim_encoding = l:vim_encoding
+ endif
+ endif
+endfun "}}}
+
+" Get the proper Vim encoding option setting from an HTML charset name.
+function! tohtml#EncodingFromCharset(encoding) "{{{
+ if exists('g:html_encoding_override') && has_key(g:html_encoding_override, a:encoding)
+ return g:html_encoding_override[a:encoding]
+ elseif has_key(g:tohtml#charset_to_encoding, tolower(a:encoding))
+ return g:tohtml#charset_to_encoding[tolower(a:encoding)]
+ else
+ return ""
+ endif
+endfun "}}}
let &cpo = s:cpo_sav
unlet s:cpo_sav
" Make sure any patches will probably use consistent indent
-" vim: ts=8 sw=2 sts=2 noet
+" vim: ts=8 sw=2 sts=2 noet fdm=marker