diff --git a/manual/aspell.1 b/manual/aspell.1 index 4ec47bd9..60913195 100644 --- a/manual/aspell.1 +++ b/manual/aspell.1 @@ -210,6 +210,9 @@ Check TeX comments. \fB\-\-add\-tex\-command=\fR\fI\fR, \fB\-\-rem\-tex\-command=\fR\fI\fR Add or remove a list of TeX commands. .TP +\fB\-\-add\-tex\-ignore\-env=\fR\fI\fR, \fB\-\-rem\-tex\-ignore\-env=\fR\fI\fR +Add or remove a list of LaTeX environments to skip while spell checking. +.TP \fB\-\-add\-texinfo\-ignore=\fR\fI\fR, \fB\-\-rem\-texinfo\-ignore=\fR\fI\fR Add or remove a list of Texinfo commands. .TP diff --git a/manual/aspell.texi b/manual/aspell.texi index 0802c5e4..cee0da92 100644 --- a/manual/aspell.texi +++ b/manual/aspell.texi @@ -1220,10 +1220,18 @@ The @option{tex} (all lowercase) filter hides @TeX{}/LaTeX commands and corresponding parameters that are not readable text in the output from Aspell. It also skips over @TeX{} comments by default. +Discretionary hyphens and italic corrections are ignored. For example, +@samp{hy\-phen} and @samp{shelf\/ful} are recognized as single words. + @c This filter mode is also available via the @c @option{latex} alias name. @table @b + +@item tex-check-comments +@i{(boolean)} +Check @TeX{} comments. Defaults to false. + @item tex-command @i{(list)} Controls which @TeX{} commands should have certain parameters and/or @@ -1232,13 +1240,15 @@ all their parameters and/or options checked. The format for each item is @example - + @end example The first item is simply the command name. The second item controls which parameters to skip over. A 'p' skips over a parameter while a 'P' doesn't. Similarly an 'o' will skip over an optional parameter -while an 'O' doesn't. The first letter on the list will apply to the +while an 'O' doesn't. A 'T' will force spell-checking of a parameter +even if the command occurs within a parameter or an environment Aspell +is told to skipped over. The first letter on the list will apply to the first parameter, the second letter will apply to the second parameter etc. If there are more parameters than letters Aspell will simply check them as normal. For example the option @@ -1261,6 +1271,25 @@ over the next optional parameter, if it is present, and will skip over the second parameter --- even if the optional parameter is not present --- and will check any additional parameters. +@example +add-tex-command foo T +@end example + +@noindent +will @emph{check} the first parameter of the @code{foo} command even +if Aspell is currently skipping over an argument or environment. For +example, if Aspell has been told to skip over the @code{bar} +environment (@pxref{Ignoring LaTeX Environments}), then in the text + +@example +\begin@{bar@} don't check \foo@{check@} don't check \end@{bar@} +@end example + +@noindent +it will nevertheless @emph{check} the argument to @code{foo}. This is +useful to force checking of arguments to text-related commands like +@code{hbox}, @code{text} or @code{intertext} inside math environments. + A @samp{*} at the end of the command is simply ignored. For example the option @@ -1287,15 +1316,56 @@ current defaults. @c will remove the command foo, if present, from the list of @TeX{} @c commands. -@item tex-check-comments -@i{(boolean)} -Check @TeX{} comments. Defaults to false. +@anchor{Ignoring LaTeX Environments} +@item tex-ignore-env +@i{(list)} +This controls which @TeX{} environments are skipped over. By default, +Aspell will skip over math formulas inside @code{$...$}, @code{$$...$$}, +@code{\(...\)} and @code{\[...\]} and over several common LaTeX and +AMS-LaTeX math environments like @code{equation} and @code{gather}. +For example, + +@example +add-tex-ignore-env thebibliography +@end example + +@noindent +will tell Aspell to skip over the bibliography as well (which may or +may not be a good idea). As with commands, skipping applies to the +starred form of the environment as well. + +@example +rem-tex-ignore-env equation +@end example + +@noindent +will make Aspell spell-check the contents of @code{equation} and +@code{equation*} environments. Skipping the contents of @code{$...$}, +@code{$$...$$}, @code{\(...\)} and @code{\[...\]} cannot be turned off. + +Note that one can force spell-checking of arguments to TeX commands +inside ignored environments with the 'T' parameter to the +@option{add-tex-command} option. @c @item tex-multi-byte @c (@i{list}) TeX multi byte letter en|decoding @end table +As a last resort, spell checking can be switched off by putting the +text @code{aspell:off} into the file. Similarly, with @code{aspell:on} +one can turn it on again. This can be useful for macro definitions, +for example + +@example +% aspell:off +\def\doi#1@{\href@{http://doi.org/#1@}@{doi:#1@}@} +% aspell:on +@end example + +@noindent +This feature is implemented via the @ref{Context Filter}. + @c The TeXfilter mode also contains a decoding and an encoding filter for @c @emph{babel} character codes like the German Umlauts: @@ -1404,6 +1474,7 @@ escapes (@code{\(}) and extended (@code{\[comp1 comp2 @dots{}]}) form. @end itemize +@anchor{Context Filter} @subsubsection Context Filter The @option{context} filter can be used to spell check source codes, diff --git a/modules/filter/context.cpp b/modules/filter/context.cpp index 0d0f6051..afe01aec 100644 --- a/modules/filter/context.cpp +++ b/modules/filter/context.cpp @@ -63,6 +63,7 @@ namespace { PosibErr ContextFilter::setup(Config * config){ name_ = "context-filter"; + order_num_ = 0.15; StringList delimiters; StackPtr delimiterpairs; const char * delimiterpair=NULL; diff --git a/modules/filter/modes/tex.amf b/modules/filter/modes/tex.amf index beb32dc3..758fcc00 100644 --- a/modules/filter/modes/tex.amf +++ b/modules/filter/modes/tex.amf @@ -6,5 +6,8 @@ MAGIC /0:256:^[ \t]*\\documentclass\[[^\[\]]*\]\{[^\{\}]*\}/tex DESCRIPTION mode for checking TeX/LaTeX documents -FILTER url +FILTER context +OPTION clear-context-delimiters +OPTION add-context-delimiters aspell:off aspell:on +OPTION enable-context-visible-first FILTER tex diff --git a/modules/filter/tex-filter.info b/modules/filter/tex-filter.info index 81d9c5b2..14d1e935 100644 --- a/modules/filter/tex-filter.info +++ b/modules/filter/tex-filter.info @@ -16,15 +16,35 @@ DESCRIPTION check TeX comments DEFAULT false ENDOPTION +OPTION ignore-env +TYPE list +DESCRIPTION LaTeX environments to be ignored +# LaTeX +#DEFAULT thebibliography +DEFAULT equation +DEFAULT eqnarray +# AMS-LaTeX +DEFAULT gather +DEFAULT multline +DEFAULT align +DEFAULT flalign +DEFAULT alignat +# Babel +DEFAULT otherlanguage +ENDOPTION + OPTION command TYPE list DESCRIPTION TeX commands +# plain TeX / LaTeX DEFAULT addtocounter pp DEFAULT addtolength pp -DEFAULT alpha p +DEFAULT alph p +DEFAULT Alph p DEFAULT arabic p DEFAULT fnsymbol p DEFAULT roman p +DEFAULT Roman p DEFAULT stepcounter p DEFAULT setcounter pp DEFAULT usecounter p @@ -42,7 +62,8 @@ DEFAULT newtheorem poPo DEFAULT newfont pp DEFAULT documentclass op DEFAULT usepackage op -DEFAULT begin po +# DO NOT change the next line! +DEFAULT begin so DEFAULT end p DEFAULT setlength pp DEFAULT addtolength pp @@ -54,15 +75,17 @@ DEFAULT hyphenation p DEFAULT pagenumbering p DEFAULT pagestyle p DEFAULT addvspace p -DEFAULT framebox ooP +DEFAULT framebox ooT DEFAULT hspace p DEFAULT vspace p -DEFAULT makebox ooP -DEFAULT parbox ooopP -DEFAULT raisebox pooP +DEFAULT hbox T +DEFAULT vbox T +DEFAULT makebox ooT +DEFAULT parbox ooopT +DEFAULT raisebox pooT DEFAULT rule opp DEFAULT sbox pO -DEFAULT savebox pooP +DEFAULT savebox pooT DEFAULT usebox p DEFAULT include p DEFAULT includeonly p @@ -76,13 +99,30 @@ DEFAULT fontshape p DEFAULT fontsize pp DEFAULT usefont pppp DEFAULT documentstyle op -DEFAULT cite p +DEFAULT cite Op DEFAULT nocite p DEFAULT psfig p DEFAULT selectlanguage p DEFAULT includegraphics op DEFAULT bibitem op +DEFAULT bibliography p +DEFAULT bibliographystyle p DEFAULT geometry p +# AMS-LaTeX +DEFAULT address p +DEFAULT email p +DEFAULT mathbb p +DEFAULT mathfrak p +DEFAULT eqref p +DEFAULT text T +DEFAULT intertext T +DEFAULT DeclareMathOperator pp +DEFAULT DeclareMathAlphabet ppppp +# hyperref +DEFAULT href pP +DEFAULT autoref p +DEFAULT url p +DEFAULT texorpdfstring Pp ENDOPTION #OPTION multi-byte diff --git a/modules/filter/tex.cpp b/modules/filter/tex.cpp index 19ab63ce..9809c3e2 100644 --- a/modules/filter/tex.cpp +++ b/modules/filter/tex.cpp @@ -32,17 +32,17 @@ namespace { class TexFilter : public IndividualFilter { private: - enum InWhat {Name, Opt, Parm, Other, Swallow}; + enum InWhat {Text, Name, Comment, InlineMath, DisplayMath, EnvName}; struct Command { InWhat in_what; String name; - const char * do_check; + bool skip; + int size; + const char * args; Command() {} - Command(InWhat w) : in_what(w), do_check("P") {} + Command(InWhat w, bool s, const char *a) : in_what(w), skip(s), args(a), size(0) {} }; - bool in_comment; - bool prev_backslash; Vector stack; class Commands : public StringMap { @@ -53,11 +53,11 @@ namespace { Commands commands; bool check_comments; - - inline void push_command(InWhat); - inline void pop_command(); - bool end_option(char u, char l); + StringMap ignore_env; + + inline bool push_command(InWhat, bool, const char *); + inline bool pop_command(); inline bool process_char(FilterChar::Chr c); @@ -71,14 +71,17 @@ namespace { // // - inline void TexFilter::push_command(InWhat w) { - stack.push_back(Command(w)); + inline bool TexFilter::push_command(InWhat w, bool skip, const char *args = "") { + stack.push_back(Command(w, skip, args)); + return skip; } - inline void TexFilter::pop_command() { - stack.pop_back(); - if (stack.empty()) - push_command(Parm); + inline bool TexFilter::pop_command() { + bool skip = stack.back().skip; + if (stack.size() > 1) { + stack.pop_back(); + } + return skip; } // @@ -96,133 +99,167 @@ namespace { check_comments = opts->retrieve_bool("f-tex-check-comments"); + opts->retrieve_list("f-tex-ignore-env", &ignore_env); + reset(); return true; } void TexFilter::reset() { - in_comment = false; - prev_backslash = false; stack.resize(0); - push_command(Parm); + push_command(Text, false); } # define top stack.back() +# define next_arg if (*top.args) { ++top.args; if (!*top.args) pop_command(); } +# define skip_opt_args if (*top.args) { while (*top.args == 'O' || *top.args == 'o') { ++top.args; } if (!*top.args) pop_command(); } + // yes this should be inlined, it is only called once inline bool TexFilter::process_char(FilterChar::Chr c) { - // deal with comments - if (c == '%' && !prev_backslash) in_comment = true; - if (in_comment && c == '\n') in_comment = false; - - prev_backslash = false; - - if (in_comment) return !check_comments; + top.size++; if (top.in_what == Name) { if (asc_isalpha(c)) { top.name += c; - return true; + return top.skip; } else { - - if (top.name.empty() && (c == '@')) { - top.name += c; - return true; - } - - top.in_what = Other; + bool in_name; if (top.name.empty()) { - top.name.clear(); top.name += c; - top.do_check = commands.lookup(top.name.c_str()); - if (top.do_check == 0) top.do_check = ""; - return !asc_isspace(c); + in_name = true; + } else { + top.size--; + in_name = false; } - top.do_check = commands.lookup(top.name.c_str()); - if (top.do_check == 0) top.do_check = ""; + String name = top.name; - if (asc_isspace(c)) { // swallow extra spaces - top.in_what = Swallow; - return true; - } else if (c == '*') { // ignore * at end of commands - return true; + pop_command(); + + const char *args = commands.lookup(name.c_str()); + + if (name == "begin") + push_command(top.in_what, top.skip); + // args = "s"; + else if (name == "end") + pop_command(); + + // we might still be waiting for arguments + skip_opt_args; + if (*top.args) { + next_arg; + } else if (name == "[") { + // \[ + push_command(DisplayMath, true); + } else if (name == "]") { + // \] + pop_command(); // pop DisplayMath + } else if (name == "(") { + // \( + push_command(InlineMath, true); + } else if (name == ")") { + // \) + pop_command(); // pop InlineMath + } else if (args && *args) { + push_command(top.in_what, top.skip, args); } - // continue o... + if (in_name || c == '*') // better way to deal with "*"? + return true; + else + return process_char(c); // start over } - } else if (top.in_what == Swallow) { - - if (asc_isspace(c)) - return true; - else - top.in_what = Other; } - if (c == '{') - while (*top.do_check == 'O' || *top.do_check == 'o') - ++top.do_check; - - if (*top.do_check == '\0') - pop_command(); - - if (c == '{') { - - if (top.in_what == Parm || top.in_what == Opt || *top.do_check == '\0') - push_command(Parm); + if (top.in_what == Comment) { + if (c == '\n') { + pop_command(); + return false; // preserve newlines + } else { + return top.skip; + } + } - top.in_what = Parm; + if (c == '%') { + push_command(Comment, !check_comments); return true; } - if (top.in_what == Other) { - - if (c == '[') { - - top.in_what = Opt; - return true; - - } else if (asc_isspace(c)) { - - return true; - + if (c == '$') { + if (top.in_what != InlineMath) { + // $ begin + return push_command(InlineMath, true); + } else if (top.size > 1) { + // $ end + return pop_command(); } else { - - pop_command(); - + // $ -> $$ + pop_command(); // pop InlineMath + if (top.in_what == DisplayMath) + // $$ end + return pop_command(); + else + // $$ start + return push_command(DisplayMath, true); } - - } + } if (c == '\\') { - prev_backslash = true; - push_command(Name); - return true; + return push_command(Name, true); } - if (top.in_what == Parm) { + if (c == '}' || c == ']') { + if (top.in_what == EnvName) { + String env = top.name; + if (env.back() == '*') + env.pop_back(); + bool skip = pop_command(); + next_arg; + if (ignore_env.have(env)) { + stack[stack.size()-2].skip = true; + } + return skip; + } else { + bool skip = pop_command(); + next_arg; + return skip; + } + } - if (c == '}') - return end_option('P','p'); + if (c == '{') { + skip_opt_args; + if (*top.args == 'T') + return push_command(Text, false); + else if (*top.args == 's') + return push_command(EnvName, true); else - return *top.do_check == 'p'; + return push_command(top.in_what, top.skip || *top.args == 'p'); + } - } else if (top.in_what == Opt) { + if (c == '[') { + if (*top.args == 'O' || *top.args == 'o' || !*top.args) { + return push_command(top.in_what, top.skip || *top.args == 'o'); + } + // else: fall-through to treat it as a one-letter argument + } - if (c == ']') - return end_option('O', 'o'); - else - return *top.do_check == 'o'; + if (top.in_what == EnvName) + top.name += c; + // we might still be waiting for arguments + if (!asc_isspace(c)) { + skip_opt_args; + next_arg; } - return false; + return top.skip; } void TexFilter::process(FilterChar * & str, FilterChar * & stop) @@ -230,19 +267,24 @@ namespace { FilterChar * cur = str; while (cur != stop) { - if (process_char(*cur)) + bool hyphen = top.in_what == Name && top.size == 0 + && (*cur == '-' || *cur == '/') && cur-str >= 2; + if (process_char(*cur)) { *cur = ' '; + } + if (hyphen) { + FilterChar *i = cur-2, *j = cur+1; + *i = FilterChar(*i, FilterChar::sum(i, j)); + i++; + while (j != stop) + *(i++) = *(j++); + *(stop-2) = *(stop-1) = FilterChar(0, 0); + cur--; + } ++cur; } } - bool TexFilter::end_option(char u, char l) { - top.in_what = Other; - if (*top.do_check == u || *top.do_check == l) - ++top.do_check; - return true; - } - // // TexFilter::Commands // @@ -252,14 +294,14 @@ namespace { while (!asc_isspace(value[p1])) { if (value[p1] == '\0') return make_err(bad_value, value,"", - _("a string of 'o','O','p',or 'P'")); + _("a string of 'o', 'O', 'p', 'P', 's' or 'T'")); ++p1; } int p2 = p1 + 1; while (asc_isspace(value[p2])) { if (value[p2] == '\0') return make_err(bad_value, value,"", - _("a string of 'o','O','p',or 'P'")); + _("a string of 'o', 'O', 'p', 'P', 's' or 'T'")); ++p2; } String t1; t1.assign(value,p1);