Clover Coverage Report
Coverage timestamp: Sat Sep 18 2010 04:09:52 UTC
../../../../../img/srcFileCovDistChart8.png 50% of files have more coverage
108   283   49   13.5
54   163   0.45   8
8     6.12  
1    
 
  LatexMathParser       Line # 76 108 49 78.2% 0.7823529
 
  (11)
 
1    /* This file is part of the project "Hilbert II" - http://www.qedeq.org
2    *
3    * Copyright 2000-2010, Michael Meyling <mime@qedeq.org>.
4    *
5    * "Hilbert II" is free software; you can redistribute
6    * it and/or modify it under the terms of the GNU General Public
7    * License as published by the Free Software Foundation; either
8    * version 2 of the License, or (at your option) any later version.
9    *
10    * This program is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13    * GNU General Public License for more details.
14    */
15   
16    package org.qedeq.kernel.bo.parser;
17   
18    import java.util.ArrayList;
19    import java.util.List;
20   
21    import org.qedeq.base.io.TextInput;
22    import org.qedeq.base.trace.Trace;
23   
24    /*
25    * TODO mime 20080118: refactor
26    *
27    * Whitespace LaTeX form, could be eaten
28    * \t
29    * \r
30    * \n
31    * \\
32    * \\,
33    * &
34    * \\\\
35    * \\par
36    * \\quad
37    * \\qquad
38    *
39    * Separator only one allowed, before and after only whitespace is possible
40    * ,
41    * |
42    * $$
43    * Separator should be read as tokens.
44    *
45    * Problem: If some atom like is followed by "(" it should be taken as an
46    * (function) operator. But if we start with readToken we don't see the "("
47    * character.
48    *
49    * Problem: Could whitespace be recognized?
50    * Translating whitespace tokens into spaces is not easy, one has to know the
51    * end of the whitespace token.
52    * Possible solution:
53    * function read token (LaTeX specific)
54    * skip real whitespace (" ", "\t", "\r", "\n")
55    * read char
56    * case char
57    * "\\" read characters or numbers (check LaTeX Syntax)
58    * "{", "}", "(", ")" are also allowed
59    * resulting string is token
60    * LaTeX command definition modifies above:
61    * Die meisten LATEX-Befehle haben eines der beiden folgenden Formate: Entweder sie beginnen
62    * mit einem Backslash (\) und haben dann einen nur aus Buchstaben bestehenden Namen, der durch
63    * ein oder mehrere Leerzeichen oder durch ein nachfolgendes Sonderzeichen oder eine Ziffer beendet
64    * wird; oder sie bestehen aus einem Backslash und genau einem Sonderzeichen oder einer Ziffer.
65    * Gross- und Kleinbuchstaben haben auch in Befehlsnamen verschiedene Bedeutung. Wenn man nach
66    * einem Befehlsnamen eine Leerstelle erhalten will, muss man "{}" zur Beendigung des Befehlsnamens
67    * oder einen eigenen Befehl f\u00fcr die Leerstelle verwenden.
68    */
69   
70    /**
71    * Parse LaTeX term or formula data into {@link org.qedeq.kernel.bo.parser.Term}s.
72    *
73    * @version $Revision: 1.1 $
74    * @author Michael Meyling
75    */
 
76    public class LatexMathParser extends MathParser {
77   
78    /** This class. */
79    private static final Class CLASS = LatexMathParser.class;
80   
81    /** Characters with special LaTeX meaning. */
82    private static final String SPECIALCHARACTERS = "(),{}\\~%$&";
83   
84    /** Counter for token whitespace lines. */
85    private int tokenWhiteSpaceLines;
86   
87    /**
88    * Constructor.
89    *
90    * @param input Parse this input.
91    * @param operators List of operators.
92    */
 
93  11 toggle public LatexMathParser(final TextInput input, final List operators) {
94  11 super(new MementoTextInput(input), operators);
95    }
96   
97   
98    /**
99    * Constructor.
100    *
101    * @param buffer Parse this input.
102    * @param operators List of operators.
103    */
 
104  0 toggle public LatexMathParser(final StringBuffer buffer, final List operators) {
105  0 this(new TextInput(buffer), operators);
106    }
107   
 
108  658 toggle protected final String readToken() {
109  658 final String method = "readToken()";
110  658 Trace.begin(CLASS, this, method);
111  658 StringBuffer token = new StringBuffer();
112  658 tokenWhiteSpaceLines = 0;
113  658 try {
114  658 do {
115  836 tokenWhiteSpaceLines += readPureWhitespace();
116  836 if (tokenWhiteSpaceLines > 1) {
117  0 break;
118    }
119  836 if (eof()) {
120  32 if (token.length() <= 0) {
121  32 token = null;
122    }
123  32 break;
124    }
125  804 final int c = getChar();
126  804 if (Character.isDigit((char) c)) {
127  8 token.append((char) readChar());
128  8 if (Character.isDigit((char) getChar())) {
129  0 continue;
130    }
131  8 break;
132    }
133  796 if (SPECIALCHARACTERS.indexOf(c) >= 0) {
134  697 switch (c) {
135  0 case '&':
136  0 case '%':
137  22 case '~':
138  134 case '$': // TODO mime 20060504 or break in this case?
139  156 readChar();
140  156 continue;
141  252 case '\\':
142  252 final String t = readBackslashToken();
143  252 if (t.equals(" ") || t.equals("quad") || t.equals("qquad")) {
144  34 continue;
145    }
146  218 token.append(t);
147  218 if ('_' == getChar() || '^' == getChar()) {
148  0 token.append((char) readChar());
149  0 continue;
150    }
151  218 break;
152  0 case '{':
153  0 readChar();
154  0 token.append("(");
155  0 break;
156  0 case '}':
157  0 readChar();
158  0 token.append(")");
159  0 break;
160  289 default:
161  289 readChar();
162  289 token.append((char) c);
163  289 if ('_' == getChar() || '^' == getChar()) {
164  0 token.append((char) readChar());
165  0 continue;
166    }
167    }
168  507 break;
169    }
170  99 token.append((char) readChar());
171  99 if ('_' == getChar() || '^' == getChar()) {
172  8 token.append((char) readChar());
173  8 continue;
174    }
175  91 break;
176    /*
177    String operator = null;
178    markPosition();
179    while (!eof() && (Character.isLetterOrDigit((char) getChar()) || '_' == getChar()
180    || '^' == getChar())) {
181    token.append((char) readChar());
182    if (null != getOperator(token.toString())) {
183    operator = token.toString();
184    clearMark();
185    markPosition();
186    }
187    }
188    if (operator != null) {
189    rewindPosition();
190    token.setLength(0);
191    token.append(operator);
192    } else {
193    clearMark();
194    }
195    */
196  198 } while (!eof());
197  658 Trace.param(CLASS, this, method, "return token", token);
198  658 return (token != null ? token.toString() : null);
199    } finally {
200  658 Trace.end(CLASS, this, method);
201    }
202    }
203   
 
204  252 toggle private String readBackslashToken() {
205  252 final String method = "readBackslashToken()";
206  252 Trace.begin(CLASS, this, method);
207  252 if (getChar() != '\\') {
208  0 throw new IllegalArgumentException("\\ expected");
209    }
210  252 readChar(); // read \
211  252 if (eof()) {
212  0 Trace.param(CLASS, this, method, "return", null);
213  0 Trace.end(CLASS, this, method);
214  0 return null;
215    }
216  252 if (!Character.isLetter((char) getChar())) {
217  34 Trace.param(CLASS, this, method, "return", (char) getChar());
218  34 Trace.end(CLASS, this, method);
219  34 return "" + ((char) readChar());
220    }
221  218 final StringBuffer buffer = new StringBuffer();
222  218 do {
223  880 buffer.append((char) readChar());
224  880 } while (!eof() && Character.isLetter((char) getChar()));
225  218 Trace.param(CLASS, this, method, "return", buffer.toString());
226  218 Trace.end(CLASS, this, method);
227  218 return buffer.toString();
228    }
229   
 
230  836 toggle private int readPureWhitespace() {
231  836 int lines = 0;
232  1130 while (getChar() != -1 && Character.isWhitespace((char) getChar())) {
233  294 if ('\n' == (char) getChar()) {
234  60 lines++;
235    }
236  294 readChar();
237    }
238  836 return lines;
239    }
240   
 
241  177 toggle protected final Operator getOperator(final String token) {
242  177 Operator result = null;
243  177 if (token == null) {
244  0 return null;
245    }
246  3249 for (int i = 0; i < getOperators().size(); i++) {
247  3156 if (token.equals(((Operator) getOperators().get(i)).getStartSymbol())) {
248  84 result = (Operator) getOperators().get(i);
249  84 break;
250    }
251    }
252  177 if (result != null) {
253  84 return result;
254    }
255    // mime 20080725: no operator found -> return subject variable
256  93 if (SPECIALCHARACTERS.indexOf(token) < 0) {
257  0 return new Operator(token, null, null, "VAR", token, 200, 0, 0);
258    }
259  93 return null;
260    }
261   
 
262  140 toggle protected final List getOperators(final String token) {
263  140 final List result = new ArrayList();
264  140 if (token == null) {
265  0 return result;
266    }
267  3780 for (int i = 0; i < getOperators().size(); i++) {
268  3640 if (token.equals(((Operator) getOperators().get(i)).getStartSymbol())) {
269  102 result.add(getOperators().get(i));
270    }
271    }
272    // mime 20080725: no operator found -> return subject variable
273  140 if (result.size() <= 0 && SPECIALCHARACTERS.indexOf(token) < 0) {
274  0 result.add(new Operator(token, null, null, "VAR", token, 200, 0, 0));
275    }
276  140 return result;
277    }
278   
 
279  162 toggle protected boolean eot(final String token) {
280  162 return token == null || token.trim().length() == 0;
281    }
282   
283    }