Clover coverage report - QedeqKernelSe Coverage Report
Coverage timestamp: Do Mrz 27 2008 21:46:26 CET
file stats: LOC: 666   Methods: 26
NCLOC: 389   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
XmlReader.java 17,3% 38,7% 42,3% 32,4%
coverage coverage
 1    /*
 2    * Copyright 2004 Sun Microsystems, Inc.
 3    *
 4    * Licensed under the Apache License, Version 2.0 (the "License");
 5    * you may not use this file except in compliance with the License.
 6    * You may obtain a copy of the License at
 7    *
 8    * http://www.apache.org/licenses/LICENSE-2.0
 9    *
 10    * Unless required by applicable law or agreed to in writing, software
 11    * distributed under the License is distributed on an "AS IS" BASIS,
 12    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13    * See the License for the specific language governing permissions and
 14    * limitations under the License.
 15    *
 16    */
 17    package com.sun.syndication.io;
 18   
 19    import java.io.*;
 20    import java.net.URL;
 21    import java.net.URLConnection;
 22    import java.net.HttpURLConnection;
 23    import java.util.regex.Pattern;
 24    import java.util.regex.Matcher;
 25    import java.text.MessageFormat;
 26   
 27    /**
 28    * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out
 29    * the charset encoding of the XML document within the stream.
 30    * <p>
 31    * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a
 32    * character stream.
 33    * <p>
 34    * All this has to be done without consuming characters from the stream, if not the XML parser
 35    * will not recognized the document as a valid XML. This is not 100% true, but it's close enough
 36    * (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all
 37    * parsers).
 38    * <p>
 39    * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and
 40    * HTTP streams by offering a wide set of constructors.
 41    * <P>
 42    * By default the charset encoding detection is lenient, the constructor with the lenient flag
 43    * can be used for an script (following HTTP MIME and XML specifications).
 44    * All this is nicely explained by Mark Pilgrim in his blog,
 45    * <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types">
 46    * Determining the character encoding of a feed</a>.
 47    * <p>
 48    * @author Alejandro Abdelnur
 49    * @version revision 1.18 taken on 2008-03-06 from Rome (see
 50    * https://rome.dev.java.net/source/browse/rome/src/java/com/sun/syndication/io/XmlReader.java)
 51    */
 52    public class XmlReader extends Reader {
 53    private static final int BUFFER_SIZE = 4096;
 54   
 55    private static final String UTF_8 = "UTF-8";
 56    private static final String US_ASCII = "US-ASCII";
 57    private static final String UTF_16BE = "UTF-16BE";
 58    private static final String UTF_16LE = "UTF-16LE";
 59    private static final String UTF_16 = "UTF-16";
 60   
 61    private static String _staticDefaultEncoding = null;
 62   
 63    private Reader _reader;
 64    private String _encoding;
 65    private String _defaultEncoding;
 66   
 67    /**
 68    * Sets the default encoding to use if none is set in HTTP content-type,
 69    * XML prolog and the rules based on content-type are not adequate.
 70    * <p/>
 71    * If it is set to NULL the content-type based rules are used.
 72    * <p/>
 73    * By default it is NULL.
 74    * <p/>
 75    *
 76    * @param encoding charset encoding to default to.
 77    */
 78  0 public static void setDefaultEncoding(String encoding) {
 79  0 _staticDefaultEncoding = encoding;
 80    }
 81   
 82    /**
 83    * Returns the default encoding to use if none is set in HTTP content-type,
 84    * XML prolog and the rules based on content-type are not adequate.
 85    * <p/>
 86    * If it is NULL the content-type based rules are used.
 87    * <p/>
 88    *
 89    * @return the default encoding to use.
 90    */
 91  0 public static String getDefaultEncoding() {
 92  0 return _staticDefaultEncoding;
 93    }
 94   
 95    /**
 96    * Creates a Reader for a File.
 97    * <p>
 98    * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also
 99    * missing defaults to UTF-8.
 100    * <p>
 101    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
 102    * for details.
 103    * <p>
 104    * @param file File to create a Reader from.
 105    * @throws IOException thrown if there is a problem reading the file.
 106    *
 107    */
 108  61846 public XmlReader(File file) throws IOException {
 109  61846 this(new FileInputStream(file));
 110    }
 111   
 112    /**
 113    * Creates a Reader for a raw InputStream.
 114    * <p>
 115    * It follows the same logic used for files.
 116    * <p>
 117    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
 118    * for details.
 119    * <p>
 120    * @param is InputStream to create a Reader from.
 121    * @throws IOException thrown if there is a problem reading the stream.
 122    *
 123    */
 124  61846 public XmlReader(InputStream is) throws IOException {
 125  61846 this(is,true);
 126    }
 127   
 128    /**
 129    * Creates a Reader for a raw InputStream.
 130    * <p>
 131    * It follows the same logic used for files.
 132    * <p>
 133    * If lenient detection is indicated and the detection above fails as per specifications it then attempts
 134    * the following:
 135    * <p>
 136    * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
 137    * <p>
 138    * Else if the XML prolog had a charset encoding that encoding is used.
 139    * <p>
 140    * Else if the content type had a charset encoding that encoding is used.
 141    * <p>
 142    * Else 'UTF-8' is used.
 143    * <p>
 144    * If lenient detection is indicated an XmlReaderException is never thrown.
 145    * <p>
 146    * @param is InputStream to create a Reader from.
 147    * @param lenient indicates if the charset encoding detection should be relaxed.
 148    * @throws IOException thrown if there is a problem reading the stream.
 149    * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
 150    *
 151    */
 152  61846 public XmlReader(InputStream is,boolean lenient) throws IOException, XmlReaderException {
 153  61846 _defaultEncoding = _staticDefaultEncoding;
 154  61846 try {
 155  61846 doRawStream(is,lenient);
 156    }
 157    catch (XmlReaderException ex) {
 158  0 if (!lenient) {
 159  0 throw ex;
 160    }
 161    else {
 162  0 doLenientDetection(null,ex);
 163    }
 164    }
 165    }
 166   
 167    /**
 168    * Creates a Reader using the InputStream of a URL.
 169    * <p>
 170    * If the URL is not of type HTTP and there is not 'content-type' header in the fetched
 171    * data it uses the same logic used for Files.
 172    * <p>
 173    * If the URL is a HTTP Url or there is a 'content-type' header in the fetched
 174    * data it uses the same logic used for an InputStream with content-type.
 175    * <p>
 176    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
 177    * for details.
 178    * <p>
 179    * @param url URL to create a Reader from.
 180    * @throws IOException thrown if there is a problem reading the stream of the URL.
 181    *
 182    */
 183  0 public XmlReader(URL url) throws IOException {
 184  0 this(url.openConnection());
 185    }
 186   
 187    /**
 188    * Creates a Reader using the InputStream of a URLConnection.
 189    * <p>
 190    * If the URLConnection is not of type HttpURLConnection and there is not
 191    * 'content-type' header in the fetched data it uses the same logic used for files.
 192    * <p>
 193    * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched
 194    * data it uses the same logic used for an InputStream with content-type.
 195    * <p>
 196    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
 197    * for details.
 198    * <p>
 199    * @param conn URLConnection to create a Reader from.
 200    * @throws IOException thrown if there is a problem reading the stream of the URLConnection.
 201    *
 202    */
 203  0 public XmlReader(URLConnection conn) throws IOException {
 204  0 _defaultEncoding = _staticDefaultEncoding;
 205  0 boolean lenient = true;
 206  0 if (conn instanceof HttpURLConnection) {
 207  0 try {
 208  0 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
 209    }
 210    catch (XmlReaderException ex) {
 211  0 doLenientDetection(conn.getContentType(),ex);
 212    }
 213    }
 214    else
 215  0 if (conn.getContentType()!=null) {
 216  0 try {
 217  0 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
 218    }
 219    catch (XmlReaderException ex) {
 220  0 doLenientDetection(conn.getContentType(),ex);
 221    }
 222    }
 223    else {
 224  0 try {
 225  0 doRawStream(conn.getInputStream(),lenient);
 226    }
 227    catch (XmlReaderException ex) {
 228  0 doLenientDetection(null,ex);
 229    }
 230    }
 231    }
 232   
 233    /**
 234    * Creates a Reader using an InputStream an the associated content-type header.
 235    * <p>
 236    * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
 237    * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
 238    * prolog encoding uses the default encoding mandated by the content-type MIME type.
 239    * <p>
 240    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
 241    * for details.
 242    * <p>
 243    * @param is InputStream to create the reader from.
 244    * @param httpContentType content-type header to use for the resolution of the charset encoding.
 245    * @throws IOException thrown if there is a problem reading the file.
 246    *
 247    */
 248  0 public XmlReader(InputStream is,String httpContentType) throws IOException {
 249  0 this(is,httpContentType,true);
 250    }
 251   
 252    /**
 253    * Creates a Reader using an InputStream an the associated content-type header. This constructor is
 254    * lenient regarding the encoding detection.
 255    * <p>
 256    * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
 257    * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
 258    * prolog encoding uses the default encoding mandated by the content-type MIME type.
 259    * <p>
 260    * If lenient detection is indicated and the detection above fails as per specifications it then attempts
 261    * the following:
 262    * <p>
 263    * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
 264    * <p>
 265    * Else if the XML prolog had a charset encoding that encoding is used.
 266    * <p>
 267    * Else if the content type had a charset encoding that encoding is used.
 268    * <p>
 269    * Else 'UTF-8' is used.
 270    * <p>
 271    * If lenient detection is indicated an XmlReaderException is never thrown.
 272    * <p>
 273    * @param is InputStream to create the reader from.
 274    * @param httpContentType content-type header to use for the resolution of the charset encoding.
 275    * @param lenient indicates if the charset encoding detection should be relaxed.
 276    * @throws IOException thrown if there is a problem reading the file.
 277    * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
 278    *
 279    */
 280  0 public XmlReader(InputStream is,String httpContentType,boolean lenient, String defaultEncoding)
 281    throws IOException, XmlReaderException {
 282  0 _defaultEncoding = (defaultEncoding == null) ? _staticDefaultEncoding : defaultEncoding;
 283  0 try {
 284  0 doHttpStream(is,httpContentType,lenient);
 285    }
 286    catch (XmlReaderException ex) {
 287  0 if (!lenient) {
 288  0 throw ex;
 289    }
 290    else {
 291  0 doLenientDetection(httpContentType,ex);
 292    }
 293    }
 294    }
 295   
 296    /**
 297    * Creates a Reader using an InputStream an the associated content-type header. This constructor is
 298    * lenient regarding the encoding detection.
 299    * <p>
 300    * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
 301    * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
 302    * prolog encoding uses the default encoding mandated by the content-type MIME type.
 303    * <p>
 304    * If lenient detection is indicated and the detection above fails as per specifications it then attempts
 305    * the following:
 306    * <p>
 307    * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
 308    * <p>
 309    * Else if the XML prolog had a charset encoding that encoding is used.
 310    * <p>
 311    * Else if the content type had a charset encoding that encoding is used.
 312    * <p>
 313    * Else 'UTF-8' is used.
 314    * <p>
 315    * If lenient detection is indicated an XmlReaderException is never thrown.
 316    * <p>
 317    * @param is InputStream to create the reader from.
 318    * @param httpContentType content-type header to use for the resolution of the charset encoding.
 319    * @param lenient indicates if the charset encoding detection should be relaxed.
 320    * @throws IOException thrown if there is a problem reading the file.
 321    * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
 322    *
 323    */
 324  0 public XmlReader(InputStream is, String httpContentType, boolean lenient)
 325    throws IOException, XmlReaderException {
 326  0 this(is, httpContentType, lenient, null);
 327    }
 328   
 329  0 private void doLenientDetection(String httpContentType,XmlReaderException ex) throws IOException {
 330  0 if (httpContentType!=null) {
 331  0 if (httpContentType.startsWith("text/html")) {
 332  0 httpContentType = httpContentType.substring("text/html".length());
 333  0 httpContentType = "text/xml" + httpContentType;
 334  0 try {
 335  0 doHttpStream(ex.getInputStream(),httpContentType,true);
 336  0 ex = null;
 337    }
 338    catch (XmlReaderException ex2) {
 339  0 ex = ex2;
 340    }
 341    }
 342    }
 343  0 if (ex!=null) {
 344  0 String encoding = ex.getXmlEncoding();
 345  0 if (encoding==null) {
 346  0 encoding = ex.getContentTypeEncoding();
 347    }
 348  0 if (encoding==null) {
 349  0 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
 350    }
 351  0 prepareReader(ex.getInputStream(),encoding);
 352    }
 353    }
 354   
 355    /**
 356    * Returns the charset encoding of the XmlReader.
 357    * <p>
 358    * @return charset encoding.
 359    *
 360    */
 361  0 public String getEncoding() {
 362  0 return _encoding;
 363    }
 364   
 365    public int read(char[] buf,int offset,int len) throws IOException {
 366    return _reader.read(buf,offset,len);
 367    }
 368   
 369    /**
 370    * Closes the XmlReader stream.
 371    * <p>
 372    * @throws IOException thrown if there was a problem closing the stream.
 373    *
 374    */
 375  61460 public void close() throws IOException {
 376  61460 _reader.close();
 377    }
 378   
 379  61846 private void doRawStream(InputStream is,boolean lenient) throws IOException {
 380  61846 BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
 381  61846 String bomEnc = getBOMEncoding(pis);
 382  61846 String xmlGuessEnc = getXMLGuessEncoding(pis);
 383  61846 String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
 384  61846 String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, pis);
 385  61846 prepareReader(pis,encoding);
 386    }
 387   
 388  0 private void doHttpStream(InputStream is,String httpContentType,boolean lenient) throws IOException {
 389  0 BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
 390  0 String cTMime = getContentTypeMime(httpContentType);
 391  0 String cTEnc = getContentTypeEncoding(httpContentType);
 392  0 String bomEnc = getBOMEncoding(pis);
 393  0 String xmlGuessEnc = getXMLGuessEncoding(pis);
 394  0 String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
 395  0 String encoding = calculateHttpEncoding(cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, pis,lenient);
 396  0 prepareReader(pis,encoding);
 397    }
 398   
 399  61846 private void prepareReader(InputStream is,String encoding) throws IOException {
 400  61846 _reader = new InputStreamReader(is,encoding);
 401  61846 _encoding = encoding;
 402    }
 403   
 404    // InputStream is passed for XmlReaderException creation only
 405  61846 private String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is) throws IOException {
 406  61846 String encoding;
 407  61846 if (bomEnc==null) {
 408  61846 if (xmlGuessEnc==null || xmlEnc==null) {
 409  0 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
 410    }
 411    else
 412  61846 if (xmlEnc.equals(UTF_16) && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) {
 413  0 encoding = xmlGuessEnc;
 414    }
 415    else {
 416  61846 encoding = xmlEnc;
 417    }
 418    }
 419    else
 420  0 if (bomEnc.equals(UTF_8)) {
 421  0 if (xmlGuessEnc!=null && !xmlGuessEnc.equals(UTF_8)) {
 422  0 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
 423    bomEnc,xmlGuessEnc,xmlEnc,is);
 424    }
 425  0 if (xmlEnc!=null && !xmlEnc.equals(UTF_8)) {
 426  0 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
 427    bomEnc,xmlGuessEnc,xmlEnc,is);
 428    }
 429  0 encoding = UTF_8;
 430    }
 431    else
 432  0 if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) {
 433  0 if (xmlGuessEnc!=null && !xmlGuessEnc.equals(bomEnc)) {
 434  0 throw new IOException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}));
 435    }
 436  0 if (xmlEnc!=null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) {
 437  0 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
 438    bomEnc,xmlGuessEnc,xmlEnc,is);
 439    }
 440  0 encoding =bomEnc;
 441    }
 442    else {
 443  0 throw new XmlReaderException(RAW_EX_2.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
 444    bomEnc,xmlGuessEnc,xmlEnc,is);
 445    }
 446  61846 return encoding;
 447    }
 448   
 449    // InputStream is passed for XmlReaderException creation only
 450  0 private String calculateHttpEncoding(String cTMime, String cTEnc, String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is,boolean lenient) throws IOException {
 451  0 String encoding;
 452  0 if (lenient & xmlEnc!=null) {
 453  0 encoding = xmlEnc;
 454    }
 455    else {
 456  0 boolean appXml = isAppXml(cTMime);
 457  0 boolean textXml = isTextXml(cTMime);
 458  0 if (appXml || textXml) {
 459  0 if (cTEnc==null) {
 460  0 if (appXml) {
 461  0 encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, is);
 462    }
 463    else {
 464  0 encoding = (_defaultEncoding == null) ? US_ASCII : _defaultEncoding;
 465    }
 466    }
 467    else
 468  0 if (bomEnc!=null && (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE))) {
 469  0 throw new XmlReaderException(HTTP_EX_1.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
 470    cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
 471    }
 472    else
 473  0 if (cTEnc.equals(UTF_16)) {
 474  0 if (bomEnc!=null && bomEnc.startsWith(UTF_16)) {
 475  0 encoding = bomEnc;
 476    }
 477    else {
 478  0 throw new XmlReaderException(HTTP_EX_2.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
 479    cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
 480    }
 481    }
 482    else {
 483  0 encoding = cTEnc;
 484    }
 485    }
 486    else {
 487  0 throw new XmlReaderException(HTTP_EX_3.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
 488    cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
 489    }
 490    }
 491  0 return encoding;
 492    }
 493   
 494    // returns MIME type or NULL if httpContentType is NULL
 495  0 private static String getContentTypeMime(String httpContentType) {
 496  0 String mime = null;
 497  0 if (httpContentType!=null) {
 498  0 int i = httpContentType.indexOf(";");
 499  0 mime = ((i==-1) ? httpContentType : httpContentType.substring(0,i)).trim();
 500    }
 501  0 return mime;
 502    }
 503   
 504    private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([.[^; ]]*)");
 505   
 506    // returns charset parameter value, NULL if not present, NULL if httpContentType is NULL
 507  0 private static String getContentTypeEncoding(String httpContentType) {
 508  0 String encoding = null;
 509  0 if (httpContentType!=null) {
 510  0 int i = httpContentType.indexOf(";");
 511  0 if (i>-1) {
 512  0 String postMime = httpContentType.substring(i+1);
 513  0 Matcher m = CHARSET_PATTERN.matcher(postMime);
 514  0 encoding = (m.find()) ? m.group(1) : null;
 515  0 encoding = (encoding!=null) ? encoding.toUpperCase() : null;
 516    }
 517  0 if (encoding != null &&
 518    ((encoding.startsWith("\"") && encoding.endsWith("\"")) ||
 519    (encoding.startsWith("'") && encoding.endsWith("'"))
 520    )) {
 521  0 encoding = encoding.substring(1, encoding.length() - 1);
 522    }
 523    }
 524  0 return encoding;
 525    }
 526   
 527    // returns the BOM in the stream, NULL if not present,
 528    // if there was BOM the in the stream it is consumed
 529  61846 private static String getBOMEncoding(BufferedInputStream is) throws IOException {
 530  61846 String encoding = null;
 531  61846 int[] bytes = new int[3];
 532  61846 is.mark(3);
 533  61846 bytes[0] = is.read();
 534  61846 bytes[1] = is.read();
 535  61846 bytes[2] = is.read();
 536   
 537  61846 if (bytes[0] == 0xFE && bytes[1] == 0xFF) {
 538  0 encoding = UTF_16BE;
 539  0 is.reset();
 540  0 is.read();
 541  0 is.read();
 542    }
 543    else
 544  61846 if (bytes[0] == 0xFF && bytes[1] == 0xFE) {
 545  0 encoding = UTF_16LE;
 546  0 is.reset();
 547  0 is.read();
 548  0 is.read();
 549    }
 550    else
 551  61846 if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
 552  0 encoding = UTF_8;
 553    }
 554    else {
 555  61846 is.reset();
 556    }
 557  61846 return encoding;
 558    }
 559   
 560    // returns the best guess for the encoding by looking the first bytes of the stream, '<?'
 561  61846 private static String getXMLGuessEncoding(BufferedInputStream is) throws IOException {
 562  61846 String encoding = null;
 563  61846 int[] bytes = new int[4];
 564  61846 is.mark(4);
 565  61846 bytes[0] = is.read();
 566  61846 bytes[1] = is.read();
 567  61846 bytes[2] = is.read();
 568  61846 bytes[3] = is.read();
 569  61846 is.reset();
 570   
 571  61846 if (bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00 && bytes[3] == 0x3F) {
 572  0 encoding = UTF_16BE;
 573    }
 574    else
 575  61846 if (bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F && bytes[3] == 0x00) {
 576  0 encoding = UTF_16LE;
 577    }
 578    else
 579  61846 if (bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78 && bytes[3] == 0x6D) {
 580  61846 encoding = UTF_8;
 581    }
 582  61846 return encoding;
 583    }
 584   
 585   
 586    private static final Pattern ENCODING_PATTERN =
 587    Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
 588   
 589    // returns the encoding declared in the <?xml encoding=...?>, NULL if none
 590  61846 private static String getXmlProlog(BufferedInputStream is,String guessedEnc) throws IOException {
 591  61846 String encoding = null;
 592  61846 if (guessedEnc!=null) {
 593  61846 byte[] bytes = new byte[BUFFER_SIZE];
 594  61846 is.mark(BUFFER_SIZE);
 595  61846 int offset = 0;
 596  61846 int max = BUFFER_SIZE;
 597  61846 int c = is.read(bytes,offset,max);
 598  61846 int firstGT = -1;
 599  61846 while (c!=-1 && firstGT==-1 && offset< BUFFER_SIZE) {
 600  61846 offset += c;
 601  61846 max -= c;
 602  61846 c = is.read(bytes,offset,max);
 603  61846 firstGT = new String(bytes, 0, offset).indexOf(">");
 604    }
 605  61846 if (firstGT == -1) {
 606  0 if (c == -1) {
 607  0 throw new IOException("Unexpected end of XML stream");
 608    }
 609    else {
 610  0 throw new IOException("XML prolog or ROOT element not found on first " + offset + " bytes");
 611    }
 612    }
 613  61846 int bytesRead = offset;
 614  61846 if (bytesRead>0) {
 615  61846 is.reset();
 616  61846 Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,0,firstGT + 1), guessedEnc);
 617  61846 BufferedReader bReader = new BufferedReader(reader);
 618  61846 StringBuffer prolog = new StringBuffer();
 619  61846 String line = bReader.readLine();
 620  61846 while (line != null) {
 621  61846 prolog.append(line);
 622  61846 line = bReader.readLine();
 623    }
 624  61846 Matcher m = ENCODING_PATTERN.matcher(prolog);
 625  61846 if (m.find()) {
 626  61846 encoding = m.group(1).toUpperCase();
 627  61846 encoding = encoding.substring(1,encoding.length()-1);
 628    }
 629    }
 630    }
 631  61846 return encoding;
 632    }
 633   
 634    // indicates if the MIME type belongs to the APPLICATION XML family
 635  0 private static boolean isAppXml(String mime) {
 636  0 return mime!=null &&
 637    (mime.equals("application/xml") ||
 638    mime.equals("application/xml-dtd") ||
 639    mime.equals("application/xml-external-parsed-entity") ||
 640    (mime.startsWith("application/") && mime.endsWith("+xml")));
 641    }
 642   
 643    // indicates if the MIME type belongs to the TEXT XML family
 644  0 private static boolean isTextXml(String mime) {
 645  0 return mime!=null &&
 646    (mime.equals("text/xml") ||
 647    mime.equals("text/xml-external-parsed-entity") ||
 648    (mime.startsWith("text/") && mime.endsWith("+xml")));
 649    }
 650   
 651    private static final MessageFormat RAW_EX_1 = new MessageFormat(
 652    "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch");
 653   
 654    private static final MessageFormat RAW_EX_2 = new MessageFormat(
 655    "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM");
 656   
 657    private static final MessageFormat HTTP_EX_1 = new MessageFormat(
 658    "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL");
 659   
 660    private static final MessageFormat HTTP_EX_2 = new MessageFormat(
 661    "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch");
 662   
 663    private static final MessageFormat HTTP_EX_3 = new MessageFormat(
 664    "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME");
 665   
 666    }