Search in sources :

Example 1 with EncodingMemento

use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.

the class UnicodeBOMEncodingDetector method checkForBOM.

private EncodingMemento checkForBOM(InputStream inputStream) {
    EncodingMemento result = null;
    try {
        byte b1 = getNextByte(inputStream);
        byte b2 = getNextByte(inputStream);
        if (b1 == FE && b2 == FF) {
            result = createEncodingMemento(UTF_16BE_CHARSET_NAME);
            result.setUnicodeStream(true);
        } else {
            if (b1 == FF && b2 == FE) {
                result = createEncodingMemento(UTF_16LE_CHARSET_NAME);
                result.setUnicodeStream(true);
            } else {
                byte b3 = getNextByte((inputStream));
                if (b1 == EF && b2 == BB && b3 == BF) {
                    result = createEncodingMemento(UTF_8_CHARSET_NAME);
                    result.setUTF83ByteBOMUsed(true);
                }
            }
        }
    } catch (NotEnoughInputForBOMException e) {
        // This is sort of unexpected for normal cases, but can occur for
        // empty
        // streams. And, this can occur "normally" for non-BOM streams
        // that
        // have only two
        // bytes, and for which those two bytes match the first two bytes
        // of UTF-8
        // BOM In any case, we'll simply return null;
        result = null;
    } catch (IOException e) {
        // other errors should be impossible
        throw new Error(e);
    }
    return result;
}
Also used : EncodingMemento(org.eclipse.wst.sse.core.internal.encoding.EncodingMemento) IOException(java.io.IOException)

Example 2 with EncodingMemento

use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.

the class AbstractDocumentLoader method reload.

public void reload(IEncodedDocument encodedDocument, Reader inputStreamReader) throws IOException {
    if (inputStreamReader == null) {
        // $NON-NLS-1$
        throw new IllegalArgumentException("stream reader can not be null");
    }
    int READ_BUFFER_SIZE = 8192;
    int MAX_BUFFERED_SIZE_FOR_RESET_MARK = 200000;
    // temp .... eventually we'lll only read as needed
    BufferedReader bufferedReader = new BufferedReader(inputStreamReader, MAX_BUFFERED_SIZE_FOR_RESET_MARK);
    bufferedReader.mark(MAX_BUFFERED_SIZE_FOR_RESET_MARK);
    StringBuffer buffer = new StringBuffer();
    try {
        int numRead = 0;
        char[] tBuff = new char[READ_BUFFER_SIZE];
        while ((numRead = bufferedReader.read(tBuff, 0, tBuff.length)) != -1) {
            buffer.append(tBuff, 0, numRead);
        }
    // remember -- we didn't open stream ... so we don't close it
    } catch (MalformedInputException e) {
        // int pos = e.getInputLength();
        EncodingMemento localEncodingMemento = getEncodingMemento();
        boolean couldReset = true;
        String encodingNameInError = localEncodingMemento.getJavaCharsetName();
        if (encodingNameInError == null) {
            encodingNameInError = localEncodingMemento.getDetectedCharsetName();
        }
        try {
            bufferedReader.reset();
        } catch (IOException resetException) {
            // the only errro that can occur during reset is an
            // IOException
            // due to already being past the rest mark. In that case, we
            // throw more generic message
            couldReset = false;
        }
        // -1 can be used by UI layer as a code that "position could not
        // be
        // determined"
        int charPostion = -1;
        if (couldReset) {
            charPostion = getCharPostionOfFailure(bufferedReader);
        // getCharPostionOfFailure(new InputStreamReader(inStream,
        // javaEncodingNameInError));
        }
        // is
        throw new MalformedInputExceptionWithDetail(encodingNameInError, CodedIO.getAppropriateJavaCharset(encodingNameInError), charPostion, !couldReset, MAX_BUFFERED_SIZE_FOR_RESET_MARK);
    }
    StringBuffer stringbuffer = buffer;
    encodedDocument.set(stringbuffer.toString());
}
Also used : EncodingMemento(org.eclipse.wst.sse.core.internal.encoding.EncodingMemento) BufferedReader(java.io.BufferedReader) MalformedInputException(java.nio.charset.MalformedInputException) MalformedInputExceptionWithDetail(org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail) IOException(java.io.IOException)

Example 3 with EncodingMemento

use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.

the class JSPResourceEncodingDetector method getSpecDefaultEncodingMemento.

public EncodingMemento getSpecDefaultEncodingMemento() {
    resetAll();
    EncodingMemento result = null;
    String enc = getSpecDefaultEncoding();
    if (enc != null) {
        createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
        fEncodingMemento.setAppropriateDefault(enc);
        result = fEncodingMemento;
    }
    return result;
}
Also used : EncodingMemento(org.eclipse.wst.sse.core.internal.encoding.EncodingMemento)

Example 4 with EncodingMemento

use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.

the class ContentDescriberForJSPedCSS method handleStandardCalculations.

/**
 * @param description
 * @param detector
 * @throws IOException
 */
private void handleStandardCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
    // note: if we're asked for one, we set them all. I need to be sure if
    // called
    // mulitiple times (one for each, say) that we don't waste time
    // processing same
    // content again.
    EncodingMemento encodingMemento = ((JSPResourceEncodingDetector) detector).getEncodingMemento();
    // TODO: I need to verify to see if this BOM work is always done
    // by text type.
    Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
    if (detectedByteOrderMark != null) {
        Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
        // need to "push" up into base.
        if (!detectedByteOrderMark.equals(existingByteOrderMark))
            description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
    }
    if (!encodingMemento.isValid()) {
        // note: after setting here, its the mere presence of
        // IContentDescriptionExtended.UNSUPPORTED_CHARSET
        // in the resource's description that can be used to determine if
        // invalid
        // in those cases, the "detected" property contains an
        // "appropriate default" to use.
        description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
        description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
    }
    Object detectedCharset = encodingMemento.getDetectedCharsetName();
    Object javaCharset = encodingMemento.getJavaCharsetName();
    // we always include detected, if its different than java
    handleDetectedSpecialCase(description, detectedCharset, javaCharset);
    if (javaCharset != null) {
        Object existingCharset = description.getProperty(IContentDescription.CHARSET);
        if (javaCharset.equals(existingCharset)) {
            handleDetectedSpecialCase(description, detectedCharset, javaCharset);
        } else {
            // we may need to add what we found, but only need to add
            // if different from the default.
            Object defaultCharset = detector.getSpecDefaultEncoding();
            if (defaultCharset != null) {
                if (!defaultCharset.equals(javaCharset)) {
                    description.setProperty(IContentDescription.CHARSET, javaCharset);
                }
            } else {
                // assuming if there is no spec default, we always need to
                // add, I'm assuming
                description.setProperty(IContentDescription.CHARSET, javaCharset);
            }
        }
    }
}
Also used : EncodingMemento(org.eclipse.wst.sse.core.internal.encoding.EncodingMemento) JSPResourceEncodingDetector(org.eclipse.jst.jsp.core.internal.contenttype.JSPResourceEncodingDetector)

Example 5 with EncodingMemento

use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.

the class ContentDescriberForDTD method handleCalculations.

/**
 * @param description
 * @param detector
 * @throws IOException
 */
private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
    // note: if we're asked for one, we set them all. I need to be sure if
    // called
    // mulitiple times (one for each, say) that we don't waste time
    // processing same
    // content again.
    EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
    // TODO: I need to verify to see if this BOM work is always done
    // by text type.
    Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
    if (detectedByteOrderMark != null) {
        Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
        // need to "push" up into base.
        if (!detectedByteOrderMark.equals(existingByteOrderMark))
            description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
    }
    if (!encodingMemento.isValid()) {
        // note: after setting here, its the mere presence of
        // IContentDescriptionExtended.UNSUPPORTED_CHARSET
        // in the resource's description that can be used to determine if
        // invalid
        // in those cases, the "detected" property contains an
        // "appropriate default" to use.
        description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
        description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
    }
    Object detectedCharset = encodingMemento.getDetectedCharsetName();
    Object javaCharset = encodingMemento.getJavaCharsetName();
    // we always include detected, if its different than java
    handleDetectedSpecialCase(description, detectedCharset, javaCharset);
    if (javaCharset != null) {
        Object existingCharset = description.getProperty(IContentDescription.CHARSET);
        if (javaCharset.equals(existingCharset)) {
            handleDetectedSpecialCase(description, detectedCharset, javaCharset);
        } else {
            // we may need to add what we found, but only need to add
            // if different from default.the
            Object defaultCharset = getDetector().getSpecDefaultEncoding();
            if (defaultCharset != null) {
                if (!defaultCharset.equals(javaCharset)) {
                    description.setProperty(IContentDescription.CHARSET, javaCharset);
                }
            } else {
                // assuming if there is no spec default, we always need to
                // add, I'm assuming
                description.setProperty(IContentDescription.CHARSET, javaCharset);
            }
        }
    }
}
Also used : XMLResourceEncodingDetector(org.eclipse.wst.xml.core.internal.contenttype.XMLResourceEncodingDetector) EncodingMemento(org.eclipse.wst.sse.core.internal.encoding.EncodingMemento)

Aggregations

EncodingMemento (org.eclipse.wst.sse.core.internal.encoding.EncodingMemento)34 InputStream (java.io.InputStream)6 BufferedInputStream (java.io.BufferedInputStream)5 File (java.io.File)5 FileInputStream (java.io.FileInputStream)5 IOException (java.io.IOException)4 IStructuredModel (org.eclipse.wst.sse.core.internal.provisional.IStructuredModel)3 IStructuredDocument (org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument)3 XMLResourceEncodingDetector (org.eclipse.wst.xml.core.internal.contenttype.XMLResourceEncodingDetector)3 InputStreamReader (java.io.InputStreamReader)2 MalformedInputException (java.nio.charset.MalformedInputException)2 JSPResourceEncodingDetector (org.eclipse.jst.jsp.core.internal.contenttype.JSPResourceEncodingDetector)2 MalformedInputExceptionWithDetail (org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail)2 BufferedReader (java.io.BufferedReader)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 Reader (java.io.Reader)1 Charset (java.nio.charset.Charset)1 CharsetDecoder (java.nio.charset.CharsetDecoder)1 ITextFileBuffer (org.eclipse.core.filebuffers.ITextFileBuffer)1