use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.
the class UnicodeBOMEncodingDetector method checkForBOM.
private EncodingMemento checkForBOM(InputStream inputStream) {
EncodingMemento result = null;
try {
byte b1 = getNextByte(inputStream);
byte b2 = getNextByte(inputStream);
if (b1 == FE && b2 == FF) {
result = createEncodingMemento(UTF_16BE_CHARSET_NAME);
result.setUnicodeStream(true);
} else {
if (b1 == FF && b2 == FE) {
result = createEncodingMemento(UTF_16LE_CHARSET_NAME);
result.setUnicodeStream(true);
} else {
byte b3 = getNextByte((inputStream));
if (b1 == EF && b2 == BB && b3 == BF) {
result = createEncodingMemento(UTF_8_CHARSET_NAME);
result.setUTF83ByteBOMUsed(true);
}
}
}
} catch (NotEnoughInputForBOMException e) {
// This is sort of unexpected for normal cases, but can occur for
// empty
// streams. And, this can occur "normally" for non-BOM streams
// that
// have only two
// bytes, and for which those two bytes match the first two bytes
// of UTF-8
// BOM In any case, we'll simply return null;
result = null;
} catch (IOException e) {
// other errors should be impossible
throw new Error(e);
}
return result;
}
use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.
the class AbstractDocumentLoader method reload.
public void reload(IEncodedDocument encodedDocument, Reader inputStreamReader) throws IOException {
if (inputStreamReader == null) {
// $NON-NLS-1$
throw new IllegalArgumentException("stream reader can not be null");
}
int READ_BUFFER_SIZE = 8192;
int MAX_BUFFERED_SIZE_FOR_RESET_MARK = 200000;
// temp .... eventually we'lll only read as needed
BufferedReader bufferedReader = new BufferedReader(inputStreamReader, MAX_BUFFERED_SIZE_FOR_RESET_MARK);
bufferedReader.mark(MAX_BUFFERED_SIZE_FOR_RESET_MARK);
StringBuffer buffer = new StringBuffer();
try {
int numRead = 0;
char[] tBuff = new char[READ_BUFFER_SIZE];
while ((numRead = bufferedReader.read(tBuff, 0, tBuff.length)) != -1) {
buffer.append(tBuff, 0, numRead);
}
// remember -- we didn't open stream ... so we don't close it
} catch (MalformedInputException e) {
// int pos = e.getInputLength();
EncodingMemento localEncodingMemento = getEncodingMemento();
boolean couldReset = true;
String encodingNameInError = localEncodingMemento.getJavaCharsetName();
if (encodingNameInError == null) {
encodingNameInError = localEncodingMemento.getDetectedCharsetName();
}
try {
bufferedReader.reset();
} catch (IOException resetException) {
// the only errro that can occur during reset is an
// IOException
// due to already being past the rest mark. In that case, we
// throw more generic message
couldReset = false;
}
// -1 can be used by UI layer as a code that "position could not
// be
// determined"
int charPostion = -1;
if (couldReset) {
charPostion = getCharPostionOfFailure(bufferedReader);
// getCharPostionOfFailure(new InputStreamReader(inStream,
// javaEncodingNameInError));
}
// is
throw new MalformedInputExceptionWithDetail(encodingNameInError, CodedIO.getAppropriateJavaCharset(encodingNameInError), charPostion, !couldReset, MAX_BUFFERED_SIZE_FOR_RESET_MARK);
}
StringBuffer stringbuffer = buffer;
encodedDocument.set(stringbuffer.toString());
}
use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.
the class JSPResourceEncodingDetector method getSpecDefaultEncodingMemento.
public EncodingMemento getSpecDefaultEncodingMemento() {
resetAll();
EncodingMemento result = null;
String enc = getSpecDefaultEncoding();
if (enc != null) {
createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
fEncodingMemento.setAppropriateDefault(enc);
result = fEncodingMemento;
}
return result;
}
use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.
the class ContentDescriberForJSPedCSS method handleStandardCalculations.
/**
* @param description
* @param detector
* @throws IOException
*/
private void handleStandardCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
// note: if we're asked for one, we set them all. I need to be sure if
// called
// mulitiple times (one for each, say) that we don't waste time
// processing same
// content again.
EncodingMemento encodingMemento = ((JSPResourceEncodingDetector) detector).getEncodingMemento();
// TODO: I need to verify to see if this BOM work is always done
// by text type.
Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
if (detectedByteOrderMark != null) {
Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
// need to "push" up into base.
if (!detectedByteOrderMark.equals(existingByteOrderMark))
description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
}
if (!encodingMemento.isValid()) {
// note: after setting here, its the mere presence of
// IContentDescriptionExtended.UNSUPPORTED_CHARSET
// in the resource's description that can be used to determine if
// invalid
// in those cases, the "detected" property contains an
// "appropriate default" to use.
description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
}
Object detectedCharset = encodingMemento.getDetectedCharsetName();
Object javaCharset = encodingMemento.getJavaCharsetName();
// we always include detected, if its different than java
handleDetectedSpecialCase(description, detectedCharset, javaCharset);
if (javaCharset != null) {
Object existingCharset = description.getProperty(IContentDescription.CHARSET);
if (javaCharset.equals(existingCharset)) {
handleDetectedSpecialCase(description, detectedCharset, javaCharset);
} else {
// we may need to add what we found, but only need to add
// if different from the default.
Object defaultCharset = detector.getSpecDefaultEncoding();
if (defaultCharset != null) {
if (!defaultCharset.equals(javaCharset)) {
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
} else {
// assuming if there is no spec default, we always need to
// add, I'm assuming
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
}
}
}
use of org.eclipse.wst.sse.core.internal.encoding.EncodingMemento in project webtools.sourceediting by eclipse.
the class ContentDescriberForDTD method handleCalculations.
/**
* @param description
* @param detector
* @throws IOException
*/
private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
// note: if we're asked for one, we set them all. I need to be sure if
// called
// mulitiple times (one for each, say) that we don't waste time
// processing same
// content again.
EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
// TODO: I need to verify to see if this BOM work is always done
// by text type.
Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
if (detectedByteOrderMark != null) {
Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
// need to "push" up into base.
if (!detectedByteOrderMark.equals(existingByteOrderMark))
description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
}
if (!encodingMemento.isValid()) {
// note: after setting here, its the mere presence of
// IContentDescriptionExtended.UNSUPPORTED_CHARSET
// in the resource's description that can be used to determine if
// invalid
// in those cases, the "detected" property contains an
// "appropriate default" to use.
description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
}
Object detectedCharset = encodingMemento.getDetectedCharsetName();
Object javaCharset = encodingMemento.getJavaCharsetName();
// we always include detected, if its different than java
handleDetectedSpecialCase(description, detectedCharset, javaCharset);
if (javaCharset != null) {
Object existingCharset = description.getProperty(IContentDescription.CHARSET);
if (javaCharset.equals(existingCharset)) {
handleDetectedSpecialCase(description, detectedCharset, javaCharset);
} else {
// we may need to add what we found, but only need to add
// if different from default.the
Object defaultCharset = getDetector().getSpecDefaultEncoding();
if (defaultCharset != null) {
if (!defaultCharset.equals(javaCharset)) {
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
} else {
// assuming if there is no spec default, we always need to
// add, I'm assuming
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
}
}
}
Aggregations