use of java.nio.charset.MalformedInputException in project elasticsearch by elastic.
the class AnalysisTests method testParseFalseEncodedFile.
public void testParseFalseEncodedFile() throws IOException {
Path tempDir = createTempDir();
Path dict = tempDir.resolve("foo.dict");
Settings nodeSettings = Settings.builder().put("foo.bar_path", dict).put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
try (OutputStream writer = Files.newOutputStream(dict)) {
// some invalid UTF-8
writer.write(new byte[] { (byte) 0xff, 0x00, 0x00 });
writer.write('\n');
}
Environment env = new Environment(nodeSettings);
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> Analysis.getWordList(env, nodeSettings, "foo.bar"));
assertEquals("Unsupported character encoding detected while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString() + " - files must be UTF-8 encoded", ex.getMessage());
assertTrue(ex.getCause().toString(), ex.getCause() instanceof MalformedInputException || ex.getCause() instanceof CharacterCodingException);
}
use of java.nio.charset.MalformedInputException in project fastjson by alibaba.
the class IOUtils method encodeUTF8.
public static int encodeUTF8(char[] sa, int sp, int len, byte[] da) {
int sl = sp + len;
int dp = 0;
int dlASCII = dp + Math.min(len, da.length);
// ASCII only optimized loop
while (dp < dlASCII && sa[sp] < '') {
da[dp++] = (byte) sa[sp++];
}
while (sp < sl) {
char c = sa[sp++];
if (c < 0x80) {
// Have at most seven bits
da[dp++] = (byte) c;
} else if (c < 0x800) {
// 2 bytes, 11 bits
da[dp++] = (byte) (0xc0 | (c >> 6));
da[dp++] = (byte) (0x80 | (c & 0x3f));
} else if (c >= '�' && c < ('�' + 1)) {
//Character.isSurrogate(c) but 1.7
final int uc;
int ip = sp - 1;
if (Character.isHighSurrogate(c)) {
if (sl - ip < 2) {
uc = -1;
} else {
char d = sa[ip + 1];
if (Character.isLowSurrogate(d)) {
uc = Character.toCodePoint(c, d);
} else {
throw new JSONException("encodeUTF8 error", new MalformedInputException(1));
}
}
} else {
if (Character.isLowSurrogate(c)) {
throw new JSONException("encodeUTF8 error", new MalformedInputException(1));
} else {
uc = c;
}
}
if (uc < 0) {
da[dp++] = (byte) '?';
} else {
da[dp++] = (byte) (0xf0 | ((uc >> 18)));
da[dp++] = (byte) (0x80 | ((uc >> 12) & 0x3f));
da[dp++] = (byte) (0x80 | ((uc >> 6) & 0x3f));
da[dp++] = (byte) (0x80 | (uc & 0x3f));
// 2 chars
sp++;
}
} else {
// 3 bytes, 16 bits
da[dp++] = (byte) (0xe0 | ((c >> 12)));
da[dp++] = (byte) (0x80 | ((c >> 6) & 0x3f));
da[dp++] = (byte) (0x80 | (c & 0x3f));
}
}
return dp;
}
use of java.nio.charset.MalformedInputException in project robovm by robovm.
the class CharsetDecoderTest method testInvalidDecoding.
public void testInvalidDecoding() throws IOException {
byte[][] invalidSequences = new byte[][] { // overlong NULL
{ (byte) 0xC0, (byte) 0x80 }, // overlong ascii 'A'
{ (byte) 0xC0, (byte) 0xC1 }, // overlong "/../"
{ (byte) 0x2F, (byte) 0xC0, (byte) 0xAE, (byte) 0x2E, (byte) 0x2F }, // Invalid encoding 2r11111000 (sequence too long)
{ (byte) 0xF8 }, // Invalid encoding 2r10000000 (sequence too short)
{ (byte) 0x80 } };
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
/*
* When bytebuffer has a backing array...
*/
for (byte[] bytes : invalidSequences) {
try {
CharBuffer cb = decoder.decode(ByteBuffer.wrap(bytes));
fail("No exception thrown on " + Arrays.toString(bytes) + " '" + cb + "'");
} catch (MalformedInputException expected) {
}
}
/*
* When bytebuffer has _not_ got a backing array...
*/
for (byte[] bytes : invalidSequences) {
try {
ByteBuffer bb = ByteBuffer.allocateDirect(8);
bb.put(bytes).flip();
CharBuffer cb = decoder.decode(bb);
fail("No exception thrown on " + Arrays.toString(bytes) + " '" + cb + "'");
} catch (MalformedInputException expected) {
}
}
}
use of java.nio.charset.MalformedInputException in project robovm by robovm.
the class CharsetDecoderTest method testDecodeByteBufferException.
public void testDecodeByteBufferException() throws CharacterCodingException, UnsupportedEncodingException {
CharBuffer out;
ByteBuffer in;
String replaceStr = decoder.replacement() + getString();
// MalformedException:
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
in = getMalformedByteBuffer();
if (in != null) {
try {
CharBuffer buffer = decoder.decode(in);
assertTrue(buffer.remaining() > 0);
fail("should throw MalformedInputException");
} catch (MalformedInputException e) {
}
decoder.reset();
in.rewind();
decoder.onMalformedInput(CodingErrorAction.IGNORE);
out = decoder.decode(in);
assertCharBufferValue(getString(), out);
decoder.reset();
in.rewind();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
out = decoder.decode(in);
assertCharBufferValue(replaceStr, out);
}
// Unmapped Exception:
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
in = getUnmappedByteBuffer();
if (in != null) {
try {
decoder.decode(in);
fail("should throw UnmappableCharacterException");
} catch (UnmappableCharacterException e) {
}
decoder.reset();
in.rewind();
decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
out = decoder.decode(in);
assertCharBufferValue(getString(), out);
decoder.reset();
in.rewind();
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
out = decoder.decode(in);
assertCharBufferValue(replaceStr, out);
}
// RuntimeException
try {
decoder.decode(getExceptionByteArray());
fail("should throw runtime exception");
} catch (RuntimeException e) {
}
}
use of java.nio.charset.MalformedInputException in project assertj-core by joel-costigliola.
the class Files method assertSameContentAs.
/**
* Asserts that the given files have same content. Adapted from <a
* href="http://junit-addons.sourceforge.net/junitx/framework/FileAssert.html" target="_blank">FileAssert</a> (from <a
* href="http://sourceforge.net/projects/junit-addons">JUnit-addons</a>.)
* @param info contains information about the assertion.
* @param actual the "actual" file.
* @param actualCharset {@link Charset} of the "actual" file.
* @param expected the "expected" file.
* @param expectedCharset {@link Charset} of the "actual" file.
* @throws NullPointerException if {@code expected} is {@code null}.
* @throws IllegalArgumentException if {@code expected} is not an existing file.
* @throws AssertionError if {@code actual} is {@code null}.
* @throws AssertionError if {@code actual} is not an existing file.
* @throws UncheckedIOException if an I/O error occurs.
* @throws AssertionError if the given files do not have same content.
*/
public void assertSameContentAs(AssertionInfo info, File actual, Charset actualCharset, File expected, Charset expectedCharset) {
verifyIsFile(expected);
assertIsFile(info, actual);
try {
List<Delta<String>> diffs = diff.diff(actual, actualCharset, expected, expectedCharset);
if (diffs.isEmpty())
return;
throw failures.failure(info, shouldHaveSameContent(actual, expected, diffs));
} catch (MalformedInputException e) {
try {
// MalformedInputException is thrown by readLine() called in diff
// compute a binary diff, if there is a binary diff, it it shows the offset of the malformed input
BinaryDiffResult binaryDiffResult = binaryDiff.diff(actual, readAllBytes(expected.toPath()));
if (binaryDiffResult.hasNoDiff()) {
// fall back to the UncheckedIOException : not throwing an error is wrong as there was one in the first place.
throw e;
}
throw failures.failure(info, shouldHaveBinaryContent(actual, binaryDiffResult));
} catch (IOException ioe) {
throw new UncheckedIOException(format(UNABLE_TO_COMPARE_FILE_CONTENTS, actual, expected), ioe);
}
} catch (IOException e) {
throw new UncheckedIOException(format(UNABLE_TO_COMPARE_FILE_CONTENTS, actual, expected), e);
}
}
Aggregations