use of java.io.UTFDataFormatException in project tinker by Tencent.
the class DexDataBuffer method readStringData.
public StringData readStringData() {
int off = data.position();
try {
int expectedLength = readUleb128();
String result = Mutf8.decode(this, new char[expectedLength]);
if (result.length() != expectedLength) {
throw new DexException("Declared length " + expectedLength + " doesn't match decoded length of " + result.length());
}
return new StringData(off, result);
} catch (UTFDataFormatException e) {
throw new DexException(e);
}
}
use of java.io.UTFDataFormatException in project hadoop by apache.
the class UTF8 method readChars.
private static void readChars(DataInput in, StringBuilder buffer, int nBytes) throws UTFDataFormatException, IOException {
DataOutputBuffer obuf = OBUF_FACTORY.get();
obuf.reset();
obuf.write(in, nBytes);
byte[] bytes = obuf.getData();
int i = 0;
while (i < nBytes) {
byte b = bytes[i++];
if ((b & 0x80) == 0) {
// 0b0xxxxxxx: 1-byte sequence
buffer.append((char) (b & 0x7F));
} else if ((b & 0xE0) == 0xC0) {
if (i >= nBytes) {
throw new UTFDataFormatException("Truncated UTF8 at " + StringUtils.byteToHexString(bytes, i - 1, 1));
}
// 0b110xxxxx: 2-byte sequence
buffer.append((char) (((b & 0x1F) << 6) | (bytes[i++] & 0x3F)));
} else if ((b & 0xF0) == 0xE0) {
// 0b1110xxxx: 3-byte sequence
if (i + 1 >= nBytes) {
throw new UTFDataFormatException("Truncated UTF8 at " + StringUtils.byteToHexString(bytes, i - 1, 2));
}
buffer.append((char) (((b & 0x0F) << 12) | ((bytes[i++] & 0x3F) << 6) | (bytes[i++] & 0x3F)));
} else if ((b & 0xF8) == 0xF0) {
if (i + 2 >= nBytes) {
throw new UTFDataFormatException("Truncated UTF8 at " + StringUtils.byteToHexString(bytes, i - 1, 3));
}
// 0b11110xxx: 4-byte sequence
int codepoint = ((b & 0x07) << 18) | ((bytes[i++] & 0x3F) << 12) | ((bytes[i++] & 0x3F) << 6) | ((bytes[i++] & 0x3F));
buffer.append(highSurrogate(codepoint)).append(lowSurrogate(codepoint));
} else {
// The UTF8 standard describes 5-byte and 6-byte sequences, but
// these are no longer allowed as of 2003 (see RFC 3629)
// Only show the next 6 bytes max in the error code - in case the
// buffer is large, this will prevent an exceedingly large message.
int endForError = Math.min(i + 5, nBytes);
throw new UTFDataFormatException("Invalid UTF8 at " + StringUtils.byteToHexString(bytes, i - 1, endForError));
}
}
}
use of java.io.UTFDataFormatException in project robovm by robovm.
the class ModifiedUtf8 method decode.
/**
* Decodes a byte array containing <i>modified UTF-8</i> bytes into a string.
*
* <p>Note that although this method decodes the (supposedly impossible) zero byte to U+0000,
* that's what the RI does too.
*/
public static String decode(byte[] in, char[] out, int offset, int utfSize) throws UTFDataFormatException {
int count = 0, s = 0, a;
while (count < utfSize) {
if ((out[s] = (char) in[offset + count++]) < '') {
s++;
} else if (((a = out[s]) & 0xe0) == 0xc0) {
if (count >= utfSize) {
throw new UTFDataFormatException("bad second byte at " + count);
}
int b = in[offset + count++];
if ((b & 0xC0) != 0x80) {
throw new UTFDataFormatException("bad second byte at " + (count - 1));
}
out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (count + 1 >= utfSize) {
throw new UTFDataFormatException("bad third byte at " + (count + 1));
}
int b = in[offset + count++];
int c = in[offset + count++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
throw new UTFDataFormatException("bad second or third byte at " + (count - 2));
}
out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
throw new UTFDataFormatException("bad byte at " + (count - 1));
}
}
return new String(out, 0, s);
}
use of java.io.UTFDataFormatException in project ignite by apache.
the class HadoopDirectDataOutput method writeUTF.
/**
* {@inheritDoc}
*/
@Override
public void writeUTF(@NotNull String str) throws IOException {
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
int len = bytes.length;
if (len > 65535)
throw new UTFDataFormatException("UTF8 form of string is longer than 65535 bytes: " + str);
writeShort((short) len);
write(bytes);
}
use of java.io.UTFDataFormatException in project ignite by apache.
the class GridUnsafeDataInput method readUTFSpan.
/**
* Reads span of UTF-encoded characters out of internal buffer
* (starting at offset pos and ending at or before offset end),
* consuming no more than utfLen bytes. Appends read characters to
* sbuf. Returns the number of bytes consumed.
*
* @param sbuf String builder.
* @param utfLen UTF encoding length.
* @return Number of bytes consumed.
* @throws IOException In case of error.
*/
@SuppressWarnings("ThrowFromFinallyBlock")
private long readUTFSpan(StringBuilder sbuf, long utfLen) throws IOException {
int cpos = 0;
int start = pos;
int avail = Math.min(end - pos, CHAR_BUF_SIZE);
int stop = pos + ((utfLen > avail) ? avail - 2 : (int) utfLen);
boolean outOfBounds = false;
try {
while (pos < stop) {
int b1 = utfBuf[pos++] & 0xFF;
int b2, b3;
switch(b1 >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
// 1 byte format: 0xxxxxxx
urfCBuf[cpos++] = (char) b1;
break;
case 12:
case 13:
// 2 byte format: 110xxxxx 10xxxxxx
b2 = utfBuf[pos++];
if ((b2 & 0xC0) != 0x80)
throw new UTFDataFormatException();
urfCBuf[cpos++] = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
break;
case 14:
// 3 byte format: 1110xxxx 10xxxxxx 10xxxxxx
b3 = utfBuf[pos + 1];
b2 = utfBuf[pos];
pos += 2;
if ((b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80)
throw new UTFDataFormatException();
urfCBuf[cpos++] = (char) (((b1 & 0x0F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F));
break;
default:
// 10xx xxxx, 1111 xxxx
throw new UTFDataFormatException();
}
}
} catch (ArrayIndexOutOfBoundsException ignored) {
outOfBounds = true;
} finally {
if (outOfBounds || (pos - start) > utfLen) {
pos = start + (int) utfLen;
throw new UTFDataFormatException();
}
}
sbuf.append(urfCBuf, 0, cpos);
return pos - start;
}
Aggregations