use of java.io.UTFDataFormatException in project derby by apache.
the class UTF8UtilTest method testMissingSecondByteOfTwo.
/**
* Tries to skip characters where the data is incomplete.
* <p>
* In this test, the encoding states there is a character represented by
* two bytes present. However, only one byte is provided.
*/
public void testMissingSecondByteOfTwo() throws IOException {
// 0xdf = 11011111
byte[] data = { 'a', (byte) 0xdf };
InputStream is = new ByteArrayInputStream(data);
try {
UTF8Util.skipFully(is, 2);
fail("Reading invalid UTF-8 should fail");
} catch (UTFDataFormatException udfe) {
// As expected
}
}
use of java.io.UTFDataFormatException in project derby by apache.
the class UTF8UtilTest method testInvalidUTF8Encoding.
/**
* Tries to read a stream of data where there is an invalid UTF-8 encoded
* byte.
*/
public void testInvalidUTF8Encoding() throws IOException {
// 0xf8 = 11111000 <-- invalid UTF-8 encoding
byte[] data = { 'a', 'b', 'c', (byte) 0xf8, 'e', 'f' };
InputStream is = new ByteArrayInputStream(data);
try {
UTF8Util.skipFully(is, 6);
fail("Reading invalid UTF-8 should fail");
} catch (UTFDataFormatException udfe) {
// As expected when reading invalid data
}
}
use of java.io.UTFDataFormatException in project openj9 by eclipse.
the class Util method convertUTF8WithBuf.
public static String convertUTF8WithBuf(byte[] buf, char[] out, int offset, int utfSize) throws UTFDataFormatException {
int count = 0, s = 0, a;
while (count < utfSize) {
if ((out[s] = (char) buf[offset + count++]) < '\u0080')
s++;
else if (((a = out[s]) & 0xe0) == 0xc0) {
if (count >= utfSize)
/*[MSG "K0062", "Second byte at {0} does not match UTF8 Specification"]*/
throw // $NON-NLS-1$
new UTFDataFormatException(com.ibm.oti.util.Msg.getString("K0062", count));
int b = buf[count++];
if ((b & 0xC0) != 0x80)
/*[MSG "K0062", "Second byte at {0} does not match UTF8 Specification"]*/
throw // $NON-NLS-1$
new UTFDataFormatException(com.ibm.oti.util.Msg.getString("K0062", (count - 1)));
out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (count + 1 >= utfSize)
/*[MSG "K0063", "Third byte at {0} does not match UTF8 Specification"]*/
throw // $NON-NLS-1$
new UTFDataFormatException(com.ibm.oti.util.Msg.getString("K0063", (count + 1)));
int b = buf[count++];
int c = buf[count++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80))
/*[MSG "K0064", "Second or third byte at {0} does not match UTF8 Specification"]*/
throw // $NON-NLS-1$
new UTFDataFormatException(com.ibm.oti.util.Msg.getString("K0064", (count - 2)));
out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
// $NON-NLS-1$
throw new UTFDataFormatException(com.ibm.oti.util.Msg.getString("K0065", (count - 1)));
}
}
return new String(out, 0, s);
}
use of java.io.UTFDataFormatException in project teiid by teiid.
the class CodeByteStream method readUTF.
public char[] readUTF() throws UTFDataFormatException {
int utflen = readByte();
if (utflen == 255) {
// long UTF
int high = readByte();
int low = readByte();
utflen = (high << 8) + low;
}
char[] str = new char[utflen];
int count = 0;
int strlen = 0;
while (count < utflen) {
int c = readByte();
int char2, char3;
switch(c >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
// 0xxxxxxx
count++;
str[strlen++] = (char) c;
break;
case 12:
case 13:
// 110x xxxx 10xx xxxx
count += 2;
if (count > utflen)
throw new UTFDataFormatException();
char2 = readByte();
if ((char2 & 0xC0) != 0x80)
throw new UTFDataFormatException();
str[strlen++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
// 1110 xxxx 10xx xxxx 10xx xxxx
count += 3;
if (count > utflen)
throw new UTFDataFormatException();
char2 = readByte();
char3 = readByte();
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
throw new UTFDataFormatException();
str[strlen++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0));
break;
default:
// 10xx xxxx, 1111 xxxx
throw new UTFDataFormatException();
}
}
if (strlen < utflen)
System.arraycopy(str, 0, str = new char[strlen], 0, strlen);
return str;
}
use of java.io.UTFDataFormatException in project teiid by teiid.
the class Field method getUTF.
public char[] getUTF(int offset) throws UTFDataFormatException {
int pos = this.offset + offset;
int utflen = getUInt2(pos);
pos += 2;
char[] str = new char[utflen];
int count = 0;
int strlen = 0;
while (count < utflen) {
int c = buffer[pos++] & 0xFF;
int char2, char3;
switch(c >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
// 0xxxxxxx
count++;
str[strlen++] = (char) c;
break;
case 12:
case 13:
// 110x xxxx 10xx xxxx
count += 2;
if (count > utflen)
throw new UTFDataFormatException();
char2 = buffer[pos++] & 0xFF;
if ((char2 & 0xC0) != 0x80)
throw new UTFDataFormatException();
str[strlen++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
// 1110 xxxx 10xx xxxx 10xx xxxx
count += 3;
if (count > utflen)
throw new UTFDataFormatException();
char2 = buffer[pos++] & 0xFF;
char3 = buffer[pos++] & 0xFF;
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
throw new UTFDataFormatException();
str[strlen++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0));
break;
default:
// 10xx xxxx, 1111 xxxx
throw new UTFDataFormatException();
}
}
if (strlen < utflen)
System.arraycopy(str, 0, str = new char[strlen], 0, strlen);
return str;
}
Aggregations