use of java.nio.charset.MalformedInputException in project hadoop by apache.
the class Text method validateUTF8.
/**
* Check to see if a byte array is valid utf-8
* @param utf8 the array of bytes
* @param start the offset of the first byte in the array
* @param len the length of the byte sequence
* @throws MalformedInputException if the byte array contains invalid bytes
*/
public static void validateUTF8(byte[] utf8, int start, int len) throws MalformedInputException {
int count = start;
int leadByte = 0;
int length = 0;
int state = LEAD_BYTE;
while (count < start + len) {
int aByte = utf8[count] & 0xFF;
switch(state) {
case LEAD_BYTE:
leadByte = aByte;
length = bytesFromUTF8[aByte];
switch(length) {
case // check for ASCII
0:
if (leadByte > 0x7F)
throw new MalformedInputException(count);
break;
case 1:
if (leadByte < 0xC2 || leadByte > 0xDF)
throw new MalformedInputException(count);
state = TRAIL_BYTE_1;
break;
case 2:
if (leadByte < 0xE0 || leadByte > 0xEF)
throw new MalformedInputException(count);
state = TRAIL_BYTE_1;
break;
case 3:
if (leadByte < 0xF0 || leadByte > 0xF4)
throw new MalformedInputException(count);
state = TRAIL_BYTE_1;
break;
default:
// or if < 0 we got a trail byte in the lead byte position
throw new MalformedInputException(count);
}
// switch (length)
break;
case TRAIL_BYTE_1:
if (leadByte == 0xF0 && aByte < 0x90)
throw new MalformedInputException(count);
if (leadByte == 0xF4 && aByte > 0x8F)
throw new MalformedInputException(count);
if (leadByte == 0xE0 && aByte < 0xA0)
throw new MalformedInputException(count);
if (leadByte == 0xED && aByte > 0x9F)
throw new MalformedInputException(count);
// falls through to regular trail-byte test!!
case TRAIL_BYTE:
if (aByte < 0x80 || aByte > 0xBF)
throw new MalformedInputException(count);
if (--length == 0) {
state = LEAD_BYTE;
} else {
state = TRAIL_BYTE;
}
break;
default:
break;
}
// switch (state)
count++;
}
}
use of java.nio.charset.MalformedInputException in project webtools.sourceediting by eclipse.
the class XMLMalformedInputTests method doTestMalformedInput.
/**
* Tests for a file, filename that should throw a
* MalformedInputExceptionWithDetail at character, expectedPosition. This
* happens when no encoding is specified, so the default is used, but
* there are characters that the default encoding does not recognize
*/
void doTestMalformedInput(String filename, IResourceCharsetDetector detector, int expectedPosition) throws IOException {
Exception foundException = null;
int badCharPosition = -1;
File file = TestsPlugin.getTestFile(filename);
if (!file.exists())
throw new IllegalArgumentException(filename + " was not found");
InputStream inputStream = new FileInputStream(file);
InputStream istream = getMarkSupportedStream(inputStream);
detector.set(istream);
// IEncodedDocument doc =
// detector.createNewStructuredDocument(filename, istream);
EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
String foundIANAEncoding = encodingMemento.getJavaCharsetName();
Charset charset = Charset.forName(foundIANAEncoding);
CharsetDecoder charsetDecoder = charset.newDecoder();
charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
istream.close();
inputStream.close();
// now, try reading as per encoding
inputStream = new FileInputStream(file);
// skip BOM for this case
// System.out.println(inputStream.read());
// System.out.println(inputStream.read());
// System.out.println(inputStream.read());
InputStreamReader reader = new InputStreamReader(inputStream, charsetDecoder);
try {
// just try reading ... should throw exception
// exception)
readInputStream(reader);
} catch (MalformedInputException e) {
// as expected, now do detailed checking.
inputStream.close();
istream.close();
inputStream = new FileInputStream(file);
charsetDecoder = charset.newDecoder();
charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new InputStreamReader(inputStream, charsetDecoder);
istream = getMarkSupportedStream(inputStream);
try {
handleMalFormedInput_DetailChecking(reader, foundIANAEncoding);
} catch (MalformedInputExceptionWithDetail se) {
foundException = se;
badCharPosition = se.getCharPosition();
}
} finally {
if (istream != null) {
istream.close();
}
if (inputStream != null) {
inputStream.close();
}
}
// handle adjustments here for VM differnces:
// for now its either 49 or 49 + 2 BOMs (51)
// can be smarting later.
assertTrue("MalformedInputException was not thrown as expected for filename: " + filename + " Exception thrown:" + foundException, foundException instanceof MalformedInputExceptionWithDetail);
assertTrue("Wrong character position detected in MalformedInputException. Expected: " + expectedPosition + " Found: " + badCharPosition, (badCharPosition == expectedPosition) || badCharPosition == expectedPosition - 2);
}
use of java.nio.charset.MalformedInputException in project webtools.sourceediting by eclipse.
the class CreateCodedReaderTester method testCreateAllFiles.
public void testCreateAllFiles() throws CoreException, IOException {
if (RECREATE_FILES) {
List allFiles = TestsPlugin.getAllTestFiles(TEST_FILE_DIR);
URL outputDirURL = TestsPlugin.getInstallLocation();
File zipoutFile = new File(outputDirURL.getPath(), TESTFILES_ZIPFILE_NAME);
java.io.FileOutputStream zipOut = new FileOutputStream(zipoutFile);
ZipOutputStream zipOutputStream = new ZipOutputStream(zipOut);
int count = 1;
for (Iterator iter = allFiles.iterator(); iter.hasNext(); ) {
File file = (File) iter.next();
createZipEntry(zipOutputStream, file);
CodedReaderCreator codedReaderCreator = new CodedReaderCreator();
codedReaderCreator.set(file.getName(), new FileInputStream(file));
String detectedCharsetName = null;
String javaCharsetName = null;
String expectedException = null;
try {
// just used for debug info, but can throw exception
javaCharsetName = codedReaderCreator.getEncodingMemento().getJavaCharsetName();
detectedCharsetName = codedReaderCreator.getEncodingMemento().getDetectedCharsetName();
} catch (UnsupportedCharsetExceptionWithDetail e) {
// ignore for simply creating tests
expectedException = e.getClass().getName() + ".class";
} catch (MalformedInputException e) {
// ignore for simply creating tests
expectedException = e.getClass().getName() + ".class";
} catch (IllegalCharsetNameException e) {
// ignore for simply creating tests
expectedException = e.getClass().getName() + ".class";
}
String subpath = getSubPathName(file);
createTestMethodSource(count, subpath, detectedCharsetName, javaCharsetName, expectedException);
count++;
}
zipOutputStream.close();
zipOut.close();
assertTrue(true);
}
}
use of java.nio.charset.MalformedInputException in project cytoscape-impl by cytoscape.
the class DefaultAttributeTableReader method readTable.
/**
* Read table from the data source.
*/
@Override
public void readTable(CyTable table) throws IOException {
try {
BufferedReader bufRd = null;
if (is == null)
is = URLUtil.getInputStream(source);
try {
// This data is shared by both the OpenCSV and the old method of reading files.
int lineCount = 0;
bufRd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8").newDecoder()));
/*
* Read & extract one line at a time. The line can be Tab delimited,
*/
final String delimiter = mapping.getDelimiterRegEx();
// If the delimiter contains a comma, treat the file as a CSV file.
if (delimiter.contains(TextDelimiter.COMMA.getDelimiter()) && mapping.getDelimiters().size() == 1) {
// Use OpenCSV.. New method...
CSVReader reader = new CSVReader(bufRd);
// Note that rowData is roughly equivalent to "parts" in the old code.
String[] rowData;
while ((rowData = reader.readNext()) != null) {
// If key dos not exists, ignore the line.
if (lineCount >= startLineNumber && rowData.length >= mapping.getKeyIndex() + 1) {
try {
parser.parseAll(table, rowData);
} catch (Exception ex) {
logger.warn("Couldn't parse row from OpenCSV: " + lineCount);
}
globalCounter++;
}
lineCount++;
}
try {
reader.close();
} catch (Exception e) {
}
} else {
// Use the "old" method for splitting the lines.
String line;
String[] parts = null;
while ((line = bufRd.readLine()) != null) {
/*
* Ignore Empty & Commnet lines.
*/
if ((commentChar != null) && line.startsWith(commentChar)) {
// Do nothing
} else if ((lineCount >= startLineNumber) && (line.trim().length() > 0)) {
parts = line.split(delimiter);
// If key does not exists, ignore the line.
if (parts.length >= mapping.getKeyIndex() + 1) {
try {
parser.parseAll(table, parts);
} catch (Exception ex) {
logger.warn("Couldn't parse row: " + lineCount);
}
globalCounter++;
}
}
lineCount++;
}
}
} catch (MalformedInputException mie) {
throw new IOException("Unable to import table: illegal character encoding in input");
} finally {
if (bufRd != null)
bufRd.close();
}
} finally {
if (is != null)
is.close();
}
}
use of java.nio.charset.MalformedInputException in project georocket by georocket.
the class StringWindow method append.
/**
* Append data to the window (i.e. make it larger)
* @param buf the data to append
*/
public void append(Buffer buf) {
// append new bytes to buffered bytes or use them directly
if (this.buf.length() > 0) {
this.buf.appendBuffer(buf);
} else {
this.buf = buf;
}
// convert Vert.x buffer to ByteBuffer (ugly!)
ByteBuffer byteBuf = ByteBuffer.wrap(this.buf.getBytes());
// prepare temporary CharBuffer
ensureCharBuffer(buf.length());
charBuf.position(0);
charBuf.limit(charBuf.capacity());
// decode ByteBuffer to temporary CharBuffer
CoderResult result = decoder.decode(byteBuf, charBuf, false);
if (result.isMalformed()) {
throw new IllegalStateException(new MalformedInputException(result.length()));
}
if (result.isUnmappable()) {
throw new IllegalStateException(new UnmappableCharacterException(result.length()));
}
// reset CharBuffer and remove decoded bytes from byte buffer
charBuf.flip();
this.buf = this.buf.getBuffer(byteBuf.position(), this.buf.length());
// append to decoded string buffer
this.decodedBuf.append(charBuf);
}
Aggregations