Search in sources :

Example 31 with MalformedInputException

use of java.nio.charset.MalformedInputException in project hadoop by apache.

the class Text method validateUTF8.

/**
   * Check to see if a byte array is valid utf-8
   * @param utf8 the array of bytes
   * @param start the offset of the first byte in the array
   * @param len the length of the byte sequence
   * @throws MalformedInputException if the byte array contains invalid bytes
   */
public static void validateUTF8(byte[] utf8, int start, int len) throws MalformedInputException {
    int count = start;
    int leadByte = 0;
    int length = 0;
    int state = LEAD_BYTE;
    while (count < start + len) {
        int aByte = utf8[count] & 0xFF;
        switch(state) {
            case LEAD_BYTE:
                leadByte = aByte;
                length = bytesFromUTF8[aByte];
                switch(length) {
                    case // check for ASCII
                    0:
                        if (leadByte > 0x7F)
                            throw new MalformedInputException(count);
                        break;
                    case 1:
                        if (leadByte < 0xC2 || leadByte > 0xDF)
                            throw new MalformedInputException(count);
                        state = TRAIL_BYTE_1;
                        break;
                    case 2:
                        if (leadByte < 0xE0 || leadByte > 0xEF)
                            throw new MalformedInputException(count);
                        state = TRAIL_BYTE_1;
                        break;
                    case 3:
                        if (leadByte < 0xF0 || leadByte > 0xF4)
                            throw new MalformedInputException(count);
                        state = TRAIL_BYTE_1;
                        break;
                    default:
                        // or if < 0 we got a trail byte in the lead byte position
                        throw new MalformedInputException(count);
                }
                // switch (length)
                break;
            case TRAIL_BYTE_1:
                if (leadByte == 0xF0 && aByte < 0x90)
                    throw new MalformedInputException(count);
                if (leadByte == 0xF4 && aByte > 0x8F)
                    throw new MalformedInputException(count);
                if (leadByte == 0xE0 && aByte < 0xA0)
                    throw new MalformedInputException(count);
                if (leadByte == 0xED && aByte > 0x9F)
                    throw new MalformedInputException(count);
            // falls through to regular trail-byte test!!
            case TRAIL_BYTE:
                if (aByte < 0x80 || aByte > 0xBF)
                    throw new MalformedInputException(count);
                if (--length == 0) {
                    state = LEAD_BYTE;
                } else {
                    state = TRAIL_BYTE;
                }
                break;
            default:
                break;
        }
        // switch (state)
        count++;
    }
}
Also used : MalformedInputException(java.nio.charset.MalformedInputException)

Example 32 with MalformedInputException

use of java.nio.charset.MalformedInputException in project webtools.sourceediting by eclipse.

the class XMLMalformedInputTests method doTestMalformedInput.

/**
 * Tests for a file, filename that should throw a
 * MalformedInputExceptionWithDetail at character, expectedPosition. This
 * happens when no encoding is specified, so the default is used, but
 * there are characters that the default encoding does not recognize
 */
void doTestMalformedInput(String filename, IResourceCharsetDetector detector, int expectedPosition) throws IOException {
    Exception foundException = null;
    int badCharPosition = -1;
    File file = TestsPlugin.getTestFile(filename);
    if (!file.exists())
        throw new IllegalArgumentException(filename + " was not found");
    InputStream inputStream = new FileInputStream(file);
    InputStream istream = getMarkSupportedStream(inputStream);
    detector.set(istream);
    // IEncodedDocument doc =
    // detector.createNewStructuredDocument(filename, istream);
    EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
    String foundIANAEncoding = encodingMemento.getJavaCharsetName();
    Charset charset = Charset.forName(foundIANAEncoding);
    CharsetDecoder charsetDecoder = charset.newDecoder();
    charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
    charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    istream.close();
    inputStream.close();
    // now, try reading as per encoding
    inputStream = new FileInputStream(file);
    // skip BOM for this case
    // System.out.println(inputStream.read());
    // System.out.println(inputStream.read());
    // System.out.println(inputStream.read());
    InputStreamReader reader = new InputStreamReader(inputStream, charsetDecoder);
    try {
        // just try reading ... should throw exception
        // exception)
        readInputStream(reader);
    } catch (MalformedInputException e) {
        // as expected, now do detailed checking.
        inputStream.close();
        istream.close();
        inputStream = new FileInputStream(file);
        charsetDecoder = charset.newDecoder();
        charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
        charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
        reader = new InputStreamReader(inputStream, charsetDecoder);
        istream = getMarkSupportedStream(inputStream);
        try {
            handleMalFormedInput_DetailChecking(reader, foundIANAEncoding);
        } catch (MalformedInputExceptionWithDetail se) {
            foundException = se;
            badCharPosition = se.getCharPosition();
        }
    } finally {
        if (istream != null) {
            istream.close();
        }
        if (inputStream != null) {
            inputStream.close();
        }
    }
    // handle adjustments here for VM differnces:
    // for now its either 49 or 49 + 2 BOMs (51)
    // can be smarting later.
    assertTrue("MalformedInputException was not thrown as expected for filename: " + filename + " Exception thrown:" + foundException, foundException instanceof MalformedInputExceptionWithDetail);
    assertTrue("Wrong character position detected in MalformedInputException.  Expected: " + expectedPosition + " Found: " + badCharPosition, (badCharPosition == expectedPosition) || badCharPosition == expectedPosition - 2);
}
Also used : CharsetDecoder(java.nio.charset.CharsetDecoder) XMLResourceEncodingDetector(org.eclipse.wst.xml.core.internal.contenttype.XMLResourceEncodingDetector) InputStreamReader(java.io.InputStreamReader) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) EncodingMemento(org.eclipse.wst.sse.core.internal.encoding.EncodingMemento) MalformedInputExceptionWithDetail(org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail) Charset(java.nio.charset.Charset) MalformedInputException(java.nio.charset.MalformedInputException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) MalformedInputException(java.nio.charset.MalformedInputException) File(java.io.File)

Example 33 with MalformedInputException

use of java.nio.charset.MalformedInputException in project webtools.sourceediting by eclipse.

the class CreateCodedReaderTester method testCreateAllFiles.

public void testCreateAllFiles() throws CoreException, IOException {
    if (RECREATE_FILES) {
        List allFiles = TestsPlugin.getAllTestFiles(TEST_FILE_DIR);
        URL outputDirURL = TestsPlugin.getInstallLocation();
        File zipoutFile = new File(outputDirURL.getPath(), TESTFILES_ZIPFILE_NAME);
        java.io.FileOutputStream zipOut = new FileOutputStream(zipoutFile);
        ZipOutputStream zipOutputStream = new ZipOutputStream(zipOut);
        int count = 1;
        for (Iterator iter = allFiles.iterator(); iter.hasNext(); ) {
            File file = (File) iter.next();
            createZipEntry(zipOutputStream, file);
            CodedReaderCreator codedReaderCreator = new CodedReaderCreator();
            codedReaderCreator.set(file.getName(), new FileInputStream(file));
            String detectedCharsetName = null;
            String javaCharsetName = null;
            String expectedException = null;
            try {
                // just used for debug info, but can throw exception
                javaCharsetName = codedReaderCreator.getEncodingMemento().getJavaCharsetName();
                detectedCharsetName = codedReaderCreator.getEncodingMemento().getDetectedCharsetName();
            } catch (UnsupportedCharsetExceptionWithDetail e) {
                // ignore for simply creating tests
                expectedException = e.getClass().getName() + ".class";
            } catch (MalformedInputException e) {
                // ignore for simply creating tests
                expectedException = e.getClass().getName() + ".class";
            } catch (IllegalCharsetNameException e) {
                // ignore for simply creating tests
                expectedException = e.getClass().getName() + ".class";
            }
            String subpath = getSubPathName(file);
            createTestMethodSource(count, subpath, detectedCharsetName, javaCharsetName, expectedException);
            count++;
        }
        zipOutputStream.close();
        zipOut.close();
        assertTrue(true);
    }
}
Also used : CodedReaderCreator(org.eclipse.wst.sse.core.internal.encoding.CodedReaderCreator) FileOutputStream(java.io.FileOutputStream) URL(java.net.URL) FileInputStream(java.io.FileInputStream) IllegalCharsetNameException(java.nio.charset.IllegalCharsetNameException) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) Iterator(java.util.Iterator) MalformedInputException(java.nio.charset.MalformedInputException) List(java.util.List) UnsupportedCharsetExceptionWithDetail(org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail) File(java.io.File)

Example 34 with MalformedInputException

use of java.nio.charset.MalformedInputException in project cytoscape-impl by cytoscape.

the class DefaultAttributeTableReader method readTable.

/**
 * Read table from the data source.
 */
@Override
public void readTable(CyTable table) throws IOException {
    try {
        BufferedReader bufRd = null;
        if (is == null)
            is = URLUtil.getInputStream(source);
        try {
            // This data is shared by both the OpenCSV and the old method of reading files.
            int lineCount = 0;
            bufRd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8").newDecoder()));
            /*
				 * Read & extract one line at a time. The line can be Tab delimited,
				 */
            final String delimiter = mapping.getDelimiterRegEx();
            // If the delimiter contains a comma, treat the file as a CSV file.
            if (delimiter.contains(TextDelimiter.COMMA.getDelimiter()) && mapping.getDelimiters().size() == 1) {
                // Use OpenCSV.. New method...
                CSVReader reader = new CSVReader(bufRd);
                // Note that rowData is roughly equivalent to "parts" in the old code.
                String[] rowData;
                while ((rowData = reader.readNext()) != null) {
                    // If key dos not exists, ignore the line.
                    if (lineCount >= startLineNumber && rowData.length >= mapping.getKeyIndex() + 1) {
                        try {
                            parser.parseAll(table, rowData);
                        } catch (Exception ex) {
                            logger.warn("Couldn't parse row from OpenCSV: " + lineCount);
                        }
                        globalCounter++;
                    }
                    lineCount++;
                }
                try {
                    reader.close();
                } catch (Exception e) {
                }
            } else {
                // Use the "old" method for splitting the lines.
                String line;
                String[] parts = null;
                while ((line = bufRd.readLine()) != null) {
                    /*
						 * Ignore Empty & Commnet lines.
						 */
                    if ((commentChar != null) && line.startsWith(commentChar)) {
                    // Do nothing
                    } else if ((lineCount >= startLineNumber) && (line.trim().length() > 0)) {
                        parts = line.split(delimiter);
                        // If key does not exists, ignore the line.
                        if (parts.length >= mapping.getKeyIndex() + 1) {
                            try {
                                parser.parseAll(table, parts);
                            } catch (Exception ex) {
                                logger.warn("Couldn't parse row: " + lineCount);
                            }
                            globalCounter++;
                        }
                    }
                    lineCount++;
                }
            }
        } catch (MalformedInputException mie) {
            throw new IOException("Unable to import table: illegal character encoding in input");
        } finally {
            if (bufRd != null)
                bufRd.close();
        }
    } finally {
        if (is != null)
            is.close();
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) CSVReader(au.com.bytecode.opencsv.CSVReader) BufferedReader(java.io.BufferedReader) MalformedInputException(java.nio.charset.MalformedInputException) IOException(java.io.IOException) MalformedInputException(java.nio.charset.MalformedInputException) IOException(java.io.IOException)

Example 35 with MalformedInputException

use of java.nio.charset.MalformedInputException in project georocket by georocket.

the class StringWindow method append.

/**
 * Append data to the window (i.e. make it larger)
 * @param buf the data to append
 */
public void append(Buffer buf) {
    // append new bytes to buffered bytes or use them directly
    if (this.buf.length() > 0) {
        this.buf.appendBuffer(buf);
    } else {
        this.buf = buf;
    }
    // convert Vert.x buffer to ByteBuffer (ugly!)
    ByteBuffer byteBuf = ByteBuffer.wrap(this.buf.getBytes());
    // prepare temporary CharBuffer
    ensureCharBuffer(buf.length());
    charBuf.position(0);
    charBuf.limit(charBuf.capacity());
    // decode ByteBuffer to temporary CharBuffer
    CoderResult result = decoder.decode(byteBuf, charBuf, false);
    if (result.isMalformed()) {
        throw new IllegalStateException(new MalformedInputException(result.length()));
    }
    if (result.isUnmappable()) {
        throw new IllegalStateException(new UnmappableCharacterException(result.length()));
    }
    // reset CharBuffer and remove decoded bytes from byte buffer
    charBuf.flip();
    this.buf = this.buf.getBuffer(byteBuf.position(), this.buf.length());
    // append to decoded string buffer
    this.decodedBuf.append(charBuf);
}
Also used : UnmappableCharacterException(java.nio.charset.UnmappableCharacterException) MalformedInputException(java.nio.charset.MalformedInputException) ByteBuffer(java.nio.ByteBuffer) CoderResult(java.nio.charset.CoderResult)

Aggregations

MalformedInputException (java.nio.charset.MalformedInputException)41 IOException (java.io.IOException)14 ByteBuffer (java.nio.ByteBuffer)12 CharBuffer (java.nio.CharBuffer)9 UnmappableCharacterException (java.nio.charset.UnmappableCharacterException)9 CharsetDecoder (java.nio.charset.CharsetDecoder)7 BufferedReader (java.io.BufferedReader)6 Path (java.nio.file.Path)6 File (java.io.File)5 InputStreamReader (java.io.InputStreamReader)5 Charset (java.nio.charset.Charset)5 Test (org.junit.Test)4 CharacterCodingException (java.nio.charset.CharacterCodingException)3 MalformedInputExceptionWithDetail (org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail)3 JSONException (com.alibaba.fastjson.JSONException)2 BufferedWriter (java.io.BufferedWriter)2 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 UncheckedIOException (java.io.UncheckedIOException)2 StandardCharsets (java.nio.charset.StandardCharsets)2