Search in sources :

Example 1 with FieldParser

use of org.apache.flink.types.parser.FieldParser in project flink by apache.

the class RowCsvInputFormat method parseRecord.

@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
    byte[] fieldDelimiter = this.getFieldDelimiter();
    boolean[] fieldIncluded = this.fieldIncluded;
    int startPos = offset;
    int limit = offset + numBytes;
    int field = 0;
    int output = 0;
    while (field < fieldIncluded.length) {
        // check valid start position
        if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
            if (isLenient()) {
                return false;
            } else {
                throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
            }
        }
        if (fieldIncluded[field]) {
            // parse field
            FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
            int latestValidPos = startPos;
            startPos = parser.resetErrorStateAndParse(bytes, startPos, limit, fieldDelimiter, holders[fieldPosMap[output]]);
            if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
                // the error state EMPTY_COLUMN is ignored
                if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
                    throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.", field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
                }
            }
            holders[fieldPosMap[output]] = parser.getLastResult();
            // or empty with emptyColumnAsNull enabled
            if (startPos < 0 || (emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
                holders[fieldPosMap[output]] = null;
                startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
            }
            output++;
        } else {
            // skip field
            startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
        }
        // check if something went wrong
        if (startPos < 0) {
            throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'", field + 1, new String(bytes, offset, numBytes)));
        } else if (startPos == limit && field != fieldIncluded.length - 1 && !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
            // and the end is not a field delimiter indicating an empty last field.
            if (isLenient()) {
                return false;
            } else {
                throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
            }
        }
        field++;
    }
    return true;
}
Also used : ParseException(org.apache.flink.api.common.io.ParseException) FieldParser(org.apache.flink.types.parser.FieldParser)

Example 2 with FieldParser

use of org.apache.flink.types.parser.FieldParser in project flink by apache.

the class TestCsvDeserializationSchema method initFieldParsers.

private void initFieldParsers() {
    int csvRowLength = indexMapping.length;
    this.fieldParsers = new FieldParser<?>[csvRowLength];
    for (int csvColumn = 0; csvColumn < csvRowLength; csvColumn++) {
        if (indexMapping[csvColumn] == -1) {
            // The output type doesn't include this field, so just assign a string parser to
            // skip it
            this.fieldParsers[csvColumn] = InstantiationUtil.instantiate(FieldParser.getParserForType(String.class), FieldParser.class);
            continue;
        }
        DataType fieldType = physicalFieldTypes.get(indexMapping[csvColumn]);
        Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(logicalTypeRootToFieldParserClass(fieldType.getLogicalType().getTypeRoot()));
        if (parserType == null) {
            throw new RuntimeException("No parser available for type '" + fieldType + "'.");
        }
        FieldParser<?> p = InstantiationUtil.instantiate(parserType, FieldParser.class);
        this.fieldParsers[csvColumn] = p;
    }
}
Also used : DataType(org.apache.flink.table.types.DataType) FieldParser(org.apache.flink.types.parser.FieldParser)

Aggregations

FieldParser (org.apache.flink.types.parser.FieldParser)2 ParseException (org.apache.flink.api.common.io.ParseException)1 DataType (org.apache.flink.table.types.DataType)1