use of org.apache.flink.types.parser.FieldParser in project flink by apache.
the class RowCsvInputFormat method parseRecord.
@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
byte[] fieldDelimiter = this.getFieldDelimiter();
boolean[] fieldIncluded = this.fieldIncluded;
int startPos = offset;
int limit = offset + numBytes;
int field = 0;
int output = 0;
while (field < fieldIncluded.length) {
// check valid start position
if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
if (isLenient()) {
return false;
} else {
throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
}
}
if (fieldIncluded[field]) {
// parse field
FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
int latestValidPos = startPos;
startPos = parser.resetErrorStateAndParse(bytes, startPos, limit, fieldDelimiter, holders[fieldPosMap[output]]);
if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
// the error state EMPTY_COLUMN is ignored
if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.", field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
}
}
holders[fieldPosMap[output]] = parser.getLastResult();
// or empty with emptyColumnAsNull enabled
if (startPos < 0 || (emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
holders[fieldPosMap[output]] = null;
startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
}
output++;
} else {
// skip field
startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
}
// check if something went wrong
if (startPos < 0) {
throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'", field + 1, new String(bytes, offset, numBytes)));
} else if (startPos == limit && field != fieldIncluded.length - 1 && !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
// and the end is not a field delimiter indicating an empty last field.
if (isLenient()) {
return false;
} else {
throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
}
}
field++;
}
return true;
}
use of org.apache.flink.types.parser.FieldParser in project flink by apache.
the class TestCsvDeserializationSchema method initFieldParsers.
private void initFieldParsers() {
int csvRowLength = indexMapping.length;
this.fieldParsers = new FieldParser<?>[csvRowLength];
for (int csvColumn = 0; csvColumn < csvRowLength; csvColumn++) {
if (indexMapping[csvColumn] == -1) {
// The output type doesn't include this field, so just assign a string parser to
// skip it
this.fieldParsers[csvColumn] = InstantiationUtil.instantiate(FieldParser.getParserForType(String.class), FieldParser.class);
continue;
}
DataType fieldType = physicalFieldTypes.get(indexMapping[csvColumn]);
Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(logicalTypeRootToFieldParserClass(fieldType.getLogicalType().getTypeRoot()));
if (parserType == null) {
throw new RuntimeException("No parser available for type '" + fieldType + "'.");
}
FieldParser<?> p = InstantiationUtil.instantiate(parserType, FieldParser.class);
this.fieldParsers[csvColumn] = p;
}
}
Aggregations