use of org.apache.flink.types.StringValue in project flink by apache.
the class GenericCsvInputFormatTest method testReadInvalidContentsLenient.
@Test
public void testReadInvalidContentsLenient() {
try {
final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setFieldDelimiter("|");
format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
format.setLenient(true);
format.configure(parameters);
format.open(split);
Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };
assertNotNull(format.nextRecord(values));
assertNull(format.nextRecord(values));
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
}
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class GenericCsvInputFormatTest method readWithParseQuotedStrings.
@Test
public void readWithParseQuotedStrings() {
try {
final String fileContent = "\"ab\\\"c\"|\"def\"\n\"ghijk\"|\"abc\"";
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setFieldDelimiter("|");
format.setFieldTypesGeneric(StringValue.class, StringValue.class);
format.enableQuotedStringParsing('"');
format.configure(parameters);
format.open(split);
Value[] values = new Value[] { new StringValue(), new StringValue() };
values = format.nextRecord(values);
assertNotNull(values);
assertEquals("ab\\\"c", ((StringValue) values[0]).getValue());
assertEquals("def", ((StringValue) values[1]).getValue());
values = format.nextRecord(values);
assertNotNull(values);
assertEquals("ghijk", ((StringValue) values[0]).getValue());
assertEquals("abc", ((StringValue) values[1]).getValue());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
}
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class GenericCsvInputFormatTest method testReadInvalidContentsLenientWithSkipping.
@Test
public void testReadInvalidContentsLenientWithSkipping() {
try {
final String fileContent = "abc|dfgsdf|777|444\n" + // good line
"kkz|777|foobar|hhg\n" + // wrong data type in field
"kkz|777foobarhhg \n" + // too short, a skipped field never ends
"xyx|ignored|42|\n";
// another good line
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setFieldDelimiter("|");
format.setFieldTypesGeneric(StringValue.class, null, IntValue.class);
format.setLenient(true);
format.configure(parameters);
format.open(split);
Value[] values = new Value[] { new StringValue(), new IntValue() };
assertNotNull(format.nextRecord(values));
assertNull(format.nextRecord(values));
assertNull(format.nextRecord(values));
assertNotNull(format.nextRecord(values));
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
}
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class GenericCsvInputFormatTest method readWithHeaderLineAndInvalidIntermediate.
@Test
public void readWithHeaderLineAndInvalidIntermediate() {
try {
final String fileContent = "colname-1|colname-2|some name 3|column four|\n" + "123|abc|456|def|\n" + "colname-1|colname-2|some name 3|column four|\n" + // repeated header in the middle
"987|xyz|654|pqr|\n";
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setFieldDelimiter("|");
format.setFieldTypesGeneric(IntValue.class, StringValue.class, IntValue.class, StringValue.class);
format.setSkipFirstLineAsHeader(true);
format.configure(parameters);
format.open(split);
Value[] values = new Value[] { new IntValue(), new StringValue(), new IntValue(), new StringValue() };
// first line is skipped as header
// first row (= second line)
assertNotNull(format.nextRecord(values));
try {
format.nextRecord(values);
fail("Format accepted invalid line.");
} catch (ParseException e) {
// as we expected
}
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
}
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class ExecutionEnvironment method readTextFileWithValue.
/**
* Creates a {@link DataSet} that represents the Strings produced by reading the given file line
* wise. This method is similar to {@link #readTextFile(String, String)}, but it produces a
* DataSet with mutable {@link StringValue} objects, rather than Java Strings. StringValues can
* be used to tune implementations to be less object and garbage collection heavy.
*
* <p>The {@link java.nio.charset.Charset} with the given name will be used to read the files.
*
* @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or
* "hdfs://host:port/file/path").
* @param charsetName The name of the character set used to read the file.
* @param skipInvalidLines A flag to indicate whether to skip lines that cannot be read with the
* given character set.
* @return A DataSet that represents the data read from the given file as text lines.
*/
public DataSource<StringValue> readTextFileWithValue(String filePath, String charsetName, boolean skipInvalidLines) {
Preconditions.checkNotNull(filePath, "The file path may not be null.");
TextValueInputFormat format = new TextValueInputFormat(new Path(filePath));
format.setCharsetName(charsetName);
format.setSkipInvalidLines(skipInvalidLines);
return new DataSource<>(this, format, new ValueTypeInfo<>(StringValue.class), Utils.getCallLocationName());
}
Aggregations