Search in sources :

Example 26 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class GenericCsvInputFormatTest method testReadInvalidContentsLenient.

@Test
public void testReadInvalidContentsLenient() {
    try {
        final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
        format.setLenient(true);
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };
        assertNotNull(format.nextRecord(values));
        assertNull(format.nextRecord(values));
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 27 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class GenericCsvInputFormatTest method readWithParseQuotedStrings.

@Test
public void readWithParseQuotedStrings() {
    try {
        final String fileContent = "\"ab\\\"c\"|\"def\"\n\"ghijk\"|\"abc\"";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(StringValue.class, StringValue.class);
        format.enableQuotedStringParsing('"');
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new StringValue(), new StringValue() };
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals("ab\\\"c", ((StringValue) values[0]).getValue());
        assertEquals("def", ((StringValue) values[1]).getValue());
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals("ghijk", ((StringValue) values[0]).getValue());
        assertEquals("abc", ((StringValue) values[1]).getValue());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 28 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class GenericCsvInputFormatTest method testReadInvalidContentsLenientWithSkipping.

@Test
public void testReadInvalidContentsLenientWithSkipping() {
    try {
        final String fileContent = "abc|dfgsdf|777|444\n" + // good line
        "kkz|777|foobar|hhg\n" + // wrong data type in field
        "kkz|777foobarhhg  \n" + // too short, a skipped field never ends
        "xyx|ignored|42|\n";
        // another good line
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(StringValue.class, null, IntValue.class);
        format.setLenient(true);
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new StringValue(), new IntValue() };
        assertNotNull(format.nextRecord(values));
        assertNull(format.nextRecord(values));
        assertNull(format.nextRecord(values));
        assertNotNull(format.nextRecord(values));
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 29 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class GenericCsvInputFormatTest method readWithHeaderLineAndInvalidIntermediate.

@Test
public void readWithHeaderLineAndInvalidIntermediate() {
    try {
        final String fileContent = "colname-1|colname-2|some name 3|column four|\n" + "123|abc|456|def|\n" + "colname-1|colname-2|some name 3|column four|\n" + // repeated header in the middle
        "987|xyz|654|pqr|\n";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(IntValue.class, StringValue.class, IntValue.class, StringValue.class);
        format.setSkipFirstLineAsHeader(true);
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new IntValue(), new StringValue(), new IntValue(), new StringValue() };
        // first line is skipped as header
        // first row (= second line)
        assertNotNull(format.nextRecord(values));
        try {
            format.nextRecord(values);
            fail("Format accepted invalid line.");
        } catch (ParseException e) {
        // as we expected
        }
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 30 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class ExecutionEnvironment method readTextFileWithValue.

/**
 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line
 * wise. This method is similar to {@link #readTextFile(String, String)}, but it produces a
 * DataSet with mutable {@link StringValue} objects, rather than Java Strings. StringValues can
 * be used to tune implementations to be less object and garbage collection heavy.
 *
 * <p>The {@link java.nio.charset.Charset} with the given name will be used to read the files.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or
 *     "hdfs://host:port/file/path").
 * @param charsetName The name of the character set used to read the file.
 * @param skipInvalidLines A flag to indicate whether to skip lines that cannot be read with the
 *     given character set.
 * @return A DataSet that represents the data read from the given file as text lines.
 */
public DataSource<StringValue> readTextFileWithValue(String filePath, String charsetName, boolean skipInvalidLines) {
    Preconditions.checkNotNull(filePath, "The file path may not be null.");
    TextValueInputFormat format = new TextValueInputFormat(new Path(filePath));
    format.setCharsetName(charsetName);
    format.setSkipInvalidLines(skipInvalidLines);
    return new DataSource<>(this, format, new ValueTypeInfo<>(StringValue.class), Utils.getCallLocationName());
}
Also used : Path(org.apache.flink.core.fs.Path) TextValueInputFormat(org.apache.flink.api.java.io.TextValueInputFormat) StringValue(org.apache.flink.types.StringValue) DataSource(org.apache.flink.api.java.operators.DataSource)

Aggregations

StringValue (org.apache.flink.types.StringValue)88 Test (org.junit.Test)61 IntValue (org.apache.flink.types.IntValue)35 LongValue (org.apache.flink.types.LongValue)21 IOException (java.io.IOException)17 ArrayList (java.util.ArrayList)15 Record (org.apache.flink.types.Record)13 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)11 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)11 DoubleValue (org.apache.flink.types.DoubleValue)11 Value (org.apache.flink.types.Value)10 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)7 Plan (org.apache.flink.api.common.Plan)7 Configuration (org.apache.flink.configuration.Configuration)7 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)7 NoSuchElementException (java.util.NoSuchElementException)6 File (java.io.File)5 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)5