Search in sources :

Example 56 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class GenericCsvInputFormatTest method testSparseParseWithIndicesMultiCharDelimiter.

@SuppressWarnings("unchecked")
@Test
public void testSparseParseWithIndicesMultiCharDelimiter() {
    try {
        final String fileContent = "111|-|222|-|333|-|444|-|555|-|666|-|777|-|888|-|999|-|000|-|\n" + "000|-|999|-|888|-|777|-|666|-|555|-|444|-|333|-|222|-|111\n" + "555|-|999|-|888|-|111|-|666|-|555|-|444|-|777|-|222|-|111|-|\n" + "22222|-|99999|-|8|-|99999999|-|6666666|-|5|-|4444|-|8|-|22222|-|1\n";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|-|");
        format.setFieldsGeneric(new int[] { 0, 3, 7 }, (Class<? extends Value>[]) new Class[] { IntValue.class, IntValue.class, IntValue.class });
        format.configure(parameters);
        format.open(split);
        Value[] values = createIntValues(3);
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals(111, ((IntValue) values[0]).getValue());
        assertEquals(444, ((IntValue) values[1]).getValue());
        assertEquals(888, ((IntValue) values[2]).getValue());
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals(000, ((IntValue) values[0]).getValue());
        assertEquals(777, ((IntValue) values[1]).getValue());
        assertEquals(333, ((IntValue) values[2]).getValue());
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals(555, ((IntValue) values[0]).getValue());
        assertEquals(111, ((IntValue) values[1]).getValue());
        assertEquals(777, ((IntValue) values[2]).getValue());
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals(22222, ((IntValue) values[0]).getValue());
        assertEquals(99999999, ((IntValue) values[1]).getValue());
        assertEquals(8, ((IntValue) values[2]).getValue());
        assertNull(format.nextRecord(values));
        assertTrue(format.reachedEnd());
    } catch (Exception ex) {
        System.err.println(ex.getMessage());
        ex.printStackTrace();
        fail("Test erroneous");
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 57 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class GenericCsvInputFormatTest method testReadInvalidContentsLenient.

@Test
public void testReadInvalidContentsLenient() {
    try {
        final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
        format.setLenient(true);
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };
        assertNotNull(format.nextRecord(values));
        assertNull(format.nextRecord(values));
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 58 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class DelimitedInputFormatSamplingTest method testNumSamplesMultipleFiles.

@Test
public void testNumSamplesMultipleFiles() {
    try {
        final String tempFile = TestFileUtils.createTempFileDir(TEST_DATA1, TEST_DATA1, TEST_DATA1, TEST_DATA1);
        final Configuration conf = new Configuration();
        final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
        format.setFilePath(tempFile.replace("file", "test"));
        format.configure(conf);
        TestFileSystem.resetStreamOpenCounter();
        format.getStatistics(null);
        Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
        TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
        format2.setFilePath(tempFile.replace("file", "test"));
        format2.setNumLineSamples(8);
        format2.configure(conf);
        TestFileSystem.resetStreamOpenCounter();
        format2.getStatistics(null);
        Assert.assertEquals("Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened());
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test)

Example 59 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class DelimitedInputFormatSamplingTest method testDifferentDelimiter.

@Test
public void testDifferentDelimiter() {
    try {
        final String DELIMITER = "12345678-";
        String testData = TEST_DATA1.replace("\n", DELIMITER);
        final String tempFile = TestFileUtils.createTempFile(testData);
        final Configuration conf = new Configuration();
        final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
        format.setFilePath(tempFile);
        format.setDelimiter(DELIMITER);
        format.configure(conf);
        BaseStatistics stats = format.getStatistics(null);
        final int numLines = TEST_DATA_1_LINES;
        final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;
        Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
        Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) BaseStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics) Test(org.junit.Test)

Example 60 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class DelimitedInputFormatSamplingTest method testNumSamplesOneFile.

// ========================================================================
//  Tests
// ========================================================================
@Test
public void testNumSamplesOneFile() {
    try {
        final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
        final Configuration conf = new Configuration();
        final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
        format.setFilePath(tempFile.replace("file", "test"));
        format.configure(conf);
        TestFileSystem.resetStreamOpenCounter();
        format.getStatistics(null);
        Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
        TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
        format2.setFilePath(tempFile.replace("file", "test"));
        format2.setNumLineSamples(8);
        format2.configure(conf);
        TestFileSystem.resetStreamOpenCounter();
        format2.getStatistics(null);
        Assert.assertEquals("Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened());
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test)

Aggregations

Configuration (org.apache.flink.configuration.Configuration)630 Test (org.junit.Test)452 IOException (java.io.IOException)137 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)93 File (java.io.File)92 JobID (org.apache.flink.api.common.JobID)74 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)68 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)49 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)46 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)45 Path (org.apache.flink.core.fs.Path)44 ActorRef (akka.actor.ActorRef)43 ArrayList (java.util.ArrayList)43 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)39 FiniteDuration (scala.concurrent.duration.FiniteDuration)38 LocalFlinkMiniCluster (org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster)36 BeforeClass (org.junit.BeforeClass)35 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)33 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)33 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)32