Search in sources :

Example 66 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class DelimitedInputFormatTest method testMultiCharDelimiter.

@Test
public void testMultiCharDelimiter() throws IOException {
    final String myString = "www112xx1123yyy11123zzzzz1123";
    final FileInputSplit split = createTempFile(myString);
    final Configuration parameters = new Configuration();
    format.setDelimiter("1123");
    format.configure(parameters);
    format.open(split);
    String first = format.nextRecord(null);
    assertNotNull(first);
    assertEquals("www112xx", first);
    String second = format.nextRecord(null);
    assertNotNull(second);
    assertEquals("yyy1", second);
    String third = format.nextRecord(null);
    assertNotNull(third);
    assertEquals("zzzzz", third);
    assertNull(format.nextRecord(null));
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test)

Example 67 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class EnumerateNestedFilesTest method setup.

@Before
public void setup() {
    this.config = new Configuration();
    format = new DummyFileInputFormat();
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Before(org.junit.Before)

Example 68 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class FileInputFormatTest method testGetStatisticsMultipleFilesWithCachedVersion.

@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
    try {
        final long SIZE1 = 2077;
        final long SIZE2 = 31909;
        final long SIZE3 = 10;
        final long TOTAL = SIZE1 + SIZE2 + SIZE3;
        final long FAKE_SIZE = 10065;
        String tempDir = TestFileUtils.createTempFileDir(SIZE1, SIZE2, SIZE3);
        DummyFileInputFormat format = new DummyFileInputFormat();
        format.setFilePath(tempDir);
        format.configure(new Configuration());
        FileBaseStatistics stats = format.getStatistics(null);
        Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
        format = new DummyFileInputFormat();
        format.setFilePath(tempDir);
        format.configure(new Configuration());
        FileBaseStatistics newStats = format.getStatistics(stats);
        Assert.assertTrue("Statistics object was changed", newStats == stats);
        // insert fake stats with the correct modification time. the call should return the fake stats
        format = new DummyFileInputFormat();
        format.setFilePath(tempDir);
        format.configure(new Configuration());
        FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
        BaseStatistics latest = format.getStatistics(fakeStats);
        Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
        // insert fake stats with the correct modification time. the call should return the fake stats
        format = new DummyFileInputFormat();
        format.setFilePath(tempDir);
        format.configure(new Configuration());
        FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
        BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
        Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) BaseStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) IOException(java.io.IOException) Test(org.junit.Test)

Example 69 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class DelimitedInputFormatTest method testReadWithBufferSizeIsMultiple.

/**
	 * Tests that the correct number of records is read when the split boundary is exact at the record boundary.
	 */
@Test
public void testReadWithBufferSizeIsMultiple() throws IOException {
    final String myString = "aaaaaaa\nbbbbbbb\nccccccc\nddddddd\n";
    final FileInputSplit split = createTempFile(myString);
    FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames());
    FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames());
    final Configuration parameters = new Configuration();
    format.setBufferSize(2 * ((int) split1.getLength()));
    format.configure(parameters);
    String next;
    int count = 0;
    // read split 1
    format.open(split1);
    while ((next = format.nextRecord(null)) != null) {
        assertEquals(7, next.length());
        count++;
    }
    assertNull(format.nextRecord(null));
    assertTrue(format.reachedEnd());
    format.close();
    // this one must have read one too many, because the next split will skipp the trailing remainder
    // which happens to be one full record
    assertEquals(3, count);
    // read split 2
    format.open(split2);
    while ((next = format.nextRecord(null)) != null) {
        assertEquals(7, next.length());
        count++;
    }
    format.close();
    assertEquals(4, count);
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test)

Example 70 with Configuration

use of org.apache.flink.configuration.Configuration in project flink by apache.

the class InnerJoinOperatorBaseTest method testJoinRich.

@Test
public void testJoinRich() {
    final AtomicBoolean opened = new AtomicBoolean(false);
    final AtomicBoolean closed = new AtomicBoolean(false);
    final String taskName = "Test rich join function";
    final RichFlatJoinFunction<String, String, Integer> joiner = new RichFlatJoinFunction<String, String, Integer>() {

        @Override
        public void open(Configuration parameters) throws Exception {
            opened.compareAndSet(false, true);
            assertEquals(0, getRuntimeContext().getIndexOfThisSubtask());
            assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
        }

        @Override
        public void close() throws Exception {
            closed.compareAndSet(false, true);
        }

        @Override
        public void join(String first, String second, Collector<Integer> out) throws Exception {
            out.collect(first.length());
            out.collect(second.length());
        }
    };
    InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>> base = new InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>>(joiner, new BinaryOperatorInformation<String, String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], taskName);
    final List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
    final List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
    final List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6));
    try {
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>();
        final HashMap<String, Future<Path>> cpTasks = new HashMap<>();
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
        assertEquals(expected, resultSafe);
        assertEquals(expected, resultRegular);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(opened.get());
    assertTrue(closed.get());
}
Also used : Accumulator(org.apache.flink.api.common.accumulators.Accumulator) RichFlatJoinFunction(org.apache.flink.api.common.functions.RichFlatJoinFunction) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Collector(org.apache.flink.util.Collector) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) Future(java.util.concurrent.Future) Test(org.junit.Test)

Aggregations

Configuration (org.apache.flink.configuration.Configuration)630 Test (org.junit.Test)452 IOException (java.io.IOException)137 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)93 File (java.io.File)92 JobID (org.apache.flink.api.common.JobID)74 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)68 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)49 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)46 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)45 Path (org.apache.flink.core.fs.Path)44 ActorRef (akka.actor.ActorRef)43 ArrayList (java.util.ArrayList)43 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)39 FiniteDuration (scala.concurrent.duration.FiniteDuration)38 LocalFlinkMiniCluster (org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster)36 BeforeClass (org.junit.BeforeClass)35 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)33 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)33 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)32