Search in sources :

Example 1 with Tuple6

use of org.apache.flink.api.java.tuple.Tuple6 in project flink by apache.

the class CrossITCase method testProjectCrossOnATupleInput2.

@Test
public void testProjectCrossOnATupleInput2() throws Exception {
    /*
		 * project cross on a tuple input 2
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
    DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
    DataSet<Tuple6<String, String, Long, Long, Long, Integer>> crossDs = ds.cross(ds2).projectSecond(3).projectFirst(2, 1).projectSecond(4, 1).projectFirst(0);
    List<Tuple6<String, String, Long, Long, Long, Integer>> result = crossDs.collect();
    String expected = "Hallo,Hi,1,1,1,1\n" + "Hallo Welt,Hi,1,2,2,1\n" + "Hallo Welt wie,Hi,1,1,3,1\n" + "Hallo,Hello,2,1,1,2\n" + "Hallo Welt,Hello,2,2,2,2\n" + "Hallo Welt wie,Hello,2,1,3,2\n" + "Hallo,Hello world,2,1,1,3\n" + "Hallo Welt,Hello world,2,2,2,3\n" + "Hallo Welt wie,Hello world,2,1,3,3\n";
    compareResultAsTuples(result, expected);
}
Also used : Tuple5(org.apache.flink.api.java.tuple.Tuple5) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple6(org.apache.flink.api.java.tuple.Tuple6) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 2 with Tuple6

use of org.apache.flink.api.java.tuple.Tuple6 in project flink by apache.

the class TPCHQuery10 method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get customer data set: (custkey, name, address, nationkey, acctbal) 
    DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env);
    // get orders data set: (orderkey, custkey, orderdate)
    DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env);
    // get lineitem data set: (orderkey, extendedprice, discount, returnflag)
    DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env);
    // get nation data set: (nationkey, name)
    DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env);
    // orders filtered by year: (orderkey, custkey)
    DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year
    orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990).project(0, 1);
    // lineitems filtered by flag: (orderkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsFilteredByFlag = // filter by flag
    lineitems.filter(lineitem -> lineitem.f3.equals("R")).project(0, 1, 2);
    // join orders with lineitems: (custkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsOfCustomerKey = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag).where(0).equalTo(0).projectFirst(1).projectSecond(1, 2);
    // aggregate for revenue: (custkey, revenue)
    DataSet<Tuple2<Integer, Double>> revenueOfCustomerKey = lineitemsOfCustomerKey.map(i -> new Tuple2<>(i.f0, i.f1 * (1 - i.f2))).groupBy(0).sum(1);
    // join customer with nation (custkey, name, address, nationname, acctbal)
    DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers.joinWithTiny(nations).where(3).equalTo(0).projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);
    // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
    DataSet<Tuple6<Integer, String, String, String, Double, Double>> customerWithRevenue = customerWithNation.join(revenueOfCustomerKey).where(0).equalTo(0).projectFirst(0, 1, 2, 3, 4).projectSecond(1);
    // emit result
    customerWithRevenue.writeAsCsv(outputPath);
    // execute program
    env.execute("TPCH Query 10 Example");
}
Also used : DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple6(org.apache.flink.api.java.tuple.Tuple6) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3)

Example 3 with Tuple6

use of org.apache.flink.api.java.tuple.Tuple6 in project flink by apache.

the class JoinITCase method testProjectOnATuple1Input.

@Test
public void testProjectOnATuple1Input() throws Exception {
    /*
		 * Project join on a tuple input 1
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
    DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
    DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs = ds1.join(ds2).where(1).equalTo(1).projectFirst(2, 1).projectSecond(3).projectFirst(0).projectSecond(4, 1);
    List<Tuple6<String, Long, String, Integer, Long, Long>> result = joinDs.collect();
    String expected = "Hi,1,Hallo,1,1,1\n" + "Hello,2,Hallo Welt,2,2,2\n" + "Hello world,2,Hallo Welt,3,2,2\n";
    compareResultAsTuples(result, expected);
}
Also used : Tuple5(org.apache.flink.api.java.tuple.Tuple5) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple6(org.apache.flink.api.java.tuple.Tuple6) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 4 with Tuple6

use of org.apache.flink.api.java.tuple.Tuple6 in project flink by apache.

the class JoinITCase method testProjectJoinOnATuple2Input.

@Test
public void testProjectJoinOnATuple2Input() throws Exception {
    /*
		 * Project join on a tuple input 2
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
    DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
    DataSet<Tuple6<String, String, Long, Long, Long, Integer>> joinDs = ds1.join(ds2).where(1).equalTo(1).projectSecond(3).projectFirst(2, 1).projectSecond(4, 1).projectFirst(0);
    List<Tuple6<String, String, Long, Long, Long, Integer>> result = joinDs.collect();
    String expected = "Hallo,Hi,1,1,1,1\n" + "Hallo Welt,Hello,2,2,2,2\n" + "Hallo Welt,Hello world,2,2,2,3\n";
    compareResultAsTuples(result, expected);
}
Also used : Tuple5(org.apache.flink.api.java.tuple.Tuple5) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple6(org.apache.flink.api.java.tuple.Tuple6) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 5 with Tuple6

use of org.apache.flink.api.java.tuple.Tuple6 in project flink by apache.

the class DeclarativeSlotManagerTest method testRequirementDeclaration.

private void testRequirementDeclaration(RequirementDeclarationScenario scenario) throws Exception {
    final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
    final ResourceID resourceID = ResourceID.generate();
    final JobID jobId = new JobID();
    final SlotID slotId = new SlotID(resourceID, 0);
    final String targetAddress = "localhost";
    final ResourceProfile resourceProfile = ResourceProfile.fromResources(42.0, 1337);
    final CompletableFuture<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestFuture = new CompletableFuture<>();
    // accept an incoming slot request
    final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(tuple6 -> {
        requestFuture.complete(Tuple6.of(tuple6.f0, tuple6.f1, tuple6.f2, tuple6.f3, tuple6.f4, tuple6.f5));
        return CompletableFuture.completedFuture(Acknowledge.get());
    }).createTestingTaskExecutorGateway();
    final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(resourceID, taskExecutorGateway);
    final SlotStatus slotStatus = new SlotStatus(slotId, resourceProfile);
    final SlotReport slotReport = new SlotReport(slotStatus);
    final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
    try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setSlotTracker(slotTracker).buildAndStartWithDirectExec(resourceManagerId, new TestingResourceActionsBuilder().build())) {
        if (scenario == RequirementDeclarationScenario.TASK_EXECUTOR_REGISTRATION_BEFORE_REQUIREMENT_DECLARATION) {
            slotManager.registerTaskManager(taskExecutorConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
        }
        final ResourceRequirements requirements = ResourceRequirements.create(jobId, targetAddress, Collections.singleton(ResourceRequirement.create(resourceProfile, 1)));
        slotManager.processResourceRequirements(requirements);
        if (scenario == RequirementDeclarationScenario.TASK_EXECUTOR_REGISTRATION_AFTER_REQUIREMENT_DECLARATION) {
            slotManager.registerTaskManager(taskExecutorConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
        }
        assertThat(requestFuture.get(), is(equalTo(Tuple6.of(slotId, jobId, requestFuture.get().f2, resourceProfile, targetAddress, resourceManagerId))));
        DeclarativeTaskManagerSlot slot = slotTracker.getSlot(slotId);
        assertEquals("The slot has not been allocated to the expected allocation id.", jobId, slot.getJobId());
    }
}
Also used : ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) Assert.assertThat(org.junit.Assert.assertThat) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FunctionUtils(org.apache.flink.util.function.FunctionUtils) TestLogger(org.apache.flink.util.TestLogger) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotOccupiedException(org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) Set(java.util.Set) BlockingQueue(java.util.concurrent.BlockingQueue) SlotManagerMetricGroup(org.apache.flink.runtime.metrics.groups.SlotManagerMetricGroup) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TestingUtils(org.apache.flink.testutils.TestingUtils) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Assert.assertFalse(org.junit.Assert.assertFalse) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) SlotAllocationException(org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FlinkException(org.apache.flink.util.FlinkException) WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) CoreMatchers.not(org.hamcrest.CoreMatchers.not) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Assert.assertSame(org.junit.Assert.assertSame) ManuallyTriggeredScheduledExecutorService(org.apache.flink.core.testutils.ManuallyTriggeredScheduledExecutorService) TestingMetricRegistry(org.apache.flink.runtime.metrics.util.TestingMetricRegistry) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) Matchers.hasSize(org.hamcrest.Matchers.hasSize) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Matchers.empty(org.hamcrest.Matchers.empty) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) SystemExitTrackingSecurityManager(org.apache.flink.runtime.testutils.SystemExitTrackingSecurityManager) Test(org.junit.Test) InstanceID(org.apache.flink.runtime.instance.InstanceID) Iterators(org.apache.flink.shaded.guava30.com.google.common.collect.Iterators) TimeUnit(java.util.concurrent.TimeUnit) JobID(org.apache.flink.api.common.JobID) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) Tuple6(org.apache.flink.api.java.tuple.Tuple6) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)

Aggregations

Tuple6 (org.apache.flink.api.java.tuple.Tuple6)25 Test (org.junit.Test)20 CompletableFuture (java.util.concurrent.CompletableFuture)11 JobID (org.apache.flink.api.common.JobID)11 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)11 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)11 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)11 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)11 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)11 ResourceManagerId (org.apache.flink.runtime.resourcemanager.ResourceManagerId)11 TaskExecutorConnection (org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)11 ResourceRequirement (org.apache.flink.runtime.slots.ResourceRequirement)11 SlotReport (org.apache.flink.runtime.taskexecutor.SlotReport)11 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)11 Matchers.empty (org.hamcrest.Matchers.empty)11 Assert.assertFalse (org.junit.Assert.assertFalse)11 Assert.assertThat (org.junit.Assert.assertThat)11 ArrayList (java.util.ArrayList)10 List (java.util.List)10 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)10