Examples with JobSpecification - org.apache.hyracks.api.job.JobSpecification

Example 66 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class CountOfCountsTest method countOfCountsExternalSortMultiNC.

@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
    RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, sorter, 0);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, sorter, 0, group, 0);
    IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn3, group, 0, sorter2, 0);
    IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn4, sorter2, 0, group2, 0);
    IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(conn5, group2, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) Test(org.junit.Test)

Example 67 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class AggregationTest method singleKeyAvgExtGroupTest.

@Test
public void singleKeyAvgExtGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
    int[] keyFields = new int[] { 0 };
    int frameLimits = 5;
    int tableSize = 8;
    long fileSize = frameLimits * spec.getFrameSize();
    ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false), new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new AvgFieldMergeAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) AvgFieldMergeAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.AvgFieldMergeAggregatorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) AvgFieldGroupAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.AvgFieldGroupAggregatorFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) Test(org.junit.Test)

Example 68 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class WriteResultPOperator method contributeRuntimeOperator.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    WriteResultOperator writeResultOp = (WriteResultOperator) op;
    IMetadataProvider mp = context.getMetadataProvider();
    JobSpecification spec = builder.getJobSpec();
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> runtimeAndConstraints = mp.getWriteResultRuntime(dataSource, propagatedSchema, keys, payload, additionalFilteringKeys, context, spec);
    builder.contributeHyracksOperator(writeResultOp, runtimeAndConstraints.first);
    builder.contributeAlgebricksPartitionConstraint(runtimeAndConstraints.first, runtimeAndConstraints.second);
    ILogicalOperator src = writeResultOp.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, writeResultOp, 0);
}

Also used : WriteResultOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.WriteResultOperator) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IMetadataProvider(org.apache.hyracks.algebricks.core.algebra.metadata.IMetadataProvider)

Example 69 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class JobManagerTest method testAdmitThenReject.

@Test
public void testAdmitThenReject() throws HyracksException {
    IJobCapacityController jobCapacityController = mock(IJobCapacityController.class);
    IJobManager jobManager = spy(new JobManager(ccConfig, mockClusterControllerService(), jobCapacityController));
    // A pending job should also be rejected if its requirement exceeds the updated maximum capacity of the cluster.
    // A normal run.
    JobRun run1 = mockJobRun(1);
    JobSpecification job1 = mock(JobSpecification.class);
    when(run1.getJobSpecification()).thenReturn(job1);
    when(jobCapacityController.allocate(job1)).thenReturn(IJobCapacityController.JobSubmissionStatus.EXECUTE);
    jobManager.add(run1);
    // A failure run.
    JobRun run2 = mockJobRun(2);
    JobSpecification job2 = mock(JobSpecification.class);
    when(run2.getJobSpecification()).thenReturn(job2);
    when(jobCapacityController.allocate(job2)).thenReturn(IJobCapacityController.JobSubmissionStatus.QUEUE).thenThrow(HyracksException.create(ErrorCode.JOB_REQUIREMENTS_EXCEED_CAPACITY, "1", "0"));
    jobManager.add(run2);
    // Completes the first run.
    jobManager.prepareComplete(run1, JobStatus.TERMINATED, Collections.emptyList());
    jobManager.finalComplete(run1);
    // Verifies job status of the failed job.
    verify(run2, times(1)).setStatus(eq(JobStatus.PENDING), any());
    verify(run2, times(1)).setPendingStatus(eq(JobStatus.FAILURE), any());
}

Also used : IJobCapacityController(org.apache.hyracks.api.job.resource.IJobCapacityController) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 70 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class JobManagerTest method testCancel.

@Test
public void testCancel() throws HyracksException {
    CCConfig ccConfig = new CCConfig();
    IJobCapacityController jobCapacityController = mock(IJobCapacityController.class);
    IJobManager jobManager = spy(new JobManager(ccConfig, mockClusterControllerService(), jobCapacityController));
    // Submits runnable jobs.
    List<JobRun> acceptedRuns = new ArrayList<>();
    for (int id = 0; id < 4096; ++id) {
        // Mocks an immediately executable job.
        JobRun run = mockJobRun(id);
        JobSpecification job = mock(JobSpecification.class);
        when(run.getJobSpecification()).thenReturn(job);
        when(jobCapacityController.allocate(job)).thenReturn(IJobCapacityController.JobSubmissionStatus.EXECUTE);
        // Submits the job.
        acceptedRuns.add(run);
        jobManager.add(run);
        Assert.assertTrue(jobManager.getRunningJobs().size() == id + 1);
        Assert.assertTrue(jobManager.getPendingJobs().isEmpty());
    }
    // Submits jobs that will be deferred due to the capacity limitation.
    List<JobRun> deferredRuns = new ArrayList<>();
    for (int id = 4096; id < 8192; ++id) {
        // Mocks a deferred job.
        JobRun run = mockJobRun(id);
        JobSpecification job = mock(JobSpecification.class);
        when(run.getJobSpecification()).thenReturn(job);
        when(jobCapacityController.allocate(job)).thenReturn(IJobCapacityController.JobSubmissionStatus.QUEUE).thenReturn(IJobCapacityController.JobSubmissionStatus.EXECUTE);
        // Submits the job.
        deferredRuns.add(run);
        jobManager.add(run);
        Assert.assertTrue(jobManager.getRunningJobs().size() == 4096);
        Assert.assertTrue(jobManager.getPendingJobs().size() == id + 1 - 4096);
    }
    // Cancels deferred jobs.
    for (JobRun run : deferredRuns) {
        jobManager.cancel(run.getJobId());
    }
    // Cancels runnable jobs.
    for (JobRun run : acceptedRuns) {
        jobManager.cancel(run.getJobId());
    }
    Assert.assertTrue(jobManager.getPendingJobs().isEmpty());
    Assert.assertTrue(jobManager.getArchivedJobs().size() == ccConfig.getJobHistorySize());
    verify(jobManager, times(0)).prepareComplete(any(), any(), any());
    verify(jobManager, times(0)).finalComplete(any());
}

Also used : IJobCapacityController(org.apache.hyracks.api.job.resource.IJobCapacityController) CCConfig(org.apache.hyracks.control.common.controllers.CCConfig) ArrayList(java.util.ArrayList) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Aggregations

JobSpecification (org.apache.hyracks.api.job.JobSpecification)182 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)90 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)88 Test (org.junit.Test)82 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)77 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)67 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)61 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)59 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)59 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)45 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)45 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)41 FileSplit (org.apache.hyracks.api.io.FileSplit)40 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)38 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)29 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)29 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23