Search in sources :

Example 71 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class JobManagerTest method test.

@Test
public void test() throws IOException, CmdLineException {
    IJobCapacityController jobCapacityController = mock(IJobCapacityController.class);
    IJobManager jobManager = spy(new JobManager(ccConfig, mockClusterControllerService(), jobCapacityController));
    // Submits runnable jobs.
    List<JobRun> acceptedRuns = new ArrayList<>();
    for (int id = 0; id < 4096; ++id) {
        // Mocks an immediately executable job.
        JobRun run = mockJobRun(id);
        JobSpecification job = mock(JobSpecification.class);
        when(run.getJobSpecification()).thenReturn(job);
        when(jobCapacityController.allocate(job)).thenReturn(IJobCapacityController.JobSubmissionStatus.EXECUTE);
        // Submits the job.
        acceptedRuns.add(run);
        jobManager.add(run);
        Assert.assertTrue(jobManager.getRunningJobs().size() == id + 1);
        Assert.assertTrue(jobManager.getPendingJobs().isEmpty());
    }
    // Submits jobs that will be deferred due to the capacity limitation.
    List<JobRun> deferredRuns = new ArrayList<>();
    for (int id = 4096; id < 8192; ++id) {
        // Mocks a deferred job.
        JobRun run = mockJobRun(id);
        JobSpecification job = mock(JobSpecification.class);
        when(run.getJobSpecification()).thenReturn(job);
        when(jobCapacityController.allocate(job)).thenReturn(IJobCapacityController.JobSubmissionStatus.QUEUE).thenReturn(IJobCapacityController.JobSubmissionStatus.EXECUTE);
        // Submits the job.
        deferredRuns.add(run);
        jobManager.add(run);
        Assert.assertTrue(jobManager.getRunningJobs().size() == 4096);
        Assert.assertTrue(jobManager.getPendingJobs().size() == id + 1 - 4096);
    }
    // Further jobs will be denied because the job queue is full.
    boolean jobQueueFull = false;
    try {
        JobRun run = mockJobRun(8193);
        JobSpecification job = mock(JobSpecification.class);
        when(run.getJobSpecification()).thenReturn(job);
        when(jobCapacityController.allocate(job)).thenReturn(IJobCapacityController.JobSubmissionStatus.QUEUE).thenReturn(IJobCapacityController.JobSubmissionStatus.EXECUTE);
        jobManager.add(run);
    } catch (HyracksException e) {
        // Verifies the error code.
        jobQueueFull = e.getErrorCode() == ErrorCode.JOB_QUEUE_FULL;
    }
    Assert.assertTrue(jobQueueFull);
    // Completes runnable jobs.
    for (JobRun run : acceptedRuns) {
        jobManager.prepareComplete(run, JobStatus.TERMINATED, Collections.emptyList());
        jobManager.finalComplete(run);
    }
    Assert.assertTrue(jobManager.getRunningJobs().size() == 4096);
    Assert.assertTrue(jobManager.getPendingJobs().isEmpty());
    Assert.assertTrue(jobManager.getArchivedJobs().size() == ccConfig.getJobHistorySize());
    // Completes deferred jobs.
    for (JobRun run : deferredRuns) {
        jobManager.prepareComplete(run, JobStatus.TERMINATED, Collections.emptyList());
        jobManager.finalComplete(run);
    }
    Assert.assertTrue(jobManager.getRunningJobs().isEmpty());
    Assert.assertTrue(jobManager.getPendingJobs().isEmpty());
    Assert.assertTrue(jobManager.getArchivedJobs().size() == ccConfig.getJobHistorySize());
    verify(jobManager, times(8192)).prepareComplete(any(), any(), any());
    verify(jobManager, times(8192)).finalComplete(any());
}
Also used : IJobCapacityController(org.apache.hyracks.api.job.resource.IJobCapacityController) ArrayList(java.util.ArrayList) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 72 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class JobManagerTest method testExceedMax.

@Test
public void testExceedMax() throws HyracksException {
    IJobCapacityController jobCapacityController = mock(IJobCapacityController.class);
    IJobManager jobManager = spy(new JobManager(ccConfig, mockClusterControllerService(), jobCapacityController));
    boolean rejected = false;
    // A job should be rejected immediately if its requirement exceeds the maximum capacity of the cluster.
    try {
        JobRun run = mockJobRun(1);
        JobSpecification job = mock(JobSpecification.class);
        when(run.getJobSpecification()).thenReturn(job);
        when(jobCapacityController.allocate(job)).thenThrow(HyracksException.create(ErrorCode.JOB_REQUIREMENTS_EXCEED_CAPACITY, "1", "0"));
        jobManager.add(run);
    } catch (HyracksException e) {
        // Verifies the error code.
        rejected = e.getErrorCode() == ErrorCode.JOB_REQUIREMENTS_EXCEED_CAPACITY;
    }
    Assert.assertTrue(rejected);
    Assert.assertTrue(jobManager.getRunningJobs().isEmpty());
    Assert.assertTrue(jobManager.getPendingJobs().isEmpty());
    Assert.assertTrue(jobManager.getArchivedJobs().size() == 0);
}
Also used : IJobCapacityController(org.apache.hyracks.api.job.resource.IJobCapacityController) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 73 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class DataflowTest method testHDFSReadWriteOperators.

/**
     * Test a job with only HDFS read and writes.
     *
     * @throws Exception
     */
public void testHDFSReadWriteOperators() throws Exception {
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
    conf.setInputFormat(TextInputFormat.class);
    Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, numberOfNC * 4);
    String[] readSchedule = scheduler.getLocationConstraints(splits);
    JobSpecification jobSpec = new JobSpecification();
    RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
    ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
    HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
    jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
    jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
    jobSpec.addRoot(writeOperator);
    IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    JobId jobId = client.startJob(jobSpec);
    client.waitForCompletion(jobId);
    Assert.assertEquals(true, checkResults());
}
Also used : Path(org.apache.hadoop.fs.Path) IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) Scheduler(org.apache.hyracks.hdfs.scheduler.Scheduler) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TextTupleWriterFactory(org.apache.hyracks.hdfs.lib.TextTupleWriterFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HyracksConnection(org.apache.hyracks.api.client.HyracksConnection) InputSplit(org.apache.hadoop.mapred.InputSplit) TextKeyValueParserFactory(org.apache.hyracks.hdfs.lib.TextKeyValueParserFactory) JobId(org.apache.hyracks.api.job.JobId)

Example 74 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class Groupby method main.

public static void main(String[] args) throws Exception {
    Options options = new Options();
    CmdLineParser parser = new CmdLineParser(options);
    if (args.length == 0) {
        parser.printUsage(System.err);
        return;
    }
    parser.parseArgument(args);
    IHyracksClientConnection hcc = new HyracksConnection(options.host, options.port);
    JobSpecification job;
    long start = System.currentTimeMillis();
    job = createJob(parseFileSplits(options.inFileSplits), parseFileSplits(options.outFileSplits), options.htSize, options.fileSize, options.frameLimit, options.frameSize, options.algo, options.outPlain);
    if (job != null) {
        System.out.print("CreateJobTime:" + (System.currentTimeMillis() - start));
        start = System.currentTimeMillis();
        JobId jobId = hcc.startJob(job);
        hcc.waitForCompletion(jobId);
        System.out.println("JobExecuteTime:" + (System.currentTimeMillis() - start));
    }
}
Also used : IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) CmdLineParser(org.kohsuke.args4j.CmdLineParser) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HyracksConnection(org.apache.hyracks.api.client.HyracksConnection) JobId(org.apache.hyracks.api.job.JobId)

Example 75 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class Join method main.

public static void main(String[] args) throws Exception {
    Options options = new Options();
    CmdLineParser parser = new CmdLineParser(options);
    if (args.length == 0) {
        parser.printUsage(System.err);
        return;
    }
    parser.parseArgument(args);
    IHyracksClientConnection hcc = new HyracksConnection(options.host, options.port);
    JobSpecification job = createJob(parseFileSplits(options.inFileCustomerSplits), parseFileSplits(options.inFileOrderSplits), parseFileSplits(options.outFileSplits), options.numJoinPartitions, options.algo, options.graceInputSize, options.graceRecordsPerFrame, options.graceFactor, options.memSize, options.tableSize, options.hasGroupBy, options.frameSize);
    if (job == null) {
        return;
    }
    long start = System.currentTimeMillis();
    JobId jobId = hcc.startJob(job, options.profile ? EnumSet.of(JobFlag.PROFILE_RUNTIME) : EnumSet.noneOf(JobFlag.class));
    hcc.waitForCompletion(jobId);
    long end = System.currentTimeMillis();
    System.err.println(start + " " + end + " " + (end - start));
}
Also used : IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) CmdLineParser(org.kohsuke.args4j.CmdLineParser) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HyracksConnection(org.apache.hyracks.api.client.HyracksConnection) JobId(org.apache.hyracks.api.job.JobId)

Aggregations

JobSpecification (org.apache.hyracks.api.job.JobSpecification)182 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)90 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)88 Test (org.junit.Test)82 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)77 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)67 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)61 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)59 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)59 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)45 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)45 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)41 FileSplit (org.apache.hyracks.api.io.FileSplit)40 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)38 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)29 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)29 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23