Search in sources :

Example 1 with Scheduler

use of org.apache.hyracks.hdfs.scheduler.Scheduler in project asterixdb by apache.

the class DataflowTest method testHDFSReadWriteOperators.

/**
     * Test a job with only HDFS read and writes.
     *
     * @throws Exception
     */
public void testHDFSReadWriteOperators() throws Exception {
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
    conf.setInputFormat(TextInputFormat.class);
    Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, numberOfNC * 4);
    String[] readSchedule = scheduler.getLocationConstraints(splits);
    JobSpecification jobSpec = new JobSpecification();
    RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
    ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
    HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
    jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
    jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
    jobSpec.addRoot(writeOperator);
    IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    JobId jobId = client.startJob(jobSpec);
    client.waitForCompletion(jobId);
    Assert.assertEquals(true, checkResults());
}
Also used : Path(org.apache.hadoop.fs.Path) IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) Scheduler(org.apache.hyracks.hdfs.scheduler.Scheduler) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TextTupleWriterFactory(org.apache.hyracks.hdfs.lib.TextTupleWriterFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HyracksConnection(org.apache.hyracks.api.client.HyracksConnection) InputSplit(org.apache.hadoop.mapred.InputSplit) TextKeyValueParserFactory(org.apache.hyracks.hdfs.lib.TextKeyValueParserFactory) JobId(org.apache.hyracks.api.job.JobId)

Example 2 with Scheduler

use of org.apache.hyracks.hdfs.scheduler.Scheduler in project asterixdb by apache.

the class HDFSUtils method initializeHDFSScheduler.

public static Scheduler initializeHDFSScheduler(ICCServiceContext serviceCtx) throws HyracksDataException {
    ICCContext ccContext = serviceCtx.getCCContext();
    Scheduler scheduler = null;
    try {
        scheduler = new Scheduler(ccContext.getClusterControllerInfo().getClientNetAddress(), ccContext.getClusterControllerInfo().getClientNetPort());
    } catch (HyracksException e) {
        throw new RuntimeDataException(ErrorCode.UTIL_HDFS_UTILS_CANNOT_OBTAIN_HDFS_SCHEDULER);
    }
    return scheduler;
}
Also used : Scheduler(org.apache.hyracks.hdfs.scheduler.Scheduler) IndexingScheduler(org.apache.asterix.external.indexing.IndexingScheduler) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ICCContext(org.apache.hyracks.api.context.ICCContext) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException)

Aggregations

Scheduler (org.apache.hyracks.hdfs.scheduler.Scheduler)2 RuntimeDataException (org.apache.asterix.common.exceptions.RuntimeDataException)1 IndexingScheduler (org.apache.asterix.external.indexing.IndexingScheduler)1 Path (org.apache.hadoop.fs.Path)1 InputSplit (org.apache.hadoop.mapred.InputSplit)1 HyracksConnection (org.apache.hyracks.api.client.HyracksConnection)1 IHyracksClientConnection (org.apache.hyracks.api.client.IHyracksClientConnection)1 ICCContext (org.apache.hyracks.api.context.ICCContext)1 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)1 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)1 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)1 JobId (org.apache.hyracks.api.job.JobId)1 JobSpecification (org.apache.hyracks.api.job.JobSpecification)1 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)1 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)1 MToNPartitioningMergingConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor)1 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)1 ExternalSortOperatorDescriptor (org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor)1 TextKeyValueParserFactory (org.apache.hyracks.hdfs.lib.TextKeyValueParserFactory)1 TextTupleWriterFactory (org.apache.hyracks.hdfs.lib.TextTupleWriterFactory)1