Search in sources :

Example 71 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.

the class HadoopInputFormatBase method createInputSplits.

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
    JobContext jobContext = new JobContextImpl(configuration, new JobID());
    jobContext.getCredentials().addAll(this.credentials);
    Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
    if (currentUserCreds != null) {
        jobContext.getCredentials().addAll(currentUserCreds);
    }
    List<org.apache.hadoop.mapreduce.InputSplit> splits;
    try {
        splits = this.mapreduceInputFormat.getSplits(jobContext);
    } catch (InterruptedException e) {
        throw new IOException("Could not get Splits.", e);
    }
    HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
    for (int i = 0; i < hadoopInputSplits.length; i++) {
        hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
    }
    return hadoopInputSplits;
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) HadoopInputSplit(org.apache.flink.api.java.hadoop.mapreduce.wrapper.HadoopInputSplit) IOException(java.io.IOException) JobContext(org.apache.hadoop.mapreduce.JobContext) HadoopInputSplit(org.apache.flink.api.java.hadoop.mapreduce.wrapper.HadoopInputSplit) JobID(org.apache.hadoop.mapreduce.JobID) Credentials(org.apache.hadoop.security.Credentials)

Example 72 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project goldenorb by jzachr.

the class OrbPartition method dumpData.

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;
    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15) try {
        count++;
        tryAgain = false;
        if (job == null) {
            job = new Job(conf);
            job.setOutputFormatClass(TextOutputFormat.class);
            FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
        }
        if (jobContext == null) {
            jobContext = new JobContext(job.getConfiguration(), new JobID());
        }
        System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
        tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
        outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
        rw = outputFormat.getRecordWriter(tao);
        vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
        for (Vertex v : vertices.values()) {
            OrbContext oc = vw.vertexWrite(v);
            rw.write(oc.getKey(), oc.getValue());
        // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
        // oc.getKey().toString() + ", " + oc.getValue().toString());
        }
        rw.close(tao);
        FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
        if (cm.needsTaskCommit(tao)) {
            cm.commitTask(tao);
            cm.cleanupJob(jobContext);
        } else {
            cm.cleanupJob(jobContext);
            tryAgain = true;
        }
    } catch (IOException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InstantiationException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InterruptedException e) {
        tryAgain = true;
        e.printStackTrace();
    }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}
Also used : FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) Path(org.apache.hadoop.fs.Path) TaskID(org.apache.hadoop.mapreduce.TaskID) Configuration(org.apache.hadoop.conf.Configuration) OrbConfiguration(org.goldenorb.conf.OrbConfiguration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) OrbContext(org.goldenorb.io.output.OrbContext) VertexWriter(org.goldenorb.io.output.VertexWriter) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) JobID(org.apache.hadoop.mapreduce.JobID)

Example 73 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project goldenorb by jzachr.

the class VertexInput method initialize.

/**
 */
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory.getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}
Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) JobContext(org.apache.hadoop.mapreduce.JobContext) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID)

Example 74 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project goldenorb by jzachr.

the class InputSplitAllocator method assignInputSplits.

/**
 * This method gets the raw splits and calls another method to assign them.
 *
 * @returns Map
 */
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
    List<RawSplit> rawSplits = null;
    JobConf job = new JobConf(orbConf);
    LOG.debug(orbConf.getJobNumber().toString());
    JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
    org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
    try {
        input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
        List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
        rawSplits = new ArrayList<RawSplit>(splits.size());
        DataOutputBuffer buffer = new DataOutputBuffer();
        SerializationFactory factory = new SerializationFactory(orbConf);
        Serializer serializer = factory.getSerializer(splits.get(0).getClass());
        serializer.open(buffer);
        for (int i = 0; i < splits.size(); i++) {
            buffer.reset();
            serializer.serialize(splits.get(i));
            RawSplit rawSplit = new RawSplit();
            rawSplit.setClassName(splits.get(i).getClass().getName());
            rawSplit.setDataLength(splits.get(i).getLength());
            rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
            rawSplit.setLocations(splits.get(i).getLocations());
            rawSplits.add(rawSplit);
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
    return assignInputSplits(rawSplits);
}
Also used : RawSplit(org.goldenorb.io.input.RawSplit) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) IOException(java.io.IOException) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) JobContext(org.apache.hadoop.mapreduce.JobContext) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID) Serializer(org.apache.hadoop.io.serializer.Serializer)

Example 75 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hbase by apache.

the class TestTableInputFormatBase method testNonSuccessiveSplitsAreNotMerged.

@Test
public void testNonSuccessiveSplitsAreNotMerged() throws IOException {
    JobContext context = mock(JobContext.class);
    Configuration conf = HBaseConfiguration.create();
    conf.set(ConnectionUtils.HBASE_CLIENT_CONNECTION_IMPL, ConnectionForMergeTesting.class.getName());
    conf.set(TableInputFormat.INPUT_TABLE, "testTable");
    conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
    when(context.getConfiguration()).thenReturn(conf);
    TableInputFormat tifExclude = new TableInputFormatForMergeTesting();
    tifExclude.setConf(conf);
    // split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged,
    // but split["a", "b"] and split["c", "d"] are not merged.
    assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1, tifExclude.getSplits(context).size());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) JobContext(org.apache.hadoop.mapreduce.JobContext) Test(org.junit.Test)

Aggregations

JobContext (org.apache.hadoop.mapreduce.JobContext)85 Configuration (org.apache.hadoop.conf.Configuration)41 Job (org.apache.hadoop.mapreduce.Job)35 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)34 Test (org.junit.Test)31 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)29 InputSplit (org.apache.hadoop.mapreduce.InputSplit)28 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)25 Path (org.apache.hadoop.fs.Path)24 IOException (java.io.IOException)22 File (java.io.File)19 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)16 ArrayList (java.util.ArrayList)13 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)11 JobConf (org.apache.hadoop.mapred.JobConf)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 LongWritable (org.apache.hadoop.io.LongWritable)9 MapFile (org.apache.hadoop.io.MapFile)9 JobID (org.apache.hadoop.mapreduce.JobID)7 FileSystem (org.apache.hadoop.fs.FileSystem)6