Search in sources :

Example 71 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.

the class CrunchMultipleOutputs method write.

/**
   * Write key and value to baseOutputPath using the namedOutput.
   * 
   * @param namedOutput    the named output name
   * @param key            the key
   * @param value          the value
   * @param baseOutputPath base-output path to write the record to.
   * Note: Framework will generate unique filename for the baseOutputPath
   */
@SuppressWarnings("unchecked")
public <K, V> void write(String namedOutput, K key, V value, String baseOutputPath) throws IOException, InterruptedException {
    checkNamedOutputName(context, namedOutput, false);
    checkBaseOutputPath(baseOutputPath);
    if (!namedOutputs.contains(namedOutput)) {
        throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'");
    }
    TaskAttemptContext taskContext = getContext(namedOutput);
    getRecordWriter(taskContext, baseOutputPath).write(key, value);
}
Also used : TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 72 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project druid by druid-io.

the class DruidParquetInputTest method getFirstRecord.

private GenericRecord getFirstRecord(Job job, String parquetPath) throws IOException, InterruptedException {
    File testFile = new File(parquetPath);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);
    DruidParquetInputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    reader.initialize(split, context);
    reader.nextKeyValue();
    GenericRecord data = (GenericRecord) reader.getCurrentValue();
    reader.close();
    return data;
}
Also used : Path(org.apache.hadoop.fs.Path) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 73 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project druid by druid-io.

the class DruidOrcInputFormatTest method testRead.

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();
    reader.initialize(split, context);
    reader.nextKeyValue();
    OrcStruct data = (OrcStruct) reader.getCurrentValue();
    MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);
    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));
    reader.close();
}
Also used : OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) OrcNewInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 74 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project goldenorb by jzachr.

the class OrbPartition method dumpData.

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;
    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15) try {
        count++;
        tryAgain = false;
        if (job == null) {
            job = new Job(conf);
            job.setOutputFormatClass(TextOutputFormat.class);
            FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
        }
        if (jobContext == null) {
            jobContext = new JobContext(job.getConfiguration(), new JobID());
        }
        System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
        tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
        outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
        rw = outputFormat.getRecordWriter(tao);
        vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
        for (Vertex v : vertices.values()) {
            OrbContext oc = vw.vertexWrite(v);
            rw.write(oc.getKey(), oc.getValue());
        // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
        // oc.getKey().toString() + ", " + oc.getValue().toString());
        }
        rw.close(tao);
        FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
        if (cm.needsTaskCommit(tao)) {
            cm.commitTask(tao);
            cm.cleanupJob(jobContext);
        } else {
            cm.cleanupJob(jobContext);
            tryAgain = true;
        }
    } catch (IOException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InstantiationException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InterruptedException e) {
        tryAgain = true;
        e.printStackTrace();
    }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}
Also used : FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) Path(org.apache.hadoop.fs.Path) TaskID(org.apache.hadoop.mapreduce.TaskID) Configuration(org.apache.hadoop.conf.Configuration) OrbConfiguration(org.goldenorb.conf.OrbConfiguration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) OrbContext(org.goldenorb.io.output.OrbContext) VertexWriter(org.goldenorb.io.output.VertexWriter) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) JobID(org.apache.hadoop.mapreduce.JobID)

Example 75 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project goldenorb by jzachr.

the class VertexInput method initialize.

/**
 * 
 */
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory.getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}
Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) JobContext(org.apache.hadoop.mapreduce.JobContext) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)110 Configuration (org.apache.hadoop.conf.Configuration)58 Job (org.apache.hadoop.mapreduce.Job)44 Path (org.apache.hadoop.fs.Path)39 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)38 InputSplit (org.apache.hadoop.mapreduce.InputSplit)36 Test (org.junit.Test)35 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)33 JobContext (org.apache.hadoop.mapreduce.JobContext)28 IOException (java.io.IOException)27 File (java.io.File)22 LongWritable (org.apache.hadoop.io.LongWritable)22 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)19 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)12 ArrayList (java.util.ArrayList)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 MapFile (org.apache.hadoop.io.MapFile)10