Search in sources :

Example 1 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project hadoop by apache.

the class TestShuffleScheduler method TestSucceedAndFailedCopyMap.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(2);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    MapHost host1 = new MapHost("host1", null);
    TaskAttemptID failedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    TaskAttemptID succeedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    // handle output fetch failure for failedAttemptID, part I
    scheduler.hostFailed(host1.getHostName());
    // handle output fetch succeed for succeedAttemptID
    long bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
    // handle output fetch failure for failedAttemptID, part II
    // for MAPREDUCE-6361: verify no NPE exception get thrown out
    scheduler.copyFailed(failedAttemptID, host1, true, false);
}
Also used : Task(org.apache.hadoop.mapred.Task) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 2 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project hadoop by apache.

the class TestPipeApplication method initStdOut.

/**
   * clean previous std error and outs
   */
private void initStdOut(JobConf configuration) {
    TaskAttemptID taskId = TaskAttemptID.forName(configuration.get(MRJobConfig.TASK_ATTEMPT_ID));
    File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT);
    File stdErr = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDERR);
    // prepare folder
    if (!stdOut.getParentFile().exists()) {
        stdOut.getParentFile().mkdirs();
    } else {
        // clean logs
        stdOut.deleteOnExit();
        stdErr.deleteOnExit();
    }
}
Also used : TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) File(java.io.File)

Example 3 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project carbondata by apache.

the class StoreCreator method executeGraph.

/**
   * Execute graph which will further load data
   *
   * @param loadModel
   * @param storeLocation
   * @throws Exception
   */
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    new File(storeLocation).mkdirs();
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        path.delete();
    }
    SchemaInfo info = new SchemaInfo();
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setMaxColumns(configuration, "10");
    CSVInputFormat.setNumberOfColumns(configuration, "7");
    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();
    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
    new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });
    info.setDatabaseName(databaseName);
    info.setTableName(tableName);
    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
    String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
    File file = new File(segLocation);
    File factFile = null;
    File[] folderList = file.listFiles();
    File folder = null;
    for (int i = 0; i < folderList.length; i++) {
        if (folderList[i].isDirectory()) {
            folder = folderList[i];
        }
    }
    if (folder.isDirectory()) {
        File[] files = folder.listFiles();
        for (int i = 0; i < files.length; i++) {
            if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
                factFile = files[i];
                break;
            }
        }
        factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
        CarbonUtil.deleteFoldersAndFiles(folder);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CSVRecordReaderIterator(org.apache.carbondata.processing.csvload.CSVRecordReaderIterator) StringArrayWritable(org.apache.carbondata.processing.csvload.StringArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) CSVInputFormat(org.apache.carbondata.processing.csvload.CSVInputFormat) BlockDetails(org.apache.carbondata.processing.csvload.BlockDetails) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) DataLoadExecutor(org.apache.carbondata.processing.newflow.DataLoadExecutor) SchemaInfo(org.apache.carbondata.processing.api.dataloader.SchemaInfo)

Example 4 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project incubator-systemml by apache.

the class MultipleOutputCommitter method moveFileToDestination.

private void moveFileToDestination(TaskAttemptContext context, FileSystem fs, Path file) throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    // get output index and final destination 
    //e.g., 0-r-00000 
    String name = file.getName();
    int index = Integer.parseInt(name.substring(0, name.indexOf("-")));
    //e.g., outX/0-r-00000
    Path dest = new Path(outputs[index], name);
    // move file from 'file' to 'finalPath'
    if (!fs.rename(file, dest)) {
        if (!fs.delete(dest, true))
            throw new IOException("Failed to delete earlier output " + dest + " for rename of " + file + " in task " + attemptId);
        if (!fs.rename(file, dest))
            throw new IOException("Failed to save output " + dest + " for rename of " + file + " in task: " + attemptId);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) IOException(java.io.IOException)

Example 5 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project cdap by caskdata.

the class StreamInputFormatTest method testStreamRecordReader.

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();
    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
    AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
    AbstractStreamInputFormat format = new AbstractStreamInputFormat() {

        @Override
        public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
            return new NoOpAuthorizer();
        }

        @Override
        public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
            return new AuthenticationTestContext();
        }
    };
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());
    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();
    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(new IdentityStreamEventDecoder(), new NoOpAuthorizer(), new AuthenticationTestContext(), DUMMY_ID);
    recordReader.initialize(splits.get(1), context);
    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) AuthenticationTestContext(co.cask.cdap.security.auth.context.AuthenticationTestContext) NoOpAuthorizer(co.cask.cdap.security.spi.authorization.NoOpAuthorizer) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) IdentityStreamEventDecoder(co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JobID(org.apache.hadoop.mapred.JobID) Test(org.junit.Test)

Aggregations

TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)17 File (java.io.File)6 Test (org.junit.Test)6 JobConf (org.apache.hadoop.mapred.JobConf)5 IOException (java.io.IOException)4 Configuration (org.apache.hadoop.conf.Configuration)4 Path (org.apache.hadoop.fs.Path)4 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 JobID (org.apache.hadoop.mapred.JobID)3 TaskStatus (org.apache.hadoop.mapred.TaskStatus)3 TaskUmbilicalProtocol (org.apache.hadoop.mapred.TaskUmbilicalProtocol)3 JobID (org.apache.hadoop.mapreduce.JobID)3 Progress (org.apache.hadoop.util.Progress)3 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)2 AuthenticationTestContext (co.cask.cdap.security.auth.context.AuthenticationTestContext)2 NoOpAuthorizer (co.cask.cdap.security.spi.authorization.NoOpAuthorizer)2 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)2 CarbonStorePath (org.apache.carbondata.core.util.path.CarbonStorePath)2 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)2