Search in sources :

Example 11 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterWithDuplicatedCommitInternal.

private void testCommitterWithDuplicatedCommitInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    // validate output
    validateContent(outDir);
    // commit job again on a successful commit job.
    try {
        committer.commitJob(jContext);
        if (version == 1) {
            Assert.fail("Duplicate commit success: wrong behavior for version 1.");
        }
    } catch (IOException e) {
        if (version == 2) {
            Assert.fail("Duplicate commit failed: wrong behavior for version 2.");
        }
    }
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 12 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestFileOutputCommitter method testAbortInternal.

private void testAbortInternal(int version) throws IOException, InterruptedException {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do abort
    committer.abortTask(tContext);
    File expectedFile = new File(new Path(committer.getWorkPath(), partFile).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());
    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 13 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterWithFailureInternal.

private void testCommitterWithFailureInternal(int version, int maxAttempts) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, maxAttempts);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new CommitterWithFailedThenSucceed(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    try {
        committer.commitJob(jContext);
        // (1,1), (1,2), (2,1) shouldn't reach to here.
        if (version == 1 || maxAttempts <= 1) {
            Assert.fail("Commit successful: wrong behavior for version 1.");
        }
    } catch (IOException e) {
        // (2,2) shouldn't reach to here.
        if (version == 2 && maxAttempts > 2) {
            Assert.fail("Commit failed: wrong behavior for version 2.");
        }
    }
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 14 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class DBOutputFormat method getRecordWriter.

/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(FileSystem filesystem, JobConf job, String name, Progressable progress) throws IOException {
    org.apache.hadoop.mapreduce.RecordWriter<K, V> w = super.getRecordWriter(new TaskAttemptContextImpl(job, TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID))));
    org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter writer = (org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter) w;
    try {
        return new DBRecordWriter(writer.getConnection(), writer.getStatement());
    } catch (SQLException se) {
        throw new IOException(se);
    }
}
Also used : SQLException(java.sql.SQLException) IOException(java.io.IOException) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)

Example 15 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project carbondata by apache.

the class StoreCreator method executeGraph.

/**
   * Execute graph which will further load data
   *
   * @param loadModel
   * @param storeLocation
   * @throws Exception
   */
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    new File(storeLocation).mkdirs();
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        path.delete();
    }
    SchemaInfo info = new SchemaInfo();
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setMaxColumns(configuration, "10");
    CSVInputFormat.setNumberOfColumns(configuration, "7");
    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();
    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
    new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });
    info.setDatabaseName(databaseName);
    info.setTableName(tableName);
    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
    String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
    File file = new File(segLocation);
    File factFile = null;
    File[] folderList = file.listFiles();
    File folder = null;
    for (int i = 0; i < folderList.length; i++) {
        if (folderList[i].isDirectory()) {
            folder = folderList[i];
        }
    }
    if (folder.isDirectory()) {
        File[] files = folder.listFiles();
        for (int i = 0; i < files.length; i++) {
            if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
                factFile = files[i];
                break;
            }
        }
        factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
        CarbonUtil.deleteFoldersAndFiles(folder);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CSVRecordReaderIterator(org.apache.carbondata.processing.csvload.CSVRecordReaderIterator) StringArrayWritable(org.apache.carbondata.processing.csvload.StringArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) CSVInputFormat(org.apache.carbondata.processing.csvload.CSVInputFormat) BlockDetails(org.apache.carbondata.processing.csvload.BlockDetails) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) DataLoadExecutor(org.apache.carbondata.processing.newflow.DataLoadExecutor) SchemaInfo(org.apache.carbondata.processing.api.dataloader.SchemaInfo)

Aggregations

TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)47 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)38 Configuration (org.apache.hadoop.conf.Configuration)35 File (java.io.File)25 Job (org.apache.hadoop.mapreduce.Job)23 Path (org.apache.hadoop.fs.Path)22 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)22 JobContext (org.apache.hadoop.mapreduce.JobContext)21 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 Test (org.junit.Test)17 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)12 LongWritable (org.apache.hadoop.io.LongWritable)11 IOException (java.io.IOException)10 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 MapFile (org.apache.hadoop.io.MapFile)9 NullWritable (org.apache.hadoop.io.NullWritable)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FileAttribute (java.nio.file.attribute.FileAttribute)4 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)4