Examples with RecordWriter - org.apache.hadoop.mapreduce.RecordWriter

Example 21 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterRetryInternal.

// retry committer for 2 times.
private void testCommitterRetryInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    // only attempt for 1 time.
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, 1);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new CommitterWithFailedThenSucceed(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    try {
        committer.commitJob(jContext);
        Assert.fail("Commit successful: wrong behavior for the first time " + "commit.");
    } catch (IOException e) {
        // commit again.
        try {
            committer.commitJob(jContext);
            // version 1 shouldn't reach to here.
            if (version == 1) {
                Assert.fail("Commit successful after retry: wrong behavior for " + "version 1.");
            }
        } catch (FileNotFoundException ex) {
            if (version == 2) {
                Assert.fail("Commit failed after retry: wrong behavior for" + " version 2.");
            }
            assertTrue(ex.getMessage().contains(committer.getJobAttemptPath(jContext).toString() + " does not exist"));
        }
    }
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) FileNotFoundException(java.io.FileNotFoundException) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 22 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterInternal.

private void testCommitterInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    // validate output
    validateContent(outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 23 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class TestFileOutputCommitter method testConcurrentCommitTaskWithSubDir.

private void testConcurrentCommitTaskWithSubDir(int version) throws Exception {
    final Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    final Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    conf.setClass("fs.file.impl", RLFS.class, FileSystem.class);
    FileSystem.closeAll();
    final JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    final FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext);
    amCommitter.setupJob(jContext);
    final TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
    taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
    taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
    final TextOutputFormat[] tof = new TextOutputFormat[2];
    for (int i = 0; i < tof.length; i++) {
        tof[i] = new TextOutputFormat() {

            @Override
            public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
                final FileOutputCommitter foc = (FileOutputCommitter) getOutputCommitter(context);
                return new Path(new Path(foc.getWorkPath(), SUB_DIR), getUniqueFile(context, getOutputName(context), extension));
            }
        };
    }
    final ExecutorService executor = HadoopExecutors.newFixedThreadPool(2);
    try {
        for (int i = 0; i < taCtx.length; i++) {
            final int taskIdx = i;
            executor.submit(new Callable<Void>() {

                @Override
                public Void call() throws IOException, InterruptedException {
                    final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
                    outputCommitter.setupTask(taCtx[taskIdx]);
                    final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
                    writeOutput(rw, taCtx[taskIdx]);
                    outputCommitter.commitTask(taCtx[taskIdx]);
                    return null;
                }
            });
        }
    } finally {
        executor.shutdown();
        while (!executor.awaitTermination(1, TimeUnit.SECONDS)) {
            LOG.info("Awaiting thread termination!");
        }
    }
    amCommitter.commitJob(jContext);
    final RawLocalFileSystem lfs = new RawLocalFileSystem();
    lfs.setConf(conf);
    assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR)));
    // validate output
    validateContent(OUT_SUB_DIR);
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) ExecutorService(java.util.concurrent.ExecutorService) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 24 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class Chain method addMapper.

/**
   * Add mapper(the first mapper) that reads input from the input
   * context and writes to queue
   */
@SuppressWarnings("unchecked")
void addMapper(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> output, int index) throws IOException, InterruptedException {
    Configuration conf = getConf(index);
    Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
    Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class);
    RecordReader rr = new ChainRecordReader(inputContext);
    RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf);
    Mapper.Context mapperContext = createMapContext(rr, rw, (MapContext) inputContext, getConf(index));
    MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw);
    threads.add(runner);
}

Also used : Mapper(org.apache.hadoop.mapreduce.Mapper) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) RecordReader(org.apache.hadoop.mapreduce.RecordReader)

Example 25 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project phoenix by apache.

the class MultiHfileOutputFormat method createRecordWriter.

/**
     * 
     * @param context
     * @return
     * @throws IOException 
     */
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
    return new RecordWriter<TableRowkeyPair, V>() {

        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);

        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;

        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());

        private boolean rollRequested = false;

        @Override
        public void write(TableRowkeyPair row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }
            // phoenix-2216: start : extract table name from the rowkey
            String tableName = row.getTableName();
            byte[] rowKey = row.getRowkey().get();
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            WriterLength wl = this.writers.get(tableAndFamily);
            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                // phoenix-2216: start : create a directory for table and family within the output dir 
                Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
                fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
            // phoenix-2216: end
            }
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }
            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }
            // create a new WAL writer, if necessary
            if (wl == null || wl.writer == null) {
                // phoenix-2216: start : passed even the table name
                wl = getNewWriter(tableName, family, conf);
            // phoenix-2216: end
            }
            // we now have the proper WAL writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;
            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
           * @param family
           * @return A WriterLength, containing a new StoreFile.Writer.
           * @throws IOException
           */
        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
            Path familydir = new Path(tableOutputPath, Bytes.toString(family));
            // phoenix-2216: start : fetching the configuration properties that were set to the table.
            // create a map from column family to the compression algorithm for the table.
            final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
            final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
            final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
            // phoenix-2216: end
            String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
            final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
            final DataBlockEncoding overriddenEncoding;
            if (dataBlockEncodingStr != null) {
                overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
            } else {
                overriddenEncoding = null;
            }
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
            // join and put it in the writers map .
            // phoenix-2216: start : holds a map of writers where the 
            //                       key in the map is a join byte array of table name and family.
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            this.writers.put(tableAndFamily, wl);
            // phoenix-2216: end
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TableRowkeyPair(org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair) FileSystem(org.apache.hadoop.fs.FileSystem) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Path(org.apache.hadoop.fs.Path) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) Map(java.util.Map) TreeMap(java.util.TreeMap) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) AbstractHFileWriter(org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter)

Aggregations

RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)26 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)19 Configuration (org.apache.hadoop.conf.Configuration)16 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)12 Job (org.apache.hadoop.mapreduce.Job)11 JobContext (org.apache.hadoop.mapreduce.JobContext)11 IOException (java.io.IOException)10 Path (org.apache.hadoop.fs.Path)10 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)10 MapFile (org.apache.hadoop.io.MapFile)9 File (java.io.File)8 FileSystem (org.apache.hadoop.fs.FileSystem)4 NullWritable (org.apache.hadoop.io.NullWritable)4 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)4 FileOutputCommitter (org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter)4 WritableComparable (org.apache.hadoop.io.WritableComparable)3 OutputFormat (org.apache.hadoop.mapreduce.OutputFormat)3 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)3 TextOutputFormat (org.apache.hadoop.mapreduce.lib.output.TextOutputFormat)3 FileNotFoundException (java.io.FileNotFoundException)2