Search in sources :

Example 21 with Counters

use of org.apache.hadoop.mapred.Counters in project voldemort by voldemort.

the class HadoopStoreBuilder method build.

/**
     * Run the job
     */
public void build() {
    try {
        JobConf conf = prepareJobConf(baseJobConf);
        FileSystem fs = outputDir.getFileSystem(conf);
        if (fs.exists(outputDir)) {
            info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName());
            fs.delete(outputDir, true);
        }
        conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
        conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
        conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
        conf.setBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, saveKeys);
        conf.setBoolean(VoldemortBuildAndPushJob.REDUCER_PER_BUCKET, reducerPerBucket);
        conf.setBoolean(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY, buildPrimaryReplicasOnly);
        if (!isAvro) {
            conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
            conf.setMapperClass(mapperClass);
            conf.setMapOutputKeyClass(BytesWritable.class);
            conf.setMapOutputValueClass(BytesWritable.class);
            conf.setReducerClass(HadoopStoreBuilderReducer.class);
        }
        conf.setInputFormat(inputFormatClass);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        conf.setOutputKeyClass(BytesWritable.class);
        conf.setOutputValueClass(BytesWritable.class);
        conf.setJarByClass(getClass());
        conf.setReduceSpeculativeExecution(false);
        FileInputFormat.setInputPaths(conf, inputPath);
        conf.set("final.output.dir", outputDir.toString());
        conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
        conf.set("dfs.umaskmode", "002");
        FileOutputFormat.setOutputPath(conf, tempDir);
        FileSystem outputFs = outputDir.getFileSystem(conf);
        if (outputFs.exists(outputDir)) {
            throw new IOException("Final output directory already exists.");
        }
        // delete output dir if it already exists
        FileSystem tempFs = tempDir.getFileSystem(conf);
        tempFs.delete(tempDir, true);
        long size = sizeOfPath(tempFs, inputPath);
        logger.info("Data size = " + size + ", replication factor = " + storeDef.getReplicationFactor() + ", numNodes = " + cluster.getNumberOfNodes() + ", numPartitions = " + cluster.getNumberOfPartitions() + ", chunk size = " + chunkSizeBytes);
        // Base numbers of chunks and reducers, will get modified according to various settings
        int numChunks = (int) (size / cluster.getNumberOfPartitions() / chunkSizeBytes) + 1;
        /* +1 so we round up */
        int numReducers = cluster.getNumberOfPartitions();
        // question, but in order to avoid breaking anything we'll just maintain the original behavior.
        if (saveKeys) {
            if (buildPrimaryReplicasOnly) {
            // The buildPrimaryReplicasOnly mode is supported exclusively in combination with
            // saveKeys. If enabled, then we don't want to shuffle extra keys redundantly,
            // hence we don't change the number of reducers.
            } else {
                // Old behavior, where all keys are redundantly shuffled to redundant reducers.
                numReducers = numReducers * storeDef.getReplicationFactor();
            }
        } else {
            numChunks = numChunks * storeDef.getReplicationFactor();
        }
        // Ensure at least one chunk
        numChunks = Math.max(numChunks, 1);
        if (reducerPerBucket) {
        // Then all chunks for a given partition/replica combination are shuffled to the same
        // reducer, hence, the number of reducers remains the same as previously defined.
        } else {
            // Otherwise, we want one reducer per chunk, hence we multiply the number of reducers.
            numReducers = numReducers * numChunks;
        }
        conf.setInt(AbstractStoreBuilderConfigurable.NUM_CHUNKS, numChunks);
        conf.setNumReduceTasks(numReducers);
        logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers + ", save keys: " + saveKeys + ", reducerPerBucket: " + reducerPerBucket + ", buildPrimaryReplicasOnly: " + buildPrimaryReplicasOnly);
        if (isAvro) {
            conf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
            // conf.setMapperClass(mapperClass);
            conf.setMapOutputKeyClass(ByteBuffer.class);
            conf.setMapOutputValueClass(ByteBuffer.class);
            conf.setInputFormat(inputFormatClass);
            conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
            conf.setOutputKeyClass(ByteBuffer.class);
            conf.setOutputValueClass(ByteBuffer.class);
            // AvroJob confs for the avro mapper
            AvroJob.setInputSchema(conf, Schema.parse(baseJobConf.get(AVRO_REC_SCHEMA)));
            AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES)));
            AvroJob.setMapperClass(conf, mapperClass);
            conf.setReducerClass(AvroStoreBuilderReducer.class);
        }
        logger.info("Building store...");
        // The snipped below copied and adapted from: JobClient.runJob(conf);
        // We have more control in the error handling this way.
        JobClient jc = new JobClient(conf);
        RunningJob runningJob = jc.submitJob(conf);
        Counters counters;
        try {
            if (!jc.monitorAndPrintJob(conf, runningJob)) {
                counters = runningJob.getCounters();
                // For some datasets, the number of chunks that we calculated is inadequate.
                // Here, we try to identify if this is the case.
                long mapOutputBytes = counters.getCounter(Task.Counter.MAP_OUTPUT_BYTES);
                long averageNumberOfBytesPerChunk = mapOutputBytes / numChunks / cluster.getNumberOfPartitions();
                if (averageNumberOfBytesPerChunk > (HadoopStoreWriter.DEFAULT_CHUNK_SIZE)) {
                    float chunkSizeBloat = averageNumberOfBytesPerChunk / (float) HadoopStoreWriter.DEFAULT_CHUNK_SIZE;
                    long suggestedTargetChunkSize = (long) (HadoopStoreWriter.DEFAULT_CHUNK_SIZE / chunkSizeBloat);
                    logger.error("The number of bytes per chunk may be too high." + " averageNumberOfBytesPerChunk = " + averageNumberOfBytesPerChunk + ". Consider setting " + VoldemortBuildAndPushJob.BUILD_CHUNK_SIZE + "=" + suggestedTargetChunkSize);
                } else {
                    logger.error("Job Failed: " + runningJob.getFailureInfo());
                }
                throw new VoldemortException("BnP's MapReduce job failed.");
            }
        } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
        }
        counters = runningJob.getCounters();
        long numberOfRecords = counters.getCounter(Task.Counter.REDUCE_INPUT_GROUPS);
        if (numberOfRecords < minNumberOfRecords) {
            throw new VoldemortException("The number of records in the data set (" + numberOfRecords + ") is lower than the minimum required (" + minNumberOfRecords + "). Aborting.");
        }
        if (saveKeys) {
            logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
            logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
        }
        // Do a CheckSumOfCheckSum - Similar to HDFS
        CheckSum checkSumGenerator = CheckSum.getInstance(this.checkSumType);
        if (!this.checkSumType.equals(CheckSumType.NONE) && checkSumGenerator == null) {
            throw new VoldemortException("Could not generate checksum digest for type " + this.checkSumType);
        }
        List<Integer> directorySuffixes = Lists.newArrayList();
        if (buildPrimaryReplicasOnly) {
            // Files are grouped by partitions
            for (int partitionId = 0; partitionId < cluster.getNumberOfPartitions(); partitionId++) {
                directorySuffixes.add(partitionId);
            }
        } else {
            // Files are grouped by node
            for (Node node : cluster.getNodes()) {
                directorySuffixes.add(node.getId());
            }
        }
        ReadOnlyStorageMetadata fullStoreMetadata = new ReadOnlyStorageMetadata();
        List<Integer> emptyDirectories = Lists.newArrayList();
        final String directoryPrefix = buildPrimaryReplicasOnly ? ReadOnlyUtils.PARTITION_DIRECTORY_PREFIX : ReadOnlyUtils.NODE_DIRECTORY_PREFIX;
        // Generate a log message every 30 seconds or after processing every 100 directories.
        final long LOG_INTERVAL_TIME = TimeUnit.MILLISECONDS.convert(30, TimeUnit.SECONDS);
        final int LOG_INTERVAL_COUNT = buildPrimaryReplicasOnly ? 100 : 5;
        int lastLogCount = 0;
        long lastLogTime = 0;
        long startTimeMS = System.currentTimeMillis();
        // Check if all folder exists and with format file
        for (int index = 0; index < directorySuffixes.size(); index++) {
            int directorySuffix = directorySuffixes.get(index);
            long elapsedTime = System.currentTimeMillis() - lastLogTime;
            long elapsedCount = index - lastLogCount;
            if (elapsedTime >= LOG_INTERVAL_TIME || elapsedCount >= LOG_INTERVAL_COUNT) {
                lastLogTime = System.currentTimeMillis();
                lastLogCount = index;
                logger.info("Processed " + directorySuffix + " out of " + directorySuffixes.size() + " directories.");
            }
            String directoryName = directoryPrefix + directorySuffix;
            ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();
            if (saveKeys) {
                metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
            } else {
                metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V1.getCode());
            }
            Path directoryPath = new Path(outputDir.toString(), directoryName);
            if (!outputFs.exists(directoryPath)) {
                logger.debug("No data generated for " + directoryName + ". Generating empty folder");
                emptyDirectories.add(directorySuffix);
                // Create empty folder
                outputFs.mkdirs(directoryPath);
                outputFs.setPermission(directoryPath, new FsPermission(HADOOP_FILE_PERMISSION));
                logger.debug("Setting permission to 755 for " + directoryPath);
            }
            processCheckSumMetadataFile(directoryName, outputFs, checkSumGenerator, directoryPath, metadata);
            if (buildPrimaryReplicasOnly) {
            // In buildPrimaryReplicasOnly mode, writing a metadata file for each partitions
            // takes too long, so we skip it. We will rely on the full-store.metadata file instead.
            } else {
                // Maintaining the old behavior: we write the node-specific metadata file
                writeMetadataFile(directoryPath, outputFs, ReadOnlyUtils.METADATA_FILE_EXTENSION, metadata);
            }
            fullStoreMetadata.addNestedMetadata(directoryName, metadata);
        }
        // Write the aggregate metadata file
        writeMetadataFile(outputDir, outputFs, ReadOnlyUtils.FULL_STORE_METADATA_FILE, fullStoreMetadata);
        long elapsedTimeMs = System.currentTimeMillis() - startTimeMS;
        long elapsedTimeSeconds = TimeUnit.SECONDS.convert(elapsedTimeMs, TimeUnit.MILLISECONDS);
        logger.info("Total Processed directories: " + directorySuffixes.size() + ". Elapsed Time (Seconds):" + elapsedTimeSeconds);
        if (emptyDirectories.size() > 0) {
            logger.info("Empty directories: " + Arrays.toString(emptyDirectories.toArray()));
        }
    } catch (Exception e) {
        logger.error("Error in Store builder", e);
        throw new VoldemortException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Node(voldemort.cluster.Node) StoreDefinitionsMapper(voldemort.xml.StoreDefinitionsMapper) ClusterMapper(voldemort.xml.ClusterMapper) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) VoldemortException(voldemort.VoldemortException) VoldemortException(voldemort.VoldemortException) IOException(java.io.IOException) ReadOnlyStorageMetadata(voldemort.store.readonly.ReadOnlyStorageMetadata) CheckSum(voldemort.store.readonly.checksum.CheckSum) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) AvroOutputFormat(org.apache.avro.mapred.AvroOutputFormat) Counters(org.apache.hadoop.mapred.Counters) FsPermission(org.apache.hadoop.fs.permission.FsPermission) JobConf(org.apache.hadoop.mapred.JobConf)

Example 22 with Counters

use of org.apache.hadoop.mapred.Counters in project ambrose by twitter.

the class CascadingJob method setJobStats.

@JsonIgnore
public void setJobStats(HadoopStepStats stats) {
    Counters counters = new Counters();
    for (String groupName : stats.getCounterGroups()) {
        for (String counterName : stats.getCountersFor(groupName)) {
            Long counterValue = stats.getCounterValue(groupName, counterName);
            counters.findCounter(groupName, counterName).setValue(counterValue);
        }
    }
    setCounterGroupMap(CounterGroup.counterGroupsByName(counters));
}
Also used : Counters(org.apache.hadoop.mapred.Counters) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore)

Example 23 with Counters

use of org.apache.hadoop.mapred.Counters in project ambrose by twitter.

the class AmbroseHiveUtil method counterGroupInfoMap.

/**
   * Constructs counter groups from job runtime statistics. Hive mangles Hadoop Counter data,
   * forming counter names with format "$groupName::$counterName".
   *
   * @param counterNameToValue mangled hadoop counters from hive.
   * @return counter groups by name.
   */
public static Map<String, CounterGroup> counterGroupInfoMap(Map<String, Double> counterNameToValue) {
    Counters counters = new Counters();
    for (Map.Entry<String, ? extends Number> entry : counterNameToValue.entrySet()) {
        String key = entry.getKey();
        Number value = entry.getValue();
        String[] cNames = key.split("::");
        String groupName = cNames[0];
        String counterName = cNames[1];
        Counter counter = counters.findCounter(groupName, counterName);
        counter.setValue(value.longValue());
    }
    return CounterGroup.counterGroupsByName(counters);
}
Also used : Counter(org.apache.hadoop.mapred.Counters.Counter) Counters(org.apache.hadoop.mapred.Counters) Map(java.util.Map)

Aggregations

Counters (org.apache.hadoop.mapred.Counters)23 RunningJob (org.apache.hadoop.mapred.RunningJob)14 Path (org.apache.hadoop.fs.Path)13 JobConf (org.apache.hadoop.mapred.JobConf)12 FileSystem (org.apache.hadoop.fs.FileSystem)11 Counter (org.apache.hadoop.mapred.Counters.Counter)6 JobClient (org.apache.hadoop.mapred.JobClient)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 Text (org.apache.hadoop.io.Text)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 LineReader (org.apache.hadoop.util.LineReader)4 BufferedReader (java.io.BufferedReader)3 DataOutputStream (java.io.DataOutputStream)3 IOException (java.io.IOException)3 InputStreamReader (java.io.InputStreamReader)3 ArrayList (java.util.ArrayList)3 CommandLine (org.apache.commons.cli.CommandLine)3 CommandLineParser (org.apache.commons.cli.CommandLineParser)3 GnuParser (org.apache.commons.cli.GnuParser)3 HelpFormatter (org.apache.commons.cli.HelpFormatter)3