Search in sources :

Example 1 with Group

use of org.apache.hadoop.mapred.Counters.Group in project hadoop by apache.

the class GroupFactoryForTest method testFileSystemGroupIteratorConcurrency.

@Test
public void testFileSystemGroupIteratorConcurrency() {
    Counters counters = new Counters();
    // create 2 filesystem counter groups
    counters.findCounter("fs1", FileSystemCounter.BYTES_READ).increment(1);
    counters.findCounter("fs2", FileSystemCounter.BYTES_READ).increment(1);
    // Iterate over the counters in this group while updating counters in
    // the group
    Group group = counters.getGroup(FileSystemCounter.class.getName());
    Iterator<Counter> iterator = group.iterator();
    counters.findCounter("fs3", FileSystemCounter.BYTES_READ).increment(1);
    assertTrue(iterator.hasNext());
    iterator.next();
    counters.findCounter("fs3", FileSystemCounter.BYTES_READ).increment(1);
    assertTrue(iterator.hasNext());
    iterator.next();
}
Also used : FrameworkCounterGroup(org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup) Group(org.apache.hadoop.mapred.Counters.Group) TaskCounter(org.apache.hadoop.mapreduce.TaskCounter) FileSystemCounter(org.apache.hadoop.mapreduce.FileSystemCounter) Counter(org.apache.hadoop.mapred.Counters.Counter) JobCounter(org.apache.hadoop.mapreduce.JobCounter) FileSystemCounter(org.apache.hadoop.mapreduce.FileSystemCounter) Test(org.junit.Test)

Example 2 with Group

use of org.apache.hadoop.mapred.Counters.Group in project hadoop by apache.

the class GroupFactoryForTest method testTaskCounter.

@SuppressWarnings("rawtypes")
@Test
public void testTaskCounter() {
    GroupFactory groupFactory = new GroupFactoryForTest();
    FrameworkGroupFactory frameworkGroupFactory = groupFactory.newFrameworkGroupFactory(TaskCounter.class);
    Group group = (Group) frameworkGroupFactory.newGroup("TaskCounter");
    FrameworkCounterGroup counterGroup = (FrameworkCounterGroup) group.getUnderlyingGroup();
    org.apache.hadoop.mapreduce.Counter count1 = counterGroup.findCounter(TaskCounter.PHYSICAL_MEMORY_BYTES.toString());
    Assert.assertNotNull(count1);
    count1.increment(10);
    count1.increment(10);
    Assert.assertEquals(20, count1.getValue());
    // Verify no exception get thrown when finding an unknown counter
    org.apache.hadoop.mapreduce.Counter count2 = counterGroup.findCounter(TaskCounter.MAP_PHYSICAL_MEMORY_BYTES_MAX.toString());
    Assert.assertNotNull(count2);
    count2.increment(5);
    count2.increment(10);
    Assert.assertEquals(10, count2.getValue());
}
Also used : FrameworkCounterGroup(org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup) Group(org.apache.hadoop.mapred.Counters.Group) FrameworkCounterGroup(org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup) FrameworkGroupFactory(org.apache.hadoop.mapreduce.counters.CounterGroupFactory.FrameworkGroupFactory) GroupFactory(org.apache.hadoop.mapred.Counters.GroupFactory) FrameworkGroupFactory(org.apache.hadoop.mapreduce.counters.CounterGroupFactory.FrameworkGroupFactory) Test(org.junit.Test)

Example 3 with Group

use of org.apache.hadoop.mapred.Counters.Group in project incubator-systemml by apache.

the class CSVReblockMR method runAssignRowIDMRJob.

public static AssignRowIDMRReturn runAssignRowIDMRJob(String[] inputs, InputInfo[] inputInfos, int[] brlens, int[] bclens, String reblockInstructions, int replication, String[] smallestFiles, boolean transform, String naStrings, String spec) throws Exception {
    AssignRowIDMRReturn ret = new AssignRowIDMRReturn();
    JobConf job;
    job = new JobConf(CSVReblockMR.class);
    job.setJobName("Assign-RowID-MR");
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.CELL);
    job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles);
    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions);
    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set up custom map/reduce configurations 
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    //set up the number of reducers
    job.setNumReduceTasks(1);
    // Print the complete instruction
    //if (LOG.isTraceEnabled())
    //inst.printCompelteMRJobInstruction();
    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CSVAssignRowIDMapper.class);
    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(OffsetCount.class);
    //configure reducer
    job.setReducerClass(CSVAssignRowIDReducer.class);
    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    //set up the output file
    ret.counterFile = new Path(MRJobConfiguration.constructTempOutputFilename());
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, ret.counterFile);
    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(OffsetCount.class);
    // setup properties relevant to transform
    job.setBoolean(MRJobConfiguration.TF_TRANSFORM, transform);
    if (transform) {
        if (naStrings != null)
            // Adding "dummy" string to handle the case of na_strings = ""
            job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(naStrings));
        job.set(MRJobConfiguration.TF_SPEC, spec);
    }
    RunningJob runjob = JobClient.runJob(job);
    /* Process different counters */
    Group rgroup = runjob.getCounters().getGroup(NUM_ROWS_IN_MATRIX);
    Group cgroup = runjob.getCounters().getGroup(NUM_COLS_IN_MATRIX);
    ret.rlens = new long[inputs.length];
    ret.clens = new long[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        // number of non-zeros
        ret.rlens[i] = rgroup.getCounter(Integer.toString(i));
        ret.clens[i] = cgroup.getCounter(Integer.toString(i));
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 4 with Group

use of org.apache.hadoop.mapred.Counters.Group in project incubator-systemml by apache.

the class ApplyTfBBMR method runJob.

public static JobReturn runJob(String inputPath, String rblkInst, String otherInst, String spec, String mapsPath, String tmpPath, String outputPath, String partOffsetsFile, CSVFileFormatProperties inputDataProperties, long numRows, long numColsBefore, long numColsAfter, int replication, String headerLine) throws Exception {
    CSVReblockInstruction rblk = (CSVReblockInstruction) InstructionParser.parseSingleInstruction(rblkInst);
    long[] rlens = new long[] { numRows };
    long[] clens = new long[] { numColsAfter };
    int[] brlens = new int[] { rblk.brlen };
    int[] bclens = new int[] { rblk.bclen };
    byte[] realIndexes = new byte[] { rblk.input };
    byte[] resultIndexes = new byte[] { rblk.output };
    JobConf job = new JobConf(ApplyTfBBMR.class);
    job.setJobName("ApplyTfBB");
    /* Setup MapReduce Job */
    job.setJarByClass(ApplyTfBBMR.class);
    // set relevant classes
    job.setMapperClass(ApplyTfBBMapper.class);
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, new String[] { inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, brlens, bclens, false, ConvertTarget.CELL);
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
    MRJobConfiguration.setCSVReblockInstructions(job, rblkInst);
    //set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, otherInst);
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);
    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, null, rblkInst, null, otherInst, resultIndexes);
    MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, null, rblkInst, null, null, null, resultIndexes, mapoutputIndexes, false);
    //set up the number of reducers
    int numRed = WriteCSVMR.determineNumReducers(rlens, clens, ConfigurationManager.getNumReducers(), ret.numReducerGroups);
    job.setNumReduceTasks(numRed);
    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, new byte[] { rblk.output }, new byte[] { 0 }, new String[] { outputPath }, new OutputInfo[] { OutputInfo.BinaryBlockOutputInfo }, true, false);
    // configure mapper and the mapper output key value pairs
    job.setMapperClass(ApplyTfBBMapper.class);
    job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
    job.setMapOutputValueClass(BlockRow.class);
    //configure reducer
    job.setReducerClass(CSVReblockReducer.class);
    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    // Add transformation metadata file as well as partOffsetsFile to Distributed cache
    DistributedCache.addCacheFile((new Path(mapsPath)).toUri(), job);
    DistributedCache.createSymlink(job);
    Path cachefile = new Path(new Path(partOffsetsFile), "part-00000");
    DistributedCache.addCacheFile(cachefile.toUri(), job);
    DistributedCache.createSymlink(job);
    job.set(MRJobConfiguration.TF_HAS_HEADER, Boolean.toString(inputDataProperties.hasHeader()));
    job.set(MRJobConfiguration.TF_DELIM, inputDataProperties.getDelim());
    // Adding "dummy" string to handle the case of na_strings = ""
    if (inputDataProperties.getNAStrings() != null)
        job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(inputDataProperties.getNAStrings()));
    job.set(MRJobConfiguration.TF_SPEC, spec);
    job.set(MRJobConfiguration.TF_SMALLEST_FILE, CSVReblockMR.findSmallestFile(job, inputPath));
    job.set(MRJobConfiguration.OUTPUT_MATRICES_DIRS_CONFIG, outputPath);
    job.setLong(MRJobConfiguration.TF_NUM_COLS, numColsBefore);
    job.set(MRJobConfiguration.TF_TXMTD_PATH, mapsPath);
    job.set(MRJobConfiguration.TF_HEADER, headerLine);
    job.set(CSVReblockMR.ROWID_FILE_NAME, cachefile.toString());
    job.set(MRJobConfiguration.TF_TMP_LOC, tmpPath);
    RunningJob runjob = JobClient.runJob(job);
    MapReduceTool.deleteFileIfExistOnHDFS(cachefile, job);
    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        ret.stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
    }
    return new JobReturn(ret.stats, runjob.isSuccessful());
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.hadoop.mapred.Counters.Group) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) MatrixChar_N_ReducerGroups(org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 5 with Group

use of org.apache.hadoop.mapred.Counters.Group in project incubator-systemml by apache.

the class RemoteParForMR method runJob.

public static // inputs
RemoteParForJobReturn runJob(// inputs
long pfid, // inputs
String program, // inputs
String taskFile, // inputs
String resultFile, // inputs
MatrixObject colocatedDPMatrixObj, // opt params
boolean enableCPCaching, // opt params
int numMappers, // opt params
int replication, // opt params
int max_retry, // opt params
long minMem, // opt params
boolean jvmReuse) {
    RemoteParForJobReturn ret = null;
    String jobname = "ParFor-EMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    JobConf job;
    job = new JobConf(RemoteParForMR.class);
    job.setJobName(jobname + pfid);
    // maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();
    try {
        // ///
        // configure the MR job
        // set arbitrary CP program blocks that will perform in the mapper
        MRJobConfiguration.setProgramBlocks(job, program);
        // enable/disable caching
        MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
        // set mappers, reducers, combiners
        // map-only
        job.setMapperClass(RemoteParWorkerMapper.class);
        // set input format (one split per row, NLineInputFormat default N=1)
        if (ParForProgramBlock.ALLOW_DATA_COLOCATION && colocatedDPMatrixObj != null) {
            job.setInputFormat(RemoteParForColocatedNLineInputFormat.class);
            MRJobConfiguration.setPartitioningFormat(job, colocatedDPMatrixObj.getPartitionFormat());
            MatrixCharacteristics mc = colocatedDPMatrixObj.getMatrixCharacteristics();
            MRJobConfiguration.setPartitioningBlockNumRows(job, mc.getRowsPerBlock());
            MRJobConfiguration.setPartitioningBlockNumCols(job, mc.getColsPerBlock());
            MRJobConfiguration.setPartitioningFilename(job, colocatedDPMatrixObj.getFileName());
        } else // default case
        {
            job.setInputFormat(NLineInputFormat.class);
        }
        // set the input path and output path
        FileInputFormat.setInputPaths(job, new Path(taskFile));
        // set output format
        job.setOutputFormat(SequenceFileOutputFormat.class);
        // set output path
        MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
        FileOutputFormat.setOutputPath(job, new Path(resultFile));
        // set the output key, value schema
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        // ////
        // set optimization parameters
        // set the number of mappers and reducers
        // numMappers
        job.setNumMapTasks(numMappers);
        job.setNumReduceTasks(0);
        // job.setInt("mapred.map.tasks.maximum", 1); //system property
        // job.setInt("mapred.tasktracker.tasks.maximum",1); //system property
        // job.setInt("mapred.jobtracker.maxtasks.per.job",1); //system property
        // set jvm memory size (if require)
        String memKey = MRConfigurationNames.MR_CHILD_JAVA_OPTS;
        if (minMem > 0 && minMem > InfrastructureAnalyzer.extractMaxMemoryOpt(job.get(memKey))) {
            InfrastructureAnalyzer.setMaxMemoryOpt(job, memKey, minMem);
            LOG.warn("Forcing '" + memKey + "' to -Xmx" + minMem / (1024 * 1024) + "M.");
        }
        // disable automatic tasks timeouts and speculative task exec
        job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
        job.setMapSpeculativeExecution(false);
        // set up map/reduce memory configurations (if in AM context)
        DMLConfig config = ConfigurationManager.getDMLConfig();
        DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
        // set up custom map/reduce configurations
        MRJobConfiguration.setupCustomMRConfigurations(job, config);
        // enables the reuse of JVMs (multiple tasks per MR task)
        if (jvmReuse)
            // unlimited
            job.setNumTasksToExecutePerJvm(-1);
        // set sort io buffer (reduce unnecessary large io buffer, guaranteed memory consumption)
        // 8MB
        job.setInt(MRConfigurationNames.MR_TASK_IO_SORT_MB, 8);
        // set the replication factor for the results
        job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
        // set the max number of retries per map task
        // disabled job-level configuration to respect cluster configuration
        // note: this refers to hadoop2, hence it never had effect on mr1
        // job.setInt(MRConfigurationNames.MR_MAP_MAXATTEMPTS, max_retry);
        // set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);
        // ///
        // execute the MR job
        RunningJob runjob = JobClient.runJob(job);
        // Process different counters
        Statistics.incrementNoOfExecutedMRJobs();
        Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
        int numTasks = (int) pgroup.getCounter(Stat.PARFOR_NUMTASKS.toString());
        int numIters = (int) pgroup.getCounter(Stat.PARFOR_NUMITERS.toString());
        if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode()) {
            Statistics.incrementJITCompileTime(pgroup.getCounter(Stat.PARFOR_JITCOMPILE.toString()));
            Statistics.incrementJVMgcCount(pgroup.getCounter(Stat.PARFOR_JVMGC_COUNT.toString()));
            Statistics.incrementJVMgcTime(pgroup.getCounter(Stat.PARFOR_JVMGC_TIME.toString()));
            Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
            CacheStatistics.incrementMemHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_MEM.toString()));
            CacheStatistics.incrementFSBuffHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString()));
            CacheStatistics.incrementFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FS.toString()));
            CacheStatistics.incrementHDFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_HDFS.toString()));
            CacheStatistics.incrementFSBuffWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString()));
            CacheStatistics.incrementFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FS.toString()));
            CacheStatistics.incrementHDFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_HDFS.toString()));
            CacheStatistics.incrementAcquireRTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQR.toString()));
            CacheStatistics.incrementAcquireMTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQM.toString()));
            CacheStatistics.incrementReleaseTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_RLS.toString()));
            CacheStatistics.incrementExportTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_EXP.toString()));
        }
        // read all files of result variables and prepare for return
        LocalVariableMap[] results = readResultFile(job, resultFile);
        ret = new RemoteParForJobReturn(runjob.isSuccessful(), numTasks, numIters, results);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    } finally {
        // remove created files
        try {
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(taskFile), job);
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        }
    }
    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

Group (org.apache.hadoop.mapred.Counters.Group)33 JobConf (org.apache.hadoop.mapred.JobConf)26 RunningJob (org.apache.hadoop.mapred.RunningJob)26 DMLConfig (org.apache.sysml.conf.DMLConfig)23 Path (org.apache.hadoop.fs.Path)14 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)11 IOException (java.io.IOException)7 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)6 FrameworkCounterGroup (org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup)5 Test (org.junit.Test)5 Counter (org.apache.hadoop.mapred.Counters.Counter)4 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)4 TaggedMatrixBlock (org.apache.sysml.runtime.matrix.data.TaggedMatrixBlock)4 GroupFactory (org.apache.hadoop.mapred.Counters.GroupFactory)3 FrameworkGroupFactory (org.apache.hadoop.mapreduce.counters.CounterGroupFactory.FrameworkGroupFactory)3 PrintWriter (java.io.PrintWriter)2 URI (java.net.URI)2 HashMap (java.util.HashMap)2 Well1024a (org.apache.commons.math3.random.Well1024a)2