Search in sources :

Example 16 with Group

use of org.apache.hadoop.mapred.Counters.Group in project incubator-systemml by apache.

the class WriteCSVMR method runJob.

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String csvWriteInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs) throws Exception {
    JobConf job = new JobConf(WriteCSVMR.class);
    job.setJobName("WriteCSV-MR");
    // check for valid output dimensions
    for (int i = 0; i < rlens.length; i++) if (rlens[i] == 0 || clens[i] == 0)
        throw new IOException("Write of matrices with zero" + " rows or columns not supported (" + rlens[i] + "x" + clens[i] + ").");
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    // set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, ConvertTarget.CSVWRITE);
    // set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
    // set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
    MRJobConfiguration.setCSVWriteInstructions(job, csvWriteInstructions);
    // set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);
    // set up custom map/reduce configurations
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    long maxRlen = 0;
    for (long rlen : rlens) if (rlen > maxRlen)
        maxRlen = rlen;
    // set up the number of reducers (according to output size)
    int numRed = determineNumReducers(rlens, clens, config.getIntValue(DMLConfig.NUM_REDUCERS), (int) maxRlen);
    job.setNumReduceTasks(numRed);
    byte[] resultDimsUnknown = new byte[resultIndexes.length];
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    OutputInfo[] outputInfos = new OutputInfo[outputs.length];
    HashMap<Byte, Integer> indexmap = new HashMap<>();
    for (int i = 0; i < stats.length; i++) {
        indexmap.put(resultIndexes[i], i);
        resultDimsUnknown[i] = (byte) 0;
        stats[i] = new MatrixCharacteristics();
        outputInfos[i] = OutputInfo.CSVOutputInfo;
    }
    CSVWriteInstruction[] ins = MRInstructionParser.parseCSVWriteInstructions(csvWriteInstructions);
    for (CSVWriteInstruction in : ins) stats[indexmap.get(in.output)].set(rlens[in.input], clens[in.input], -1, -1);
    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(stats);
    // set up what matrices are needed to pass from the mapper to reducer
    MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, "", "", csvWriteInstructions, resultIndexes);
    // set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true);
    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CSVWriteMapper.class);
    job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
    job.setMapOutputValueClass(MatrixBlock.class);
    // configure reducer
    job.setReducerClass(CSVWriteReducer.class);
    job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
    job.setPartitionerClass(TaggedFirstSecondIndexes.FirstIndexRangePartitioner.class);
    // job.setOutputFormat(UnPaddedOutputFormat.class);
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }
    // set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    RunningJob runjob = JobClient.runJob(job);
    /* Process different counters */
    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        // number of non-zeros
        stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
    }
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
Also used : Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) HashMap(java.util.HashMap) IOException(java.io.IOException) TaggedFirstSecondIndexes(org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) CSVWriteInstruction(org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction)

Example 17 with Group

use of org.apache.hadoop.mapred.Counters.Group in project hive by apache.

the class HiveHistoryImpl method setTaskCounters.

@Override
public void setTaskCounters(String queryId, String taskId, Counters ctrs) {
    String id = queryId + ":" + taskId;
    QueryInfo ji = queryInfoMap.get(queryId);
    StringBuilder sb1 = new StringBuilder("");
    TaskInfo ti = taskInfoMap.get(id);
    if ((ti == null) || (ctrs == null)) {
        return;
    }
    StringBuilder sb = new StringBuilder("");
    try {
        boolean first = true;
        for (Group group : ctrs) {
            for (Counter counter : group) {
                if (first) {
                    first = false;
                } else {
                    sb.append(',');
                }
                sb.append(group.getDisplayName());
                sb.append('.');
                sb.append(counter.getDisplayName());
                sb.append(':');
                sb.append(counter.getCounter());
                String tab = getRowCountTableName(counter.getDisplayName());
                if (tab != null) {
                    if (sb1.length() > 0) {
                        sb1.append(",");
                    }
                    sb1.append(tab);
                    sb1.append('~');
                    sb1.append(counter.getCounter());
                    ji.rowCountMap.put(tab, counter.getCounter());
                }
            }
        }
    } catch (Exception e) {
        LOG.warn(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
    if (sb1.length() > 0) {
        taskInfoMap.get(id).hm.put(Keys.ROWS_INSERTED.name(), sb1.toString());
        queryInfoMap.get(queryId).hm.put(Keys.ROWS_INSERTED.name(), sb1.toString());
    }
    if (sb.length() > 0) {
        taskInfoMap.get(id).hm.put(Keys.TASK_COUNTERS.name(), sb.toString());
    }
}
Also used : Group(org.apache.hadoop.mapred.Counters.Group) Counter(org.apache.hadoop.mapred.Counters.Counter) IOException(java.io.IOException)

Example 18 with Group

use of org.apache.hadoop.mapred.Counters.Group in project hadoop by apache.

the class GroupFactoryForTest method testFrameworkCounter.

@SuppressWarnings("rawtypes")
@Test
public void testFrameworkCounter() {
    GroupFactory groupFactory = new GroupFactoryForTest();
    FrameworkGroupFactory frameworkGroupFactory = groupFactory.newFrameworkGroupFactory(JobCounter.class);
    Group group = (Group) frameworkGroupFactory.newGroup("JobCounter");
    FrameworkCounterGroup counterGroup = (FrameworkCounterGroup) group.getUnderlyingGroup();
    org.apache.hadoop.mapreduce.Counter count1 = counterGroup.findCounter(JobCounter.NUM_FAILED_MAPS.toString());
    Assert.assertNotNull(count1);
    // Verify no exception get thrown when finding an unknown counter
    org.apache.hadoop.mapreduce.Counter count2 = counterGroup.findCounter("Unknown");
    Assert.assertNull(count2);
}
Also used : FrameworkCounterGroup(org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup) Group(org.apache.hadoop.mapred.Counters.Group) FrameworkCounterGroup(org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup) FrameworkGroupFactory(org.apache.hadoop.mapreduce.counters.CounterGroupFactory.FrameworkGroupFactory) GroupFactory(org.apache.hadoop.mapred.Counters.GroupFactory) FrameworkGroupFactory(org.apache.hadoop.mapreduce.counters.CounterGroupFactory.FrameworkGroupFactory) Test(org.junit.Test)

Example 19 with Group

use of org.apache.hadoop.mapred.Counters.Group in project hadoop by apache.

the class GroupFactoryForTest method testGroupIteratorConcurrency.

@SuppressWarnings("deprecation")
@Test
public void testGroupIteratorConcurrency() {
    Counters counters = new Counters();
    counters.incrCounter("group1", "counter1", 1);
    Group group = counters.getGroup("group1");
    Iterator<Counter> iterator = group.iterator();
    counters.incrCounter("group1", "counter2", 1);
    iterator.next();
}
Also used : FrameworkCounterGroup(org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup) Group(org.apache.hadoop.mapred.Counters.Group) TaskCounter(org.apache.hadoop.mapreduce.TaskCounter) FileSystemCounter(org.apache.hadoop.mapreduce.FileSystemCounter) Counter(org.apache.hadoop.mapred.Counters.Counter) JobCounter(org.apache.hadoop.mapreduce.JobCounter) Test(org.junit.Test)

Example 20 with Group

use of org.apache.hadoop.mapred.Counters.Group in project hadoop by apache.

the class GroupFactoryForTest method testFilesystemCounter.

@Test
public void testFilesystemCounter() {
    GroupFactory groupFactory = new GroupFactoryForTest();
    Group fsGroup = groupFactory.newFileSystemGroup();
    org.apache.hadoop.mapreduce.Counter count1 = fsGroup.findCounter("ANY_BYTES_READ");
    Assert.assertNotNull(count1);
    // Verify no exception get thrown when finding an unknown counter
    org.apache.hadoop.mapreduce.Counter count2 = fsGroup.findCounter("Unknown");
    Assert.assertNull(count2);
}
Also used : FrameworkCounterGroup(org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup) Group(org.apache.hadoop.mapred.Counters.Group) FrameworkGroupFactory(org.apache.hadoop.mapreduce.counters.CounterGroupFactory.FrameworkGroupFactory) GroupFactory(org.apache.hadoop.mapred.Counters.GroupFactory) Test(org.junit.Test)

Aggregations

Group (org.apache.hadoop.mapred.Counters.Group)21 JobConf (org.apache.hadoop.mapred.JobConf)14 RunningJob (org.apache.hadoop.mapred.RunningJob)14 DMLConfig (org.apache.sysml.conf.DMLConfig)12 Path (org.apache.hadoop.fs.Path)8 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)6 FrameworkCounterGroup (org.apache.hadoop.mapreduce.counters.FrameworkCounterGroup)5 Test (org.junit.Test)5 IOException (java.io.IOException)4 Counter (org.apache.hadoop.mapred.Counters.Counter)4 GroupFactory (org.apache.hadoop.mapred.Counters.GroupFactory)3 FrameworkGroupFactory (org.apache.hadoop.mapreduce.counters.CounterGroupFactory.FrameworkGroupFactory)3 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)3 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)3 FileSystemCounter (org.apache.hadoop.mapreduce.FileSystemCounter)2 JobCounter (org.apache.hadoop.mapreduce.JobCounter)2 TaskCounter (org.apache.hadoop.mapreduce.TaskCounter)2 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)2 TaggedMatrixBlock (org.apache.sysml.runtime.matrix.data.TaggedMatrixBlock)2 PrintWriter (java.io.PrintWriter)1