Examples with RunningJob - org.apache.hadoop.mapred.RunningJob

Example 86 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project voldemort by voldemort.

the class HadoopStoreBuilder method build.

/**
 * Run the job
 */
public void build() {
    try {
        JobConf conf = prepareJobConf(baseJobConf);
        FileSystem fs = outputDir.getFileSystem(conf);
        if (fs.exists(outputDir)) {
            info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName());
            fs.delete(outputDir, true);
        }
        conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
        conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
        conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
        conf.setBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, saveKeys);
        conf.setBoolean(VoldemortBuildAndPushJob.REDUCER_PER_BUCKET, reducerPerBucket);
        conf.setBoolean(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY, buildPrimaryReplicasOnly);
        if (!isAvro) {
            conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
            conf.setMapperClass(mapperClass);
            conf.setMapOutputKeyClass(BytesWritable.class);
            conf.setMapOutputValueClass(BytesWritable.class);
            conf.setReducerClass(HadoopStoreBuilderReducer.class);
        }
        conf.setInputFormat(inputFormatClass);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        conf.setOutputKeyClass(BytesWritable.class);
        conf.setOutputValueClass(BytesWritable.class);
        conf.setJarByClass(getClass());
        conf.setReduceSpeculativeExecution(false);
        FileInputFormat.setInputPaths(conf, inputPath);
        conf.set("final.output.dir", outputDir.toString());
        conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
        conf.set("dfs.umaskmode", "002");
        FileOutputFormat.setOutputPath(conf, tempDir);
        FileSystem outputFs = outputDir.getFileSystem(conf);
        if (outputFs.exists(outputDir)) {
            throw new IOException("Final output directory already exists.");
        }
        // delete output dir if it already exists
        FileSystem tempFs = tempDir.getFileSystem(conf);
        tempFs.delete(tempDir, true);
        long size = sizeOfPath(tempFs, inputPath);
        logger.info("Data size = " + size + ", replication factor = " + storeDef.getReplicationFactor() + ", numNodes = " + cluster.getNumberOfNodes() + ", numPartitions = " + cluster.getNumberOfPartitions() + ", chunk size = " + chunkSizeBytes);
        // Base numbers of chunks and reducers, will get modified according to various settings
        int numChunks = (int) (size / cluster.getNumberOfPartitions() / chunkSizeBytes) + 1;
        /* +1 so we round up */
        int numReducers = cluster.getNumberOfPartitions();
        // question, but in order to avoid breaking anything we'll just maintain the original behavior.
        if (saveKeys) {
            if (buildPrimaryReplicasOnly) {
            // The buildPrimaryReplicasOnly mode is supported exclusively in combination with
            // saveKeys. If enabled, then we don't want to shuffle extra keys redundantly,
            // hence we don't change the number of reducers.
            } else {
                // Old behavior, where all keys are redundantly shuffled to redundant reducers.
                numReducers = numReducers * storeDef.getReplicationFactor();
            }
        } else {
            numChunks = numChunks * storeDef.getReplicationFactor();
        }
        // Ensure at least one chunk
        numChunks = Math.max(numChunks, 1);
        if (reducerPerBucket) {
        // Then all chunks for a given partition/replica combination are shuffled to the same
        // reducer, hence, the number of reducers remains the same as previously defined.
        } else {
            // Otherwise, we want one reducer per chunk, hence we multiply the number of reducers.
            numReducers = numReducers * numChunks;
        }
        conf.setInt(AbstractStoreBuilderConfigurable.NUM_CHUNKS, numChunks);
        conf.setNumReduceTasks(numReducers);
        logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers + ", save keys: " + saveKeys + ", reducerPerBucket: " + reducerPerBucket + ", buildPrimaryReplicasOnly: " + buildPrimaryReplicasOnly);
        if (isAvro) {
            conf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
            // conf.setMapperClass(mapperClass);
            conf.setMapOutputKeyClass(ByteBuffer.class);
            conf.setMapOutputValueClass(ByteBuffer.class);
            conf.setInputFormat(inputFormatClass);
            conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
            conf.setOutputKeyClass(ByteBuffer.class);
            conf.setOutputValueClass(ByteBuffer.class);
            // AvroJob confs for the avro mapper
            AvroJob.setInputSchema(conf, Schema.parse(baseJobConf.get(AVRO_REC_SCHEMA)));
            AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES)));
            AvroJob.setMapperClass(conf, mapperClass);
            conf.setReducerClass(AvroStoreBuilderReducer.class);
        }
        logger.info("Building store...");
        // The snipped below copied and adapted from: JobClient.runJob(conf);
        // We have more control in the error handling this way.
        JobClient jc = new JobClient(conf);
        RunningJob runningJob = jc.submitJob(conf);
        Counters counters;
        try {
            if (!jc.monitorAndPrintJob(conf, runningJob)) {
                counters = runningJob.getCounters();
                // For some datasets, the number of chunks that we calculated is inadequate.
                // Here, we try to identify if this is the case.
                long mapOutputBytes = counters.getCounter(Task.Counter.MAP_OUTPUT_BYTES);
                long averageNumberOfBytesPerChunk = mapOutputBytes / numChunks / cluster.getNumberOfPartitions();
                if (averageNumberOfBytesPerChunk > (HadoopStoreWriter.DEFAULT_CHUNK_SIZE)) {
                    float chunkSizeBloat = averageNumberOfBytesPerChunk / (float) HadoopStoreWriter.DEFAULT_CHUNK_SIZE;
                    long suggestedTargetChunkSize = (long) (HadoopStoreWriter.DEFAULT_CHUNK_SIZE / chunkSizeBloat);
                    logger.error("The number of bytes per chunk may be too high." + " averageNumberOfBytesPerChunk = " + averageNumberOfBytesPerChunk + ". Consider setting " + VoldemortBuildAndPushJob.BUILD_CHUNK_SIZE + "=" + suggestedTargetChunkSize);
                } else {
                    logger.error("Job Failed: " + runningJob.getFailureInfo());
                }
                throw new VoldemortException("BnP's MapReduce job failed.");
            }
        } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
        }
        counters = runningJob.getCounters();
        long numberOfRecords = counters.getCounter(Task.Counter.REDUCE_INPUT_GROUPS);
        if (numberOfRecords < minNumberOfRecords) {
            throw new VoldemortException("The number of records in the data set (" + numberOfRecords + ") is lower than the minimum required (" + minNumberOfRecords + "). Aborting.");
        }
        if (saveKeys) {
            logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
            logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
        }
        // Do a CheckSumOfCheckSum - Similar to HDFS
        CheckSum checkSumGenerator = CheckSum.getInstance(this.checkSumType);
        if (!this.checkSumType.equals(CheckSumType.NONE) && checkSumGenerator == null) {
            throw new VoldemortException("Could not generate checksum digest for type " + this.checkSumType);
        }
        List<Integer> directorySuffixes = Lists.newArrayList();
        if (buildPrimaryReplicasOnly) {
            // Files are grouped by partitions
            for (int partitionId = 0; partitionId < cluster.getNumberOfPartitions(); partitionId++) {
                directorySuffixes.add(partitionId);
            }
        } else {
            // Files are grouped by node
            for (Node node : cluster.getNodes()) {
                directorySuffixes.add(node.getId());
            }
        }
        ReadOnlyStorageMetadata fullStoreMetadata = new ReadOnlyStorageMetadata();
        List<Integer> emptyDirectories = Lists.newArrayList();
        final String directoryPrefix = buildPrimaryReplicasOnly ? ReadOnlyUtils.PARTITION_DIRECTORY_PREFIX : ReadOnlyUtils.NODE_DIRECTORY_PREFIX;
        // Generate a log message every 30 seconds or after processing every 100 directories.
        final long LOG_INTERVAL_TIME = TimeUnit.MILLISECONDS.convert(30, TimeUnit.SECONDS);
        final int LOG_INTERVAL_COUNT = buildPrimaryReplicasOnly ? 100 : 5;
        int lastLogCount = 0;
        long lastLogTime = 0;
        long startTimeMS = System.currentTimeMillis();
        // Check if all folder exists and with format file
        for (int index = 0; index < directorySuffixes.size(); index++) {
            int directorySuffix = directorySuffixes.get(index);
            long elapsedTime = System.currentTimeMillis() - lastLogTime;
            long elapsedCount = index - lastLogCount;
            if (elapsedTime >= LOG_INTERVAL_TIME || elapsedCount >= LOG_INTERVAL_COUNT) {
                lastLogTime = System.currentTimeMillis();
                lastLogCount = index;
                logger.info("Processed " + directorySuffix + " out of " + directorySuffixes.size() + " directories.");
            }
            String directoryName = directoryPrefix + directorySuffix;
            ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();
            if (saveKeys) {
                metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
            } else {
                metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V1.getCode());
            }
            Path directoryPath = new Path(outputDir.toString(), directoryName);
            if (!outputFs.exists(directoryPath)) {
                logger.debug("No data generated for " + directoryName + ". Generating empty folder");
                emptyDirectories.add(directorySuffix);
                // Create empty folder
                outputFs.mkdirs(directoryPath);
                outputFs.setPermission(directoryPath, new FsPermission(HADOOP_FILE_PERMISSION));
                logger.debug("Setting permission to 755 for " + directoryPath);
            }
            processCheckSumMetadataFile(directoryName, outputFs, checkSumGenerator, directoryPath, metadata);
            if (buildPrimaryReplicasOnly) {
            // In buildPrimaryReplicasOnly mode, writing a metadata file for each partitions
            // takes too long, so we skip it. We will rely on the full-store.metadata file instead.
            } else {
                // Maintaining the old behavior: we write the node-specific metadata file
                writeMetadataFile(directoryPath, outputFs, ReadOnlyUtils.METADATA_FILE_EXTENSION, metadata);
            }
            fullStoreMetadata.addNestedMetadata(directoryName, metadata);
        }
        // Write the aggregate metadata file
        writeMetadataFile(outputDir, outputFs, ReadOnlyUtils.FULL_STORE_METADATA_FILE, fullStoreMetadata);
        long elapsedTimeMs = System.currentTimeMillis() - startTimeMS;
        long elapsedTimeSeconds = TimeUnit.SECONDS.convert(elapsedTimeMs, TimeUnit.MILLISECONDS);
        logger.info("Total Processed directories: " + directorySuffixes.size() + ". Elapsed Time (Seconds):" + elapsedTimeSeconds);
        if (emptyDirectories.size() > 0) {
            logger.info("Empty directories: " + Arrays.toString(emptyDirectories.toArray()));
        }
    } catch (Exception e) {
        logger.error("Error in Store builder", e);
        throw new VoldemortException(e);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Node(voldemort.cluster.Node) StoreDefinitionsMapper(voldemort.xml.StoreDefinitionsMapper) ClusterMapper(voldemort.xml.ClusterMapper) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) VoldemortException(voldemort.VoldemortException) VoldemortException(voldemort.VoldemortException) IOException(java.io.IOException) ReadOnlyStorageMetadata(voldemort.store.readonly.ReadOnlyStorageMetadata) CheckSum(voldemort.store.readonly.checksum.CheckSum) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) AvroOutputFormat(org.apache.avro.mapred.AvroOutputFormat) Counters(org.apache.hadoop.mapred.Counters) FsPermission(org.apache.hadoop.fs.permission.FsPermission) JobConf(org.apache.hadoop.mapred.JobConf)

Example 87 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project HI-labs by elephantscale.

the class PrintCounters method main.

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.out.println("Usage : java PrintCounters <job id>");
        System.exit(1);
    }
    RunningJob job = new JobClient().getJob(JobID.forName(args[0]));
    // RunningJob job = new JobClient().getJob(new JobID("job_201309211454", 9));
    // RunningJob job = new JobClient().getJob(args[0]);
    Counters counters = job.getCounters();
    for (Counters.Group group : counters) {
        System.out.println("- Counter Group: " + group.getDisplayName() + " (" + group.getName() + ")");
        System.out.println("  number of counters in this group: " + group.size());
        for (Counters.Counter counter : group) {
            System.out.println("  - " + counter.getDisplayName() + ": " + counter.getName());
        }
    }
}

Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) JobClient(org.apache.hadoop.mapred.JobClient)

Example 88 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.

the class TestTableMapReduceUtil method shoudBeValidMapReduceEvaluation.

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
    Configuration cfg = UTIL.getConfiguration();
    JobConf jobConf = new JobConf(cfg);
    try {
        jobConf.setJobName("process row task");
        jobConf.setNumReduceTasks(1);
        TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf);
        TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf);
        RunningJob job = JobClient.runJob(jobConf);
        assertTrue(job.isSuccessful());
    } finally {
        if (jobConf != null)
            FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
    }
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) Test(org.junit.Test)

Example 89 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project Cloud9 by lintool.

the class WikipediaForwardIndexBuilder method run.

@SuppressWarnings("static-access")
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg().withDescription("two-letter language code").create(LANGUAGE_OPTION));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION));
    String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION);
    String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);
    if (!inputPath.isAbsolute()) {
        System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!");
        return -1;
    }
    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }
    JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class);
    FileSystem fs = FileSystem.get(conf);
    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - language: " + language);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");
    conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language));
    conf.setNumReduceTasks(1);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);
    if (language != null) {
        conf.set("wiki.language", language);
    }
    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);
    // Delete the output directory if it exists already.
    fs.delete(new Path(tmpPath), true);
    RunningJob job = JobClient.runJob(conf);
    Counters counters = job.getCounters();
    int blocks = (int) counters.getCounter(Blocks.Total);
    LOG.info("number of blocks: " + blocks);
    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);
    out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName());
    out.writeUTF(inputPath.toString());
    out.writeInt(blocks);
    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");
        int docno = Integer.parseInt(arr[0]);
        int offset = Integer.parseInt(arr[1]);
        short fileno = Short.parseShort(arr[2]);
        out.writeInt(docno);
        out.writeInt(offset);
        out.writeShort(fileno);
        cnt++;
        if (cnt % 100000 == 0) {
            LOG.info(cnt + " blocks written");
        }
    }
    reader.close();
    out.close();
    if (cnt != blocks) {
        throw new RuntimeException("Error: mismatch in block count!");
    }
    // Clean up.
    fs.delete(new Path(tmpPath), true);
    return 0;
}

Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) Text(org.apache.hadoop.io.Text) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) JobConf(org.apache.hadoop.mapred.JobConf)

Example 90 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project Cloud9 by lintool.

the class Aquaint2ForwardIndexBuilder method runTool.

public int runTool(Configuration config, String collectionPath, String outputPath, String indexFile, String mappingFile) throws Exception {
    // sLogger.error ("getConf(): " + getConf() + ", DemoCountAquaint2Documents.class: " + DemoCountAquaint2Documents.class);
    JobConf conf = new JobConf(config, DemoCountAquaint2Documents.class);
    FileSystem fs = FileSystem.get(config);
    sLogger.info("Tool name: BuildAquaint2ForwardIndex");
    sLogger.info(" - collection path: " + collectionPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - index file: " + indexFile);
    sLogger.info(" - mapping file: " + mappingFile);
    conf.setJobName("BuildAquaint2ForwardIndex");
    conf.set("mapred.child.java.opts", "-Xmx1024m");
    conf.setNumReduceTasks(1);
    if (conf.get("mapred.job.tracker").equals("local")) {
        conf.set("DocnoMappingFile", mappingFile);
    } else {
        DistributedCache.addCacheFile(new URI(mappingFile), conf);
    }
    FileInputFormat.setInputPaths(conf, new Path(collectionPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);
    conf.setInputFormat(Aquaint2DocumentInputFormatOld.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);
    RunningJob job = JobClient.runJob(conf);
    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Count.DOCS).getCounter();
    String inputFile = outputPath + "/" + "part-00000";
    sLogger.info("Writing " + numDocs + " doc offseta to " + indexFile);
    LineReader reader = new LineReader(fs.open(new Path(inputFile)));
    FSDataOutputStream writer = fs.create(new Path(indexFile), true);
    writer.writeUTF("edu.umd.cloud9.collection.aquaint2.Aquaint2ForwardIndex");
    writer.writeUTF(collectionPath);
    writer.writeInt(numDocs);
    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\t");
        long offset = Long.parseLong(arr[1]);
        int len = Integer.parseInt(arr[2]);
        // sLogger.info(arr[0] + " " + offset + " " + len);
        writer.writeLong(offset);
        writer.writeInt(len);
        cnt++;
        if (cnt % 100000 == 0) {
            sLogger.info(cnt + " docs");
        }
    }
    reader.close();
    writer.close();
    sLogger.info(cnt + " docs total. Done!");
    if (numDocs != cnt) {
        throw new RuntimeException("Unexpected number of documents in building forward index!");
    }
    return 0;
}

Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) URI(java.net.URI) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

RunningJob (org.apache.hadoop.mapred.RunningJob)93 JobConf (org.apache.hadoop.mapred.JobConf)65 Path (org.apache.hadoop.fs.Path)49 JobClient (org.apache.hadoop.mapred.JobClient)33 IOException (java.io.IOException)28 FileSystem (org.apache.hadoop.fs.FileSystem)28 DMLConfig (org.apache.sysml.conf.DMLConfig)27 Group (org.apache.hadoop.mapred.Counters.Group)26 Counters (org.apache.hadoop.mapred.Counters)17 Configuration (org.apache.hadoop.conf.Configuration)14 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)13 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)8 File (java.io.File)6 TaggedMatrixBlock (org.apache.sysml.runtime.matrix.data.TaggedMatrixBlock)6 DataOutputStream (java.io.DataOutputStream)5 URI (java.net.URI)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 Context (org.apache.hadoop.hive.ql.Context)5 Text (org.apache.hadoop.io.Text)5