Search in sources :

Example 16 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project Cloud9 by lintool.

the class HadoopAlign method doAlignment.

@SuppressWarnings("deprecation")
public static void doAlignment(int mapTasks, int reduceTasks, HadoopAlignConfig hac) throws IOException {
    System.out.println("Running alignment: " + hac);
    FileSystem fs = FileSystem.get(hac);
    Path cbtxt = new Path(hac.getRoot() + "/comp-bitext");
    //		fs.delete(cbtxt, true);
    if (!fs.exists(cbtxt)) {
        CorpusVocabNormalizerAndNumberizer.preprocessAndNumberizeFiles(hac, hac.getBitexts(), cbtxt);
    }
    System.out.println("Finished preprocessing");
    int m1iters = hac.getModel1Iterations();
    int hmmiters = hac.getHMMIterations();
    int totalIterations = m1iters + hmmiters;
    String modelType = null;
    ArrayList<Double> perps = new ArrayList<Double>();
    ArrayList<Double> aers = new ArrayList<Double>();
    boolean hmm = false;
    boolean firstHmm = true;
    Path model1PosteriorsPath = null;
    for (int iteration = 0; iteration < totalIterations; iteration++) {
        long start = System.currentTimeMillis();
        hac.setBoolean("ha.generate.posterios", false);
        boolean lastIteration = (iteration == totalIterations - 1);
        boolean lastModel1Iteration = (iteration == m1iters - 1);
        if (iteration >= m1iters)
            hmm = true;
        if (hmm)
            modelType = "HMM";
        else
            modelType = "Model1";
        FileSystem fileSys = FileSystem.get(hac);
        String sOutputPath = modelType + ".data." + iteration;
        Path outputPath = new Path(sOutputPath);
        try {
            if (// no probs in first iteration!
            usePServer && iteration > 0)
                startPServers(hac);
            System.out.println("Starting iteration " + iteration + (iteration == 0 ? " (initialization)" : "") + ": " + modelType);
            JobConf conf = new JobConf(hac, HadoopAlign.class);
            conf.setJobName("EMTrain." + modelType + ".iter" + iteration);
            conf.setInputFormat(SequenceFileInputFormat.class);
            conf.set(KEY_TRAINER, MODEL1_TRAINER);
            conf.set(KEY_ITERATION, Integer.toString(iteration));
            conf.set("mapred.child.java.opts", "-Xmx2048m");
            if (iteration == 0)
                conf.set(KEY_TRAINER, MODEL1_UNIFORM_INIT);
            if (hmm) {
                conf.set(KEY_TRAINER, HMM_TRAINER);
                if (firstHmm) {
                    firstHmm = false;
                    System.out.println("Writing default a-table...");
                    Path pathATable = hac.getATablePath();
                    fileSys.delete(pathATable, true);
                    DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(fileSys.create(pathATable)));
                    int cond_values = 1;
                    if (!hac.isHMMHomogeneous()) {
                        cond_values = 100;
                    }
                    ATable at = new ATable(hac.isHMMHomogeneous(), cond_values, 100);
                    at.normalize();
                    at.write(dos);
                    //			System.out.println(at);
                    dos.close();
                }
            }
            conf.setOutputKeyClass(IntWritable.class);
            conf.setOutputValueClass(PartialCountContainer.class);
            conf.setMapperClass(EMapper.class);
            conf.setReducerClass(EMReducer.class);
            conf.setNumMapTasks(mapTasks);
            conf.setNumReduceTasks(reduceTasks);
            System.out.println("Running job " + conf.getJobName());
            // otherwise, input is set to output of last model 1 iteration
            if (model1PosteriorsPath != null) {
                System.out.println("Input: " + model1PosteriorsPath);
                FileInputFormat.setInputPaths(conf, model1PosteriorsPath);
            } else {
                System.out.println("Input: " + cbtxt);
                FileInputFormat.setInputPaths(conf, cbtxt);
            }
            System.out.println("Output: " + outputPath);
            FileOutputFormat.setOutputPath(conf, new Path(hac.getRoot() + "/" + outputPath.toString()));
            fileSys.delete(new Path(hac.getRoot() + "/" + outputPath.toString()), true);
            conf.setOutputFormat(SequenceFileOutputFormat.class);
            RunningJob job = JobClient.runJob(conf);
            Counters c = job.getCounters();
            double lp = c.getCounter(CrossEntropyCounters.LOGPROB);
            double wc = c.getCounter(CrossEntropyCounters.WORDCOUNT);
            double ce = lp / wc / Math.log(2);
            double perp = Math.pow(2.0, ce);
            double aer = ComputeAER(c);
            System.out.println("Iteration " + iteration + ": (" + modelType + ")\tCROSS-ENTROPY: " + ce + "   PERPLEXITY: " + perp);
            System.out.println("Iteration " + iteration + ": " + aer + " AER");
            aers.add(aer);
            perps.add(perp);
        } finally {
            stopPServers();
        }
        JobConf conf = new JobConf(hac, ModelMergeMapper2.class);
        System.err.println("Setting " + TTABLE_ITERATION_OUTPUT + " to " + outputPath.toString());
        conf.set(TTABLE_ITERATION_OUTPUT, hac.getRoot() + "/" + outputPath.toString());
        conf.setJobName("EMTrain.ModelMerge");
        //			conf.setOutputKeyClass(LongWritable.class);
        conf.setMapperClass(ModelMergeMapper2.class);
        conf.setSpeculativeExecution(false);
        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(0);
        conf.setInputFormat(NullInputFormat.class);
        conf.setOutputFormat(NullOutputFormat.class);
        conf.set("mapred.map.child.java.opts", "-Xmx2048m");
        conf.set("mapred.reduce.child.java.opts", "-Xmx2048m");
        //			FileInputFormat.setInputPaths(conf, root+"/dummy");
        //			fileSys.delete(new Path(root+"/dummy.out"), true);
        //			FileOutputFormat.setOutputPath(conf, new Path(root+"/dummy.out"));
        //			conf.setOutputFormat(SequenceFileOutputFormat.class);
        System.out.println("Running job " + conf.getJobName());
        System.out.println("Input: " + hac.getRoot() + "/dummy");
        System.out.println("Output: " + hac.getRoot() + "/dummy.out");
        JobClient.runJob(conf);
        fileSys.delete(new Path(hac.getRoot() + "/" + outputPath.toString()), true);
        if (lastIteration || lastModel1Iteration) {
            //hac.setBoolean("ha.generate.posteriors", true);
            conf = new JobConf(hac, HadoopAlign.class);
            sOutputPath = modelType + ".data." + iteration;
            outputPath = new Path(sOutputPath);
            conf.setJobName(modelType + ".align");
            conf.set("mapred.map.child.java.opts", "-Xmx2048m");
            conf.set("mapred.reduce.child.java.opts", "-Xmx2048m");
            // TODO use file cache
            /*try {
					if (hmm || iteration > 0) {
						URI ttable = new URI(fileSys.getHomeDirectory() + Path.SEPARATOR + hac.getTTablePath().toString());
						DistributedCache.addCacheFile(ttable, conf);
						System.out.println("cache<-- " + ttable);
					}

				} catch (Exception e) { throw new RuntimeException("Caught " + e); }
         */
            conf.setInputFormat(SequenceFileInputFormat.class);
            conf.setOutputFormat(SequenceFileOutputFormat.class);
            conf.set(KEY_TRAINER, MODEL1_TRAINER);
            conf.set(KEY_ITERATION, Integer.toString(iteration));
            if (hmm)
                conf.set(KEY_TRAINER, HMM_TRAINER);
            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(PhrasePair.class);
            conf.setMapperClass(AlignMapper.class);
            conf.setReducerClass(IdentityReducer.class);
            conf.setNumMapTasks(mapTasks);
            conf.setNumReduceTasks(reduceTasks);
            FileOutputFormat.setOutputPath(conf, new Path(hac.getRoot() + "/" + outputPath.toString()));
            //if last model1 iteration, save output path, to be used as input path in later iterations
            if (lastModel1Iteration) {
                FileInputFormat.setInputPaths(conf, cbtxt);
                model1PosteriorsPath = new Path(hac.getRoot() + "/" + outputPath.toString());
            } else {
                FileInputFormat.setInputPaths(conf, model1PosteriorsPath);
            }
            fileSys.delete(outputPath, true);
            System.out.println("Running job " + conf.getJobName());
            RunningJob job = JobClient.runJob(conf);
            System.out.println("GENERATED: " + model1PosteriorsPath);
            Counters c = job.getCounters();
            double aer = ComputeAER(c);
            //				System.out.println("Iteration " + iteration + ": (" + modelType + ")\tCROSS-ENTROPY: " + ce + "   PERPLEXITY: " + perp);
            System.out.println("Iteration " + iteration + ": " + aer + " AER");
            aers.add(aer);
            perps.add(0.0);
        }
        long end = System.currentTimeMillis();
        System.out.println(modelType + " iteration " + iteration + " took " + ((end - start) / 1000) + " seconds.");
    }
    for (int i = 0; i < perps.size(); i++) {
        System.out.print("I=" + i + "\t");
        if (aers.size() > 0) {
            System.out.print(aers.get(i) + "\t");
        }
        System.out.println(perps.get(i));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ATable(edu.umd.hooka.alignment.hmm.ATable) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) JobConf(org.apache.hadoop.mapred.JobConf) BufferedOutputStream(java.io.BufferedOutputStream)

Example 17 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project Cloud9 by lintool.

the class BuildAnchorTextForwardIndex method run.

/**
	 * Runs this tool.
	 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }
    JobConf conf = new JobConf(getConf());
    FileSystem fs = FileSystem.get(conf);
    String collectionPath = args[0];
    String outputPath = args[1];
    String indexFile = args[2];
    LOG.info("Tool name: BuildAnchorTextForwardIndex");
    LOG.info(" - collection path: " + collectionPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setJobName("BuildAnchorTextForwardIndex");
    conf.setNumMapTasks(100);
    conf.setNumReduceTasks(1);
    FileInputFormat.setInputPaths(conf, new Path(collectionPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);
    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);
    // delete the output directory if it exists already
    fs.delete(new Path(outputPath), true);
    RunningJob job = JobClient.runJob(conf);
    Counters counters = job.getCounters();
    int blocks = (int) counters.findCounter(Blocks.Total).getCounter();
    LOG.info("number of blocks: " + blocks);
    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(outputPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);
    out.writeUTF(IndexableAnchorTextForwardIndex.class.getName());
    out.writeUTF(collectionPath);
    out.writeInt(blocks);
    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");
        int docno = Integer.parseInt(arr[0]);
        int offset = Integer.parseInt(arr[1]);
        short fileno = Short.parseShort(arr[2]);
        out.writeInt(docno);
        out.writeInt(offset);
        out.writeShort(fileno);
        cnt++;
        if (cnt % 1000 == 0) {
            LOG.info(cnt + " blocks written");
        }
    }
    reader.close();
    out.close();
    if (cnt != blocks) {
        throw new RuntimeException("Error: mismatch in block count!");
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) AnchorText(edu.umd.cloud9.webgraph.data.AnchorText) IndexableAnchorTextForwardIndex(edu.umd.cloud9.webgraph.data.IndexableAnchorTextForwardIndex) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) JobConf(org.apache.hadoop.mapred.JobConf)

Example 18 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project Cloud9 by lintool.

the class BuildIndexableAnchorCollection method run.

/**
   * Runs this tool.
   */
public int run(String[] args) throws Exception {
    if (args.length < 5) {
        printUsage();
        return -1;
    }
    JobConf conf = new JobConf(getConf());
    FileSystem fs = FileSystem.get(conf);
    String collectionPath = DriverUtil.argValue(args, DriverUtil.CL_INPUT);
    String outputPath = DriverUtil.argValue(args, DriverUtil.CL_OUTPUT);
    String docnoMappingClass = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING_CLASS);
    String docnoMapping = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING);
    int numReducers = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_NUMBER_OF_REDUCERS));
    if (DriverUtil.argExists(args, DriverUtil.CL_MAX_LENGTH)) {
        conf.setInt("Cloud9.maxContentLength", Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_MAX_LENGTH)));
    }
    conf.set("Cloud9.DocnoMappingClass", docnoMappingClass);
    LOG.info("Tool name: BuildAnchorTextForwardIndex");
    LOG.info(" - collection path: " + collectionPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - docno-mapping class: " + docnoMappingClass);
    LOG.info(" - docno-mapping file: " + docnoMapping);
    if (args.length == 6) {
        LOG.info(" - maximum content length: " + conf.getInt("Cloud9.maxContentLength", 0));
    }
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setJobName("BuildIndexableAnchorCollection");
    conf.setJarByClass(BuildIndexableAnchorCollection.class);
    conf.setNumMapTasks(100);
    conf.setNumReduceTasks(numReducers);
    DistributedCache.addCacheFile(new URI(docnoMapping), conf);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
    SequenceFileInputFormat.setInputPaths(conf, new Path(collectionPath));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IndexableAnchorText.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    // delete the output directory if it exists already
    fs.delete(new Path(outputPath), true);
    RunningJob job = JobClient.runJob(conf);
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) URI(java.net.URI)

Example 19 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project Cloud9 by lintool.

the class FileMerger method mergeFilesDistribute.

private static Path mergeFilesDistribute(Configuration configuration, String inputFiles, String outputFile, int numberOfMappers, Class<? extends Writable> keyClass, Class<? extends Writable> valueClass, Class<? extends FileInputFormat> fileInputClass, Class<? extends FileOutputFormat> fileOutputClass, boolean deleteSource, boolean deleteDestinationFileIfExist) throws IOException {
    JobConf conf = new JobConf(configuration, FileMerger.class);
    conf.setJobName(FileMerger.class.getSimpleName());
    FileSystem fs = FileSystem.get(conf);
    sLogger.info("Tool: " + FileMerger.class.getSimpleName());
    sLogger.info(" - merge files from: " + inputFiles);
    sLogger.info(" - merge files to: " + outputFile);
    conf.setNumMapTasks(numberOfMappers);
    conf.setNumReduceTasks(1);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setMapOutputKeyClass(keyClass);
    conf.setMapOutputValueClass(valueClass);
    conf.setOutputKeyClass(keyClass);
    conf.setOutputValueClass(valueClass);
    conf.setInputFormat(fileInputClass);
    conf.setOutputFormat(fileOutputClass);
    Path inputPath = new Path(inputFiles);
    Path mergePath = new Path(inputPath.getParent().toString() + Path.SEPARATOR + MERGE + generateRandomString());
    Preconditions.checkArgument(!fs.exists(mergePath), new IOException("Intermediate merge directory already exists..."));
    Path outputPath = new Path(outputFile);
    if (deleteDestinationFileIfExist) {
        if (fs.exists(outputPath)) {
            // carefully remove the destination file, not recursive
            fs.delete(outputPath, false);
            sLogger.info("Warning: remove destination file since it already exists...");
        }
    } else {
        Preconditions.checkArgument(!fs.exists(outputPath), new IOException("Destination file already exists..."));
    }
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, mergePath);
    FileOutputFormat.setCompressOutput(conf, true);
    try {
        long startTime = System.currentTimeMillis();
        RunningJob job = JobClient.runJob(conf);
        sLogger.info("Merge Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        fs.rename(new Path(mergePath.toString() + Path.SEPARATOR + "part-00000"), outputPath);
        if (deleteSource) {
            for (FileStatus fileStatus : fs.globStatus(inputPath)) {
                fs.deleteOnExit(fileStatus.getPath());
            }
        }
    } finally {
        fs.delete(mergePath, true);
    }
    sLogger.info("Successfully merge " + inputFiles.toString() + " to " + outputFile);
    return outputPath;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf)

Example 20 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project incubator-systemml by apache.

the class RemoteParForMR method runJob.

public static //inputs
RemoteParForJobReturn runJob(//inputs
long pfid, //inputs
String program, //inputs
String taskFile, //inputs
String resultFile, //inputs
MatrixObject colocatedDPMatrixObj, //opt params
boolean enableCPCaching, //opt params
int numMappers, //opt params
int replication, //opt params
int max_retry, //opt params
long minMem, //opt params
boolean jvmReuse) throws DMLRuntimeException {
    RemoteParForJobReturn ret = null;
    String jobname = "ParFor-EMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    JobConf job;
    job = new JobConf(RemoteParForMR.class);
    job.setJobName(jobname + pfid);
    //maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();
    try {
        /////
        //configure the MR job
        //set arbitrary CP program blocks that will perform in the mapper
        MRJobConfiguration.setProgramBlocks(job, program);
        //enable/disable caching
        MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
        //set mappers, reducers, combiners
        //map-only
        job.setMapperClass(RemoteParWorkerMapper.class);
        //set input format (one split per row, NLineInputFormat default N=1)
        if (ParForProgramBlock.ALLOW_DATA_COLOCATION && colocatedDPMatrixObj != null) {
            job.setInputFormat(RemoteParForColocatedNLineInputFormat.class);
            MRJobConfiguration.setPartitioningFormat(job, colocatedDPMatrixObj.getPartitionFormat());
            MatrixCharacteristics mc = colocatedDPMatrixObj.getMatrixCharacteristics();
            MRJobConfiguration.setPartitioningBlockNumRows(job, mc.getRowsPerBlock());
            MRJobConfiguration.setPartitioningBlockNumCols(job, mc.getColsPerBlock());
            MRJobConfiguration.setPartitioningFilename(job, colocatedDPMatrixObj.getFileName());
        } else //default case 
        {
            job.setInputFormat(NLineInputFormat.class);
        }
        //set the input path and output path 
        FileInputFormat.setInputPaths(job, new Path(taskFile));
        //set output format
        job.setOutputFormat(SequenceFileOutputFormat.class);
        //set output path
        MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
        FileOutputFormat.setOutputPath(job, new Path(resultFile));
        //set the output key, value schema
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        //////
        //set optimization parameters
        //set the number of mappers and reducers 
        //numMappers
        job.setNumMapTasks(numMappers);
        job.setNumReduceTasks(0);
        //job.setInt("mapred.map.tasks.maximum", 1); //system property
        //job.setInt("mapred.tasktracker.tasks.maximum",1); //system property
        //job.setInt("mapred.jobtracker.maxtasks.per.job",1); //system property
        //set jvm memory size (if require)
        String memKey = MRConfigurationNames.MR_CHILD_JAVA_OPTS;
        if (minMem > 0 && minMem > InfrastructureAnalyzer.extractMaxMemoryOpt(job.get(memKey))) {
            InfrastructureAnalyzer.setMaxMemoryOpt(job, memKey, minMem);
            LOG.warn("Forcing '" + memKey + "' to -Xmx" + minMem / (1024 * 1024) + "M.");
        }
        //disable automatic tasks timeouts and speculative task exec
        job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
        job.setMapSpeculativeExecution(false);
        //set up map/reduce memory configurations (if in AM context)
        DMLConfig config = ConfigurationManager.getDMLConfig();
        DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
        //set up custom map/reduce configurations 
        MRJobConfiguration.setupCustomMRConfigurations(job, config);
        //enables the reuse of JVMs (multiple tasks per MR task)
        if (jvmReuse)
            //unlimited
            job.setNumTasksToExecutePerJvm(-1);
        //set sort io buffer (reduce unnecessary large io buffer, guaranteed memory consumption)
        //8MB
        job.setInt(MRConfigurationNames.MR_TASK_IO_SORT_MB, 8);
        //set the replication factor for the results
        job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
        //set the max number of retries per map task
        //  disabled job-level configuration to respect cluster configuration
        //  note: this refers to hadoop2, hence it never had effect on mr1
        //job.setInt(MRConfigurationNames.MR_MAP_MAXATTEMPTS, max_retry);
        //set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);
        /////
        // execute the MR job			
        RunningJob runjob = JobClient.runJob(job);
        // Process different counters 
        Statistics.incrementNoOfExecutedMRJobs();
        Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
        int numTasks = (int) pgroup.getCounter(Stat.PARFOR_NUMTASKS.toString());
        int numIters = (int) pgroup.getCounter(Stat.PARFOR_NUMITERS.toString());
        if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode()) {
            Statistics.incrementJITCompileTime(pgroup.getCounter(Stat.PARFOR_JITCOMPILE.toString()));
            Statistics.incrementJVMgcCount(pgroup.getCounter(Stat.PARFOR_JVMGC_COUNT.toString()));
            Statistics.incrementJVMgcTime(pgroup.getCounter(Stat.PARFOR_JVMGC_TIME.toString()));
            Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
            CacheStatistics.incrementMemHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_MEM.toString()));
            CacheStatistics.incrementFSBuffHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString()));
            CacheStatistics.incrementFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FS.toString()));
            CacheStatistics.incrementHDFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_HDFS.toString()));
            CacheStatistics.incrementFSBuffWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString()));
            CacheStatistics.incrementFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FS.toString()));
            CacheStatistics.incrementHDFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_HDFS.toString()));
            CacheStatistics.incrementAcquireRTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQR.toString()));
            CacheStatistics.incrementAcquireMTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQM.toString()));
            CacheStatistics.incrementReleaseTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_RLS.toString()));
            CacheStatistics.incrementExportTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_EXP.toString()));
        }
        // read all files of result variables and prepare for return
        LocalVariableMap[] results = readResultFile(job, resultFile);
        ret = new RemoteParForJobReturn(runjob.isSuccessful(), numTasks, numIters, results);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    } finally {
        // remove created files 
        try {
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(taskFile), job);
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        }
    }
    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

RunningJob (org.apache.hadoop.mapred.RunningJob)61 JobConf (org.apache.hadoop.mapred.JobConf)45 Path (org.apache.hadoop.fs.Path)35 FileSystem (org.apache.hadoop.fs.FileSystem)24 JobClient (org.apache.hadoop.mapred.JobClient)20 IOException (java.io.IOException)15 Counters (org.apache.hadoop.mapred.Counters)14 Group (org.apache.hadoop.mapred.Counters.Group)13 DMLConfig (org.apache.sysml.conf.DMLConfig)13 Configuration (org.apache.hadoop.conf.Configuration)7 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)7 DataOutputStream (java.io.DataOutputStream)6 File (java.io.File)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 Text (org.apache.hadoop.io.Text)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)5 Test (org.junit.Test)5 URI (java.net.URI)4