Search in sources :

Example 81 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestGridMixClasses method testSleepMapper.

/*
   * test SleepMapper
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test(timeout = 30000)
public void testSleepMapper() throws Exception {
    SleepJob.SleepMapper test = new SleepJob.SleepMapper();
    Configuration conf = new Configuration();
    conf.setInt(JobContext.NUM_REDUCES, 2);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    TaskAttemptID taskId = new TaskAttemptID();
    FakeRecordLLReader reader = new FakeRecordLLReader();
    LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
    OutputCommitter committer = new CustomOutputCommitter();
    StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
    SleepSplit split = getSleepSplit();
    MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(conf, taskId, reader, writer, committer, reporter, split);
    Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>().getMapContext(mapcontext);
    long start = System.currentTimeMillis();
    LOG.info("start:" + start);
    LongWritable key = new LongWritable(start + 2000);
    LongWritable value = new LongWritable(start + 2000);
    // should slip 2 sec
    test.map(key, value, context);
    LOG.info("finish:" + System.currentTimeMillis());
    assertTrue(System.currentTimeMillis() >= (start + 2000));
    test.cleanup(context);
    assertEquals(1, writer.getData().size());
}
Also used : Context(org.apache.hadoop.mapreduce.Mapper.Context) ReduceContext(org.apache.hadoop.mapreduce.ReduceContext) MapContext(org.apache.hadoop.mapreduce.MapContext) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapred.JobContext) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) DummyReporter(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter) NullWritable(org.apache.hadoop.io.NullWritable) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) LongWritable(org.apache.hadoop.io.LongWritable) SleepSplit(org.apache.hadoop.mapred.gridmix.SleepJob.SleepSplit) StatusReporter(org.apache.hadoop.mapreduce.StatusReporter) Test(org.junit.Test)

Example 82 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestDistCacheEmulation method validateSetupGenDC.

/**
   * Validate setupGenerateDistCacheData by validating <li>permissions of the
   * distributed cache directories and <li>content of the generated sequence
   * file. This includes validation of dist cache file paths and their file
   * sizes.
   */
private void validateSetupGenDC(Configuration jobConf, long[] sortedFileSizes) throws IOException, InterruptedException {
    // build things needed for validation
    long sumOfFileSizes = 0;
    for (int i = 0; i < sortedFileSizes.length; i++) {
        sumOfFileSizes += sortedFileSizes[i];
    }
    FileSystem fs = FileSystem.get(jobConf);
    assertEquals("Number of distributed cache files to be generated is wrong.", sortedFileSizes.length, jobConf.getInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, -1));
    assertEquals("Total size of dist cache files to be generated is wrong.", sumOfFileSizes, jobConf.getLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, -1));
    Path filesListFile = new Path(jobConf.get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST));
    FileStatus stat = fs.getFileStatus(filesListFile);
    assertEquals("Wrong permissions of dist Cache files list file " + filesListFile, new FsPermission((short) 0644), stat.getPermission());
    InputSplit split = new FileSplit(filesListFile, 0, stat.getLen(), (String[]) null);
    TaskAttemptContext taskContext = MapReduceTestUtil.createDummyMapTaskAttemptContext(jobConf);
    RecordReader<LongWritable, BytesWritable> reader = new GenerateDistCacheData.GenDCDataFormat().createRecordReader(split, taskContext);
    MapContext<LongWritable, BytesWritable, NullWritable, BytesWritable> mapContext = new MapContextImpl<LongWritable, BytesWritable, NullWritable, BytesWritable>(jobConf, taskContext.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
    reader.initialize(split, mapContext);
    // start validating setupGenerateDistCacheData
    doValidateSetupGenDC(reader, fs, sortedFileSizes);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BytesWritable(org.apache.hadoop.io.BytesWritable) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) NullWritable(org.apache.hadoop.io.NullWritable) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) LongWritable(org.apache.hadoop.io.LongWritable) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 83 with NullWritable

use of org.apache.hadoop.io.NullWritable in project carbondata by apache.

the class StoreCreator method executeGraph.

/**
   * Execute graph which will further load data
   *
   * @param loadModel
   * @param storeLocation
   * @throws Exception
   */
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    new File(storeLocation).mkdirs();
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        path.delete();
    }
    SchemaInfo info = new SchemaInfo();
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length));
    CSVInputFormat.setMaxColumns(configuration, "10");
    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();
    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
    new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });
    info.setDatabaseName(databaseName);
    info.setTableName(tableName);
    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
    String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
    File file = new File(segLocation);
    File factFile = null;
    File[] folderList = file.listFiles();
    File folder = null;
    for (int i = 0; i < folderList.length; i++) {
        if (folderList[i].isDirectory()) {
            folder = folderList[i];
        }
    }
    if (folder.isDirectory()) {
        File[] files = folder.listFiles();
        for (int i = 0; i < files.length; i++) {
            if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
                factFile = files[i];
                break;
            }
        }
        //      Files.copy(factFile.toPath(), file.toPath(), REPLACE_EXISTING);
        factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
        CarbonUtil.deleteFoldersAndFiles(folder);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CSVRecordReaderIterator(org.apache.carbondata.processing.csvload.CSVRecordReaderIterator) StringArrayWritable(org.apache.carbondata.processing.csvload.StringArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) CSVInputFormat(org.apache.carbondata.processing.csvload.CSVInputFormat) BlockDetails(org.apache.carbondata.processing.csvload.BlockDetails) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) DataLoadExecutor(org.apache.carbondata.processing.newflow.DataLoadExecutor) SchemaInfo(org.apache.carbondata.processing.api.dataloader.SchemaInfo)

Example 84 with NullWritable

use of org.apache.hadoop.io.NullWritable in project jena by apache.

the class TestDistinctTriples method distinct_triples_02.

@Test
public void distinct_triples_02() throws IOException {
    MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this.getMapReduceDriver();
    Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), NodeFactory.createLiteral("1"));
    TripleWritable tw = new TripleWritable(t);
    for (int i = 0; i < 100; i++) {
        driver.addInput(new LongWritable(i), tw);
    }
    driver.addOutput(NullWritable.get(), tw);
    driver.runTest();
}
Also used : Triple(org.apache.jena.graph.Triple) TripleWritable(org.apache.jena.hadoop.rdf.types.TripleWritable) LongWritable(org.apache.hadoop.io.LongWritable) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 85 with NullWritable

use of org.apache.hadoop.io.NullWritable in project jena by apache.

the class AbstractNodeWriter method writeValue.

/**
     * Writes the given value
     * <p>
     * If the value is one of the RDF primitives - {@link NodeWritable},
     * {@link TripleWritable}, {@link QuadWritable} and
     * {@link NodeTupleWritable} - then it is formatted as a series of nodes
     * separated by the separator. Otherwise it is formatted by simply calling
     * {@code toString()} on it.
     * </p>
     * 
     * @param value
     *            Values
     */
protected void writeValue(TValue value) {
    // Handle null specially
    if (value instanceof NullWritable || value == null)
        return;
    // Handle RDF primitives specially and format them as proper nodes
    if (value instanceof NodeWritable) {
        this.writeKey((NodeWritable) value);
    } else if (value instanceof TripleWritable) {
        Triple t = ((TripleWritable) value).get();
        this.writeNodes(t.getSubject(), t.getPredicate(), t.getObject());
    } else if (value instanceof QuadWritable) {
        Quad q = ((QuadWritable) value).get();
        this.writeNodes(q.getGraph(), q.getSubject(), q.getPredicate(), q.getObject());
    } else if (value instanceof NodeTupleWritable) {
        Tuple<Node> tuple = ((NodeTupleWritable) value).get();
        Node[] n = new Node[tuple.len()];
        tuple.copyInto(n);
        this.writeNodes(n);
    } else {
        // For arbitrary values just toString() them
        this.writer.write(value.toString());
    }
}
Also used : Triple(org.apache.jena.graph.Triple) NodeWritable(org.apache.jena.hadoop.rdf.types.NodeWritable) TripleWritable(org.apache.jena.hadoop.rdf.types.TripleWritable) Quad(org.apache.jena.sparql.core.Quad) QuadWritable(org.apache.jena.hadoop.rdf.types.QuadWritable) NullWritable(org.apache.hadoop.io.NullWritable) Tuple(org.apache.jena.atlas.lib.tuple.Tuple) NodeTupleWritable(org.apache.jena.hadoop.rdf.types.NodeTupleWritable)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)101 Test (org.junit.Test)65 Configuration (org.apache.hadoop.conf.Configuration)41 Path (org.apache.hadoop.fs.Path)41 File (java.io.File)29 FileSystem (org.apache.hadoop.fs.FileSystem)26 SequenceFile (org.apache.hadoop.io.SequenceFile)22 JobConf (org.apache.hadoop.mapred.JobConf)22 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)16 InputSplit (org.apache.hadoop.mapred.InputSplit)16 LongWritable (org.apache.hadoop.io.LongWritable)15 IntWritable (org.apache.hadoop.io.IntWritable)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8 IOException (java.io.IOException)7 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)7 FloatWritable (org.apache.hadoop.io.FloatWritable)7