use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestGridMixClasses method testSleepMapper.
/*
* test SleepMapper
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test(timeout = 30000)
public void testSleepMapper() throws Exception {
SleepJob.SleepMapper test = new SleepJob.SleepMapper();
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
FakeRecordLLReader reader = new FakeRecordLLReader();
LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
SleepSplit split = getSleepSplit();
MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(conf, taskId, reader, writer, committer, reporter, split);
Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>().getMapContext(mapcontext);
long start = System.currentTimeMillis();
LOG.info("start:" + start);
LongWritable key = new LongWritable(start + 2000);
LongWritable value = new LongWritable(start + 2000);
// should slip 2 sec
test.map(key, value, context);
LOG.info("finish:" + System.currentTimeMillis());
assertTrue(System.currentTimeMillis() >= (start + 2000));
test.cleanup(context);
assertEquals(1, writer.getData().size());
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestDistCacheEmulation method validateSetupGenDC.
/**
* Validate setupGenerateDistCacheData by validating <li>permissions of the
* distributed cache directories and <li>content of the generated sequence
* file. This includes validation of dist cache file paths and their file
* sizes.
*/
private void validateSetupGenDC(Configuration jobConf, long[] sortedFileSizes) throws IOException, InterruptedException {
// build things needed for validation
long sumOfFileSizes = 0;
for (int i = 0; i < sortedFileSizes.length; i++) {
sumOfFileSizes += sortedFileSizes[i];
}
FileSystem fs = FileSystem.get(jobConf);
assertEquals("Number of distributed cache files to be generated is wrong.", sortedFileSizes.length, jobConf.getInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, -1));
assertEquals("Total size of dist cache files to be generated is wrong.", sumOfFileSizes, jobConf.getLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, -1));
Path filesListFile = new Path(jobConf.get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST));
FileStatus stat = fs.getFileStatus(filesListFile);
assertEquals("Wrong permissions of dist Cache files list file " + filesListFile, new FsPermission((short) 0644), stat.getPermission());
InputSplit split = new FileSplit(filesListFile, 0, stat.getLen(), (String[]) null);
TaskAttemptContext taskContext = MapReduceTestUtil.createDummyMapTaskAttemptContext(jobConf);
RecordReader<LongWritable, BytesWritable> reader = new GenerateDistCacheData.GenDCDataFormat().createRecordReader(split, taskContext);
MapContext<LongWritable, BytesWritable, NullWritable, BytesWritable> mapContext = new MapContextImpl<LongWritable, BytesWritable, NullWritable, BytesWritable>(jobConf, taskContext.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mapContext);
// start validating setupGenerateDistCacheData
doValidateSetupGenDC(reader, fs, sortedFileSizes);
}
use of org.apache.hadoop.io.NullWritable in project carbondata by apache.
the class StoreCreator method executeGraph.
/**
* Execute graph which will further load data
*
* @param loadModel
* @param storeLocation
* @throws Exception
*/
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
new File(storeLocation).mkdirs();
String outPutLoc = storeLocation + "/etl";
String databaseName = loadModel.getDatabaseName();
String tableName = loadModel.getTableName();
String tempLocationKey = databaseName + '_' + tableName + "_1";
CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
CarbonProperties.getInstance().addProperty("send.signal.load", "false");
CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
File path = new File(graphPath);
if (path.exists()) {
path.delete();
}
SchemaInfo info = new SchemaInfo();
BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
Configuration configuration = new Configuration();
CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length));
CSVInputFormat.setMaxColumns(configuration, "10");
TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
CSVInputFormat format = new CSVInputFormat();
RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });
info.setDatabaseName(databaseName);
info.setTableName(tableName);
writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
File file = new File(segLocation);
File factFile = null;
File[] folderList = file.listFiles();
File folder = null;
for (int i = 0; i < folderList.length; i++) {
if (folderList[i].isDirectory()) {
folder = folderList[i];
}
}
if (folder.isDirectory()) {
File[] files = folder.listFiles();
for (int i = 0; i < files.length; i++) {
if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
factFile = files[i];
break;
}
}
// Files.copy(factFile.toPath(), file.toPath(), REPLACE_EXISTING);
factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
CarbonUtil.deleteFoldersAndFiles(folder);
}
}
use of org.apache.hadoop.io.NullWritable in project jena by apache.
the class TestDistinctTriples method distinct_triples_02.
@Test
public void distinct_triples_02() throws IOException {
MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this.getMapReduceDriver();
Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), NodeFactory.createLiteral("1"));
TripleWritable tw = new TripleWritable(t);
for (int i = 0; i < 100; i++) {
driver.addInput(new LongWritable(i), tw);
}
driver.addOutput(NullWritable.get(), tw);
driver.runTest();
}
use of org.apache.hadoop.io.NullWritable in project jena by apache.
the class AbstractNodeWriter method writeValue.
/**
* Writes the given value
* <p>
* If the value is one of the RDF primitives - {@link NodeWritable},
* {@link TripleWritable}, {@link QuadWritable} and
* {@link NodeTupleWritable} - then it is formatted as a series of nodes
* separated by the separator. Otherwise it is formatted by simply calling
* {@code toString()} on it.
* </p>
*
* @param value
* Values
*/
protected void writeValue(TValue value) {
// Handle null specially
if (value instanceof NullWritable || value == null)
return;
// Handle RDF primitives specially and format them as proper nodes
if (value instanceof NodeWritable) {
this.writeKey((NodeWritable) value);
} else if (value instanceof TripleWritable) {
Triple t = ((TripleWritable) value).get();
this.writeNodes(t.getSubject(), t.getPredicate(), t.getObject());
} else if (value instanceof QuadWritable) {
Quad q = ((QuadWritable) value).get();
this.writeNodes(q.getGraph(), q.getSubject(), q.getPredicate(), q.getObject());
} else if (value instanceof NodeTupleWritable) {
Tuple<Node> tuple = ((NodeTupleWritable) value).get();
Node[] n = new Node[tuple.len()];
tuple.copyInto(n);
this.writeNodes(n);
} else {
// For arbitrary values just toString() them
this.writer.write(value.toString());
}
}
Aggregations