use of voldemort.store.readonly.ReadOnlyStorageMetadata in project voldemort by voldemort.
the class HdfsFetcherTest method testAggStatsWithValidFile.
public void testAggStatsWithValidFile() throws Exception {
HdfsFetcherAggStats stats = HdfsFetcherAggStats.getStats();
long totalBytesFetchedBefore = stats.getTotalBytesFetched();
long totalFetchesBefore = stats.getTotalFetches();
double totalDataFetchRateBefore = stats.getTotalDataFetchRate();
// Generate 0_0.[index | data] and their corresponding metadata
File testSourceDirectory = TestUtils.createTempDir();
File testDestinationDirectory = TestUtils.createTempDir();
// Missing metadata file
File indexFile = new File(testSourceDirectory, "0_0.index");
FileUtils.writeByteArrayToFile(indexFile, TestUtils.randomBytes(100));
File dataFile = new File(testSourceDirectory, "0_0.data");
FileUtils.writeByteArrayToFile(dataFile, TestUtils.randomBytes(400));
File metadataFile = new File(testSourceDirectory, ".metadata");
ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();
metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
metadata.add(ReadOnlyStorageMetadata.CHECKSUM, new String(Hex.encodeHex(CheckSumTests.calculateCheckSum(testSourceDirectory.listFiles(), CheckSumType.MD5))));
FileUtils.writeStringToFile(metadataFile, metadata.toJsonString());
HdfsFetcher fetcher = new HdfsFetcher();
File fetchedFile = fetcher.fetch(testSourceDirectory.getAbsolutePath(), testDestinationDirectory.getAbsolutePath() + "1");
assertNotNull(fetchedFile);
assertEquals(fetchedFile.getAbsolutePath(), testDestinationDirectory.getAbsolutePath() + "1");
// The total bytes fetched includes meta data file as well.
assertEquals(totalBytesFetchedBefore + 500 + metadata.toJsonString().length(), stats.getTotalBytesFetched());
assertEquals(totalFetchesBefore + 1, stats.getTotalFetches());
assertTrue(stats.getTotalDataFetchRate() > totalDataFetchRateBefore);
}
use of voldemort.store.readonly.ReadOnlyStorageMetadata in project voldemort by voldemort.
the class AdminServiceBasicTest method generateROFiles.
private void generateROFiles(int numChunks, long indexSize, long dataSize, HashMap<Integer, List<Integer>> buckets, File versionDir) throws IOException {
ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();
metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
File metadataFile = new File(versionDir, ".metadata");
BufferedWriter writer = new BufferedWriter(new FileWriter(metadataFile));
writer.write(metadata.toJsonString());
writer.close();
for (Entry<Integer, List<Integer>> entry : buckets.entrySet()) {
int replicaType = entry.getKey();
for (int partitionId : entry.getValue()) {
for (int chunkId = 0; chunkId < numChunks; chunkId++) {
String fileName = Integer.toString(partitionId) + "_" + Integer.toString(replicaType) + "_" + Integer.toString(chunkId);
File index = new File(versionDir, fileName + ".index");
File data = new File(versionDir, fileName + ".data");
// write some random crap for index and data
FileOutputStream dataOs = new FileOutputStream(data);
for (int i = 0; i < dataSize; i++) dataOs.write(i);
dataOs.close();
FileOutputStream indexOs = new FileOutputStream(index);
for (int i = 0; i < indexSize; i++) indexOs.write(i);
indexOs.close();
}
}
}
}
use of voldemort.store.readonly.ReadOnlyStorageMetadata in project voldemort by voldemort.
the class AdminRebalanceTest method generateROFiles.
private void generateROFiles(int numChunks, long indexSize, long dataSize, HashMap<Integer, List<Integer>> buckets, File versionDir) throws IOException {
ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();
metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
File metadataFile = new File(versionDir, ".metadata");
BufferedWriter writer = new BufferedWriter(new FileWriter(metadataFile));
writer.write(metadata.toJsonString());
writer.close();
for (Entry<Integer, List<Integer>> entry : buckets.entrySet()) {
int replicaType = entry.getKey();
for (int partitionId : entry.getValue()) {
for (int chunkId = 0; chunkId < numChunks; chunkId++) {
File index = new File(versionDir, Integer.toString(partitionId) + "_" + Integer.toString(replicaType) + "_" + Integer.toString(chunkId) + ".index");
File data = new File(versionDir, Integer.toString(partitionId) + "_" + Integer.toString(replicaType) + "_" + Integer.toString(chunkId) + ".data");
// write some random crap for index and data
FileOutputStream dataOs = new FileOutputStream(data);
for (int i = 0; i < dataSize; i++) dataOs.write(i);
dataOs.close();
FileOutputStream indexOs = new FileOutputStream(index);
for (int i = 0; i < indexSize; i++) indexOs.write(i);
indexOs.close();
}
}
}
}
use of voldemort.store.readonly.ReadOnlyStorageMetadata in project voldemort by voldemort.
the class HadoopStoreBuilder method build.
/**
* Run the job
*/
public void build() {
try {
JobConf conf = prepareJobConf(baseJobConf);
FileSystem fs = outputDir.getFileSystem(conf);
if (fs.exists(outputDir)) {
info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName());
fs.delete(outputDir, true);
}
conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
conf.setBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, saveKeys);
conf.setBoolean(VoldemortBuildAndPushJob.REDUCER_PER_BUCKET, reducerPerBucket);
conf.setBoolean(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY, buildPrimaryReplicasOnly);
if (!isAvro) {
conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
conf.setMapperClass(mapperClass);
conf.setMapOutputKeyClass(BytesWritable.class);
conf.setMapOutputValueClass(BytesWritable.class);
conf.setReducerClass(HadoopStoreBuilderReducer.class);
}
conf.setInputFormat(inputFormatClass);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
FileInputFormat.setInputPaths(conf, inputPath);
conf.set("final.output.dir", outputDir.toString());
conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
conf.set("dfs.umaskmode", "002");
FileOutputFormat.setOutputPath(conf, tempDir);
FileSystem outputFs = outputDir.getFileSystem(conf);
if (outputFs.exists(outputDir)) {
throw new IOException("Final output directory already exists.");
}
// delete output dir if it already exists
FileSystem tempFs = tempDir.getFileSystem(conf);
tempFs.delete(tempDir, true);
long size = sizeOfPath(tempFs, inputPath);
logger.info("Data size = " + size + ", replication factor = " + storeDef.getReplicationFactor() + ", numNodes = " + cluster.getNumberOfNodes() + ", numPartitions = " + cluster.getNumberOfPartitions() + ", chunk size = " + chunkSizeBytes);
// Base numbers of chunks and reducers, will get modified according to various settings
int numChunks = (int) (size / cluster.getNumberOfPartitions() / chunkSizeBytes) + 1;
/* +1 so we round up */
int numReducers = cluster.getNumberOfPartitions();
// question, but in order to avoid breaking anything we'll just maintain the original behavior.
if (saveKeys) {
if (buildPrimaryReplicasOnly) {
// The buildPrimaryReplicasOnly mode is supported exclusively in combination with
// saveKeys. If enabled, then we don't want to shuffle extra keys redundantly,
// hence we don't change the number of reducers.
} else {
// Old behavior, where all keys are redundantly shuffled to redundant reducers.
numReducers = numReducers * storeDef.getReplicationFactor();
}
} else {
numChunks = numChunks * storeDef.getReplicationFactor();
}
// Ensure at least one chunk
numChunks = Math.max(numChunks, 1);
if (reducerPerBucket) {
// Then all chunks for a given partition/replica combination are shuffled to the same
// reducer, hence, the number of reducers remains the same as previously defined.
} else {
// Otherwise, we want one reducer per chunk, hence we multiply the number of reducers.
numReducers = numReducers * numChunks;
}
conf.setInt(AbstractStoreBuilderConfigurable.NUM_CHUNKS, numChunks);
conf.setNumReduceTasks(numReducers);
logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers + ", save keys: " + saveKeys + ", reducerPerBucket: " + reducerPerBucket + ", buildPrimaryReplicasOnly: " + buildPrimaryReplicasOnly);
if (isAvro) {
conf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
// conf.setMapperClass(mapperClass);
conf.setMapOutputKeyClass(ByteBuffer.class);
conf.setMapOutputValueClass(ByteBuffer.class);
conf.setInputFormat(inputFormatClass);
conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
conf.setOutputKeyClass(ByteBuffer.class);
conf.setOutputValueClass(ByteBuffer.class);
// AvroJob confs for the avro mapper
AvroJob.setInputSchema(conf, Schema.parse(baseJobConf.get(AVRO_REC_SCHEMA)));
AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES)));
AvroJob.setMapperClass(conf, mapperClass);
conf.setReducerClass(AvroStoreBuilderReducer.class);
}
logger.info("Building store...");
// The snipped below copied and adapted from: JobClient.runJob(conf);
// We have more control in the error handling this way.
JobClient jc = new JobClient(conf);
RunningJob runningJob = jc.submitJob(conf);
Counters counters;
try {
if (!jc.monitorAndPrintJob(conf, runningJob)) {
counters = runningJob.getCounters();
// For some datasets, the number of chunks that we calculated is inadequate.
// Here, we try to identify if this is the case.
long mapOutputBytes = counters.getCounter(Task.Counter.MAP_OUTPUT_BYTES);
long averageNumberOfBytesPerChunk = mapOutputBytes / numChunks / cluster.getNumberOfPartitions();
if (averageNumberOfBytesPerChunk > (HadoopStoreWriter.DEFAULT_CHUNK_SIZE)) {
float chunkSizeBloat = averageNumberOfBytesPerChunk / (float) HadoopStoreWriter.DEFAULT_CHUNK_SIZE;
long suggestedTargetChunkSize = (long) (HadoopStoreWriter.DEFAULT_CHUNK_SIZE / chunkSizeBloat);
logger.error("The number of bytes per chunk may be too high." + " averageNumberOfBytesPerChunk = " + averageNumberOfBytesPerChunk + ". Consider setting " + VoldemortBuildAndPushJob.BUILD_CHUNK_SIZE + "=" + suggestedTargetChunkSize);
} else {
logger.error("Job Failed: " + runningJob.getFailureInfo());
}
throw new VoldemortException("BnP's MapReduce job failed.");
}
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
counters = runningJob.getCounters();
long numberOfRecords = counters.getCounter(Task.Counter.REDUCE_INPUT_GROUPS);
if (numberOfRecords < minNumberOfRecords) {
throw new VoldemortException("The number of records in the data set (" + numberOfRecords + ") is lower than the minimum required (" + minNumberOfRecords + "). Aborting.");
}
if (saveKeys) {
logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
}
// Do a CheckSumOfCheckSum - Similar to HDFS
CheckSum checkSumGenerator = CheckSum.getInstance(this.checkSumType);
if (!this.checkSumType.equals(CheckSumType.NONE) && checkSumGenerator == null) {
throw new VoldemortException("Could not generate checksum digest for type " + this.checkSumType);
}
List<Integer> directorySuffixes = Lists.newArrayList();
if (buildPrimaryReplicasOnly) {
// Files are grouped by partitions
for (int partitionId = 0; partitionId < cluster.getNumberOfPartitions(); partitionId++) {
directorySuffixes.add(partitionId);
}
} else {
// Files are grouped by node
for (Node node : cluster.getNodes()) {
directorySuffixes.add(node.getId());
}
}
ReadOnlyStorageMetadata fullStoreMetadata = new ReadOnlyStorageMetadata();
List<Integer> emptyDirectories = Lists.newArrayList();
final String directoryPrefix = buildPrimaryReplicasOnly ? ReadOnlyUtils.PARTITION_DIRECTORY_PREFIX : ReadOnlyUtils.NODE_DIRECTORY_PREFIX;
// Generate a log message every 30 seconds or after processing every 100 directories.
final long LOG_INTERVAL_TIME = TimeUnit.MILLISECONDS.convert(30, TimeUnit.SECONDS);
final int LOG_INTERVAL_COUNT = buildPrimaryReplicasOnly ? 100 : 5;
int lastLogCount = 0;
long lastLogTime = 0;
long startTimeMS = System.currentTimeMillis();
// Check if all folder exists and with format file
for (int index = 0; index < directorySuffixes.size(); index++) {
int directorySuffix = directorySuffixes.get(index);
long elapsedTime = System.currentTimeMillis() - lastLogTime;
long elapsedCount = index - lastLogCount;
if (elapsedTime >= LOG_INTERVAL_TIME || elapsedCount >= LOG_INTERVAL_COUNT) {
lastLogTime = System.currentTimeMillis();
lastLogCount = index;
logger.info("Processed " + directorySuffix + " out of " + directorySuffixes.size() + " directories.");
}
String directoryName = directoryPrefix + directorySuffix;
ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();
if (saveKeys) {
metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
} else {
metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V1.getCode());
}
Path directoryPath = new Path(outputDir.toString(), directoryName);
if (!outputFs.exists(directoryPath)) {
logger.debug("No data generated for " + directoryName + ". Generating empty folder");
emptyDirectories.add(directorySuffix);
// Create empty folder
outputFs.mkdirs(directoryPath);
outputFs.setPermission(directoryPath, new FsPermission(HADOOP_FILE_PERMISSION));
logger.debug("Setting permission to 755 for " + directoryPath);
}
processCheckSumMetadataFile(directoryName, outputFs, checkSumGenerator, directoryPath, metadata);
if (buildPrimaryReplicasOnly) {
// In buildPrimaryReplicasOnly mode, writing a metadata file for each partitions
// takes too long, so we skip it. We will rely on the full-store.metadata file instead.
} else {
// Maintaining the old behavior: we write the node-specific metadata file
writeMetadataFile(directoryPath, outputFs, ReadOnlyUtils.METADATA_FILE_EXTENSION, metadata);
}
fullStoreMetadata.addNestedMetadata(directoryName, metadata);
}
// Write the aggregate metadata file
writeMetadataFile(outputDir, outputFs, ReadOnlyUtils.FULL_STORE_METADATA_FILE, fullStoreMetadata);
long elapsedTimeMs = System.currentTimeMillis() - startTimeMS;
long elapsedTimeSeconds = TimeUnit.SECONDS.convert(elapsedTimeMs, TimeUnit.MILLISECONDS);
logger.info("Total Processed directories: " + directorySuffixes.size() + ". Elapsed Time (Seconds):" + elapsedTimeSeconds);
if (emptyDirectories.size() > 0) {
logger.info("Empty directories: " + Arrays.toString(emptyDirectories.toArray()));
}
} catch (Exception e) {
logger.error("Error in Store builder", e);
throw new VoldemortException(e);
}
}
use of voldemort.store.readonly.ReadOnlyStorageMetadata in project voldemort by voldemort.
the class HadoopStoreBuilderTest method testHadoopBuild.
@Test
public void testHadoopBuild() throws Exception {
// create test data
Map<String, String> values = new HashMap<String, String>();
File testDir = TestUtils.createTempDir();
File tempDir = new File(testDir, "temp"), tempDir2 = new File(testDir, "temp2");
File outputDir = new File(testDir, "output"), outputDir2 = new File(testDir, "output2");
File storeDir = TestUtils.createTempDir(testDir);
for (int i = 0; i < 200; i++) values.put(Integer.toString(i), Integer.toBinaryString(i));
// write test data to text file
File inputFile = File.createTempFile("input", ".txt", testDir);
inputFile.deleteOnExit();
StringBuilder contents = new StringBuilder();
for (Map.Entry<String, String> entry : values.entrySet()) contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
FileUtils.writeStringToFile(inputFile, contents.toString());
String storeName = "test";
SerializerDefinition serDef = new SerializerDefinition("string");
Cluster cluster = ServerTestUtils.getLocalCluster(1);
// Test backwards compatibility
StoreDefinition def = new StoreDefinitionBuilder().setName(storeName).setType(ReadOnlyStorageConfiguration.TYPE_NAME).setKeySerializer(serDef).setValueSerializer(serDef).setRoutingPolicy(RoutingTier.CLIENT).setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY).setReplicationFactor(1).setPreferredReads(1).setRequiredReads(1).setPreferredWrites(1).setRequiredWrites(1).build();
HadoopStoreBuilder builder = new HadoopStoreBuilder("testHadoopBuild", new Props(), new JobConf(), TextStoreMapper.class, TextInputFormat.class, cluster, def, new Path(tempDir2.getAbsolutePath()), new Path(outputDir2.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, saveKeys, false, 64 * 1024, false, null, false);
builder.build();
builder = new HadoopStoreBuilder("testHadoopBuild", new Props(), new JobConf(), TextStoreMapper.class, TextInputFormat.class, cluster, def, new Path(tempDir.getAbsolutePath()), new Path(outputDir.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, saveKeys, false, 64 * 1024, false, null, false);
builder.build();
// Check if checkSum is generated in outputDir
File nodeFile = new File(outputDir, "node-0");
// Check if metadata file exists
File metadataFile = new File(nodeFile, ".metadata");
Assert.assertTrue("Metadata file should exist!", metadataFile.exists());
ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata(metadataFile);
if (saveKeys)
Assert.assertEquals("In saveKeys mode, the metadata format should be READONLY_V2!", metadata.get(ReadOnlyStorageMetadata.FORMAT), ReadOnlyStorageFormat.READONLY_V2.getCode());
else
Assert.assertEquals("In legacy mode (saveKeys==false), the metadata format should be READONLY_V1!", metadata.get(ReadOnlyStorageMetadata.FORMAT), ReadOnlyStorageFormat.READONLY_V1.getCode());
Assert.assertEquals("Checksum type should be MD5!", metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE), CheckSum.toString(CheckSumType.MD5));
// Check contents of checkSum file
byte[] md5 = Hex.decodeHex(((String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM)).toCharArray());
byte[] checkSumBytes = CheckSumTests.calculateCheckSum(nodeFile.listFiles(), CheckSumType.MD5);
Assert.assertEquals("Checksum is not as excepted!", 0, ByteUtils.compare(checkSumBytes, md5));
// check if fetching works
HdfsFetcher fetcher = new HdfsFetcher();
// Fetch to version directory
File versionDir = new File(storeDir, "version-0");
fetcher.fetch(nodeFile.getAbsolutePath(), versionDir.getAbsolutePath());
Assert.assertTrue("Version directory should exist!", versionDir.exists());
// open store
@SuppressWarnings("unchecked") Serializer<Object> serializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(serDef);
ReadOnlyStorageEngine engine = new ReadOnlyStorageEngine(storeName, searchStrategy, new RoutingStrategyFactory().updateRoutingStrategy(def, cluster), 0, storeDir, 1);
Store<Object, Object, Object> store = SerializingStore.wrap(engine, serializer, serializer, serializer);
// check values
for (Map.Entry<String, String> entry : values.entrySet()) {
String key = entry.getKey();
try {
List<Versioned<Object>> found = store.get(key, null);
Assert.assertEquals("Incorrect number of results", 1, found.size());
Assert.assertEquals(entry.getValue(), found.get(0).getValue());
} catch (VoldemortException e) {
throw new VoldemortException("Got an exception while trying to get key '" + key + "'.", e);
}
}
// also check the iterator - first key iterator...
try {
ClosableIterator<ByteArray> keyIterator = engine.keys();
if (!saveKeys) {
fail("Should have thrown an exception since this RO format does not support iterators");
}
int numElements = 0;
while (keyIterator.hasNext()) {
Assert.assertTrue(values.containsKey(serializer.toObject(keyIterator.next().get())));
numElements++;
}
Assert.assertEquals(numElements, values.size());
} catch (UnsupportedOperationException e) {
if (saveKeys) {
fail("Should not have thrown an exception since this RO format does support iterators");
}
}
// ... and entry iterator
try {
ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entryIterator = engine.entries();
if (!saveKeys) {
fail("Should have thrown an exception since this RO format does not support iterators");
}
int numElements = 0;
while (entryIterator.hasNext()) {
Pair<ByteArray, Versioned<byte[]>> entry = entryIterator.next();
Assert.assertEquals(values.get(serializer.toObject(entry.getFirst().get())), serializer.toObject(entry.getSecond().getValue()));
numElements++;
}
Assert.assertEquals(numElements, values.size());
} catch (UnsupportedOperationException e) {
if (saveKeys) {
fail("Should not have thrown an exception since this RO format does support iterators");
}
}
}
Aggregations