Search in sources :

Example 1 with HdfsFetcher

use of voldemort.store.readonly.fetcher.HdfsFetcher in project voldemort by voldemort.

the class HadoopStoreBuilderCollisionTest method testCollisionWithParams.

@SuppressWarnings({ "unchecked" })
public void testCollisionWithParams(int totalElements, int maxCollisions) throws Exception {
    assertEquals(totalElements % maxCollisions, 0);
    // create test data
    Map<String, String> values = new HashMap<String, String>();
    List<String> valuesLeft = Lists.newArrayList();
    File testDir = TestUtils.createTempDir();
    File tempDir = new File(testDir, "temp");
    File outputDir = new File(testDir, "output");
    File storeDir = TestUtils.createTempDir(testDir);
    for (int i = 0; i < totalElements; i++) {
        values.put(Integer.toString(i), Integer.toString(i));
        valuesLeft.add(Integer.toString(i));
    }
    String storeName = "test";
    SerializerDefinition serDef = new SerializerDefinition("string");
    Cluster cluster = ServerTestUtils.getLocalCluster(1);
    Serializer<Object> serializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(serDef);
    // write test data to text file
    File inputFile = File.createTempFile("input", ".txt", testDir);
    inputFile.deleteOnExit();
    StringBuilder contents = new StringBuilder();
    byte[] currentMd5 = TestUtils.randomBytes(2 * ByteUtils.SIZE_OF_INT);
    int entryId = 0;
    for (Map.Entry<String, String> entry : values.entrySet()) {
        if (entryId % maxCollisions == 0) {
            currentMd5 = TestUtils.randomBytes(2 * ByteUtils.SIZE_OF_INT);
        }
        contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
        byte[] oldMd5 = ByteUtils.copy(ByteUtils.md5(serializer.toBytes(entry.getKey())), 0, 2 * ByteUtils.SIZE_OF_INT);
        oldMd5ToNewMd5.put(new ByteArray(oldMd5), currentMd5);
        entryId++;
    }
    FileUtils.writeStringToFile(inputFile, contents.toString());
    StoreDefinition def = new StoreDefinitionBuilder().setName(storeName).setType(ReadOnlyStorageConfiguration.TYPE_NAME).setKeySerializer(serDef).setValueSerializer(serDef).setRoutingPolicy(RoutingTier.CLIENT).setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY).setReplicationFactor(1).setPreferredReads(1).setRequiredReads(1).setPreferredWrites(1).setRequiredWrites(1).build();
    HadoopStoreBuilder builder = new HadoopStoreBuilder("testCollisionWithParams", new Props(), new JobConf(), CollidingTextStoreMapper.class, TextInputFormat.class, cluster, def, new Path(tempDir.getAbsolutePath()), new Path(outputDir.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, true, false, 1024 * 1024 * 1024, false, null, false);
    builder.build();
    File nodeFile = new File(outputDir, "node-0");
    File versionDir = new File(storeDir, "version-0");
    HdfsFetcher fetcher = new HdfsFetcher();
    fetcher.fetch(nodeFile.getAbsolutePath(), versionDir.getAbsolutePath());
    // Test if we work in the normal collision scenario open store
    ReadOnlyStorageEngine engine = new ReadOnlyStorageEngine(storeName, new CustomBinarySearchStrategy(), new RoutingStrategyFactory().updateRoutingStrategy(def, cluster), 0, storeDir, 1);
    Store<Object, Object, Object> store = SerializingStore.wrap(engine, serializer, serializer, serializer);
    // check values
    for (Map.Entry<String, String> entry : values.entrySet()) {
        List<Versioned<Object>> found = store.get(entry.getKey(), null);
        Assert.assertEquals("Incorrect number of results", 1, found.size());
        Assert.assertEquals(entry.getValue(), found.get(0).getValue());
    }
    // also check the iterator - first key iterator...
    List<String> valuesLeft2 = Lists.newArrayList(valuesLeft);
    ClosableIterator<ByteArray> keyIterator = engine.keys();
    int numElements = 0;
    while (keyIterator.hasNext()) {
        Object object = serializer.toObject(keyIterator.next().get());
        assertEquals(valuesLeft.remove(object), true);
        Assert.assertTrue(values.containsKey(object));
        numElements++;
    }
    Assert.assertEquals(numElements, values.size());
    Assert.assertEquals(valuesLeft.size(), 0);
    // ... and entry iterator
    ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entryIterator = engine.entries();
    numElements = 0;
    while (entryIterator.hasNext()) {
        Pair<ByteArray, Versioned<byte[]>> entry = entryIterator.next();
        assertEquals(valuesLeft2.remove(serializer.toObject(entry.getFirst().get())), true);
        Assert.assertEquals(values.get(serializer.toObject(entry.getFirst().get())), serializer.toObject(entry.getSecond().getValue()));
        numElements++;
    }
    Assert.assertEquals(numElements, values.size());
    Assert.assertEquals(valuesLeft2.size(), 0);
}
Also used : Versioned(voldemort.versioning.Versioned) HashMap(java.util.HashMap) RoutingStrategyFactory(voldemort.routing.RoutingStrategyFactory) Props(voldemort.utils.Props) StoreDefinition(voldemort.store.StoreDefinition) ByteArray(voldemort.utils.ByteArray) JobConf(org.apache.hadoop.mapred.JobConf) Serializer(voldemort.serialization.Serializer) Pair(voldemort.utils.Pair) StoreDefinitionBuilder(voldemort.store.StoreDefinitionBuilder) Path(org.apache.hadoop.fs.Path) Cluster(voldemort.cluster.Cluster) ReadOnlyStorageEngine(voldemort.store.readonly.ReadOnlyStorageEngine) DefaultSerializerFactory(voldemort.serialization.DefaultSerializerFactory) HdfsFetcher(voldemort.store.readonly.fetcher.HdfsFetcher) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) SerializerDefinition(voldemort.serialization.SerializerDefinition)

Example 2 with HdfsFetcher

use of voldemort.store.readonly.fetcher.HdfsFetcher in project voldemort by voldemort.

the class HadoopStoreBuilderTest method testHadoopBuild.

@Test
public void testHadoopBuild() throws Exception {
    // create test data
    Map<String, String> values = new HashMap<String, String>();
    File testDir = TestUtils.createTempDir();
    File tempDir = new File(testDir, "temp"), tempDir2 = new File(testDir, "temp2");
    File outputDir = new File(testDir, "output"), outputDir2 = new File(testDir, "output2");
    File storeDir = TestUtils.createTempDir(testDir);
    for (int i = 0; i < 200; i++) values.put(Integer.toString(i), Integer.toBinaryString(i));
    // write test data to text file
    File inputFile = File.createTempFile("input", ".txt", testDir);
    inputFile.deleteOnExit();
    StringBuilder contents = new StringBuilder();
    for (Map.Entry<String, String> entry : values.entrySet()) contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
    FileUtils.writeStringToFile(inputFile, contents.toString());
    String storeName = "test";
    SerializerDefinition serDef = new SerializerDefinition("string");
    Cluster cluster = ServerTestUtils.getLocalCluster(1);
    // Test backwards compatibility
    StoreDefinition def = new StoreDefinitionBuilder().setName(storeName).setType(ReadOnlyStorageConfiguration.TYPE_NAME).setKeySerializer(serDef).setValueSerializer(serDef).setRoutingPolicy(RoutingTier.CLIENT).setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY).setReplicationFactor(1).setPreferredReads(1).setRequiredReads(1).setPreferredWrites(1).setRequiredWrites(1).build();
    HadoopStoreBuilder builder = new HadoopStoreBuilder("testHadoopBuild", new Props(), new JobConf(), TextStoreMapper.class, TextInputFormat.class, cluster, def, new Path(tempDir2.getAbsolutePath()), new Path(outputDir2.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, saveKeys, false, 64 * 1024, false, null, false);
    builder.build();
    builder = new HadoopStoreBuilder("testHadoopBuild", new Props(), new JobConf(), TextStoreMapper.class, TextInputFormat.class, cluster, def, new Path(tempDir.getAbsolutePath()), new Path(outputDir.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, saveKeys, false, 64 * 1024, false, null, false);
    builder.build();
    // Check if checkSum is generated in outputDir
    File nodeFile = new File(outputDir, "node-0");
    // Check if metadata file exists
    File metadataFile = new File(nodeFile, ".metadata");
    Assert.assertTrue("Metadata file should exist!", metadataFile.exists());
    ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata(metadataFile);
    if (saveKeys)
        Assert.assertEquals("In saveKeys mode, the metadata format should be READONLY_V2!", metadata.get(ReadOnlyStorageMetadata.FORMAT), ReadOnlyStorageFormat.READONLY_V2.getCode());
    else
        Assert.assertEquals("In legacy mode (saveKeys==false), the metadata format should be READONLY_V1!", metadata.get(ReadOnlyStorageMetadata.FORMAT), ReadOnlyStorageFormat.READONLY_V1.getCode());
    Assert.assertEquals("Checksum type should be MD5!", metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE), CheckSum.toString(CheckSumType.MD5));
    // Check contents of checkSum file
    byte[] md5 = Hex.decodeHex(((String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM)).toCharArray());
    byte[] checkSumBytes = CheckSumTests.calculateCheckSum(nodeFile.listFiles(), CheckSumType.MD5);
    Assert.assertEquals("Checksum is not as excepted!", 0, ByteUtils.compare(checkSumBytes, md5));
    // check if fetching works
    HdfsFetcher fetcher = new HdfsFetcher();
    // Fetch to version directory
    File versionDir = new File(storeDir, "version-0");
    fetcher.fetch(nodeFile.getAbsolutePath(), versionDir.getAbsolutePath());
    Assert.assertTrue("Version directory should exist!", versionDir.exists());
    // open store
    @SuppressWarnings("unchecked") Serializer<Object> serializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(serDef);
    ReadOnlyStorageEngine engine = new ReadOnlyStorageEngine(storeName, searchStrategy, new RoutingStrategyFactory().updateRoutingStrategy(def, cluster), 0, storeDir, 1);
    Store<Object, Object, Object> store = SerializingStore.wrap(engine, serializer, serializer, serializer);
    // check values
    for (Map.Entry<String, String> entry : values.entrySet()) {
        String key = entry.getKey();
        try {
            List<Versioned<Object>> found = store.get(key, null);
            Assert.assertEquals("Incorrect number of results", 1, found.size());
            Assert.assertEquals(entry.getValue(), found.get(0).getValue());
        } catch (VoldemortException e) {
            throw new VoldemortException("Got an exception while trying to get key '" + key + "'.", e);
        }
    }
    // also check the iterator - first key iterator...
    try {
        ClosableIterator<ByteArray> keyIterator = engine.keys();
        if (!saveKeys) {
            fail("Should have thrown an exception since this RO format does not support iterators");
        }
        int numElements = 0;
        while (keyIterator.hasNext()) {
            Assert.assertTrue(values.containsKey(serializer.toObject(keyIterator.next().get())));
            numElements++;
        }
        Assert.assertEquals(numElements, values.size());
    } catch (UnsupportedOperationException e) {
        if (saveKeys) {
            fail("Should not have thrown an exception since this RO format does support iterators");
        }
    }
    // ... and entry iterator
    try {
        ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entryIterator = engine.entries();
        if (!saveKeys) {
            fail("Should have thrown an exception since this RO format does not support iterators");
        }
        int numElements = 0;
        while (entryIterator.hasNext()) {
            Pair<ByteArray, Versioned<byte[]>> entry = entryIterator.next();
            Assert.assertEquals(values.get(serializer.toObject(entry.getFirst().get())), serializer.toObject(entry.getSecond().getValue()));
            numElements++;
        }
        Assert.assertEquals(numElements, values.size());
    } catch (UnsupportedOperationException e) {
        if (saveKeys) {
            fail("Should not have thrown an exception since this RO format does support iterators");
        }
    }
}
Also used : Versioned(voldemort.versioning.Versioned) HashMap(java.util.HashMap) RoutingStrategyFactory(voldemort.routing.RoutingStrategyFactory) Props(voldemort.utils.Props) VoldemortException(voldemort.VoldemortException) ReadOnlyStorageMetadata(voldemort.store.readonly.ReadOnlyStorageMetadata) StoreDefinition(voldemort.store.StoreDefinition) ByteArray(voldemort.utils.ByteArray) JobConf(org.apache.hadoop.mapred.JobConf) Serializer(voldemort.serialization.Serializer) Pair(voldemort.utils.Pair) StoreDefinitionBuilder(voldemort.store.StoreDefinitionBuilder) Path(org.apache.hadoop.fs.Path) Cluster(voldemort.cluster.Cluster) ReadOnlyStorageEngine(voldemort.store.readonly.ReadOnlyStorageEngine) DefaultSerializerFactory(voldemort.serialization.DefaultSerializerFactory) HdfsFetcher(voldemort.store.readonly.fetcher.HdfsFetcher) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) SerializerDefinition(voldemort.serialization.SerializerDefinition) Test(org.junit.Test)

Aggregations

File (java.io.File)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Path (org.apache.hadoop.fs.Path)2 JobConf (org.apache.hadoop.mapred.JobConf)2 Cluster (voldemort.cluster.Cluster)2 RoutingStrategyFactory (voldemort.routing.RoutingStrategyFactory)2 DefaultSerializerFactory (voldemort.serialization.DefaultSerializerFactory)2 Serializer (voldemort.serialization.Serializer)2 SerializerDefinition (voldemort.serialization.SerializerDefinition)2 StoreDefinition (voldemort.store.StoreDefinition)2 StoreDefinitionBuilder (voldemort.store.StoreDefinitionBuilder)2 ReadOnlyStorageEngine (voldemort.store.readonly.ReadOnlyStorageEngine)2 HdfsFetcher (voldemort.store.readonly.fetcher.HdfsFetcher)2 ByteArray (voldemort.utils.ByteArray)2 Pair (voldemort.utils.Pair)2 Props (voldemort.utils.Props)2 Versioned (voldemort.versioning.Versioned)2 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)1 Test (org.junit.Test)1