use of com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat.LuceneIndexInputSplit in project elephant-bird by twitter.
the class TestLuceneIndexInputFormat method testGetSplits.
@Test
public void testGetSplits() throws Exception {
DummyLuceneInputFormat lif = new DummyLuceneInputFormat();
Configuration conf = new Configuration();
LuceneIndexInputFormat.setInputPaths(ImmutableList.of(new Path("src/test/resources/com/twitter/elephantbird" + "/mapreduce/input/sample_indexes/")), conf);
LuceneIndexInputFormat.setMaxCombinedIndexSizePerSplitBytes(15L, conf);
JobContext jobContext = createStrictMock(JobContext.class);
expect(HadoopCompat.getConfiguration(jobContext)).andStubReturn(conf);
replay(jobContext);
List<InputSplit> splits = lif.getSplits(jobContext);
LuceneIndexInputSplit split = (LuceneIndexInputSplit) splits.get(0);
assertEquals(2, split.getIndexDirs().size());
assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-1"));
assertTrue(split.getIndexDirs().get(1).toString().endsWith("sample_indexes/more-indexes/index-3"));
split = (LuceneIndexInputSplit) splits.get(1);
assertEquals(1, split.getIndexDirs().size());
assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-2"));
}
use of com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat.LuceneIndexInputSplit in project elephant-bird by twitter.
the class TestLuceneIndexInputFormat method testCombineSplitsOneSplit.
@Test
public void testCombineSplitsOneSplit() throws Exception {
DummyLuceneInputFormat lif = new DummyLuceneInputFormat();
PriorityQueue<LuceneIndexInputSplit> splits = new PriorityQueue<LuceneIndexInputSplit>();
splits.add(new LuceneIndexInputSplit(Lists.newArrayList(new Path("/index/1")), 1500L));
List<InputSplit> combined = lif.combineSplits(splits, 1000L, 10000L);
assertEquals(1, combined.size());
List<Path> dirs = ((LuceneIndexInputSplit) combined.get(0)).getIndexDirs();
Set<String> dirsStrings = Sets.newHashSet(Iterables.transform(dirs, Functions.toStringFunction()));
assertEquals(1, dirsStrings.size());
assertTrue(dirsStrings.contains("/index/1"));
}
use of com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat.LuceneIndexInputSplit in project elephant-bird by twitter.
the class TestLuceneIndexInputFormat method findSplitsHelper.
private void findSplitsHelper(List<Path> inputPaths) throws IOException {
DummyLuceneInputFormat lif = new DummyLuceneInputFormat();
Configuration conf = new Configuration();
LuceneIndexInputFormat.setInputPaths(inputPaths, conf);
lif.loadConfig(conf);
PriorityQueue<LuceneIndexInputSplit> splits = lif.findSplits(conf);
LuceneIndexInputSplit split;
split = splits.poll();
assertEquals(4, split.getLength());
assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-1"));
split = splits.poll();
assertEquals(6, split.getLength());
assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/more-indexes/index-3"));
split = splits.poll();
assertEquals(20, split.getLength());
assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-2"));
assertTrue(splits.isEmpty());
}
use of com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat.LuceneIndexInputSplit in project elephant-bird by twitter.
the class TestLuceneIndexInputFormat method testCombineSplitsWithMaxNumberIndexesPerMapper.
@Test
public void testCombineSplitsWithMaxNumberIndexesPerMapper() throws Exception {
DummyLuceneInputFormat lif = new DummyLuceneInputFormat();
PriorityQueue<LuceneIndexInputSplit> splits = new PriorityQueue<LuceneIndexInputSplit>();
String[] paths = new String[1000];
long[] sizes = new long[1000];
for (int i = 0; i < 100; i++) {
switch(i) {
case 0:
sizes[i] = 500L;
paths[i] = "/index/500";
break;
case 1:
sizes[i] = 300L;
paths[i] = "/index/300";
break;
case 2:
sizes[i] = 100L;
paths[i] = "/index/100";
break;
default:
sizes[i] = 1L;
paths[i] = "/index/small-" + i;
}
splits.add(new LuceneIndexInputSplit(Lists.newArrayList(new Path(paths[i])), sizes[i]));
}
List<InputSplit> combined = lif.combineSplits(splits, 150L, 10L);
assertEquals(12, combined.size());
for (int i = 0; i < 9; i++) {
LuceneIndexInputSplit split = (LuceneIndexInputSplit) combined.get(i);
assertEquals(10L, split.getIndexDirs().size());
assertEquals(10L, split.getLength());
for (Path p : split.getIndexDirs()) {
assertTrue(p.toString().startsWith("/index/small-"));
}
}
LuceneIndexInputSplit split = (LuceneIndexInputSplit) combined.get(9);
assertEquals(8, split.getIndexDirs().size());
assertEquals(107, split.getLength());
for (int i = 0; i < 7; i++) {
assertTrue(split.getIndexDirs().get(i).toString().startsWith("/index/small-"));
}
assertEquals("/index/100", split.getIndexDirs().get(7).toString());
split = (LuceneIndexInputSplit) combined.get(10);
assertEquals(1, split.getIndexDirs().size());
assertEquals(300, split.getLength());
assertEquals("/index/300", split.getIndexDirs().get(0).toString());
split = (LuceneIndexInputSplit) combined.get(11);
assertEquals(1, split.getIndexDirs().size());
assertEquals(500, split.getLength());
assertEquals("/index/500", split.getIndexDirs().get(0).toString());
}
use of com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat.LuceneIndexInputSplit in project elephant-bird by twitter.
the class TestLuceneIndexInputFormat method testLuceneIndexInputSplit.
@Test
public void testLuceneIndexInputSplit() throws Exception {
LuceneIndexInputSplit orig = new LuceneIndexInputSplit(Lists.newArrayList(new Path("/index/test"), new Path("/index/test2"), new Path("/index/test3")), 500L);
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
DataOutputStream dataOut = new DataOutputStream(bytesOut);
orig.write(dataOut);
LuceneIndexInputSplit deSerialized = new LuceneIndexInputSplit();
deSerialized.readFields(new DataInputStream((new ByteArrayInputStream(bytesOut.toByteArray()))));
assertEquals(orig.getIndexDirs(), deSerialized.getIndexDirs());
assertEquals(orig.getLength(), deSerialized.getLength());
assertEquals(0, orig.compareTo(deSerialized));
LuceneIndexInputSplit smaller = new LuceneIndexInputSplit(Lists.newArrayList(new Path("/index/small")), 100L);
assertTrue(orig.compareTo(smaller) > 0);
assertTrue(smaller.compareTo(orig) < 0);
}
Aggregations