use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.
the class DatasourceRecordReader method initialize.
@Override
public void initialize(InputSplit split, final TaskAttemptContext context) throws IOException {
List<WindowedDataSegment> segments = ((DatasourceInputSplit) split).getSegments();
String dataSource = Iterators.getOnlyElement(segments.stream().map(s -> s.getSegment().getDataSource()).distinct().iterator());
spec = DatasourceInputFormat.getIngestionSpec(context.getConfiguration(), dataSource);
logger.info("load schema [%s]", spec);
List<WindowedStorageAdapter> adapters = Lists.transform(segments, new Function<WindowedDataSegment, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(WindowedDataSegment segment) {
try {
logger.info("Getting storage path for segment [%s]", segment.getSegment().getId());
Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));
logger.info("Fetch segment files from [%s]", path);
File dir = FileUtils.createTempDir();
tmpSegmentDirs.add(dir);
logger.info("Locally storing fetched segment at [%s]", dir);
JobHelper.unzipNoGuava(path, context.getConfiguration(), dir, context, null);
logger.info("finished fetching segment files");
QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
indexes.add(index);
numRows += index.getNumRows();
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(index), segment.getInterval());
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
});
firehose = new IngestSegmentFirehose(adapters, spec.getTransformSpec(), spec.getDimensions(), spec.getMetrics(), spec.getFilter());
}
use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.
the class SeekableStreamIndexTaskTestBase method readSegmentColumn.
protected List<String> readSegmentColumn(final String column, final SegmentDescriptor descriptor) throws IOException {
File indexBasePath = new File(StringUtils.format("%s/%s/%s_%s/%s/%d", getSegmentDirectory(), OLD_DATA_SCHEMA.getDataSource(), descriptor.getInterval().getStart(), descriptor.getInterval().getEnd(), descriptor.getVersion(), descriptor.getPartitionNumber()));
File outputLocation = new File(directory, StringUtils.format("%s_%s_%s_%s", descriptor.getInterval().getStart(), descriptor.getInterval().getEnd(), descriptor.getVersion(), descriptor.getPartitionNumber()));
outputLocation.mkdir();
CompressionUtils.unzip(Files.asByteSource(new File(indexBasePath.listFiles()[0], "index.zip")), outputLocation, Predicates.alwaysFalse(), false);
IndexIO indexIO = new TestUtils().getTestIndexIO();
QueryableIndex index = indexIO.loadIndex(outputLocation);
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) index.getColumnHolder(column).getColumn();
List<String> values = new ArrayList<>();
for (int i = 0; i < theColumn.length(); i++) {
int id = theColumn.getSingleValueRow(i);
String value = theColumn.lookupName(id);
values.add(value);
}
return values;
}
use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.
the class BaseFilterTest method makeConstructors.
public static Collection<Object[]> makeConstructors() {
final List<Object[]> constructors = new ArrayList<>();
final Map<String, BitmapSerdeFactory> bitmapSerdeFactories = ImmutableMap.of("concise", new ConciseBitmapSerdeFactory(), "roaring", new RoaringBitmapSerdeFactory(true));
final Map<String, SegmentWriteOutMediumFactory> segmentWriteOutMediumFactories = ImmutableMap.of("tmpFile segment write-out medium", TmpFileSegmentWriteOutMediumFactory.instance(), "off-heap memory segment write-out medium", OffHeapMemorySegmentWriteOutMediumFactory.instance());
final Map<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finishers = ImmutableMap.<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>>builder().put("incremental", input -> {
final IncrementalIndex index = input.buildIncrementalIndex();
return Pair.of(new IncrementalIndexStorageAdapter(index), index);
}).put("mmapped", input -> {
final QueryableIndex index = input.buildMMappedIndex();
return Pair.of(new QueryableIndexStorageAdapter(index), index);
}).put("mmappedMerged", input -> {
final QueryableIndex index = input.buildMMappedMergedIndex();
return Pair.of(new QueryableIndexStorageAdapter(index), index);
}).put("rowBasedWithoutTypeSignature", input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> {
})).put("rowBasedWithTypeSignature", input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {
})).build();
for (Map.Entry<String, BitmapSerdeFactory> bitmapSerdeFactoryEntry : bitmapSerdeFactories.entrySet()) {
for (Map.Entry<String, SegmentWriteOutMediumFactory> segmentWriteOutMediumFactoryEntry : segmentWriteOutMediumFactories.entrySet()) {
for (Map.Entry<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finisherEntry : finishers.entrySet()) {
for (boolean cnf : ImmutableList.of(false, true)) {
for (boolean optimize : ImmutableList.of(false, true)) {
final String testName = StringUtils.format("bitmaps[%s], indexMerger[%s], finisher[%s], cnf[%s], optimize[%s]", bitmapSerdeFactoryEntry.getKey(), segmentWriteOutMediumFactoryEntry.getKey(), finisherEntry.getKey(), cnf, optimize);
final IndexBuilder indexBuilder = IndexBuilder.create().schema(DEFAULT_INDEX_SCHEMA).indexSpec(new IndexSpec(bitmapSerdeFactoryEntry.getValue(), null, null, null)).segmentWriteOutMediumFactory(segmentWriteOutMediumFactoryEntry.getValue());
constructors.add(new Object[] { testName, indexBuilder, finisherEntry.getValue(), cnf, optimize });
}
}
}
}
}
return constructors;
}
use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.
the class IngestSegmentFirehoseTest method testReadFromIndexAndWriteAnotherIndex.
@Test
public void testReadFromIndexAndWriteAnotherIndex() throws Exception {
// Tests a "reindexing" use case that is a common use of ingestSegment.
File segmentDir = tempFolder.newFolder();
createTestIndex(segmentDir);
try (final QueryableIndex qi = indexIO.loadIndex(segmentDir);
final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withDimensionsSpec(DIMENSIONS_SPEC_REINDEX).withMetrics(AGGREGATORS_REINDEX.toArray(new AggregatorFactory[0])).build()).setMaxRowCount(5000).build()) {
final StorageAdapter sa = new QueryableIndexStorageAdapter(qi);
final WindowedStorageAdapter wsa = new WindowedStorageAdapter(sa, sa.getInterval());
final IngestSegmentFirehose firehose = new IngestSegmentFirehose(ImmutableList.of(wsa, wsa), TransformSpec.NONE, ImmutableList.of("host", "spatial"), ImmutableList.of("visited_sum", "unique_hosts"), null);
int count = 0;
while (firehose.hasMore()) {
final InputRow row = firehose.nextRow();
Assert.assertNotNull(row);
if (count == 0) {
Assert.assertEquals(DateTimes.of("2014-10-22T00Z"), row.getTimestamp());
Assert.assertEquals("host1", row.getRaw("host"));
Assert.assertEquals("0,1", row.getRaw("spatial"));
Assert.assertEquals(10L, row.getRaw("visited_sum"));
Assert.assertEquals(1.0d, ((HyperLogLogCollector) row.getRaw("unique_hosts")).estimateCardinality(), 0.1);
}
count++;
index.add(row);
}
Assert.assertEquals(18, count);
// Check the index
Assert.assertEquals(9, index.size());
final IncrementalIndexStorageAdapter queryable = new IncrementalIndexStorageAdapter(index);
Assert.assertEquals(2, queryable.getAvailableDimensions().size());
Assert.assertEquals("host", queryable.getAvailableDimensions().get(0));
Assert.assertEquals("spatial", queryable.getAvailableDimensions().get(1));
Assert.assertEquals(ImmutableList.of("visited_sum", "unique_hosts"), queryable.getAvailableMetrics());
// Do a spatial filter
final IngestSegmentFirehose firehose2 = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(queryable, Intervals.of("2000/3000"))), TransformSpec.NONE, ImmutableList.of("host", "spatial"), ImmutableList.of("visited_sum", "unique_hosts"), new SpatialDimFilter("spatial", new RadiusBound(new float[] { 1, 0 }, 0.1f)));
final InputRow row = firehose2.nextRow();
Assert.assertFalse(firehose2.hasMore());
Assert.assertEquals(DateTimes.of("2014-10-22T00Z"), row.getTimestamp());
Assert.assertEquals("host2", row.getRaw("host"));
Assert.assertEquals("1,0", row.getRaw("spatial"));
Assert.assertEquals(40L, row.getRaw("visited_sum"));
Assert.assertEquals(1.0d, ((HyperLogLogCollector) row.getRaw("unique_hosts")).estimateCardinality(), 0.1);
}
}
use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.
the class CalciteTests method createMockWalker.
public static SpecificSegmentsQuerySegmentWalker createMockWalker(final QueryRunnerFactoryConglomerate conglomerate, final File tmpDir, final QueryScheduler scheduler, final JoinableFactory joinableFactory) {
final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA).rows(ROWS1).buildMMappedIndex();
final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA_DIFFERENT_DIM3_M1_TYPES).rows(ROWS2).buildMMappedIndex();
final QueryableIndex forbiddenIndex = IndexBuilder.create().tmpDir(new File(tmpDir, "forbidden")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA).rows(FORBIDDEN_ROWS).buildMMappedIndex();
final QueryableIndex indexNumericDims = IndexBuilder.create().tmpDir(new File(tmpDir, "3")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA_NUMERIC_DIMS).rows(ROWS1_WITH_NUMERIC_DIMS).buildMMappedIndex();
final QueryableIndex index4 = IndexBuilder.create().tmpDir(new File(tmpDir, "4")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA).rows(ROWS1_WITH_FULL_TIMESTAMP).buildMMappedIndex();
final QueryableIndex indexLotsOfColumns = IndexBuilder.create().tmpDir(new File(tmpDir, "5")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA_LOTS_O_COLUMNS).rows(ROWS_LOTS_OF_COLUMNS).buildMMappedIndex();
final QueryableIndex someDatasourceIndex = IndexBuilder.create().tmpDir(new File(tmpDir, "6")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA).rows(ROWS1).buildMMappedIndex();
final QueryableIndex someXDatasourceIndex = IndexBuilder.create().tmpDir(new File(tmpDir, "7")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA_WITH_X_COLUMNS).rows(RAW_ROWS1_X).buildMMappedIndex();
final QueryableIndex userVisitIndex = IndexBuilder.create().tmpDir(new File(tmpDir, "8")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(INDEX_SCHEMA).rows(USER_VISIT_ROWS).buildMMappedIndex();
return new SpecificSegmentsQuerySegmentWalker(conglomerate, INJECTOR.getInstance(LookupExtractorFactoryContainerProvider.class), joinableFactory, scheduler).add(DataSegment.builder().dataSource(DATASOURCE1).interval(index1.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index1).add(DataSegment.builder().dataSource(DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index2).add(DataSegment.builder().dataSource(FORBIDDEN_DATASOURCE).interval(forbiddenIndex.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), forbiddenIndex).add(DataSegment.builder().dataSource(DATASOURCE3).interval(indexNumericDims.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), indexNumericDims).add(DataSegment.builder().dataSource(DATASOURCE4).interval(index4.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index4).add(DataSegment.builder().dataSource(DATASOURCE5).interval(indexLotsOfColumns.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), indexLotsOfColumns).add(DataSegment.builder().dataSource(SOME_DATASOURCE).interval(indexLotsOfColumns.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), someDatasourceIndex).add(DataSegment.builder().dataSource(SOMEXDATASOURCE).interval(indexLotsOfColumns.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), someXDatasourceIndex).add(DataSegment.builder().dataSource(BROADCAST_DATASOURCE).interval(indexNumericDims.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), indexNumericDims).add(DataSegment.builder().dataSource(USERVISITDATASOURCE).interval(userVisitIndex.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), userVisitIndex);
}
Aggregations