use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.
the class IndexMergerTest method testCloser.
@Test(expected = IllegalArgumentException.class)
public void testCloser() throws Exception {
final long timestamp = System.currentTimeMillis();
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
IncrementalIndexTest.populateIndex(timestamp, toPersist);
ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1");
capabilities.setHasSpatialIndexes(true);
final File tempDir = temporaryFolder.newFolder();
final File v8TmpDir = new File(tempDir, "v8-tmp");
final File v9TmpDir = new File(tempDir, "v9-tmp");
try {
INDEX_MERGER.persist(toPersist, tempDir, indexSpec);
} finally {
if (v8TmpDir.exists()) {
Assert.fail("v8-tmp dir not clean.");
}
if (v9TmpDir.exists()) {
Assert.fail("v9-tmp dir not clean.");
}
}
}
use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.
the class IncrementalIndex method toTimeAndDims.
@VisibleForTesting
TimeAndDims toTimeAndDims(InputRow row) throws IndexSizeExceededException {
row = formatRow(row);
if (row.getTimestampFromEpoch() < minTimestamp) {
throw new IAE("Cannot add row[%s] because it is below the minTimestamp[%s]", row, new DateTime(minTimestamp));
}
final List<String> rowDimensions = row.getDimensions();
Object[] dims;
List<Object> overflow = null;
synchronized (dimensionDescs) {
dims = new Object[dimensionDescs.size()];
for (String dimension : rowDimensions) {
boolean wasNewDim = false;
ColumnCapabilitiesImpl capabilities;
DimensionDesc desc = dimensionDescs.get(dimension);
if (desc != null) {
capabilities = desc.getCapabilities();
} else {
wasNewDim = true;
capabilities = columnCapabilities.get(dimension);
if (capabilities == null) {
capabilities = new ColumnCapabilitiesImpl();
// For schemaless type discovery, assume everything is a String for now, can change later.
capabilities.setType(ValueType.STRING);
capabilities.setDictionaryEncoded(true);
capabilities.setHasBitmapIndexes(true);
columnCapabilities.put(dimension, capabilities);
}
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
desc = addNewDimension(dimension, capabilities, handler);
}
DimensionHandler handler = desc.getHandler();
DimensionIndexer indexer = desc.getIndexer();
Object dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension));
// Set column capabilities as data is coming in
if (!capabilities.hasMultipleValues() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
capabilities.setHasMultipleValues(true);
}
if (wasNewDim) {
if (overflow == null) {
overflow = Lists.newArrayList();
}
overflow.add(dimsKey);
} else if (desc.getIndex() > dims.length || dims[desc.getIndex()] != null) {
/*
* index > dims.length requires that we saw this dimension and added it to the dimensionOrder map,
* otherwise index is null. Since dims is initialized based on the size of dimensionOrder on each call to add,
* it must have been added to dimensionOrder during this InputRow.
*
* if we found an index for this dimension it means we've seen it already. If !(index > dims.length) then
* we saw it on a previous input row (this its safe to index into dims). If we found a value in
* the dims array for this index, it means we have seen this dimension already on this input row.
*/
throw new ISE("Dimension[%s] occurred more than once in InputRow", dimension);
} else {
dims[desc.getIndex()] = dimsKey;
}
}
}
if (overflow != null) {
// Merge overflow and non-overflow
Object[] newDims = new Object[dims.length + overflow.size()];
System.arraycopy(dims, 0, newDims, 0, dims.length);
for (int i = 0; i < overflow.size(); ++i) {
newDims[dims.length + i] = overflow.get(i);
}
dims = newDims;
}
long truncated = 0;
if (row.getTimestamp() != null) {
truncated = gran.bucketStart(row.getTimestamp()).getMillis();
}
return new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList);
}
use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.
the class IncrementalIndex method makeCapabilitesFromValueType.
private ColumnCapabilitiesImpl makeCapabilitesFromValueType(ValueType type) {
ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl();
capabilities.setDictionaryEncoded(type == ValueType.STRING);
capabilities.setHasBitmapIndexes(type == ValueType.STRING);
capabilities.setType(type);
return capabilities;
}
use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.
the class Sink method makeNewCurrIndex.
private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) {
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withTimestampSpec(schema.getParser()).withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()).withDimensionsSpec(schema.getParser()).withMetrics(schema.getAggregators()).withRollup(schema.getGranularitySpec().isRollup()).build();
final IncrementalIndex newIndex = new OnheapIncrementalIndex(indexSchema, reportParseExceptions, maxRowsInMemory);
final FireHydrant old;
synchronized (hydrantLock) {
if (writable) {
old = currHydrant;
int newCount = 0;
int numHydrants = hydrants.size();
if (numHydrants > 0) {
FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
newCount = lastHydrant.getCount() + 1;
if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
Map<String, ColumnCapabilitiesImpl> oldCapabilities;
if (lastHydrant.hasSwapped()) {
oldCapabilities = Maps.newHashMap();
QueryableIndex oldIndex = lastHydrant.getSegment().asQueryableIndex();
for (String dim : oldIndex.getAvailableDimensions()) {
dimOrder.add(dim);
oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumn(dim).getCapabilities());
}
} else {
IncrementalIndex oldIndex = lastHydrant.getIndex();
dimOrder.addAll(oldIndex.getDimensionOrder());
oldCapabilities = oldIndex.getColumnCapabilities();
}
newIndex.loadDimensionIterable(dimOrder, oldCapabilities);
}
}
currHydrant = new FireHydrant(newIndex, newCount, getSegment().getIdentifier());
if (old != null) {
numRowsExcludingCurrIndex.addAndGet(old.getIndex().size());
}
hydrants.add(currHydrant);
} else {
// Oops, someone called finishWriting while we were making this new index.
newIndex.close();
throw new ISE("finishWriting() called during swap");
}
}
return old;
}
use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.
the class IndexMergerV9 method makeIndexFiles.
@Override
protected File makeIndexFiles(final List<IndexableAdapter> adapters, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, new Function<IndexableAdapter, Metadata>() {
@Override
public Metadata apply(IndexableAdapter input) {
return input.getMetadata();
}
});
Metadata segmentMetadata = null;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
final File v9TmpDir = new File(outDir, "v9-tmp");
FileUtils.forceMkdir(v9TmpDir);
registerDeleteDirectory(closer, v9TmpDir);
log.info("Start making v9 index files, outDir:%s", outDir);
File tmpPeonFilesDir = new File(v9TmpDir, "tmpPeonFiles");
FileUtils.forceMkdir(tmpPeonFilesDir);
registerDeleteDirectory(closer, tmpPeonFilesDir);
final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, false);
closer.register(ioPeon);
long startTime = System.currentTimeMillis();
ByteStreams.write(Ints.toByteArray(IndexIO.V9_VERSION), Files.newOutputStreamSupplier(new File(outDir, "version.bin")));
log.info("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
log.info("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMerger> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
mergers.add(handlers[i].makeMerger(indexSpec, v9TmpDir, ioPeon, dimCapabilities.get(i), progress));
}
/************* Setup Dim Conversions **************/
progress.progress();
startTime = System.currentTimeMillis();
final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
final ArrayList<Boolean> dimensionSkipFlag = Lists.newArrayListWithCapacity(mergedDimensions.size());
final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
writeDimValueAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.info("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/************* Walk through data sets, merge them, and write merged columns *************/
progress.progress();
final Iterable<Rowboat> theRows = makeRowIterable(adapters, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
final LongColumnSerializer timeWriter = setupTimeWriter(ioPeon, indexSpec);
final ArrayList<GenericColumnSerializer> metWriters = setupMetricsWriters(ioPeon, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());
mergeIndexesAndWriteColumns(adapters, progress, theRows, timeWriter, metWriters, rowNumConversions, mergers);
/************ Create Inverted Indexes and Finalize Build Columns *************/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metWriters);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = (DimensionMergerV9) mergers.get(i);
merger.writeIndexes(rowNumConversions, closer);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/************* Make index.drd & metadata.drd files **************/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
Aggregations