use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.
the class ResultRow method toMap.
/**
* Returns a Map representation of the data in this row. Does not include the timestamp.
*/
public Map<String, Object> toMap(final GroupByQuery query) {
final RowSignature signature = query.getResultRowSignature();
final Map<String, Object> map = new HashMap<>();
for (int i = query.getResultRowDimensionStart(); i < row.length; i++) {
final String columnName = signature.getColumnName(i);
map.put(columnName, row[i]);
}
return map;
}
use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.
the class RowBasedGrouperHelper method createResultRowBasedColumnSelectorFactory.
/**
* Creates a {@link ColumnSelectorFactory} that can read rows which originate as results of the provided "query".
*
* @param query a groupBy query
* @param supplier supplier of result rows from the query
* @param finalization whether the column capabilities reported by this factory should reflect finalized types
*/
public static ColumnSelectorFactory createResultRowBasedColumnSelectorFactory(final GroupByQuery query, final Supplier<ResultRow> supplier, final RowSignature.Finalization finalization) {
final RowSignature signature = query.getResultRowSignature(finalization);
final RowAdapter<ResultRow> adapter = new RowAdapter<ResultRow>() {
@Override
public ToLongFunction<ResultRow> timestampFunction() {
if (query.getResultRowHasTimestamp()) {
return row -> row.getLong(0);
} else {
final long timestamp = query.getUniversalTimestamp().getMillis();
return row -> timestamp;
}
}
@Override
public Function<ResultRow, Object> columnFunction(final String columnName) {
final int columnIndex = signature.indexOf(columnName);
if (columnIndex < 0) {
return row -> null;
} else {
return row -> row.get(columnIndex);
}
}
};
// Decorate "signature" so that it returns hasMultipleValues = false. (groupBy does not return multiple values.)
final ColumnInspector decoratedSignature = new ColumnInspector() {
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column) {
final ColumnCapabilities baseCapabilities = signature.getColumnCapabilities(column);
if (baseCapabilities == null || baseCapabilities.hasMultipleValues().isFalse()) {
return baseCapabilities;
} else {
return ColumnCapabilitiesImpl.copyOf(baseCapabilities).setHasMultipleValues(false);
}
}
};
return RowBasedColumnSelectorFactory.create(adapter, supplier::get, decoratedSignature, false);
}
use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.
the class QuantileSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
if (input == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String histogramName = StringUtils.format("%s:agg", name);
final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!probabilityArg.isA(SqlKind.LITERAL)) {
// Probability must be a literal in order to plan.
return null;
}
final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
final int resolution;
if (aggregateCall.getArgList().size() >= 3) {
final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
if (!resolutionArg.isA(SqlKind.LITERAL)) {
// Resolution must be a literal in order to plan.
return null;
}
resolution = ((Number) RexLiteral.value(resolutionArg)).intValue();
} else {
resolution = ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE;
}
final int numBuckets = ApproximateHistogram.DEFAULT_BUCKET_SIZE;
final float lowerLimit = Float.NEGATIVE_INFINITY;
final float upperLimit = Float.POSITIVE_INFINITY;
// Look for existing matching aggregatorFactory.
for (final Aggregation existing : existingAggregations) {
for (AggregatorFactory factory : existing.getAggregatorFactories()) {
if (factory instanceof ApproximateHistogramAggregatorFactory) {
final ApproximateHistogramAggregatorFactory theFactory = (ApproximateHistogramAggregatorFactory) factory;
// Check input for equivalence.
final boolean inputMatches;
final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
if (virtualInput == null) {
inputMatches = input.isDirectColumnAccess() && input.getDirectColumn().equals(theFactory.getFieldName());
} else {
inputMatches = virtualInput.equals(input);
}
final boolean matches = inputMatches && theFactory.getResolution() == resolution && theFactory.getNumBuckets() == numBuckets && theFactory.getLowerLimit() == lowerLimit && theFactory.getUpperLimit() == upperLimit;
if (matches) {
// Found existing one. Use this.
return Aggregation.create(ImmutableList.of(), new QuantilePostAggregator(name, factory.getName(), probability));
}
}
}
}
// No existing match found. Create a new one.
if (input.isDirectColumnAccess()) {
if (rowSignature.getColumnType(input.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new ApproximateHistogramFoldingAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
} else {
aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
}
} else {
final String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, virtualColumnName, resolution, numBuckets, lowerLimit, upperLimit, false);
}
return Aggregation.create(ImmutableList.of(aggregatorFactory), new QuantilePostAggregator(name, histogramName, probability));
}
use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.
the class SegmentAnalyzer method analyze.
public Map<String, ColumnAnalysis> analyze(Segment segment) {
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
Map<String, ColumnAnalysis> columns = new TreeMap<>();
final RowSignature rowSignature = storageAdapter.getRowSignature();
for (String columnName : rowSignature.getColumnNames()) {
final ColumnCapabilities capabilities;
if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
// See javadocs for getSnapshotColumnCapabilities for a discussion of why we need to do this.
capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName);
} else {
capabilities = storageAdapter.getColumnCapabilities(columnName);
}
final ColumnAnalysis analysis;
switch(capabilities.getType()) {
case LONG:
final int bytesPerRow = ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
analysis = analyzeNumericColumn(capabilities, length, bytesPerRow);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case DOUBLE:
analysis = analyzeNumericColumn(capabilities, length, Double.BYTES);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
analysis = analyzeComplexColumn(capabilities, columnHolder);
break;
default:
log.warn("Unknown column type[%s].", capabilities.asTypeString());
analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
}
columns.put(columnName, analysis);
}
return columns;
}
use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.
the class InlineDataSourceTest method test_serde_untyped.
@Test
public void test_serde_untyped() throws Exception {
// Create a row signature with no types set.
final RowSignature.Builder builder = RowSignature.builder();
for (String columnName : expectedRowSignature.getColumnNames()) {
builder.add(columnName, null);
}
final RowSignature untypedSignature = builder.build();
final InlineDataSource untypedDataSource = InlineDataSource.fromIterable(rows, untypedSignature);
final ObjectMapper jsonMapper = TestHelper.makeJsonMapper();
final InlineDataSource deserialized = (InlineDataSource) jsonMapper.readValue(jsonMapper.writeValueAsString(untypedDataSource), DataSource.class);
Assert.assertEquals(untypedDataSource.getColumnNames(), deserialized.getColumnNames());
Assert.assertEquals(untypedDataSource.getColumnTypes(), deserialized.getColumnTypes());
Assert.assertEquals(untypedDataSource.getRowSignature(), deserialized.getRowSignature());
Assert.assertNull(deserialized.getColumnTypes());
assertRowsEqual(listDataSource.getRows(), deserialized.getRows());
}
Aggregations