use of org.apache.drill.common.expression.FieldReference in project drill by axbaretto.
the class FlattenRecordBatch method setupNewSchema.
@Override
protected boolean setupNewSchema() throws SchemaChangeException {
this.allocationVectors = Lists.newArrayList();
container.clear();
final List<NamedExpression> exprs = getExpressionList();
final ErrorCollector collector = new ErrorCollectorImpl();
final List<TransferPair> transfers = Lists.newArrayList();
final ClassGenerator<Flattener> cg = CodeGenerator.getRoot(Flattener.TEMPLATE_DEFINITION, context.getOptions());
cg.getCodeGenerator().plainJavaCapable(true);
final IntHashSet transferFieldIds = new IntHashSet();
final NamedExpression flattenExpr = new NamedExpression(popConfig.getColumn(), new FieldReference(popConfig.getColumn()));
final ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) ExpressionTreeMaterializer.materialize(flattenExpr.getExpr(), incoming, collector, context.getFunctionRegistry(), true);
final FieldReference fieldReference = flattenExpr.getRef();
final TransferPair transferPair = getFlattenFieldTransferPair(fieldReference);
if (transferPair != null) {
final ValueVector flattenVector = transferPair.getTo();
// checks that list has only default ValueVector and replaces resulting ValueVector to INT typed ValueVector
if (exprs.size() == 0 && flattenVector.getField().getType().equals(Types.LATE_BIND_TYPE)) {
final MaterializedField outputField = MaterializedField.create(fieldReference.getAsNamePart().getName(), Types.OPTIONAL_INT);
final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
container.add(vector);
} else {
transfers.add(transferPair);
container.add(flattenVector);
transferFieldIds.add(vectorRead.getFieldId().getFieldIds()[0]);
}
}
logger.debug("Added transfer for project expression.");
ClassifierResult result = new ClassifierResult();
for (NamedExpression namedExpression : exprs) {
result.clear();
String outputName = getRef(namedExpression).getRootSegment().getPath();
if (result != null && result.outputNames != null && result.outputNames.size() > 0) {
for (int j = 0; j < result.outputNames.size(); j++) {
if (!result.outputNames.get(j).equals(EMPTY_STRING)) {
outputName = result.outputNames.get(j);
break;
}
}
}
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(namedExpression.getExpr(), incoming, collector, context.getFunctionRegistry(), true);
if (collector.hasErrors()) {
throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString()));
}
if (expr instanceof DrillFuncHolderExpr && ((DrillFuncHolderExpr) expr).getHolder().isComplexWriterFuncHolder()) {
// Lazy initialization of the list of complex writers, if not done yet.
if (complexWriters == null) {
complexWriters = Lists.newArrayList();
}
// The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
((DrillFuncHolderExpr) expr).getFieldReference(namedExpression.getRef());
cg.addExpr(expr);
} else {
// need to do evaluation.
final MaterializedField outputField;
if (expr instanceof ValueVectorReadExpression) {
final TypedFieldId id = ValueVectorReadExpression.class.cast(expr).getFieldId();
@SuppressWarnings("resource") final ValueVector incomingVector = incoming.getValueAccessorById(id.getIntermediateClass(), id.getFieldIds()).getValueVector();
// when the first batch will be empty.
if (incomingVector != null) {
outputField = incomingVector.getField().clone();
} else {
outputField = MaterializedField.create(outputName, expr.getMajorType());
}
} else {
outputField = MaterializedField.create(outputName, expr.getMajorType());
}
@SuppressWarnings("resource") final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
allocationVectors.add(vector);
TypedFieldId fid = container.add(vector);
ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
cg.addExpr(write);
logger.debug("Added eval for project expression.");
}
}
cg.rotateBlock();
cg.getEvalBlock()._return(JExpr.TRUE);
container.buildSchema(SelectionVectorMode.NONE);
try {
this.flattener = context.getImplementationClass(cg.getCodeGenerator());
flattener.setup(context, incoming, this, transfers);
} catch (ClassTransformationException | IOException e) {
throw new SchemaChangeException("Failure while attempting to load generated class", e);
}
return true;
}
use of org.apache.drill.common.expression.FieldReference in project drill by axbaretto.
the class HashJoinBatch method setupHashTable.
public void setupHashTable() throws IOException, SchemaChangeException, ClassTransformationException {
// Setup the hash table configuration object
int conditionsSize = conditions.size();
final List<NamedExpression> rightExpr = new ArrayList<>(conditionsSize);
List<NamedExpression> leftExpr = new ArrayList<>(conditionsSize);
// Create named expressions from the conditions
for (int i = 0; i < conditionsSize; i++) {
rightExpr.add(new NamedExpression(conditions.get(i).getRight(), new FieldReference("build_side_" + i)));
leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
}
// Set the left named expression to be null if the probe batch is empty.
if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
leftExpr = null;
} else {
if (left.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
final String errorMsg = new StringBuilder().append("Hash join does not support probe batch with selection vectors. ").append("Probe batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
throw new SchemaChangeException(errorMsg);
}
}
final HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators);
// Create the chained hash table
final ChainedHashTable ht = new ChainedHashTable(htConfig, context, oContext.getAllocator(), this.right, this.left, null);
hashTable = ht.createAndSetupHashTable(null, 1);
}
use of org.apache.drill.common.expression.FieldReference in project drill by axbaretto.
the class HashAggTemplate method setup.
@Override
public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorContext oContext, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer, int extraRowBytes) throws SchemaChangeException, IOException {
if (valueExprs == null || valueFieldIds == null) {
throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables.");
}
if (valueFieldIds.size() < valueExprs.length) {
throw new IllegalArgumentException("Wrong number of workspace variables.");
}
this.context = context;
this.stats = oContext.getStats();
this.allocator = oContext.getAllocator();
this.oContext = oContext;
this.incoming = incoming;
this.outgoing = outgoing;
this.outContainer = outContainer;
this.operatorId = hashAggrConfig.getOperatorId();
this.useMemoryPrediction = context.getOptions().getOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_VALIDATOR);
is2ndPhase = hashAggrConfig.getAggPhase() == AggPrelBase.OperatorPhase.PHASE_2of2;
isTwoPhase = hashAggrConfig.getAggPhase() != AggPrelBase.OperatorPhase.PHASE_1of1;
is1stPhase = isTwoPhase && !is2ndPhase;
// single phase can not spill
canSpill = isTwoPhase;
// Typically for testing - force a spill after a partition has more than so many batches
minBatchesPerPartition = context.getOptions().getOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_VALIDATOR);
// Set the memory limit
long memoryLimit = allocator.getLimit();
// Optional configured memory limit, typically used only for testing.
long configLimit = context.getOptions().getOption(ExecConstants.HASHAGG_MAX_MEMORY_VALIDATOR);
if (configLimit > 0) {
logger.warn("Memory limit was changed to {}", configLimit);
memoryLimit = Math.min(memoryLimit, configLimit);
// enforce at the allocator
allocator.setLimit(memoryLimit);
}
// TODO: This functionality will be added later.
if (hashAggrConfig.getGroupByExprs().size() == 0) {
throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions.");
}
this.htIdxHolder = new IndexPointer();
this.outStartIdxHolder = new IndexPointer();
this.outNumRecordsHolder = new IndexPointer();
materializedValueFields = new MaterializedField[valueFieldIds.size()];
if (valueFieldIds.size() > 0) {
int i = 0;
FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType());
for (TypedFieldId id : valueFieldIds) {
materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType());
}
}
spillSet = new SpillSet(context, hashAggrConfig);
baseHashTable = new ChainedHashTable(htConfig, context, allocator, incoming, null, /* no incoming probe */
outgoing);
// retain these for delayedSetup, and to allow recreating hash tables (after a spill)
this.groupByOutFieldIds = groupByOutFieldIds;
numGroupByOutFields = groupByOutFieldIds.length;
// Start calculating the row widths (with the extra columns; the rest would be done in updateEstMaxBatchSize() )
estRowWidth = extraRowBytes;
estValuesRowWidth = extraRowBytes;
doSetup(incoming);
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class TopNBatchTest method priorityQueueOrderingTest.
/**
* Priority queue unit test.
* @throws Exception
*/
@Test
public void priorityQueueOrderingTest() throws Exception {
Properties properties = new Properties();
DrillConfig drillConfig = DrillConfig.create(properties);
DrillbitContext drillbitContext = mockDrillbitContext();
when(drillbitContext.getFunctionImplementationRegistry()).thenReturn(new FunctionImplementationRegistry(drillConfig));
FieldReference expr = FieldReference.getWithQuotedRef("colA");
Order.Ordering ordering = new Order.Ordering(Order.Ordering.ORDER_DESC, expr, Order.Ordering.NULLS_FIRST);
List<Order.Ordering> orderings = Lists.newArrayList(ordering);
MaterializedField colA = MaterializedField.create("colA", Types.required(TypeProtos.MinorType.INT));
MaterializedField colB = MaterializedField.create("colB", Types.required(TypeProtos.MinorType.INT));
List<MaterializedField> cols = Lists.newArrayList(colA, colB);
BatchSchema batchSchema = new BatchSchema(BatchSchema.SelectionVectorMode.NONE, cols);
FragmentContextImpl context = new FragmentContextImpl(drillbitContext, BitControl.PlanFragment.getDefaultInstance(), null, drillbitContext.getFunctionImplementationRegistry());
RowSet expectedRowSet;
try (RootAllocator allocator = new RootAllocator(100_000_000)) {
expectedRowSet = new RowSetBuilder(allocator, batchSchema).addRow(110, 10).addRow(109, 9).addRow(108, 8).addRow(107, 7).addRow(106, 6).addRow(105, 5).addRow(104, 4).addRow(103, 3).addRow(102, 2).addRow(101, 1).build();
PriorityQueue queue;
ExpandableHyperContainer hyperContainer;
{
VectorContainer container = new RowSetBuilder(allocator, batchSchema).build().container();
hyperContainer = new ExpandableHyperContainer(container);
queue = TopNBatch.createNewPriorityQueue(TopNBatch.createMainMappingSet(), TopNBatch.createLeftMappingSet(), TopNBatch.createRightMappingSet(), orderings, hyperContainer, false, true, 10, allocator, batchSchema.getSelectionVectorMode(), context);
}
List<RecordBatchData> testBatches = Lists.newArrayList();
try {
final Random random = new Random();
final int bound = 100;
final int numBatches = 11;
final int numRecordsPerBatch = 100;
for (int batchCounter = 0; batchCounter < numBatches; batchCounter++) {
RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, batchSchema);
rowSetBuilder.addRow((batchCounter + bound), batchCounter);
for (int recordCounter = 0; recordCounter < numRecordsPerBatch; recordCounter++) {
rowSetBuilder.addRow(random.nextInt(bound), random.nextInt(bound));
}
VectorContainer vectorContainer = rowSetBuilder.build().container();
queue.add(new RecordBatchData(vectorContainer, allocator));
}
queue.generate();
VectorContainer resultContainer = queue.getHyperBatch();
resultContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
RowSet.HyperRowSet actualHyperSet = HyperRowSetImpl.fromContainer(resultContainer, queue.getFinalSv4());
new RowSetComparison(expectedRowSet).verify(actualHyperSet);
} finally {
if (expectedRowSet != null) {
expectedRowSet.clear();
}
queue.cleanup();
hyperContainer.clear();
for (RecordBatchData testBatch : testBatches) {
testBatch.clear();
}
}
}
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class TestHashAggBatch method createHashAggPhysicalOperator.
private HashAggregate createHashAggPhysicalOperator(AggPrelBase.OperatorPhase phase) {
final List<NamedExpression> keyExpressions = Lists.newArrayList(new NamedExpression(SchemaPath.getSimplePath(FIRST_NAME_COL), new FieldReference(FIRST_NAME_COL)), new NamedExpression(SchemaPath.getSimplePath(LAST_NAME_COL), new FieldReference(LAST_NAME_COL)));
final List<NamedExpression> aggExpressions = Lists.newArrayList(new NamedExpression(new FunctionCall("sum", ImmutableList.of(SchemaPath.getSimplePath(STUFF_COL)), new ExpressionPosition(null, 0)), new FieldReference(TOTAL_STUFF_COL)));
return new HashAggregate(null, phase, keyExpressions, aggExpressions, 0.0f);
}
Aggregations