use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class TestReverseImplicitCast method twoWayCast.
@Test
public void twoWayCast() throws Throwable {
// Function checks for casting from Float, Double to Decimal data types
try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
Drillbit bit = new Drillbit(CONFIG, serviceSet);
DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
// run query.
bit.run();
client.connect();
List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL, Files.asCharSource(DrillFileUtils.getResourceAsFile("/functions/cast/two_way_implicit_cast.json"), Charsets.UTF_8).read());
RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator());
QueryDataBatch batch = results.get(0);
assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
Iterator<VectorWrapper<?>> itr = batchLoader.iterator();
ValueVector.Accessor intAccessor1 = itr.next().getValueVector().getAccessor();
ValueVector.Accessor varcharAccessor1 = itr.next().getValueVector().getAccessor();
for (int i = 0; i < intAccessor1.getValueCount(); i++) {
assertEquals(intAccessor1.getObject(i), 10);
assertEquals(varcharAccessor1.getObject(i).toString(), "101");
}
batchLoader.clear();
for (QueryDataBatch result : results) {
result.release();
}
}
}
use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class TestHashJoin method simpleEqualityJoin.
@Test
public void simpleEqualityJoin() throws Throwable {
// Function checks hash join with single equality condition
try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
Drillbit bit = new Drillbit(CONFIG, serviceSet);
DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
// run query.
bit.run();
client.connect();
List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL, Files.asCharSource(DrillFileUtils.getResourceAsFile("/join/hash_join.json"), Charsets.UTF_8).read().replace("#{TEST_FILE_1}", DrillFileUtils.getResourceAsFile("/build_side_input.json").toURI().toString()).replace("#{TEST_FILE_2}", DrillFileUtils.getResourceAsFile("/probe_side_input.json").toURI().toString()));
RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator());
QueryDataBatch batch = results.get(1);
assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
Iterator<VectorWrapper<?>> itr = batchLoader.iterator();
// Just test the join key
long[] colA = { 1, 1, 2, 2, 1, 1 };
// Check the output of decimal9
ValueVector.Accessor intAccessor1 = itr.next().getValueVector().getAccessor();
for (int i = 0; i < intAccessor1.getValueCount(); i++) {
assertEquals(intAccessor1.getObject(i), colA[i]);
}
assertEquals(6, intAccessor1.getValueCount());
batchLoader.clear();
for (QueryDataBatch result : results) {
result.release();
}
}
}
use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class TestDateTypes method testInterval.
@Test
public void testInterval() throws Exception {
try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
Drillbit bit = new Drillbit(CONFIG, serviceSet);
DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
// run query.
bit.run();
client.connect();
List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL, Files.asCharSource(DrillFileUtils.getResourceAsFile("/record/vector/test_interval.json"), Charsets.UTF_8).read().replace("#{TEST_FILE}", "/test_simple_interval.json"));
RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator());
QueryDataBatch batch = results.get(0);
assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
Iterator<VectorWrapper<?>> itr = batchLoader.iterator();
ValueVector.Accessor accessor = itr.next().getValueVector().getAccessor();
// Check the interval type
assertEquals((accessor.getObject(0).toString()), ("2 years 2 months 1 day 1:20:35.0"));
assertEquals((accessor.getObject(1).toString()), ("2 years 2 months 0 days 0:0:0.0"));
assertEquals((accessor.getObject(2).toString()), ("0 years 0 months 0 days 1:20:35.0"));
assertEquals((accessor.getObject(3).toString()), ("2 years 2 months 1 day 1:20:35.897"));
assertEquals((accessor.getObject(4).toString()), ("0 years 0 months 0 days 0:0:35.4"));
assertEquals((accessor.getObject(5).toString()), ("1 year 10 months 1 day 0:-39:-25.0"));
accessor = itr.next().getValueVector().getAccessor();
// Check the interval year type
assertEquals((accessor.getObject(0).toString()), ("2 years 2 months "));
assertEquals((accessor.getObject(1).toString()), ("2 years 2 months "));
assertEquals((accessor.getObject(2).toString()), ("0 years 0 months "));
assertEquals((accessor.getObject(3).toString()), ("2 years 2 months "));
assertEquals((accessor.getObject(4).toString()), ("0 years 0 months "));
assertEquals((accessor.getObject(5).toString()), ("1 year 10 months "));
accessor = itr.next().getValueVector().getAccessor();
// Check the interval day type
assertEquals((accessor.getObject(0).toString()), ("1 day 1:20:35.0"));
assertEquals((accessor.getObject(1).toString()), ("0 days 0:0:0.0"));
assertEquals((accessor.getObject(2).toString()), ("0 days 1:20:35.0"));
assertEquals((accessor.getObject(3).toString()), ("1 day 1:20:35.897"));
assertEquals((accessor.getObject(4).toString()), ("0 days 0:0:35.4"));
assertEquals((accessor.getObject(5).toString()), ("1 day 0:-39:-25.0"));
batchLoader.clear();
for (QueryDataBatch b : results) {
b.release();
}
}
}
use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class MetadataHandlerBatch method writeMetadataUsingBatchSchema.
private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadataUsingBatchSchema(List<T> metadataList) {
Preconditions.checkArgument(!metadataList.isEmpty(), "Metadata list shouldn't be empty.");
ResultSetLoader resultSetLoader = getResultSetLoaderWithBatchSchema();
resultSetLoader.startBatch();
RowSetLoader rowWriter = resultSetLoader.writer();
Iterator<T> segmentsIterator = metadataList.iterator();
while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
T metadata = segmentsIterator.next();
metadataToHandle.remove(metadata.getMetadataInfo().identifier());
List<Object> arguments = new ArrayList<>();
for (VectorWrapper<?> vectorWrapper : container) {
String[] identifierValues = Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size());
MaterializedField field = vectorWrapper.getField();
String fieldName = field.getName();
if (fieldName.equals(MetastoreAnalyzeConstants.LOCATION_FIELD)) {
arguments.add(metadata.getPath().toUri().getPath());
} else if (fieldName.equals(MetastoreAnalyzeConstants.LOCATIONS_FIELD)) {
if (metadataType == MetadataType.SEGMENT) {
arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
} else {
arguments.add(null);
}
} else if (popConfig.getContext().segmentColumns().contains(fieldName)) {
arguments.add(identifierValues[popConfig.getContext().segmentColumns().indexOf(fieldName)]);
} else if (AnalyzeColumnUtils.isColumnStatisticsField(fieldName)) {
arguments.add(metadata.getColumnStatistics(SchemaPath.parseFromString(AnalyzeColumnUtils.getColumnName(fieldName))).get(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
} else if (AnalyzeColumnUtils.isMetadataStatisticsField(fieldName)) {
arguments.add(metadata.getStatistic(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
} else if (fieldName.equals(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD)) {
// collectedMap field value
arguments.add(new Object[] {});
} else if (fieldName.equals(MetastoreAnalyzeConstants.SCHEMA_FIELD)) {
arguments.add(metadata.getSchema().jsonString());
} else if (fieldName.equals(columnNamesOptions.lastModifiedTime())) {
arguments.add(String.valueOf(metadata.getLastModifiedTime()));
} else if (fieldName.equals(columnNamesOptions.rowGroupIndex())) {
arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
} else if (fieldName.equals(columnNamesOptions.rowGroupStart())) {
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
} else if (fieldName.equals(columnNamesOptions.rowGroupLength())) {
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
} else if (fieldName.equals(MetastoreAnalyzeConstants.METADATA_TYPE)) {
arguments.add(metadataType.name());
} else {
throw new UnsupportedOperationException(String.format("Found unexpected field [%s] in incoming batch.", field));
}
}
rowWriter.addRow(arguments.toArray());
}
return resultSetLoader.harvest();
}
use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.
the class TopNBatch method innerNext.
@Override
public IterOutcome innerNext() {
recordCount = 0;
if (state == BatchState.DONE) {
return NONE;
}
// Check if anything is remaining from previous record boundary
if (hasOutputRecords) {
return handleRemainingOutput();
}
// Reset the TopN state for next iteration
resetTopNState();
boolean incomingHasSv2 = false;
switch(incoming.getSchema().getSelectionVectorMode()) {
case NONE:
{
break;
}
case TWO_BYTE:
{
incomingHasSv2 = true;
break;
}
case FOUR_BYTE:
{
throw UserException.internalError(null).message("TopN doesn't support incoming with SV4 mode").build(logger);
}
default:
throw new UnsupportedOperationException("Unsupported SV mode detected in TopN incoming batch");
}
outer: while (true) {
Stopwatch watch = Stopwatch.createStarted();
if (first) {
lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
// Create the SV4 object upfront to be used for both empty and non-empty incoming batches at EMIT boundary
sv4 = new SelectionVector4(context.getAllocator(), 0);
first = false;
} else {
lastKnownOutcome = next(incoming);
}
if (lastKnownOutcome == OK && schema == null) {
lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
container.clear();
}
logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
switch(lastKnownOutcome) {
case NONE:
break outer;
case NOT_YET:
throw new UnsupportedOperationException();
case OK_NEW_SCHEMA:
// only change in the case that the schema truly changes. Artificial schema changes are ignored.
// schema change handling in case when EMIT is also seen is same as without EMIT. i.e. only if union type
// is enabled it will be handled.
container.clear();
firstBatchForSchema = true;
if (!incoming.getSchema().equals(schema)) {
if (schema != null) {
if (!unionTypeEnabled) {
throw new UnsupportedOperationException(String.format("TopN currently doesn't support changing " + "schemas with union type disabled. Please try enabling union type: %s and re-execute the query", ExecConstants.ENABLE_UNION_TYPE_KEY));
} else {
schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
purgeAndResetPriorityQueue();
schemaChanged = true;
}
} else {
schema = incoming.getSchema();
}
}
// fall through.
case OK:
case EMIT:
if (incoming.getRecordCount() == 0) {
for (VectorWrapper<?> w : incoming) {
w.clear();
}
// Release memory for incoming SV2 vector
if (incomingHasSv2) {
incoming.getSelectionVector2().clear();
}
break;
}
countSincePurge += incoming.getRecordCount();
batchCount++;
RecordBatchData batch;
if (schemaChanged) {
batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
} else {
batch = new RecordBatchData(incoming, oContext.getAllocator());
}
boolean success = false;
try {
if (priorityQueue == null) {
priorityQueue = createNewPriorityQueue(new ExpandableHyperContainer(batch.getContainer()), config.getLimit());
} else if (!priorityQueue.isInitialized()) {
// means priority queue is cleaned up after producing output for first record boundary. We should
// initialize it for next record boundary
priorityQueue.init(config.getLimit(), oContext.getAllocator(), schema.getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE);
}
priorityQueue.add(batch);
// RecordBatches which are of no use or doesn't fall under TopN category
if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
purge();
countSincePurge = 0;
batchCount = 0;
}
success = true;
} catch (SchemaChangeException e) {
throw schemaChangeException(e, logger);
} finally {
if (!success) {
batch.clear();
}
}
break;
default:
throw new UnsupportedOperationException();
}
// with records and EMIT outcome in above case statements
if (lastKnownOutcome == EMIT) {
break;
}
}
// PriorityQueue can be uninitialized here if only empty batch is received between 2 EMIT outcome.
if (schema == null || (priorityQueue == null || !priorityQueue.isInitialized())) {
// builder may be null at this point if the first incoming batch is empty
return handleEmptyBatches(lastKnownOutcome);
}
priorityQueue.generate();
prepareOutputContainer(priorityQueue.getHyperBatch(), priorityQueue.getFinalSv4());
// lastKnownOutcome.
return getFinalOutcome();
}
Aggregations