use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class SortRecordBatchBuilder method build.
public void build(FragmentContext context, VectorContainer outputContainer) throws SchemaChangeException {
outputContainer.clear();
if (batches.keySet().size() > 1) {
throw new SchemaChangeException("Sort currently only supports a single schema.");
}
if (batches.size() > Character.MAX_VALUE) {
throw new SchemaChangeException("Sort cannot work on more than %d batches at a time.", (int) Character.MAX_VALUE);
}
if (batches.keys().size() < 1) {
assert false : "Invalid to have an empty set of batches with no schemas.";
}
final DrillBuf svBuffer = reservation.allocateBuffer();
if (svBuffer == null) {
throw new OutOfMemoryError("Failed to allocate direct memory for SV4 vector in SortRecordBatchBuilder.");
}
sv4 = new SelectionVector4(svBuffer, recordCount, Character.MAX_VALUE);
BatchSchema schema = batches.keySet().iterator().next();
List<RecordBatchData> data = batches.get(schema);
// now we're going to generate the sv4 pointers
switch(schema.getSelectionVectorMode()) {
case NONE:
{
int index = 0;
int recordBatchId = 0;
for (RecordBatchData d : data) {
for (int i = 0; i < d.getRecordCount(); i++, index++) {
sv4.set(index, recordBatchId, i);
}
recordBatchId++;
}
break;
}
case TWO_BYTE:
{
int index = 0;
int recordBatchId = 0;
for (RecordBatchData d : data) {
for (int i = 0; i < d.getRecordCount(); i++, index++) {
sv4.set(index, recordBatchId, (int) d.getSv2().getIndex(i));
}
// might as well drop the selection vector since we'll stop using it now.
d.getSv2().clear();
recordBatchId++;
}
break;
}
default:
throw new UnsupportedOperationException();
}
// next, we'll create lists of each of the vector types.
ArrayListMultimap<MaterializedField, ValueVector> vectors = ArrayListMultimap.create();
for (RecordBatchData rbd : batches.values()) {
for (ValueVector v : rbd.getVectors()) {
vectors.put(v.getField(), v);
}
}
for (MaterializedField f : schema) {
List<ValueVector> v = vectors.get(f);
outputContainer.addHyperList(v, false);
}
outputContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class ProducerConsumerBatch method load.
private boolean load(final RecordBatchData batch) {
final VectorContainer newContainer = batch.getContainer();
if (schema != null && newContainer.getSchema().equals(schema)) {
container.zeroVectors();
final BatchSchema schema = container.getSchema();
for (int i = 0; i < container.getNumberOfColumns(); i++) {
final MaterializedField field = schema.getColumn(i);
final MajorType type = field.getType();
final ValueVector vOut = container.getValueAccessorById(TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()), container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()).getValueVector();
final ValueVector vIn = newContainer.getValueAccessorById(TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()), newContainer.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()).getValueVector();
final TransferPair tp = vIn.makeTransferPair(vOut);
tp.transfer();
}
return false;
} else {
container.clear();
for (final VectorWrapper<?> w : newContainer) {
container.add(w.getValueVector());
}
container.buildSchema(SelectionVectorMode.NONE);
schema = container.getSchema();
return true;
}
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class ProjectRecordBatch method classifyExpr.
private void classifyExpr(final NamedExpression ex, final RecordBatch incoming, final ClassifierResult result) {
final NameSegment expr = ((SchemaPath) ex.getExpr()).getRootSegment();
final NameSegment ref = ex.getRef().getRootSegment();
final boolean exprHasPrefix = expr.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
final boolean refHasPrefix = ref.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
final boolean exprIsStar = expr.getPath().equals(StarColumnHelper.STAR_COLUMN);
final boolean refContainsStar = ref.getPath().contains(StarColumnHelper.STAR_COLUMN);
final boolean exprContainsStar = expr.getPath().contains(StarColumnHelper.STAR_COLUMN);
final boolean refEndsWithStar = ref.getPath().endsWith(StarColumnHelper.STAR_COLUMN);
String exprPrefix = EMPTY_STRING;
String exprSuffix = expr.getPath();
if (exprHasPrefix) {
// get the prefix of the expr
final String[] exprComponents = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
assert (exprComponents.length == 2);
exprPrefix = exprComponents[0];
exprSuffix = exprComponents[1];
result.prefix = exprPrefix;
}
boolean exprIsFirstWildcard = false;
if (exprContainsStar) {
result.isStar = true;
final Integer value = (Integer) result.prefixMap.get(exprPrefix);
if (value == null) {
final Integer n = 1;
result.prefixMap.put(exprPrefix, n);
exprIsFirstWildcard = true;
} else {
final Integer n = value + 1;
result.prefixMap.put(exprPrefix, n);
}
}
final int incomingSchemaSize = incoming.getSchema().getFieldCount();
// input is '*' and output is 'prefix_*'
if (exprIsStar && refHasPrefix && refEndsWithStar) {
final String[] components = ref.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
assert (components.length == 2);
final String prefix = components[0];
result.outputNames = Lists.newArrayList();
for (final VectorWrapper<?> wrapper : incoming) {
final ValueVector vvIn = wrapper.getValueVector();
final String name = vvIn.getField().getPath();
// add the prefix to the incoming column name
final String newName = prefix + StarColumnHelper.PREFIX_DELIMITER + name;
addToResultMaps(newName, result, false);
}
} else // input and output are the same
if (expr.getPath().equalsIgnoreCase(ref.getPath()) && (!exprContainsStar || exprIsFirstWildcard)) {
if (exprContainsStar && exprHasPrefix) {
assert exprPrefix != null;
int k = 0;
result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
for (int j = 0; j < incomingSchemaSize; j++) {
// initialize
result.outputNames.add(EMPTY_STRING);
}
for (final VectorWrapper<?> wrapper : incoming) {
final ValueVector vvIn = wrapper.getValueVector();
final String incomingName = vvIn.getField().getPath();
// get the prefix of the name
final String[] nameComponents = incomingName.split(StarColumnHelper.PREFIX_DELIMITER, 2);
// if incoming valuevector does not have a prefix, ignore it since this expression is not referencing it
if (nameComponents.length <= 1) {
k++;
continue;
}
final String namePrefix = nameComponents[0];
if (exprPrefix.equalsIgnoreCase(namePrefix)) {
final String newName = incomingName;
if (!result.outputMap.containsKey(newName)) {
result.outputNames.set(k, newName);
result.outputMap.put(newName, newName);
}
}
k++;
}
} else {
result.outputNames = Lists.newArrayList();
if (exprContainsStar) {
for (final VectorWrapper<?> wrapper : incoming) {
final ValueVector vvIn = wrapper.getValueVector();
final String incomingName = vvIn.getField().getPath();
if (refContainsStar) {
// allow dups since this is likely top-level project
addToResultMaps(incomingName, result, true);
} else {
addToResultMaps(incomingName, result, false);
}
}
} else {
final String newName = expr.getPath();
if (!refHasPrefix && !exprHasPrefix) {
// allow dups since this is likely top-level project
addToResultMaps(newName, result, true);
} else {
addToResultMaps(newName, result, false);
}
}
}
} else // input is wildcard and it is not the first wildcard
if (exprIsStar) {
result.outputNames = Lists.newArrayList();
for (final VectorWrapper<?> wrapper : incoming) {
final ValueVector vvIn = wrapper.getValueVector();
final String incomingName = vvIn.getField().getPath();
// allow dups since this is likely top-level project
addToResultMaps(incomingName, result, true);
}
} else // only the output has prefix
if (!exprHasPrefix && refHasPrefix) {
result.outputNames = Lists.newArrayList();
final String newName = ref.getPath();
addToResultMaps(newName, result, false);
} else // input has prefix but output does not
if (exprHasPrefix && !refHasPrefix) {
int k = 0;
result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
for (int j = 0; j < incomingSchemaSize; j++) {
// initialize
result.outputNames.add(EMPTY_STRING);
}
for (final VectorWrapper<?> wrapper : incoming) {
final ValueVector vvIn = wrapper.getValueVector();
final String name = vvIn.getField().getPath();
final String[] components = name.split(StarColumnHelper.PREFIX_DELIMITER, 2);
if (components.length <= 1) {
k++;
continue;
}
final String namePrefix = components[0];
final String nameSuffix = components[1];
if (exprPrefix.equalsIgnoreCase(namePrefix)) {
// // case insensitive matching of prefix.
if (refContainsStar) {
// remove the prefix from the incoming column names
// for top level we need to make names unique
final String newName = getUniqueName(nameSuffix, result);
result.outputNames.set(k, newName);
} else if (exprSuffix.equalsIgnoreCase(nameSuffix)) {
// case insensitive matching of field name.
// example: ref: $f1, expr: T0<PREFIX><column_name>
final String newName = ref.getPath();
result.outputNames.set(k, newName);
}
} else {
result.outputNames.add(EMPTY_STRING);
}
k++;
}
} else // input and output have prefixes although they could be different...
if (exprHasPrefix && refHasPrefix) {
final String[] input = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
assert (input.length == 2);
// not handled yet
assert false : "Unexpected project expression or reference";
} else {
// if the incoming schema's column name matches the expression name of the Project,
// then we just want to pick the ref name as the output column name
result.outputNames = Lists.newArrayList();
for (final VectorWrapper<?> wrapper : incoming) {
final ValueVector vvIn = wrapper.getValueVector();
final String incomingName = vvIn.getField().getPath();
if (expr.getPath().equalsIgnoreCase(incomingName)) {
// case insensitive matching of field name.
final String newName = ref.getPath();
addToResultMaps(newName, result, true);
}
}
}
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class ExpressionInterpreterTest method doTest.
protected void doTest(String expressionStr, String[] colNames, TypeProtos.MajorType[] colTypes, String[] expectFirstTwoValues, BitControl.PlanFragment planFragment) throws Exception {
@SuppressWarnings("resource") final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
@SuppressWarnings("resource") final Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
bit1.run();
// Create a mock scan batch as input for evaluation.
assertEquals(colNames.length, colTypes.length);
final MockTableDef.MockColumn[] columns = new MockTableDef.MockColumn[colNames.length];
for (int i = 0; i < colNames.length; i++) {
columns[i] = new MockTableDef.MockColumn(colNames[i], colTypes[i].getMinorType(), colTypes[i].getMode(), 0, 0, 0, null, null, null);
}
final MockTableDef.MockScanEntry entry = new MockTableDef.MockScanEntry(10, false, 0, 1, columns);
final MockSubScanPOP scanPOP = new MockSubScanPOP("testTable", false, java.util.Collections.singletonList(entry));
@SuppressWarnings("resource") final ScanBatch batch = createMockScanBatch(bit1, scanPOP, planFragment);
batch.next();
@SuppressWarnings("resource") final ValueVector vv = evalExprWithInterpreter(expressionStr, batch, bit1);
// Verify the first 2 values in the output of evaluation.
assertEquals(2, expectFirstTwoValues.length);
assertEquals(expectFirstTwoValues[0], getValueFromVector(vv, 0));
assertEquals(expectFirstTwoValues[1], getValueFromVector(vv, 1));
showValueVectorContent(vv);
vv.clear();
batch.close();
batch.getContext().close();
bit1.close();
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class ParquetResultListener method dataArrived.
@Override
public synchronized void dataArrived(QueryDataBatch result, ConnectionThrottle throttle) {
logger.debug("result arrived in test batch listener.");
int columnValCounter = 0;
FieldInfo currentField;
count += result.getHeader().getRowCount();
boolean schemaChanged = false;
final RecordBatchLoader batchLoader = new RecordBatchLoader(allocator);
try {
schemaChanged = batchLoader.load(result.getHeader().getDef(), result.getData());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean catch clause below.
} catch (SchemaChangeException e) {
throw new RuntimeException(e);
}
// used to make sure each vector in the batch has the same number of records
int valueCount = batchLoader.getRecordCount();
// print headers.
if (schemaChanged) {
}
for (final VectorWrapper vw : batchLoader) {
final ValueVector vv = vw.getValueVector();
currentField = props.fields.get(vv.getField().getPath());
if (!valuesChecked.containsKey(vv.getField().getPath())) {
valuesChecked.put(vv.getField().getPath(), 0);
columnValCounter = 0;
} else {
columnValCounter = valuesChecked.get(vv.getField().getPath());
}
printColumnMajor(vv);
if (testValues) {
for (int j = 0; j < vv.getAccessor().getValueCount(); j++) {
assertField(vv, j, currentField.type, currentField.values[columnValCounter % 3], currentField.name + "/");
columnValCounter++;
}
} else {
columnValCounter += vv.getAccessor().getValueCount();
}
valuesChecked.remove(vv.getField().getPath());
assertEquals("Mismatched value count for vectors in the same batch.", valueCount, vv.getAccessor().getValueCount());
valuesChecked.put(vv.getField().getPath(), columnValCounter);
}
if (ParquetRecordReaderTest.VERBOSE_DEBUG) {
printRowMajor(batchLoader);
}
batchCounter++;
batchLoader.clear();
result.release();
}
Aggregations