use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.
the class TestTraceOutputDump method testFilter.
@Test
public void testFilter(@Injectable final DrillbitContext bitContext, @Injectable UserClientConnection connection) throws Throwable {
mockDrillbitContext(bitContext);
final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(FileUtils.getResourceAsFile("/trace/simple_trace.json"), Charsets.UTF_8));
final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c);
final FragmentContext context = new FragmentContext(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
while (exec.next()) {
}
exec.close();
if (context.getFailureCause() != null) {
throw context.getFailureCause();
}
assertTrue(!context.isFailed());
final FragmentHandle handle = context.getHandle();
/* Form the file name to which the trace output will dump the record batches */
final String qid = QueryIdHelper.getQueryId(handle.getQueryId());
final int majorFragmentId = handle.getMajorFragmentId();
final int minorFragmentId = handle.getMinorFragmentId();
final String logLocation = c.getString(ExecConstants.TRACE_DUMP_DIRECTORY);
System.out.println("Found log location: " + logLocation);
final String filename = String.format("%s//%s_%d_%d_mock-scan", logLocation, qid, majorFragmentId, minorFragmentId);
System.out.println("File Name: " + filename);
final Configuration conf = new Configuration();
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, c.getString(ExecConstants.TRACE_DUMP_FILESYSTEM));
final FileSystem fs = FileSystem.get(conf);
final Path path = new Path(filename);
assertTrue("Trace file does not exist", fs.exists(path));
final FSDataInputStream in = fs.open(path);
final VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(context.getAllocator());
wrap.readFromStream(in);
final VectorAccessible container = wrap.get();
/* Assert there are no selection vectors */
assertTrue(wrap.getSv2() == null);
/* Assert there is only one record */
assertTrue(container.getRecordCount() == 1);
/* Read the Integer value and ASSERT its Integer.MIN_VALUE */
final int value = (int) container.iterator().next().getValueVector().getAccessor().getObject(0);
assertTrue(value == Integer.MIN_VALUE);
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class WindowFrameRecordBatch method canDoWork.
/**
* @return true when all window functions are ready to process the current batch (it's the first batch currently
* held in memory)
*/
private boolean canDoWork() {
if (batches.size() < 2) {
// current partition
return false;
}
final VectorAccessible current = batches.get(0);
final int currentSize = current.getRecordCount();
final VectorAccessible last = batches.get(batches.size() - 1);
final int lastSize = last.getRecordCount();
boolean partitionEndReached;
boolean frameEndReached;
try {
partitionEndReached = !framers[0].isSamePartition(currentSize - 1, current, lastSize - 1, last);
frameEndReached = partitionEndReached || !framers[0].isPeer(currentSize - 1, current, lastSize - 1, last);
for (final WindowFunction function : functions) {
if (!function.canDoWork(batches.size(), popConfig, frameEndReached, partitionEndReached)) {
return false;
}
}
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException(e);
}
return true;
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class DrillTestWrapper method addToCombinedVectorResults.
/**
* Add to result vectors and compare batch schema against expected schema while iterating batches.
* @param batches
* @param expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
* if encounter different batch schema.
* @param combinedVectors: the vectors to hold the values when iterate the batches.
*
* @return number of batches
* @throws SchemaChangeException
* @throws UnsupportedEncodingException
*/
public static int addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema, Map<String, List<Object>> combinedVectors) throws SchemaChangeException, UnsupportedEncodingException {
// TODO - this does not handle schema changes
int numBatch = 0;
long totalRecords = 0;
BatchSchema schema = null;
for (VectorAccessible loader : batches) {
numBatch++;
if (expectedSchema != null) {
if (!expectedSchema.equals(loader.getSchema())) {
throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s. Expected schema : %s", loader.getSchema(), expectedSchema));
}
}
// SchemaChangeException, so check/clean throws clause above.
if (schema == null) {
schema = loader.getSchema();
for (MaterializedField mf : schema) {
combinedVectors.put(SchemaPath.getSimplePath(mf.getName()).toExpr(), new ArrayList<>());
}
} else {
// TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
// of the current batch, the check for a null schema is used to only mutate the schema once
// need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
schema = loader.getSchema();
}
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (VectorWrapper<?> w : loader) {
String field = SchemaPath.getSimplePath(w.getField().getName()).toExpr();
ValueVector[] vectors;
if (w.isHyper()) {
vectors = w.getValueVectors();
} else {
vectors = new ValueVector[] { w.getValueVector() };
}
SelectionVector2 sv2 = null;
SelectionVector4 sv4 = null;
switch(schema.getSelectionVectorMode()) {
case TWO_BYTE:
sv2 = loader.getSelectionVector2();
break;
case FOUR_BYTE:
sv4 = loader.getSelectionVector4();
break;
}
if (sv4 != null) {
for (int j = 0; j < sv4.getCount(); j++) {
int complexIndex = sv4.get(j);
int batchIndex = complexIndex >> 16;
int recordIndexInBatch = complexIndex & 65535;
Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
} else {
for (ValueVector vv : vectors) {
for (int j = 0; j < loader.getRecordCount(); j++) {
int index;
if (sv2 != null) {
index = sv2.getIndex(j);
} else {
index = j;
}
Object obj = vv.getAccessor().getObject(index);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
}
}
}
}
return numBatch;
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestWriteToDisk method test.
@Test
@SuppressWarnings("static-method")
public void test() throws Exception {
final List<ValueVector> vectorList = Lists.newArrayList();
final DrillConfig config = DrillConfig.create();
try (final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
final Drillbit bit = new Drillbit(config, serviceSet)) {
bit.run();
final DrillbitContext context = bit.getContext();
final MaterializedField intField = MaterializedField.create("int", Types.required(TypeProtos.MinorType.INT));
final MaterializedField binField = MaterializedField.create("binary", Types.required(TypeProtos.MinorType.VARBINARY));
try (final IntVector intVector = (IntVector) TypeHelper.getNewVector(intField, context.getAllocator());
final VarBinaryVector binVector = (VarBinaryVector) TypeHelper.getNewVector(binField, context.getAllocator())) {
AllocationHelper.allocate(intVector, 4, 4);
AllocationHelper.allocate(binVector, 4, 5);
vectorList.add(intVector);
vectorList.add(binVector);
intVector.getMutator().setSafe(0, 0);
binVector.getMutator().setSafe(0, "ZERO".getBytes());
intVector.getMutator().setSafe(1, 1);
binVector.getMutator().setSafe(1, "ONE".getBytes());
intVector.getMutator().setSafe(2, 2);
binVector.getMutator().setSafe(2, "TWO".getBytes());
intVector.getMutator().setSafe(3, 3);
binVector.getMutator().setSafe(3, "THREE".getBytes());
intVector.getMutator().setValueCount(4);
binVector.getMutator().setValueCount(4);
VectorContainer container = new VectorContainer();
container.addCollection(vectorList);
container.setRecordCount(4);
@SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(container.getRecordCount(), container, false);
VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(batch, context.getAllocator());
final VectorAccessibleSerializable newWrap = new VectorAccessibleSerializable(context.getAllocator());
try (final FileSystem fs = getLocalFileSystem()) {
final File tempDir = Files.createTempDir();
tempDir.deleteOnExit();
final Path path = new Path(tempDir.getAbsolutePath(), "drillSerializable");
try (final FSDataOutputStream out = fs.create(path)) {
wrap.writeToStream(out);
}
try (final FSDataInputStream in = fs.open(path)) {
newWrap.readFromStream(in);
}
}
final VectorAccessible newContainer = newWrap.get();
for (VectorWrapper<?> w : newContainer) {
try (ValueVector vv = w.getValueVector()) {
int values = vv.getAccessor().getValueCount();
for (int i = 0; i < values; i++) {
final Object o = vv.getAccessor().getObject(i);
if (o instanceof byte[]) {
System.out.println(new String((byte[]) o));
} else {
System.out.println(o);
}
}
}
}
}
}
}
use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.
the class TestBatchValidator method testRepeatedBadValueOffset.
@Test
public void testRepeatedBadValueOffset() {
BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR, DataMode.REPEATED).build();
SingleRowSet batch = fixture.rowSetBuilder(schema).addRow((Object) strArray()).addRow((Object) strArray("fred", "barney", "wilma")).addRow((Object) strArray("dino")).build();
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
RepeatedVarCharVector rvc = (RepeatedVarCharVector) v;
VarCharVector vc = rvc.getDataVector();
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(4, 100_000);
BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
validator.validate();
List<String> errors = validator.errors();
assertEquals(1, errors.size());
assertTrue(errors.get(0).contains("Invalid offset"));
batch.clear();
}
Aggregations