Search in sources :

Example 1 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by apache.

the class TestTraceOutputDump method testFilter.

@Test
public void testFilter(@Injectable final DrillbitContext bitContext, @Injectable UserClientConnection connection) throws Throwable {
    mockDrillbitContext(bitContext);
    final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
    final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(FileUtils.getResourceAsFile("/trace/simple_trace.json"), Charsets.UTF_8));
    final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c);
    final FragmentContext context = new FragmentContext(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
    final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
    while (exec.next()) {
    }
    exec.close();
    if (context.getFailureCause() != null) {
        throw context.getFailureCause();
    }
    assertTrue(!context.isFailed());
    final FragmentHandle handle = context.getHandle();
    /* Form the file name to which the trace output will dump the record batches */
    final String qid = QueryIdHelper.getQueryId(handle.getQueryId());
    final int majorFragmentId = handle.getMajorFragmentId();
    final int minorFragmentId = handle.getMinorFragmentId();
    final String logLocation = c.getString(ExecConstants.TRACE_DUMP_DIRECTORY);
    System.out.println("Found log location: " + logLocation);
    final String filename = String.format("%s//%s_%d_%d_mock-scan", logLocation, qid, majorFragmentId, minorFragmentId);
    System.out.println("File Name: " + filename);
    final Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, c.getString(ExecConstants.TRACE_DUMP_FILESYSTEM));
    final FileSystem fs = FileSystem.get(conf);
    final Path path = new Path(filename);
    assertTrue("Trace file does not exist", fs.exists(path));
    final FSDataInputStream in = fs.open(path);
    final VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(context.getAllocator());
    wrap.readFromStream(in);
    final VectorAccessible container = wrap.get();
    /* Assert there are no selection vectors */
    assertTrue(wrap.getSv2() == null);
    /* Assert there is only one record */
    assertTrue(container.getRecordCount() == 1);
    /* Read the Integer value and ASSERT its Integer.MIN_VALUE */
    final int value = (int) container.iterator().next().getValueVector().getAccessor().getObject(0);
    assertTrue(value == Integer.MIN_VALUE);
}
Also used : Path(org.apache.hadoop.fs.Path) VectorAccessibleSerializable(org.apache.drill.exec.cache.VectorAccessibleSerializable) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) FragmentContext(org.apache.drill.exec.ops.FragmentContext) Configuration(org.apache.hadoop.conf.Configuration) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) SimpleRootExec(org.apache.drill.exec.physical.impl.SimpleRootExec) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test)

Example 2 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class WindowFrameRecordBatch method canDoWork.

/**
 * @return true when all window functions are ready to process the current batch (it's the first batch currently
 * held in memory)
 */
private boolean canDoWork() {
    if (batches.size() < 2) {
        // current partition
        return false;
    }
    final VectorAccessible current = batches.get(0);
    final int currentSize = current.getRecordCount();
    final VectorAccessible last = batches.get(batches.size() - 1);
    final int lastSize = last.getRecordCount();
    boolean partitionEndReached;
    boolean frameEndReached;
    try {
        partitionEndReached = !framers[0].isSamePartition(currentSize - 1, current, lastSize - 1, last);
        frameEndReached = partitionEndReached || !framers[0].isPeer(currentSize - 1, current, lastSize - 1, last);
        for (final WindowFunction function : functions) {
            if (!function.canDoWork(batches.size(), popConfig, frameEndReached, partitionEndReached)) {
                return false;
            }
        }
    } catch (SchemaChangeException e) {
        throw new UnsupportedOperationException(e);
    }
    return true;
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) VectorAccessible(org.apache.drill.exec.record.VectorAccessible)

Example 3 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class DrillTestWrapper method addToCombinedVectorResults.

/**
 * Add to result vectors and compare batch schema against expected schema while iterating batches.
 * @param batches
 * @param  expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
 *                       if encounter different batch schema.
 * @param combinedVectors: the vectors to hold the values when iterate the batches.
 *
 * @return number of batches
 * @throws SchemaChangeException
 * @throws UnsupportedEncodingException
 */
public static int addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema, Map<String, List<Object>> combinedVectors) throws SchemaChangeException, UnsupportedEncodingException {
    // TODO - this does not handle schema changes
    int numBatch = 0;
    long totalRecords = 0;
    BatchSchema schema = null;
    for (VectorAccessible loader : batches) {
        numBatch++;
        if (expectedSchema != null) {
            if (!expectedSchema.equals(loader.getSchema())) {
                throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s.  Expected schema : %s", loader.getSchema(), expectedSchema));
            }
        }
        // SchemaChangeException, so check/clean throws clause above.
        if (schema == null) {
            schema = loader.getSchema();
            for (MaterializedField mf : schema) {
                combinedVectors.put(SchemaPath.getSimplePath(mf.getName()).toExpr(), new ArrayList<>());
            }
        } else {
            // TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
            // of the current batch, the check for a null schema is used to only mutate the schema once
            // need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
            schema = loader.getSchema();
        }
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (VectorWrapper<?> w : loader) {
            String field = SchemaPath.getSimplePath(w.getField().getName()).toExpr();
            ValueVector[] vectors;
            if (w.isHyper()) {
                vectors = w.getValueVectors();
            } else {
                vectors = new ValueVector[] { w.getValueVector() };
            }
            SelectionVector2 sv2 = null;
            SelectionVector4 sv4 = null;
            switch(schema.getSelectionVectorMode()) {
                case TWO_BYTE:
                    sv2 = loader.getSelectionVector2();
                    break;
                case FOUR_BYTE:
                    sv4 = loader.getSelectionVector4();
                    break;
            }
            if (sv4 != null) {
                for (int j = 0; j < sv4.getCount(); j++) {
                    int complexIndex = sv4.get(j);
                    int batchIndex = complexIndex >> 16;
                    int recordIndexInBatch = complexIndex & 65535;
                    Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
                    if (obj != null) {
                        if (obj instanceof Text) {
                            obj = obj.toString();
                        }
                    }
                    combinedVectors.get(field).add(obj);
                }
            } else {
                for (ValueVector vv : vectors) {
                    for (int j = 0; j < loader.getRecordCount(); j++) {
                        int index;
                        if (sv2 != null) {
                            index = sv2.getIndex(j);
                        } else {
                            index = j;
                        }
                        Object obj = vv.getAccessor().getObject(index);
                        if (obj != null) {
                            if (obj instanceof Text) {
                                obj = obj.toString();
                            }
                        }
                        combinedVectors.get(field).add(obj);
                    }
                }
            }
        }
    }
    return numBatch;
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) Text(org.apache.drill.exec.util.Text) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchSchema(org.apache.drill.exec.record.BatchSchema) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 4 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class TestWriteToDisk method test.

@Test
@SuppressWarnings("static-method")
public void test() throws Exception {
    final List<ValueVector> vectorList = Lists.newArrayList();
    final DrillConfig config = DrillConfig.create();
    try (final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
        final Drillbit bit = new Drillbit(config, serviceSet)) {
        bit.run();
        final DrillbitContext context = bit.getContext();
        final MaterializedField intField = MaterializedField.create("int", Types.required(TypeProtos.MinorType.INT));
        final MaterializedField binField = MaterializedField.create("binary", Types.required(TypeProtos.MinorType.VARBINARY));
        try (final IntVector intVector = (IntVector) TypeHelper.getNewVector(intField, context.getAllocator());
            final VarBinaryVector binVector = (VarBinaryVector) TypeHelper.getNewVector(binField, context.getAllocator())) {
            AllocationHelper.allocate(intVector, 4, 4);
            AllocationHelper.allocate(binVector, 4, 5);
            vectorList.add(intVector);
            vectorList.add(binVector);
            intVector.getMutator().setSafe(0, 0);
            binVector.getMutator().setSafe(0, "ZERO".getBytes());
            intVector.getMutator().setSafe(1, 1);
            binVector.getMutator().setSafe(1, "ONE".getBytes());
            intVector.getMutator().setSafe(2, 2);
            binVector.getMutator().setSafe(2, "TWO".getBytes());
            intVector.getMutator().setSafe(3, 3);
            binVector.getMutator().setSafe(3, "THREE".getBytes());
            intVector.getMutator().setValueCount(4);
            binVector.getMutator().setValueCount(4);
            VectorContainer container = new VectorContainer();
            container.addCollection(vectorList);
            container.setRecordCount(4);
            @SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(container.getRecordCount(), container, false);
            VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(batch, context.getAllocator());
            final VectorAccessibleSerializable newWrap = new VectorAccessibleSerializable(context.getAllocator());
            try (final FileSystem fs = getLocalFileSystem()) {
                final File tempDir = Files.createTempDir();
                tempDir.deleteOnExit();
                final Path path = new Path(tempDir.getAbsolutePath(), "drillSerializable");
                try (final FSDataOutputStream out = fs.create(path)) {
                    wrap.writeToStream(out);
                }
                try (final FSDataInputStream in = fs.open(path)) {
                    newWrap.readFromStream(in);
                }
            }
            final VectorAccessible newContainer = newWrap.get();
            for (VectorWrapper<?> w : newContainer) {
                try (ValueVector vv = w.getValueVector()) {
                    int values = vv.getAccessor().getValueCount();
                    for (int i = 0; i < values; i++) {
                        final Object o = vv.getAccessor().getObject(i);
                        if (o instanceof byte[]) {
                            System.out.println(new String((byte[]) o));
                        } else {
                            System.out.println(o);
                        }
                    }
                }
            }
        }
    }
}
Also used : DrillbitContext(org.apache.drill.exec.server.DrillbitContext) Path(org.apache.hadoop.fs.Path) IntVector(org.apache.drill.exec.vector.IntVector) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) VarBinaryVector(org.apache.drill.exec.vector.VarBinaryVector) VectorContainer(org.apache.drill.exec.record.VectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) DrillConfig(org.apache.drill.common.config.DrillConfig) Drillbit(org.apache.drill.exec.server.Drillbit) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) FileSystem(org.apache.hadoop.fs.FileSystem) WritableBatch(org.apache.drill.exec.record.WritableBatch) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) File(java.io.File) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test)

Example 5 with VectorAccessible

use of org.apache.drill.exec.record.VectorAccessible in project drill by axbaretto.

the class TestBatchValidator method testRepeatedBadValueOffset.

@Test
public void testRepeatedBadValueOffset() {
    BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR, DataMode.REPEATED).build();
    SingleRowSet batch = fixture.rowSetBuilder(schema).addRow((Object) strArray()).addRow((Object) strArray("fred", "barney", "wilma")).addRow((Object) strArray("dino")).build();
    VectorAccessible va = batch.vectorAccessible();
    ValueVector v = va.iterator().next().getValueVector();
    RepeatedVarCharVector rvc = (RepeatedVarCharVector) v;
    VarCharVector vc = rvc.getDataVector();
    UInt4Vector ov = vc.getOffsetVector();
    ov.getMutator().set(4, 100_000);
    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
    validator.validate();
    List<String> errors = validator.errors();
    assertEquals(1, errors.size());
    assertTrue(errors.get(0).contains("Invalid offset"));
    batch.clear();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) VectorAccessible(org.apache.drill.exec.record.VectorAccessible) RepeatedVarCharVector(org.apache.drill.exec.vector.RepeatedVarCharVector) BatchSchema(org.apache.drill.exec.record.BatchSchema) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RepeatedVarCharVector(org.apache.drill.exec.vector.RepeatedVarCharVector) VarCharVector(org.apache.drill.exec.vector.VarCharVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) Test(org.junit.Test)

Aggregations

VectorAccessible (org.apache.drill.exec.record.VectorAccessible)32 ValueVector (org.apache.drill.exec.vector.ValueVector)17 Test (org.junit.Test)14 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)12 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 RecordBatchSizer (org.apache.drill.exec.record.RecordBatchSizer)8 RepeatedVarCharVector (org.apache.drill.exec.vector.RepeatedVarCharVector)8 BatchSchema (org.apache.drill.exec.record.BatchSchema)7 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)6 RecordBatch (org.apache.drill.exec.record.RecordBatch)6 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 ExecTest (org.apache.drill.exec.ExecTest)4 FragmentContextImpl (org.apache.drill.exec.ops.FragmentContextImpl)4 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 Text (org.apache.drill.exec.util.Text)4 RepeatedListVector (org.apache.drill.exec.vector.complex.RepeatedListVector)4 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)4 List (java.util.List)3