Search in sources :

Example 26 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class DrillTestWrapper method addToMaterializedResults.

public static void addToMaterializedResults(List<Map<String, Object>> materializedRecords, List<QueryDataBatch> records, RecordBatchLoader loader) {
    long totalRecords = 0;
    QueryDataBatch batch;
    int size = records.size();
    for (int i = 0; i < size; i++) {
        batch = records.get(0);
        loader.load(batch.getHeader().getDef(), batch.getData());
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (int j = 0; j < loader.getRecordCount(); j++) {
            Map<String, Object> record = new TreeMap<>();
            for (VectorWrapper<?> w : loader) {
                Object obj = w.getValueVector().getAccessor().getObject(j);
                if (obj != null) {
                    if (obj instanceof Text) {
                        obj = obj.toString();
                    }
                    record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
                }
                record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
            }
            materializedRecords.add(record);
        }
        records.remove(0);
        batch.release();
        loader.clear();
    }
}
Also used : QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Text(org.apache.drill.exec.util.Text) TreeMap(java.util.TreeMap)

Example 27 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class DrillTestWrapper method addToCombinedVectorResults.

/**
 * Add to result vectors and compare batch schema against expected schema while iterating batches.
 * @param batches
 * @param  expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
 *                       if encounter different batch schema.
 * @param combinedVectors: the vectors to hold the values when iterate the batches.
 *
 * @return number of batches
 * @throws SchemaChangeException
 * @throws UnsupportedEncodingException
 */
public static int addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema, Long expectedBatchSize, Integer expectedNumBatches, Map<String, List<Object>> combinedVectors, Integer expectedTotalRecords) throws SchemaChangeException {
    // TODO - this does not handle schema changes
    int numBatch = 0;
    long totalRecords = 0;
    BatchSchema schema = null;
    for (VectorAccessible loader : batches) {
        numBatch++;
        if (expectedSchema != null) {
            if (!expectedSchema.isEquivalent(loader.getSchema())) {
                throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s.  Expected schema : %s", loader.getSchema(), expectedSchema));
            }
        }
        if (expectedBatchSize != null) {
            RecordBatchSizer sizer = new RecordBatchSizer(loader);
            // Not checking actualSize as accounting is not correct when we do
            // split and transfer ownership across operators.
            Assert.assertTrue(sizer.getNetBatchSize() <= expectedBatchSize);
        }
        if (schema == null) {
            schema = loader.getSchema();
            for (MaterializedField mf : schema) {
                combinedVectors.put(SchemaPath.getSimplePath(mf.getName()).toExpr(), new ArrayList<>());
            }
        } else {
            // TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
            // of the current batch, the check for a null schema is used to only mutate the schema once
            // need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
            schema = loader.getSchema();
        }
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (VectorWrapper<?> w : loader) {
            String field = SchemaPath.getSimplePath(w.getField().getName()).toExpr();
            ValueVector[] vectors;
            if (w.isHyper()) {
                vectors = w.getValueVectors();
            } else {
                vectors = new ValueVector[] { w.getValueVector() };
            }
            SelectionVector2 sv2 = null;
            SelectionVector4 sv4 = null;
            switch(schema.getSelectionVectorMode()) {
                case TWO_BYTE:
                    sv2 = loader.getSelectionVector2();
                    break;
                case FOUR_BYTE:
                    sv4 = loader.getSelectionVector4();
                    break;
                default:
            }
            if (sv4 != null) {
                for (int j = 0; j < sv4.getCount(); j++) {
                    int complexIndex = sv4.get(j);
                    int batchIndex = complexIndex >> 16;
                    int recordIndexInBatch = complexIndex & 65535;
                    Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
                    if (obj != null) {
                        if (obj instanceof Text) {
                            obj = obj.toString();
                        }
                    }
                    combinedVectors.get(field).add(obj);
                }
            } else {
                for (ValueVector vv : vectors) {
                    for (int j = 0; j < loader.getRecordCount(); j++) {
                        int index;
                        if (sv2 != null) {
                            index = sv2.getIndex(j);
                        } else {
                            index = j;
                        }
                        Object obj = vv.getAccessor().getObject(index);
                        if (obj != null) {
                            if (obj instanceof Text) {
                                obj = obj.toString();
                            }
                        }
                        combinedVectors.get(field).add(obj);
                    }
                }
            }
        }
    }
    if (expectedNumBatches != null) {
        // Based on how much memory is actually taken by value vectors (because of doubling stuff),
        // we have to do complex math for predicting exact number of batches.
        // Instead, check that number of batches is at least the minimum that is expected
        // and no more than twice of that.
        Assert.assertTrue(numBatch >= expectedNumBatches);
        Assert.assertTrue(numBatch <= (2 * expectedNumBatches));
    }
    if (expectedTotalRecords != null) {
        Assert.assertEquals(expectedTotalRecords.longValue(), totalRecords);
    }
    return numBatch;
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) Text(org.apache.drill.exec.util.Text) ValueVector(org.apache.drill.exec.vector.ValueVector) RecordBatchSizer(org.apache.drill.exec.record.RecordBatchSizer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchSchema(org.apache.drill.exec.record.BatchSchema) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 28 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class TestBuilder method mapOf.

/**
 * Convenience method to create a {@link JsonStringHashMap<String, Object>} map instance with the given key value sequence.
 *
 * Key value sequence consists of key - value pairs such that a key precedes its value. For instance:
 *
 * mapOf("name", "Adam", "age", 41) corresponds to {"name": "Adam", "age": 41} in JSON.
 */
public static JsonStringHashMap<String, Object> mapOf(Object... keyValueSequence) {
    Preconditions.checkArgument(keyValueSequence.length % 2 == 0, "Length of key value sequence must be even");
    final JsonStringHashMap<String, Object> map = new JsonStringHashMap<>();
    for (int i = 0; i < keyValueSequence.length; i += 2) {
        Object value = keyValueSequence[i + 1];
        if (value instanceof CharSequence) {
            value = new Text(value.toString());
        }
        map.put(String.class.cast(keyValueSequence[i]), value);
    }
    return map;
}
Also used : Text(org.apache.drill.exec.util.Text) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap)

Example 29 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class TestAggregateFunctions method testSingleValueFunction.

@Test
public void testSingleValueFunction() throws Exception {
    List<String> tableNames = Arrays.asList("cp.`parquet/alltypes_required.parquet`", "cp.`parquet/alltypes_optional.parquet`");
    for (String tableName : tableNames) {
        final QueryDataBatch result = queryBuilder().sql("select * from %s limit 1", tableName).results().get(0);
        final Map<String, StringBuilder> functions = new HashMap<>();
        functions.put("single_value", new StringBuilder());
        final Map<String, Object> resultingValues = new HashMap<>();
        final List<String> columns = new ArrayList<>();
        final RecordBatchLoader loader = new RecordBatchLoader(cluster.allocator());
        loader.load(result.getHeader().getDef(), result.getData());
        for (VectorWrapper<?> vectorWrapper : loader.getContainer()) {
            final String fieldName = vectorWrapper.getField().getName();
            Object object = vectorWrapper.getValueVector().getAccessor().getObject(0);
            // VarCharVector returns Text instance, but baseline values should contain String value
            if (object instanceof Text) {
                object = object.toString();
            }
            resultingValues.put(String.format("`%s`", fieldName), object);
            for (Map.Entry<String, StringBuilder> function : functions.entrySet()) {
                function.getValue().append(function.getKey()).append("(").append(fieldName).append(") ").append(fieldName).append(",");
            }
            columns.add(fieldName);
        }
        loader.clear();
        result.release();
        String columnsList = String.join(", ", columns);
        final List<Map<String, Object>> baselineRecords = new ArrayList<>();
        baselineRecords.add(resultingValues);
        for (StringBuilder selectBody : functions.values()) {
            selectBody.setLength(selectBody.length() - 1);
            testBuilder().sqlQuery("select %s from (select %s from %s limit 1)", selectBody.toString(), columnsList, tableName).unOrdered().baselineRecords(baselineRecords).go();
        }
    }
}
Also used : HashMap(java.util.HashMap) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) ArrayList(java.util.ArrayList) Text(org.apache.drill.exec.util.Text) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap) ClusterTest(org.apache.drill.test.ClusterTest) PlannerTest(org.apache.drill.categories.PlannerTest) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test) SqlFunctionTest(org.apache.drill.categories.SqlFunctionTest) UnlikelyTest(org.apache.drill.categories.UnlikelyTest)

Example 30 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class TestOutputBatchSize method testFlattenListOfMaps.

@Test
public void testFlattenListOfMaps() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : 5, "b" : wideString,
    // "c" : [ [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}],
    // [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}],
    // [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}] ]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]");
        batchString.append("]},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]");
    batchString.append("]}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
    // "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    final JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
    resultExpected1.put("trans_id", new Text("t1"));
    resultExpected1.put("amount", new Long(100));
    resultExpected1.put("trans_time", new Long(7777777));
    resultExpected1.put("type", new Text("sports"));
    final JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
    resultExpected2.put("trans_id", new Text("t2"));
    resultExpected2.put("amount", new Long(1000));
    resultExpected2.put("trans_time", new Long(8888888));
    resultExpected2.put("type", new Text("groceries"));
    final JsonStringArrayList<JsonStringHashMap<String, Object>> results = new JsonStringArrayList<JsonStringHashMap<String, Object>>() {

        {
            add(resultExpected1);
            add(resultExpected2);
        }
    };
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, results);
        opTestBuilder.baselineValues(5l, wideString, results);
        opTestBuilder.baselineValues(5l, wideString, results);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) Text(org.apache.drill.exec.util.Text) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringArrayList(org.apache.drill.exec.util.JsonStringArrayList) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Test(org.junit.Test)

Aggregations

Text (org.apache.drill.exec.util.Text)36 Test (org.junit.Test)22 JsonStringHashMap (org.apache.drill.exec.util.JsonStringHashMap)14 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)10 FlattenPOP (org.apache.drill.exec.physical.config.FlattenPOP)10 JsonStringArrayList (org.apache.drill.exec.util.JsonStringArrayList)8 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)6 TreeMap (java.util.TreeMap)5 UnlikelyTest (org.apache.drill.categories.UnlikelyTest)5 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)5 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)5 HashMap (java.util.HashMap)4 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 BatchSchema (org.apache.drill.exec.record.BatchSchema)4 MaterializedField (org.apache.drill.exec.record.MaterializedField)4 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)4 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 BufferedWriter (java.io.BufferedWriter)3