Search in sources :

Example 11 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class TestJsonReader method testUntypedPathWithUnion.

// DRILL-6020
@Test
public void testUntypedPathWithUnion() throws Exception {
    String fileName = "table.json";
    try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(dirTestWatcher.getRootDir(), fileName)))) {
        writer.write("{\"rk\": {\"a\": {\"b\": \"1\"}}}");
        writer.write("{\"rk\": {\"a\": \"2\"}}");
    }
    JsonStringHashMap<String, Text> map = new JsonStringHashMap<>();
    map.put("b", new Text("1"));
    try {
        testBuilder().sqlQuery("select t.rk.a as a from dfs.`%s` t", fileName).ordered().optionSettingQueriesForTestQuery("alter session set `exec.enable_union_type`=true").baselineColumns("a").baselineValues(map).baselineValues("2").go();
    } finally {
        resetSessionOption(ExecConstants.ENABLE_UNION_TYPE_KEY);
    }
}
Also used : FileWriter(java.io.FileWriter) Text(org.apache.drill.exec.util.Text) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) File(java.io.File) BufferedWriter(java.io.BufferedWriter) Test(org.junit.Test)

Example 12 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class TestBuilder method mapOfObject.

/**
 * Convenience method to create an instance of {@link JsonStringHashMap}{@code <Object, Object>} with the given key-value sequence.
 *
 * By default, any {@link String} instance will be wrapped by {@link Text} instance. To disable wrapping pass
 * {@code false} as the first object to key-value sequence.
 *
 * @param keyValueSequence sequence of key-value pairs with optional boolean
 *                         flag which disables wrapping String instances by {@link Text}.
 * @return map consisting of entries given in the key-value sequence.
 */
public static JsonStringHashMap<Object, Object> mapOfObject(Object... keyValueSequence) {
    boolean convertStringToText = true;
    final int startIndex;
    if (keyValueSequence.length % 2 == 1) {
        convertStringToText = (boolean) keyValueSequence[0];
        startIndex = 1;
    } else {
        startIndex = 0;
    }
    final JsonStringHashMap<Object, Object> map = new JsonStringHashMap<>();
    for (int i = startIndex; i < keyValueSequence.length; i += 2) {
        Object key = keyValueSequence[i];
        if (convertStringToText && key instanceof CharSequence) {
            key = new Text(key.toString());
        }
        Object value = keyValueSequence[i + 1];
        if (value instanceof CharSequence) {
            value = new Text(value.toString());
        }
        map.put(key, value);
    }
    return map;
}
Also used : Text(org.apache.drill.exec.util.Text) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap)

Example 13 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class DrillTestWrapper method addToCombinedVectorResults.

/**
   * Add to result vectors and compare batch schema against expected schema while iterating batches.
   * @param batches
   * @param  expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
   *                       if encounter different batch schema.
   * @return
   * @throws SchemaChangeException
   * @throws UnsupportedEncodingException
   */
public static Map<String, List<Object>> addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema) throws SchemaChangeException, UnsupportedEncodingException {
    // TODO - this does not handle schema changes
    Map<String, List<Object>> combinedVectors = new TreeMap<>();
    long totalRecords = 0;
    BatchSchema schema = null;
    for (VectorAccessible loader : batches) {
        if (expectedSchema != null) {
            if (!expectedSchema.equals(loader.getSchema())) {
                throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s.  Expected schema : %s", loader.getSchema(), expectedSchema));
            }
        }
        // SchemaChangeException, so check/clean throws clause above.
        if (schema == null) {
            schema = loader.getSchema();
            for (MaterializedField mf : schema) {
                combinedVectors.put(SchemaPath.getSimplePath(mf.getPath()).toExpr(), new ArrayList<Object>());
            }
        } else {
            // TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
            // of the current batch, the check for a null schema is used to only mutate the schema once
            // need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
            schema = loader.getSchema();
        }
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (VectorWrapper<?> w : loader) {
            String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
            ValueVector[] vectors;
            if (w.isHyper()) {
                vectors = w.getValueVectors();
            } else {
                vectors = new ValueVector[] { w.getValueVector() };
            }
            SelectionVector2 sv2 = null;
            SelectionVector4 sv4 = null;
            switch(schema.getSelectionVectorMode()) {
                case TWO_BYTE:
                    sv2 = loader.getSelectionVector2();
                    break;
                case FOUR_BYTE:
                    sv4 = loader.getSelectionVector4();
                    break;
            }
            if (sv4 != null) {
                for (int j = 0; j < sv4.getCount(); j++) {
                    int complexIndex = sv4.get(j);
                    int batchIndex = complexIndex >> 16;
                    int recordIndexInBatch = complexIndex & 65535;
                    Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
                    if (obj != null) {
                        if (obj instanceof Text) {
                            obj = obj.toString();
                        }
                    }
                    combinedVectors.get(field).add(obj);
                }
            } else {
                for (ValueVector vv : vectors) {
                    for (int j = 0; j < loader.getRecordCount(); j++) {
                        int index;
                        if (sv2 != null) {
                            index = sv2.getIndex(j);
                        } else {
                            index = j;
                        }
                        Object obj = vv.getAccessor().getObject(index);
                        if (obj != null) {
                            if (obj instanceof Text) {
                                obj = obj.toString();
                            }
                        }
                        combinedVectors.get(field).add(obj);
                    }
                }
            }
        }
    }
    return combinedVectors;
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) Text(org.apache.drill.exec.util.Text) TreeMap(java.util.TreeMap) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchSchema(org.apache.drill.exec.record.BatchSchema) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) ArrayList(java.util.ArrayList) List(java.util.List) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 14 with Text

use of org.apache.drill.exec.util.Text in project drill by apache.

the class DrillTestWrapper method addToMaterializedResults.

public static void addToMaterializedResults(List<Map<String, Object>> materializedRecords, List<QueryDataBatch> records, RecordBatchLoader loader) throws SchemaChangeException, UnsupportedEncodingException {
    long totalRecords = 0;
    QueryDataBatch batch;
    int size = records.size();
    for (int i = 0; i < size; i++) {
        batch = records.get(0);
        loader.load(batch.getHeader().getDef(), batch.getData());
        // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
        // SchemaChangeException, so check/clean throws clause above.
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (int j = 0; j < loader.getRecordCount(); j++) {
            Map<String, Object> record = new TreeMap<>();
            for (VectorWrapper<?> w : loader) {
                Object obj = w.getValueVector().getAccessor().getObject(j);
                if (obj != null) {
                    if (obj instanceof Text) {
                        obj = obj.toString();
                    }
                    record.put(SchemaPath.getSimplePath(w.getField().getPath()).toExpr(), obj);
                }
                record.put(SchemaPath.getSimplePath(w.getField().getPath()).toExpr(), obj);
            }
            materializedRecords.add(record);
        }
        records.remove(0);
        batch.release();
        loader.clear();
    }
}
Also used : QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Text(org.apache.drill.exec.util.Text) TreeMap(java.util.TreeMap)

Example 15 with Text

use of org.apache.drill.exec.util.Text in project drill by axbaretto.

the class TestOutputBatchSize method testFlattenLowerLimit.

@Test
public void testFlattenLowerLimit() throws Exception {
    // test the lower limit of at least one batch
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    StringBuilder flattenElement = new StringBuilder();
    // Create list of 10 elements
    flattenElement.append("[");
    for (int i = 0; i < 10; i++) {
        flattenElement.append(i);
        flattenElement.append(",");
    }
    flattenElement.append(10);
    flattenElement.append("]");
    // create list of wideStrings
    final StringBuilder wideStrings = new StringBuilder();
    wideStrings.append("[");
    for (int i = 0; i < 10; i++) {
        wideStrings.append("\"" + wideString + "\",");
    }
    wideStrings.append("\"" + wideString + "\"");
    wideStrings.append("]");
    batchString.append("[");
    batchString.append("{\"a\": " + wideStrings + "," + "\"c\":" + flattenElement);
    batchString.append("}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // set very low value of batch size for a large record size.
    // This is to test we atleast get one record per batch.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 1024);
    // Here we expect 10 batches because each batch will be bounded by lower limit of at least 1 record.
    // do not check the output batch size as it will be more than configured value of 1024, so we get
    // at least one record out.
    OperatorTestBuilder opTestBuilder = opTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "c").expectedNumBatches(// verify number of batches
    10);
    final JsonStringArrayList<Text> results = new JsonStringArrayList<Text>() {

        {
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
            add(new Text(wideString));
        }
    };
    for (long j = 0; j < 11; j++) {
        opTestBuilder.baselineValues(results, j);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringArrayList(org.apache.drill.exec.util.JsonStringArrayList) Text(org.apache.drill.exec.util.Text) Test(org.junit.Test)

Aggregations

Text (org.apache.drill.exec.util.Text)36 Test (org.junit.Test)22 JsonStringHashMap (org.apache.drill.exec.util.JsonStringHashMap)14 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)10 FlattenPOP (org.apache.drill.exec.physical.config.FlattenPOP)10 JsonStringArrayList (org.apache.drill.exec.util.JsonStringArrayList)8 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)6 TreeMap (java.util.TreeMap)5 UnlikelyTest (org.apache.drill.categories.UnlikelyTest)5 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)5 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)5 HashMap (java.util.HashMap)4 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 BatchSchema (org.apache.drill.exec.record.BatchSchema)4 MaterializedField (org.apache.drill.exec.record.MaterializedField)4 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)4 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 BufferedWriter (java.io.BufferedWriter)3