use of org.apache.drill.exec.util.Text in project drill by apache.
the class TestJsonReader method testUntypedPathWithUnion.
// DRILL-6020
@Test
public void testUntypedPathWithUnion() throws Exception {
String fileName = "table.json";
try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(dirTestWatcher.getRootDir(), fileName)))) {
writer.write("{\"rk\": {\"a\": {\"b\": \"1\"}}}");
writer.write("{\"rk\": {\"a\": \"2\"}}");
}
JsonStringHashMap<String, Text> map = new JsonStringHashMap<>();
map.put("b", new Text("1"));
try {
testBuilder().sqlQuery("select t.rk.a as a from dfs.`%s` t", fileName).ordered().optionSettingQueriesForTestQuery("alter session set `exec.enable_union_type`=true").baselineColumns("a").baselineValues(map).baselineValues("2").go();
} finally {
resetSessionOption(ExecConstants.ENABLE_UNION_TYPE_KEY);
}
}
use of org.apache.drill.exec.util.Text in project drill by apache.
the class TestBuilder method mapOfObject.
/**
* Convenience method to create an instance of {@link JsonStringHashMap}{@code <Object, Object>} with the given key-value sequence.
*
* By default, any {@link String} instance will be wrapped by {@link Text} instance. To disable wrapping pass
* {@code false} as the first object to key-value sequence.
*
* @param keyValueSequence sequence of key-value pairs with optional boolean
* flag which disables wrapping String instances by {@link Text}.
* @return map consisting of entries given in the key-value sequence.
*/
public static JsonStringHashMap<Object, Object> mapOfObject(Object... keyValueSequence) {
boolean convertStringToText = true;
final int startIndex;
if (keyValueSequence.length % 2 == 1) {
convertStringToText = (boolean) keyValueSequence[0];
startIndex = 1;
} else {
startIndex = 0;
}
final JsonStringHashMap<Object, Object> map = new JsonStringHashMap<>();
for (int i = startIndex; i < keyValueSequence.length; i += 2) {
Object key = keyValueSequence[i];
if (convertStringToText && key instanceof CharSequence) {
key = new Text(key.toString());
}
Object value = keyValueSequence[i + 1];
if (value instanceof CharSequence) {
value = new Text(value.toString());
}
map.put(key, value);
}
return map;
}
use of org.apache.drill.exec.util.Text in project drill by apache.
the class DrillTestWrapper method addToCombinedVectorResults.
/**
* Add to result vectors and compare batch schema against expected schema while iterating batches.
* @param batches
* @param expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
* if encounter different batch schema.
* @return
* @throws SchemaChangeException
* @throws UnsupportedEncodingException
*/
public static Map<String, List<Object>> addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema) throws SchemaChangeException, UnsupportedEncodingException {
// TODO - this does not handle schema changes
Map<String, List<Object>> combinedVectors = new TreeMap<>();
long totalRecords = 0;
BatchSchema schema = null;
for (VectorAccessible loader : batches) {
if (expectedSchema != null) {
if (!expectedSchema.equals(loader.getSchema())) {
throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s. Expected schema : %s", loader.getSchema(), expectedSchema));
}
}
// SchemaChangeException, so check/clean throws clause above.
if (schema == null) {
schema = loader.getSchema();
for (MaterializedField mf : schema) {
combinedVectors.put(SchemaPath.getSimplePath(mf.getPath()).toExpr(), new ArrayList<Object>());
}
} else {
// TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
// of the current batch, the check for a null schema is used to only mutate the schema once
// need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
schema = loader.getSchema();
}
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (VectorWrapper<?> w : loader) {
String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
ValueVector[] vectors;
if (w.isHyper()) {
vectors = w.getValueVectors();
} else {
vectors = new ValueVector[] { w.getValueVector() };
}
SelectionVector2 sv2 = null;
SelectionVector4 sv4 = null;
switch(schema.getSelectionVectorMode()) {
case TWO_BYTE:
sv2 = loader.getSelectionVector2();
break;
case FOUR_BYTE:
sv4 = loader.getSelectionVector4();
break;
}
if (sv4 != null) {
for (int j = 0; j < sv4.getCount(); j++) {
int complexIndex = sv4.get(j);
int batchIndex = complexIndex >> 16;
int recordIndexInBatch = complexIndex & 65535;
Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
} else {
for (ValueVector vv : vectors) {
for (int j = 0; j < loader.getRecordCount(); j++) {
int index;
if (sv2 != null) {
index = sv2.getIndex(j);
} else {
index = j;
}
Object obj = vv.getAccessor().getObject(index);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
}
}
}
}
return combinedVectors;
}
use of org.apache.drill.exec.util.Text in project drill by apache.
the class DrillTestWrapper method addToMaterializedResults.
public static void addToMaterializedResults(List<Map<String, Object>> materializedRecords, List<QueryDataBatch> records, RecordBatchLoader loader) throws SchemaChangeException, UnsupportedEncodingException {
long totalRecords = 0;
QueryDataBatch batch;
int size = records.size();
for (int i = 0; i < size; i++) {
batch = records.get(0);
loader.load(batch.getHeader().getDef(), batch.getData());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean throws clause above.
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (int j = 0; j < loader.getRecordCount(); j++) {
Map<String, Object> record = new TreeMap<>();
for (VectorWrapper<?> w : loader) {
Object obj = w.getValueVector().getAccessor().getObject(j);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
record.put(SchemaPath.getSimplePath(w.getField().getPath()).toExpr(), obj);
}
record.put(SchemaPath.getSimplePath(w.getField().getPath()).toExpr(), obj);
}
materializedRecords.add(record);
}
records.remove(0);
batch.release();
loader.clear();
}
}
use of org.apache.drill.exec.util.Text in project drill by axbaretto.
the class TestOutputBatchSize method testFlattenLowerLimit.
@Test
public void testFlattenLowerLimit() throws Exception {
// test the lower limit of at least one batch
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder flattenElement = new StringBuilder();
// Create list of 10 elements
flattenElement.append("[");
for (int i = 0; i < 10; i++) {
flattenElement.append(i);
flattenElement.append(",");
}
flattenElement.append(10);
flattenElement.append("]");
// create list of wideStrings
final StringBuilder wideStrings = new StringBuilder();
wideStrings.append("[");
for (int i = 0; i < 10; i++) {
wideStrings.append("\"" + wideString + "\",");
}
wideStrings.append("\"" + wideString + "\"");
wideStrings.append("]");
batchString.append("[");
batchString.append("{\"a\": " + wideStrings + "," + "\"c\":" + flattenElement);
batchString.append("}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// set very low value of batch size for a large record size.
// This is to test we atleast get one record per batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 1024);
// Here we expect 10 batches because each batch will be bounded by lower limit of at least 1 record.
// do not check the output batch size as it will be more than configured value of 1024, so we get
// at least one record out.
OperatorTestBuilder opTestBuilder = opTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "c").expectedNumBatches(// verify number of batches
10);
final JsonStringArrayList<Text> results = new JsonStringArrayList<Text>() {
{
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
add(new Text(wideString));
}
};
for (long j = 0; j < 11; j++) {
opTestBuilder.baselineValues(results, j);
}
opTestBuilder.go();
}
Aggregations