use of org.apache.drill.exec.util.Text in project drill by apache.
the class DrillTestWrapper method addToMaterializedResults.
public static void addToMaterializedResults(List<Map<String, Object>> materializedRecords, List<QueryDataBatch> records, RecordBatchLoader loader) {
long totalRecords = 0;
QueryDataBatch batch;
int size = records.size();
for (int i = 0; i < size; i++) {
batch = records.get(0);
loader.load(batch.getHeader().getDef(), batch.getData());
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (int j = 0; j < loader.getRecordCount(); j++) {
Map<String, Object> record = new TreeMap<>();
for (VectorWrapper<?> w : loader) {
Object obj = w.getValueVector().getAccessor().getObject(j);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
}
record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
}
materializedRecords.add(record);
}
records.remove(0);
batch.release();
loader.clear();
}
}
use of org.apache.drill.exec.util.Text in project drill by apache.
the class DrillTestWrapper method addToCombinedVectorResults.
/**
* Add to result vectors and compare batch schema against expected schema while iterating batches.
* @param batches
* @param expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
* if encounter different batch schema.
* @param combinedVectors: the vectors to hold the values when iterate the batches.
*
* @return number of batches
* @throws SchemaChangeException
* @throws UnsupportedEncodingException
*/
public static int addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema, Long expectedBatchSize, Integer expectedNumBatches, Map<String, List<Object>> combinedVectors, Integer expectedTotalRecords) throws SchemaChangeException {
// TODO - this does not handle schema changes
int numBatch = 0;
long totalRecords = 0;
BatchSchema schema = null;
for (VectorAccessible loader : batches) {
numBatch++;
if (expectedSchema != null) {
if (!expectedSchema.isEquivalent(loader.getSchema())) {
throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s. Expected schema : %s", loader.getSchema(), expectedSchema));
}
}
if (expectedBatchSize != null) {
RecordBatchSizer sizer = new RecordBatchSizer(loader);
// Not checking actualSize as accounting is not correct when we do
// split and transfer ownership across operators.
Assert.assertTrue(sizer.getNetBatchSize() <= expectedBatchSize);
}
if (schema == null) {
schema = loader.getSchema();
for (MaterializedField mf : schema) {
combinedVectors.put(SchemaPath.getSimplePath(mf.getName()).toExpr(), new ArrayList<>());
}
} else {
// TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
// of the current batch, the check for a null schema is used to only mutate the schema once
// need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
schema = loader.getSchema();
}
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (VectorWrapper<?> w : loader) {
String field = SchemaPath.getSimplePath(w.getField().getName()).toExpr();
ValueVector[] vectors;
if (w.isHyper()) {
vectors = w.getValueVectors();
} else {
vectors = new ValueVector[] { w.getValueVector() };
}
SelectionVector2 sv2 = null;
SelectionVector4 sv4 = null;
switch(schema.getSelectionVectorMode()) {
case TWO_BYTE:
sv2 = loader.getSelectionVector2();
break;
case FOUR_BYTE:
sv4 = loader.getSelectionVector4();
break;
default:
}
if (sv4 != null) {
for (int j = 0; j < sv4.getCount(); j++) {
int complexIndex = sv4.get(j);
int batchIndex = complexIndex >> 16;
int recordIndexInBatch = complexIndex & 65535;
Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
} else {
for (ValueVector vv : vectors) {
for (int j = 0; j < loader.getRecordCount(); j++) {
int index;
if (sv2 != null) {
index = sv2.getIndex(j);
} else {
index = j;
}
Object obj = vv.getAccessor().getObject(index);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
}
}
}
}
if (expectedNumBatches != null) {
// Based on how much memory is actually taken by value vectors (because of doubling stuff),
// we have to do complex math for predicting exact number of batches.
// Instead, check that number of batches is at least the minimum that is expected
// and no more than twice of that.
Assert.assertTrue(numBatch >= expectedNumBatches);
Assert.assertTrue(numBatch <= (2 * expectedNumBatches));
}
if (expectedTotalRecords != null) {
Assert.assertEquals(expectedTotalRecords.longValue(), totalRecords);
}
return numBatch;
}
use of org.apache.drill.exec.util.Text in project drill by apache.
the class TestBuilder method mapOf.
/**
* Convenience method to create a {@link JsonStringHashMap<String, Object>} map instance with the given key value sequence.
*
* Key value sequence consists of key - value pairs such that a key precedes its value. For instance:
*
* mapOf("name", "Adam", "age", 41) corresponds to {"name": "Adam", "age": 41} in JSON.
*/
public static JsonStringHashMap<String, Object> mapOf(Object... keyValueSequence) {
Preconditions.checkArgument(keyValueSequence.length % 2 == 0, "Length of key value sequence must be even");
final JsonStringHashMap<String, Object> map = new JsonStringHashMap<>();
for (int i = 0; i < keyValueSequence.length; i += 2) {
Object value = keyValueSequence[i + 1];
if (value instanceof CharSequence) {
value = new Text(value.toString());
}
map.put(String.class.cast(keyValueSequence[i]), value);
}
return map;
}
use of org.apache.drill.exec.util.Text in project drill by apache.
the class TestAggregateFunctions method testSingleValueFunction.
@Test
public void testSingleValueFunction() throws Exception {
List<String> tableNames = Arrays.asList("cp.`parquet/alltypes_required.parquet`", "cp.`parquet/alltypes_optional.parquet`");
for (String tableName : tableNames) {
final QueryDataBatch result = queryBuilder().sql("select * from %s limit 1", tableName).results().get(0);
final Map<String, StringBuilder> functions = new HashMap<>();
functions.put("single_value", new StringBuilder());
final Map<String, Object> resultingValues = new HashMap<>();
final List<String> columns = new ArrayList<>();
final RecordBatchLoader loader = new RecordBatchLoader(cluster.allocator());
loader.load(result.getHeader().getDef(), result.getData());
for (VectorWrapper<?> vectorWrapper : loader.getContainer()) {
final String fieldName = vectorWrapper.getField().getName();
Object object = vectorWrapper.getValueVector().getAccessor().getObject(0);
// VarCharVector returns Text instance, but baseline values should contain String value
if (object instanceof Text) {
object = object.toString();
}
resultingValues.put(String.format("`%s`", fieldName), object);
for (Map.Entry<String, StringBuilder> function : functions.entrySet()) {
function.getValue().append(function.getKey()).append("(").append(fieldName).append(") ").append(fieldName).append(",");
}
columns.add(fieldName);
}
loader.clear();
result.release();
String columnsList = String.join(", ", columns);
final List<Map<String, Object>> baselineRecords = new ArrayList<>();
baselineRecords.add(resultingValues);
for (StringBuilder selectBody : functions.values()) {
selectBody.setLength(selectBody.length() - 1);
testBuilder().sqlQuery("select %s from (select %s from %s limit 1)", selectBody.toString(), columnsList, tableName).unOrdered().baselineRecords(baselineRecords).go();
}
}
}
use of org.apache.drill.exec.util.Text in project drill by apache.
the class TestOutputBatchSize method testFlattenListOfMaps.
@Test
public void testFlattenListOfMaps() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : 5, "b" : wideString,
// "c" : [ [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}],
// [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}],
// [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}] ]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]");
batchString.append("]},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]");
batchString.append("]}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
// "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
final JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
resultExpected1.put("trans_id", new Text("t1"));
resultExpected1.put("amount", new Long(100));
resultExpected1.put("trans_time", new Long(7777777));
resultExpected1.put("type", new Text("sports"));
final JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
resultExpected2.put("trans_id", new Text("t2"));
resultExpected2.put("amount", new Long(1000));
resultExpected2.put("trans_time", new Long(8888888));
resultExpected2.put("type", new Text("groceries"));
final JsonStringArrayList<JsonStringHashMap<String, Object>> results = new JsonStringArrayList<JsonStringHashMap<String, Object>>() {
{
add(resultExpected1);
add(resultExpected2);
}
};
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, results);
opTestBuilder.baselineValues(5l, wideString, results);
opTestBuilder.baselineValues(5l, wideString, results);
}
opTestBuilder.go();
}
Aggregations