use of org.apache.drill.exec.cache.VectorAccessibleSerializable in project drill by apache.
the class TestTraceOutputDump method testFilter.
@Test
public void testFilter(@Injectable final DrillbitContext bitContext, @Injectable UserClientConnection connection) throws Throwable {
mockDrillbitContext(bitContext);
final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(FileUtils.getResourceAsFile("/trace/simple_trace.json"), Charsets.UTF_8));
final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c);
final FragmentContext context = new FragmentContext(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
while (exec.next()) {
}
exec.close();
if (context.getFailureCause() != null) {
throw context.getFailureCause();
}
assertTrue(!context.isFailed());
final FragmentHandle handle = context.getHandle();
/* Form the file name to which the trace output will dump the record batches */
final String qid = QueryIdHelper.getQueryId(handle.getQueryId());
final int majorFragmentId = handle.getMajorFragmentId();
final int minorFragmentId = handle.getMinorFragmentId();
final String logLocation = c.getString(ExecConstants.TRACE_DUMP_DIRECTORY);
System.out.println("Found log location: " + logLocation);
final String filename = String.format("%s//%s_%d_%d_mock-scan", logLocation, qid, majorFragmentId, minorFragmentId);
System.out.println("File Name: " + filename);
final Configuration conf = new Configuration();
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, c.getString(ExecConstants.TRACE_DUMP_FILESYSTEM));
final FileSystem fs = FileSystem.get(conf);
final Path path = new Path(filename);
assertTrue("Trace file does not exist", fs.exists(path));
final FSDataInputStream in = fs.open(path);
final VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(context.getAllocator());
wrap.readFromStream(in);
final VectorAccessible container = wrap.get();
/* Assert there are no selection vectors */
assertTrue(wrap.getSv2() == null);
/* Assert there is only one record */
assertTrue(container.getRecordCount() == 1);
/* Read the Integer value and ASSERT its Integer.MIN_VALUE */
final int value = (int) container.iterator().next().getValueVector().getAccessor().getObject(0);
assertTrue(value == Integer.MIN_VALUE);
}
use of org.apache.drill.exec.cache.VectorAccessibleSerializable in project drill by apache.
the class BatchGroup method addBatch.
public void addBatch(VectorContainer newContainer) throws IOException {
assert fs != null;
assert path != null;
if (outputStream == null) {
outputStream = fs.create(path);
}
int recordCount = newContainer.getRecordCount();
WritableBatch batch = WritableBatch.getBatchNoHVWrap(recordCount, newContainer, false);
VectorAccessibleSerializable outputBatch = new VectorAccessibleSerializable(batch, allocator);
Stopwatch watch = Stopwatch.createStarted();
outputBatch.writeToStream(outputStream);
newContainer.zeroVectors();
logger.debug("Took {} us to spill {} records", watch.elapsed(TimeUnit.MICROSECONDS), recordCount);
spilledBatches++;
}
use of org.apache.drill.exec.cache.VectorAccessibleSerializable in project drill by axbaretto.
the class DumpCat method doQuery.
/**
* Querymode:
* $drill-dumpcat --file=local:///tmp/drilltrace/[queryid]_[tag]_[majorid]_[minor]_[operator]
* Batches: 135
* Records: 53,214/53,214 // the first one is the selected records. The second number is the total number of records.
* Selected Records: 53,214
* Average Record Size: 74 bytes
* Total Data Size: 12,345 bytes
* Number of Empty Batches: 1
* Schema changes: 1
* Schema change batch indices: 0
* @throws Exception
*/
protected void doQuery(FileInputStream input) throws Exception {
int batchNum = 0;
int emptyBatchNum = 0;
BatchSchema prevSchema = null;
final List<Integer> schemaChangeIdx = Lists.newArrayList();
final BatchMetaInfo aggBatchMetaInfo = new BatchMetaInfo();
while (input.available() > 0) {
final VectorAccessibleSerializable vcSerializable = new VectorAccessibleSerializable(DumpCat.allocator);
vcSerializable.readFromStream(input);
final VectorContainer vectorContainer = vcSerializable.get();
aggBatchMetaInfo.add(getBatchMetaInfo(vcSerializable));
if (vectorContainer.getRecordCount() == 0) {
emptyBatchNum++;
}
if (prevSchema != null && !vectorContainer.getSchema().equals(prevSchema)) {
schemaChangeIdx.add(batchNum);
}
prevSchema = vectorContainer.getSchema();
batchNum++;
vectorContainer.zeroVectors();
}
/* output the summary stat */
System.out.println(String.format("Total # of batches: %d", batchNum));
// output: rows, selectedRows, avg rec size, total data size.
System.out.println(aggBatchMetaInfo.toString());
System.out.println(String.format("Empty batch : %d", emptyBatchNum));
System.out.println(String.format("Schema changes : %d", schemaChangeIdx.size()));
System.out.println(String.format("Schema change batch index : %s", schemaChangeIdx.toString()));
}
use of org.apache.drill.exec.cache.VectorAccessibleSerializable in project drill by axbaretto.
the class TraceRecordBatch method doWork.
/**
* Function is invoked for every record batch and it simply dumps the buffers associated with all the value vectors in
* this record batch to a log file.
*/
@Override
protected IterOutcome doWork() {
boolean incomingHasSv2 = incoming.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE;
if (incomingHasSv2) {
sv = incoming.getSelectionVector2();
} else {
sv = null;
}
WritableBatch batch = WritableBatch.getBatchNoHVWrap(incoming.getRecordCount(), incoming, incomingHasSv2);
VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(batch, sv, oContext.getAllocator());
try {
wrap.writeToStreamAndRetain(fos);
} catch (IOException e) {
throw new RuntimeException(e);
}
batch.reconstructContainer(localAllocator, container);
if (incomingHasSv2) {
sv = wrap.getSv2();
}
return IterOutcome.OK;
}
use of org.apache.drill.exec.cache.VectorAccessibleSerializable in project drill by axbaretto.
the class BatchGroup method addBatch.
public void addBatch(VectorContainer newContainer) throws IOException {
assert fs != null;
assert path != null;
if (outputStream == null) {
outputStream = fs.create(path);
}
int recordCount = newContainer.getRecordCount();
WritableBatch batch = WritableBatch.getBatchNoHVWrap(recordCount, newContainer, false);
VectorAccessibleSerializable outputBatch = new VectorAccessibleSerializable(batch, allocator);
Stopwatch watch = Stopwatch.createStarted();
outputBatch.writeToStream(outputStream);
newContainer.zeroVectors();
logger.debug("Took {} us to spill {} records", watch.elapsed(TimeUnit.MICROSECONDS), recordCount);
spilledBatches++;
}
Aggregations