use of org.apache.drill.common.expression.SchemaPath in project drill by apache.
the class ParquetGroupScan method populatePruningVector.
public void populatePruningVector(ValueVector v, int index, SchemaPath column, String file) {
String f = Path.getPathWithoutSchemeAndAuthority(new Path(file)).toString();
MinorType type = getTypeForColumn(column).getMinorType();
switch(type) {
case INT:
{
NullableIntVector intVector = (NullableIntVector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
intVector.getMutator().setSafe(index, value);
return;
}
case SMALLINT:
{
NullableSmallIntVector smallIntVector = (NullableSmallIntVector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
smallIntVector.getMutator().setSafe(index, value.shortValue());
return;
}
case TINYINT:
{
NullableTinyIntVector tinyIntVector = (NullableTinyIntVector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
tinyIntVector.getMutator().setSafe(index, value.byteValue());
return;
}
case UINT1:
{
NullableUInt1Vector intVector = (NullableUInt1Vector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
intVector.getMutator().setSafe(index, value.byteValue());
return;
}
case UINT2:
{
NullableUInt2Vector intVector = (NullableUInt2Vector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
intVector.getMutator().setSafe(index, (char) value.shortValue());
return;
}
case UINT4:
{
NullableUInt4Vector intVector = (NullableUInt4Vector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
intVector.getMutator().setSafe(index, value);
return;
}
case BIGINT:
{
NullableBigIntVector bigIntVector = (NullableBigIntVector) v;
Long value = (Long) partitionValueMap.get(f).get(column);
bigIntVector.getMutator().setSafe(index, value);
return;
}
case FLOAT4:
{
NullableFloat4Vector float4Vector = (NullableFloat4Vector) v;
Float value = (Float) partitionValueMap.get(f).get(column);
float4Vector.getMutator().setSafe(index, value);
return;
}
case FLOAT8:
{
NullableFloat8Vector float8Vector = (NullableFloat8Vector) v;
Double value = (Double) partitionValueMap.get(f).get(column);
float8Vector.getMutator().setSafe(index, value);
return;
}
case VARBINARY:
{
NullableVarBinaryVector varBinaryVector = (NullableVarBinaryVector) v;
Object s = partitionValueMap.get(f).get(column);
byte[] bytes;
if (s instanceof Binary) {
bytes = ((Binary) s).getBytes();
} else if (s instanceof String) {
bytes = ((String) s).getBytes();
} else if (s instanceof byte[]) {
bytes = (byte[]) s;
} else {
throw new UnsupportedOperationException("Unable to create column data for type: " + type);
}
varBinaryVector.getMutator().setSafe(index, bytes, 0, bytes.length);
return;
}
case DECIMAL18:
{
NullableDecimal18Vector decimalVector = (NullableDecimal18Vector) v;
Long value = (Long) partitionValueMap.get(f).get(column);
decimalVector.getMutator().setSafe(index, value);
return;
}
case DATE:
{
NullableDateVector dateVector = (NullableDateVector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
dateVector.getMutator().setSafe(index, value * (long) DateTimeConstants.MILLIS_PER_DAY);
return;
}
case TIME:
{
NullableTimeVector timeVector = (NullableTimeVector) v;
Integer value = (Integer) partitionValueMap.get(f).get(column);
timeVector.getMutator().setSafe(index, value);
return;
}
case TIMESTAMP:
{
NullableTimeStampVector timeStampVector = (NullableTimeStampVector) v;
Long value = (Long) partitionValueMap.get(f).get(column);
timeStampVector.getMutator().setSafe(index, value);
return;
}
case VARCHAR:
{
NullableVarCharVector varCharVector = (NullableVarCharVector) v;
Object s = partitionValueMap.get(f).get(column);
byte[] bytes;
if (s instanceof String) {
// if the metadata was read from a JSON cache file it maybe a string type
bytes = ((String) s).getBytes();
} else if (s instanceof Binary) {
bytes = ((Binary) s).getBytes();
} else if (s instanceof byte[]) {
bytes = (byte[]) s;
} else {
throw new UnsupportedOperationException("Unable to create column data for type: " + type);
}
varCharVector.getMutator().setSafe(index, bytes, 0, bytes.length);
return;
}
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
use of org.apache.drill.common.expression.SchemaPath in project drill by apache.
the class ParquetGroupScan method init.
private void init(MetadataContext metaContext) throws IOException {
if (entries.size() == 1 && parquetTableMetadata == null) {
Path p = Path.getPathWithoutSchemeAndAuthority(new Path(entries.get(0).getPath()));
Path metaPath = null;
if (fs.isDirectory(p)) {
// Using the metadata file makes sense when querying a directory; otherwise
// if querying a single file we can look up the metadata directly from the file
metaPath = new Path(p, Metadata.METADATA_FILENAME);
}
if (metaPath != null && fs.exists(metaPath)) {
usedMetadataCache = true;
parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
} else {
parquetTableMetadata = Metadata.getParquetTableMetadata(fs, p.toString(), formatConfig);
}
} else {
Path p = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot));
Path metaPath = new Path(p, Metadata.METADATA_FILENAME);
if (fs.isDirectory(new Path(selectionRoot)) && fs.exists(metaPath)) {
usedMetadataCache = true;
if (parquetTableMetadata == null) {
parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
}
if (fileSet != null) {
parquetTableMetadata = removeUnneededRowGroups(parquetTableMetadata);
}
} else {
final List<FileStatus> fileStatuses = Lists.newArrayList();
for (ReadEntryWithPath entry : entries) {
getFiles(entry.getPath(), fileStatuses);
}
parquetTableMetadata = Metadata.getParquetTableMetadata(fs, fileStatuses, formatConfig);
}
}
if (fileSet == null) {
fileSet = Sets.newHashSet();
for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
fileSet.add(file.getPath());
}
}
Map<String, DrillbitEndpoint> hostEndpointMap = Maps.newHashMap();
for (DrillbitEndpoint endpoint : formatPlugin.getContext().getBits()) {
hostEndpointMap.put(endpoint.getAddress(), endpoint);
}
rowGroupInfos = Lists.newArrayList();
for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
int rgIndex = 0;
for (RowGroupMetadata rg : file.getRowGroups()) {
RowGroupInfo rowGroupInfo = new RowGroupInfo(file.getPath(), rg.getStart(), rg.getLength(), rgIndex, rg.getRowCount());
EndpointByteMap endpointByteMap = new EndpointByteMapImpl();
for (String host : rg.getHostAffinity().keySet()) {
if (hostEndpointMap.containsKey(host)) {
endpointByteMap.add(hostEndpointMap.get(host), (long) (rg.getHostAffinity().get(host) * rg.getLength()));
}
}
rowGroupInfo.setEndpointByteMap(endpointByteMap);
rgIndex++;
rowGroupInfos.add(rowGroupInfo);
}
}
this.endpointAffinities = AffinityCreator.getAffinityMap(rowGroupInfos);
columnValueCounts = Maps.newHashMap();
this.rowCount = 0;
boolean first = true;
for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
for (RowGroupMetadata rowGroup : file.getRowGroups()) {
long rowCount = rowGroup.getRowCount();
for (ColumnMetadata column : rowGroup.getColumns()) {
SchemaPath schemaPath = SchemaPath.getCompoundPath(column.getName());
Long previousCount = columnValueCounts.get(schemaPath);
if (previousCount != null) {
if (previousCount != GroupScan.NO_COLUMN_STATS) {
if (column.getNulls() != null) {
Long newCount = rowCount - column.getNulls();
columnValueCounts.put(schemaPath, columnValueCounts.get(schemaPath) + newCount);
}
}
} else {
if (column.getNulls() != null) {
Long newCount = rowCount - column.getNulls();
columnValueCounts.put(schemaPath, newCount);
} else {
columnValueCounts.put(schemaPath, GroupScan.NO_COLUMN_STATS);
}
}
boolean partitionColumn = checkForPartitionColumn(column, first);
if (partitionColumn) {
Map<SchemaPath, Object> map = partitionValueMap.get(file.getPath());
if (map == null) {
map = Maps.newHashMap();
partitionValueMap.put(file.getPath(), map);
}
Object value = map.get(schemaPath);
Object currentValue = column.getMaxValue();
if (value != null) {
if (value != currentValue) {
partitionColTypeMap.remove(schemaPath);
}
} else {
map.put(schemaPath, currentValue);
}
} else {
partitionColTypeMap.remove(schemaPath);
}
}
this.rowCount += rowGroup.getRowCount();
first = false;
}
}
}
use of org.apache.drill.common.expression.SchemaPath in project drill by apache.
the class ParquetPartitionDescriptor method getIdIfValid.
@Override
public Integer getIdIfValid(String name) {
SchemaPath schemaPath = SchemaPath.getSimplePath(name);
int id = partitionColumns.indexOf(schemaPath);
if (id == -1) {
return null;
}
return id;
}
use of org.apache.drill.common.expression.SchemaPath in project drill by apache.
the class TestSimpleExternalSort method validateResults.
private void validateResults(BufferAllocator allocator, List<QueryDataBatch> results) throws SchemaChangeException {
long previousBigInt = Long.MAX_VALUE;
int recordCount = 0;
int batchCount = 0;
for (QueryDataBatch b : results) {
RecordBatchLoader loader = new RecordBatchLoader(allocator);
if (b.getHeader().getRowCount() > 0) {
batchCount++;
loader.load(b.getHeader().getDef(), b.getData());
@SuppressWarnings("resource") BigIntVector c1 = (BigIntVector) loader.getValueAccessorById(BigIntVector.class, loader.getValueVectorId(new SchemaPath("blue", ExpressionPosition.UNKNOWN)).getFieldIds()).getValueVector();
BigIntVector.Accessor a1 = c1.getAccessor();
for (int i = 0; i < c1.getAccessor().getValueCount(); i++) {
recordCount++;
assertTrue(String.format("%d > %d", previousBigInt, a1.get(i)), previousBigInt >= a1.get(i));
previousBigInt = a1.get(i);
}
}
loader.clear();
b.release();
}
System.out.println(String.format("Sorted %,d records in %d batches.", recordCount, batchCount));
}
use of org.apache.drill.common.expression.SchemaPath in project drill by apache.
the class ExpressionTreeMaterializerTest method testMaterializingLateboundTreeValidated.
@Test
public void testMaterializingLateboundTreeValidated(@Injectable final RecordBatch batch) throws SchemaChangeException {
ErrorCollector ec = new ErrorCollector() {
int errorCount = 0;
@Override
public void addGeneralError(ExpressionPosition expr, String s) {
errorCount++;
}
@Override
public void addUnexpectedArgumentType(ExpressionPosition expr, String name, MajorType actual, MajorType[] expected, int argumentIndex) {
errorCount++;
}
@Override
public void addUnexpectedArgumentCount(ExpressionPosition expr, int actual, Range<Integer> expected) {
errorCount++;
}
@Override
public void addUnexpectedArgumentCount(ExpressionPosition expr, int actual, int expected) {
errorCount++;
}
@Override
public void addNonNumericType(ExpressionPosition expr, MajorType actual) {
errorCount++;
}
@Override
public void addUnexpectedType(ExpressionPosition expr, int index, MajorType actual) {
errorCount++;
}
@Override
public void addExpectedConstantValue(ExpressionPosition expr, int actual, String s) {
errorCount++;
}
@Override
public boolean hasErrors() {
return errorCount > 0;
}
@Override
public String toErrorString() {
return String.format("Found %s errors.", errorCount);
}
@Override
public int getErrorCount() {
return errorCount;
}
};
new NonStrictExpectations() {
{
batch.getValueVectorId(new SchemaPath("test", ExpressionPosition.UNKNOWN));
result = new TypedFieldId(Types.required(MinorType.BIGINT), -5);
}
};
new MockUp<RemoteFunctionRegistry>() {
@Mock
long getRegistryVersion() {
return 0L;
}
};
LogicalExpression functionCallExpr = new FunctionCall("testFunc", ImmutableList.of((LogicalExpression) new FieldReference("test", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
LogicalExpression newExpr = ExpressionTreeMaterializer.materialize(functionCallExpr, batch, ec, registry);
assertTrue(newExpr instanceof TypedNullConstant);
assertEquals(1, ec.getErrorCount());
System.out.println(ec.toErrorString());
}
Aggregations