Search in sources :

Example 1 with FormatSelection

use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.

the class FileSystemPartitionDescriptor method createTableScan.

@Override
public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception {
    List<String> newFiles = Lists.newArrayList();
    for (final PartitionLocation location : newPartitionLocation) {
        if (!location.isCompositePartition()) {
            newFiles.add(location.getEntirePartitionLocation());
        } else {
            final Collection<SimplePartitionLocation> subPartitions = location.getPartitionLocationRecursive();
            for (final PartitionLocation subPart : subPartitions) {
                newFiles.add(subPart.getEntirePartitionLocation());
            }
        }
    }
    if (scanRel instanceof DrillScanRel) {
        final FormatSelection formatSelection = (FormatSelection) table.getSelection();
        final FileSelection newFileSelection = new FileSelection(null, newFiles, getBaseTableLocation(), cacheFileRoot, wasAllPartitionsPruned, formatSelection.getSelection().getDirStatus());
        newFileSelection.setMetaContext(metaContext);
        final FileGroupScan newGroupScan = ((FileGroupScan) ((DrillScanRel) scanRel).getGroupScan()).clone(newFileSelection);
        return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), newGroupScan, scanRel.getRowType(), ((DrillScanRel) scanRel).getColumns(), true);
    } else if (scanRel instanceof EnumerableTableScan) {
        return createNewTableScanFromSelection((EnumerableTableScan) scanRel, newFiles, cacheFileRoot, wasAllPartitionsPruned, metaContext);
    } else {
        throw new UnsupportedOperationException("Only DrillScanRel and EnumerableTableScan is allowed!");
    }
}
Also used : FileSelection(org.apache.drill.exec.store.dfs.FileSelection) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) FileGroupScan(org.apache.drill.exec.physical.base.FileGroupScan) EnumerableTableScan(org.apache.calcite.adapter.enumerable.EnumerableTableScan) DirPrunedEnumerableTableScan(org.apache.drill.exec.planner.logical.DirPrunedEnumerableTableScan) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection)

Example 2 with FormatSelection

use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by axbaretto.

the class FileSystemPartitionDescriptor method createTableScan.

@Override
public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception {
    List<String> newFiles = Lists.newArrayList();
    for (final PartitionLocation location : newPartitionLocation) {
        if (!location.isCompositePartition()) {
            newFiles.add(location.getEntirePartitionLocation());
        } else {
            final Collection<SimplePartitionLocation> subPartitions = location.getPartitionLocationRecursive();
            for (final PartitionLocation subPart : subPartitions) {
                newFiles.add(subPart.getEntirePartitionLocation());
            }
        }
    }
    if (scanRel instanceof DrillScanRel) {
        final FormatSelection formatSelection = (FormatSelection) table.getSelection();
        final FileSelection newFileSelection = new FileSelection(null, newFiles, getBaseTableLocation(), cacheFileRoot, wasAllPartitionsPruned, formatSelection.getSelection().getDirStatus());
        newFileSelection.setMetaContext(metaContext);
        final FileGroupScan newGroupScan = ((FileGroupScan) ((DrillScanRel) scanRel).getGroupScan()).clone(newFileSelection);
        return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), newGroupScan, scanRel.getRowType(), ((DrillScanRel) scanRel).getColumns(), true);
    } else if (scanRel instanceof EnumerableTableScan) {
        return createNewTableScanFromSelection((EnumerableTableScan) scanRel, newFiles, cacheFileRoot, wasAllPartitionsPruned, metaContext);
    } else {
        throw new UnsupportedOperationException("Only DrillScanRel and EnumerableTableScan is allowed!");
    }
}
Also used : FileSelection(org.apache.drill.exec.store.dfs.FileSelection) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) FileGroupScan(org.apache.drill.exec.physical.base.FileGroupScan) EnumerableTableScan(org.apache.calcite.adapter.enumerable.EnumerableTableScan) DirPrunedEnumerableTableScan(org.apache.drill.exec.planner.logical.DirPrunedEnumerableTableScan) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection)

Example 3 with FormatSelection

use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by axbaretto.

the class FileSystemPartitionDescriptor method getFileLocationsAndStatus.

protected Pair<Collection<String>, Boolean> getFileLocationsAndStatus() {
    Collection<String> fileLocations = null;
    Pair<Collection<String>, Boolean> fileLocationsAndStatus = null;
    boolean isExpandedPartial = false;
    if (scanRel instanceof DrillScanRel) {
        // If a particular GroupScan provides files, get the list of files from there rather than
        // DrillTable because GroupScan would have the updated version of the selection
        final DrillScanRel drillScan = (DrillScanRel) scanRel;
        if (drillScan.getGroupScan().hasFiles()) {
            fileLocations = drillScan.getGroupScan().getFiles();
            isExpandedPartial = false;
        } else {
            FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
            fileLocations = selection.getFiles();
            isExpandedPartial = selection.isExpandedPartial();
        }
    } else if (scanRel instanceof EnumerableTableScan) {
        FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
        fileLocations = selection.getFiles();
        isExpandedPartial = selection.isExpandedPartial();
    }
    fileLocationsAndStatus = Pair.of(fileLocations, isExpandedPartial);
    return fileLocationsAndStatus;
}
Also used : FileSelection(org.apache.drill.exec.store.dfs.FileSelection) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) EnumerableTableScan(org.apache.calcite.adapter.enumerable.EnumerableTableScan) DirPrunedEnumerableTableScan(org.apache.drill.exec.planner.logical.DirPrunedEnumerableTableScan) Collection(java.util.Collection) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection)

Example 4 with FormatSelection

use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by axbaretto.

the class RefreshMetadataHandler method getPlan.

@Override
public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConversionException, IOException, ForemanSetupException {
    final SqlRefreshMetadata refreshTable = unwrap(sqlNode, SqlRefreshMetadata.class);
    try {
        final SchemaPlus schema = findSchema(config.getConverter().getDefaultSchema(), refreshTable.getSchemaPath());
        if (schema == null) {
            return direct(false, "Storage plugin or workspace does not exist [%s]", SchemaUtilites.SCHEMA_PATH_JOINER.join(refreshTable.getSchemaPath()));
        }
        final String tableName = refreshTable.getName();
        if (tableName.contains("*") || tableName.contains("?")) {
            return direct(false, "Glob path %s not supported for metadata refresh", tableName);
        }
        final Table table = schema.getTable(tableName);
        if (table == null) {
            return direct(false, "Table %s does not exist.", tableName);
        }
        if (!(table instanceof DrillTable)) {
            return notSupported(tableName);
        }
        final DrillTable drillTable = (DrillTable) table;
        final Object selection = drillTable.getSelection();
        if (selection instanceof FileSelection && ((FileSelection) selection).isEmptyDirectory()) {
            return direct(false, "Table %s is empty and doesn't contain any parquet files.", tableName);
        }
        if (!(selection instanceof FormatSelection)) {
            return notSupported(tableName);
        }
        FormatSelection formatSelection = (FormatSelection) selection;
        FormatPluginConfig formatConfig = formatSelection.getFormat();
        if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && ((NamedFormatPluginConfig) formatConfig).name.equals("parquet")))) {
            return notSupported(tableName);
        }
        FileSystemPlugin plugin = (FileSystemPlugin) drillTable.getPlugin();
        DrillFileSystem fs = new DrillFileSystem(plugin.getFormatPlugin(formatSelection.getFormat()).getFsConf());
        String selectionRoot = formatSelection.getSelection().selectionRoot;
        if (!fs.getFileStatus(new Path(selectionRoot)).isDirectory()) {
            return notSupported(tableName);
        }
        if (!(formatConfig instanceof ParquetFormatConfig)) {
            formatConfig = new ParquetFormatConfig();
        }
        Metadata.createMeta(fs, selectionRoot, (ParquetFormatConfig) formatConfig);
        return direct(true, "Successfully updated metadata for table %s.", tableName);
    } catch (Exception e) {
        logger.error("Failed to update metadata for table '{}'", refreshTable.getName(), e);
        return DirectPlan.createDirectPlan(context, false, String.format("Error: %s", e.getMessage()));
    }
}
Also used : FileSelection(org.apache.drill.exec.store.dfs.FileSelection) Path(org.apache.hadoop.fs.Path) FileSystemPlugin(org.apache.drill.exec.store.dfs.FileSystemPlugin) Table(org.apache.calcite.schema.Table) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) SchemaPlus(org.apache.calcite.schema.SchemaPlus) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) SqlRefreshMetadata(org.apache.drill.exec.planner.sql.parser.SqlRefreshMetadata) IOException(java.io.IOException) ValidationException(org.apache.calcite.tools.ValidationException) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException) RelConversionException(org.apache.calcite.tools.RelConversionException) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) ParquetFormatConfig(org.apache.drill.exec.store.parquet.ParquetFormatConfig)

Example 5 with FormatSelection

use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.

the class DrillStatsTable method readStatistics.

private TableStatistics readStatistics(DrillTable drillTable, Path path) throws IOException {
    final Object selection = drillTable.getSelection();
    if (selection instanceof FormatSelection) {
        StoragePlugin storagePlugin = drillTable.getPlugin();
        FormatSelection formatSelection = (FormatSelection) selection;
        FormatPluginConfig formatConfig = formatSelection.getFormat();
        if (storagePlugin instanceof FileSystemPlugin && (formatConfig instanceof ParquetFormatConfig)) {
            FormatPlugin fmtPlugin = storagePlugin.getFormatPlugin(formatConfig);
            if (fmtPlugin.supportsStatistics()) {
                return fmtPlugin.readStatistics(fs, path);
            }
        }
    }
    return null;
}
Also used : FileSystemPlugin(org.apache.drill.exec.store.dfs.FileSystemPlugin) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) ParquetFormatConfig(org.apache.drill.exec.store.parquet.ParquetFormatConfig) StoragePlugin(org.apache.drill.exec.store.StoragePlugin) FormatPlugin(org.apache.drill.exec.store.dfs.FormatPlugin)

Aggregations

FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)24 Path (org.apache.hadoop.fs.Path)14 SchemaPath (org.apache.drill.common.expression.SchemaPath)12 FileSelection (org.apache.drill.exec.store.dfs.FileSelection)12 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)8 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)7 DrillTable (org.apache.drill.exec.planner.logical.DrillTable)7 DrillFileSystem (org.apache.drill.exec.store.dfs.DrillFileSystem)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 RelNode (org.apache.calcite.rel.RelNode)6 DynamicDrillTable (org.apache.drill.exec.planner.logical.DynamicDrillTable)6 Collection (java.util.Collection)5 List (java.util.List)5 Collectors (java.util.stream.Collectors)5 MetadataType (org.apache.drill.metastore.metadata.MetadataType)5 Multimap (org.apache.drill.shaded.guava.com.google.common.collect.Multimap)5 Map (java.util.Map)4 EnumerableTableScan (org.apache.calcite.adapter.enumerable.EnumerableTableScan)4 FormatPluginConfig (org.apache.drill.common.logical.FormatPluginConfig)4