Search in sources :

Example 1 with ParquetReaderConfig

use of org.apache.drill.exec.store.parquet.ParquetReaderConfig in project drill by apache.

the class ConvertCountToDirectScanRule method checkMetadataForScanStats.

private Pair<Boolean, Metadata_V4.MetadataSummary> checkMetadataForScanStats(PlannerSettings settings, DrillTable drillTable, FormatSelection formatSelection) {
    // Currently only support metadata rowcount stats for Parquet tables
    FormatPluginConfig formatConfig = formatSelection.getFormat();
    if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) {
        return new ImmutablePair<>(false, null);
    }
    FileSystemPlugin plugin = (FileSystemPlugin) drillTable.getPlugin();
    DrillFileSystem fs;
    try {
        fs = new DrillFileSystem(plugin.getFormatPlugin(formatSelection.getFormat()).getFsConf());
    } catch (IOException e) {
        logger.warn("Unable to create the file system object for retrieving statistics from metadata cache file ", e);
        return new ImmutablePair<>(false, null);
    }
    // check if the cacheFileRoot has been set: this is needed because after directory pruning, the
    // cacheFileRoot could have been changed and not be the same as the original selectionRoot
    Path selectionRoot = formatSelection.getSelection().getCacheFileRoot() != null ? formatSelection.getSelection().getCacheFileRoot() : formatSelection.getSelection().getSelectionRoot();
    ParquetReaderConfig parquetReaderConfig = ParquetReaderConfig.builder().withFormatConfig((ParquetFormatConfig) formatConfig).withOptions(settings.getOptions()).build();
    Metadata_V4.MetadataSummary metadataSummary = Metadata.getSummary(fs, selectionRoot, false, parquetReaderConfig);
    return metadataSummary != null ? new ImmutablePair<>(true, metadataSummary) : new ImmutablePair<>(false, null);
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) FileSystemPlugin(org.apache.drill.exec.store.dfs.FileSystemPlugin) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) IOException(java.io.IOException) ParquetFormatConfig(org.apache.drill.exec.store.parquet.ParquetFormatConfig) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) ParquetReaderConfig(org.apache.drill.exec.store.parquet.ParquetReaderConfig)

Example 2 with ParquetReaderConfig

use of org.apache.drill.exec.store.parquet.ParquetReaderConfig in project drill by apache.

the class RefreshMetadataHandler method getPlan.

@Override
public PhysicalPlan getPlan(SqlNode sqlNode) throws ForemanSetupException {
    final SqlRefreshMetadata refreshTable = unwrap(sqlNode, SqlRefreshMetadata.class);
    try {
        final SchemaPlus schema = findSchema(config.getConverter().getDefaultSchema(), refreshTable.getSchemaPath());
        if (schema == null) {
            return direct(false, "Storage plugin or workspace does not exist [%s]", SchemaUtilites.SCHEMA_PATH_JOINER.join(refreshTable.getSchemaPath()));
        }
        final String tableName = refreshTable.getName();
        final SqlNodeList columnList = getColumnList(refreshTable);
        final Set<SchemaPath> columnSet = getColumnRootSegments(columnList);
        final SqlLiteral allColumns = refreshTable.getAllColumns();
        if (tableName.contains("*") || tableName.contains("?")) {
            return direct(false, "Glob path %s not supported for metadata refresh", tableName);
        }
        final Table table = schema.getTable(tableName);
        if (table == null) {
            return direct(false, "Table %s does not exist.", tableName);
        }
        if (!(table instanceof DrillTable)) {
            return notSupported(tableName);
        }
        final DrillTable drillTable = (DrillTable) table;
        final Object selection = drillTable.getSelection();
        if (selection instanceof FileSelection && ((FileSelection) selection).isEmptyDirectory()) {
            return direct(false, "Table %s is empty and doesn't contain any parquet files.", tableName);
        }
        if (!(selection instanceof FormatSelection)) {
            return notSupported(tableName);
        }
        final FormatSelection formatSelection = (FormatSelection) selection;
        FormatPluginConfig formatConfig = formatSelection.getFormat();
        if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) {
            return notSupported(tableName);
        }
        // Always create filesystem object using process user, since it owns the metadata file
        final DrillFileSystem fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), drillTable.getPlugin().getFormatPlugin(formatConfig).getFsConf());
        final Path selectionRoot = formatSelection.getSelection().getSelectionRoot();
        if (!fs.getFileStatus(selectionRoot).isDirectory()) {
            return notSupported(tableName);
        }
        if (!(formatConfig instanceof ParquetFormatConfig)) {
            formatConfig = new ParquetFormatConfig();
        }
        final ParquetReaderConfig readerConfig = ParquetReaderConfig.builder().withFormatConfig((ParquetFormatConfig) formatConfig).withOptions(context.getOptions()).build();
        Metadata.createMeta(fs, selectionRoot, readerConfig, allColumns.booleanValue(), columnSet);
        return direct(true, "Successfully updated metadata for table %s.", tableName);
    } catch (Exception e) {
        logger.error("Failed to update metadata for table '{}'", refreshTable.getName(), e);
        return DirectPlan.createDirectPlan(context, false, String.format("Error: %s", e.getMessage()));
    }
}
Also used : FileSelection(org.apache.drill.exec.store.dfs.FileSelection) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) Table(org.apache.calcite.schema.Table) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) SchemaPlus(org.apache.calcite.schema.SchemaPlus) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) SqlRefreshMetadata(org.apache.drill.exec.planner.sql.parser.SqlRefreshMetadata) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) SchemaPath(org.apache.drill.common.expression.SchemaPath) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) SqlNodeList(org.apache.calcite.sql.SqlNodeList) ParquetFormatConfig(org.apache.drill.exec.store.parquet.ParquetFormatConfig) SqlLiteral(org.apache.calcite.sql.SqlLiteral) ParquetReaderConfig(org.apache.drill.exec.store.parquet.ParquetReaderConfig)

Aggregations

SchemaPath (org.apache.drill.common.expression.SchemaPath)2 FormatPluginConfig (org.apache.drill.common.logical.FormatPluginConfig)2 DrillFileSystem (org.apache.drill.exec.store.dfs.DrillFileSystem)2 NamedFormatPluginConfig (org.apache.drill.exec.store.dfs.NamedFormatPluginConfig)2 ParquetFormatConfig (org.apache.drill.exec.store.parquet.ParquetFormatConfig)2 ParquetReaderConfig (org.apache.drill.exec.store.parquet.ParquetReaderConfig)2 Path (org.apache.hadoop.fs.Path)2 IOException (java.io.IOException)1 SchemaPlus (org.apache.calcite.schema.SchemaPlus)1 Table (org.apache.calcite.schema.Table)1 SqlLiteral (org.apache.calcite.sql.SqlLiteral)1 SqlNodeList (org.apache.calcite.sql.SqlNodeList)1 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)1 DrillTable (org.apache.drill.exec.planner.logical.DrillTable)1 SqlRefreshMetadata (org.apache.drill.exec.planner.sql.parser.SqlRefreshMetadata)1 FileSelection (org.apache.drill.exec.store.dfs.FileSelection)1 FileSystemPlugin (org.apache.drill.exec.store.dfs.FileSystemPlugin)1 FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)1 Metadata_V4 (org.apache.drill.exec.store.parquet.metadata.Metadata_V4)1 ForemanSetupException (org.apache.drill.exec.work.foreman.ForemanSetupException)1