Search in sources :

Example 11 with FormatPluginConfig

use of org.apache.drill.common.logical.FormatPluginConfig in project drill by apache.

the class StoragePluginTestUtils method configureFormatPlugins.

public static void configureFormatPlugins(StoragePluginRegistry pluginRegistry, String storagePlugin) throws PluginException {
    FileSystemConfig fileSystemConfig = (FileSystemConfig) pluginRegistry.getStoredConfig(storagePlugin);
    Map<String, FormatPluginConfig> newFormats = new HashMap<>();
    Optional.ofNullable(fileSystemConfig.getFormats()).ifPresent(newFormats::putAll);
    newFormats.put("txt", new TextFormatConfig(ImmutableList.of("txt"), null, "\u0000", null, null, null, null, null));
    newFormats.put("ssv", new TextFormatConfig(ImmutableList.of("ssv"), null, " ", null, null, null, null, null));
    newFormats.put("psv", new TextFormatConfig(ImmutableList.of("tbl"), null, "|", null, null, null, null, null));
    SequenceFileFormatConfig seqConfig = new SequenceFileFormatConfig(ImmutableList.of("seq"));
    newFormats.put("sequencefile", seqConfig);
    newFormats.put("csvh-test", new TextFormatConfig(ImmutableList.of("csvh-test"), null, ",", null, null, null, true, true));
    FileSystemConfig newFileSystemConfig = new FileSystemConfig(fileSystemConfig.getConnection(), fileSystemConfig.getConfig(), fileSystemConfig.getWorkspaces(), newFormats, PlainCredentialsProvider.EMPTY_CREDENTIALS_PROVIDER);
    newFileSystemConfig.setEnabled(fileSystemConfig.isEnabled());
    pluginRegistry.put(storagePlugin, newFileSystemConfig);
}
Also used : SequenceFileFormatConfig(org.apache.drill.exec.store.easy.sequencefile.SequenceFileFormatConfig) HashMap(java.util.HashMap) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) TextFormatConfig(org.apache.drill.exec.store.easy.text.TextFormatPlugin.TextFormatConfig) FileSystemConfig(org.apache.drill.exec.store.dfs.FileSystemConfig)

Example 12 with FormatPluginConfig

use of org.apache.drill.common.logical.FormatPluginConfig in project drill by apache.

the class ConvertCountToDirectScanRule method checkMetadataForScanStats.

private Pair<Boolean, Metadata_V4.MetadataSummary> checkMetadataForScanStats(PlannerSettings settings, DrillTable drillTable, FormatSelection formatSelection) {
    // Currently only support metadata rowcount stats for Parquet tables
    FormatPluginConfig formatConfig = formatSelection.getFormat();
    if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) {
        return new ImmutablePair<>(false, null);
    }
    FileSystemPlugin plugin = (FileSystemPlugin) drillTable.getPlugin();
    DrillFileSystem fs;
    try {
        fs = new DrillFileSystem(plugin.getFormatPlugin(formatSelection.getFormat()).getFsConf());
    } catch (IOException e) {
        logger.warn("Unable to create the file system object for retrieving statistics from metadata cache file ", e);
        return new ImmutablePair<>(false, null);
    }
    // check if the cacheFileRoot has been set: this is needed because after directory pruning, the
    // cacheFileRoot could have been changed and not be the same as the original selectionRoot
    Path selectionRoot = formatSelection.getSelection().getCacheFileRoot() != null ? formatSelection.getSelection().getCacheFileRoot() : formatSelection.getSelection().getSelectionRoot();
    ParquetReaderConfig parquetReaderConfig = ParquetReaderConfig.builder().withFormatConfig((ParquetFormatConfig) formatConfig).withOptions(settings.getOptions()).build();
    Metadata_V4.MetadataSummary metadataSummary = Metadata.getSummary(fs, selectionRoot, false, parquetReaderConfig);
    return metadataSummary != null ? new ImmutablePair<>(true, metadataSummary) : new ImmutablePair<>(false, null);
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) FileSystemPlugin(org.apache.drill.exec.store.dfs.FileSystemPlugin) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) IOException(java.io.IOException) ParquetFormatConfig(org.apache.drill.exec.store.parquet.ParquetFormatConfig) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) ParquetReaderConfig(org.apache.drill.exec.store.parquet.ParquetReaderConfig)

Example 13 with FormatPluginConfig

use of org.apache.drill.common.logical.FormatPluginConfig in project drill by apache.

the class RefreshMetadataHandler method getPlan.

@Override
public PhysicalPlan getPlan(SqlNode sqlNode) throws ForemanSetupException {
    final SqlRefreshMetadata refreshTable = unwrap(sqlNode, SqlRefreshMetadata.class);
    try {
        final SchemaPlus schema = findSchema(config.getConverter().getDefaultSchema(), refreshTable.getSchemaPath());
        if (schema == null) {
            return direct(false, "Storage plugin or workspace does not exist [%s]", SchemaUtilites.SCHEMA_PATH_JOINER.join(refreshTable.getSchemaPath()));
        }
        final String tableName = refreshTable.getName();
        final SqlNodeList columnList = getColumnList(refreshTable);
        final Set<SchemaPath> columnSet = getColumnRootSegments(columnList);
        final SqlLiteral allColumns = refreshTable.getAllColumns();
        if (tableName.contains("*") || tableName.contains("?")) {
            return direct(false, "Glob path %s not supported for metadata refresh", tableName);
        }
        final Table table = schema.getTable(tableName);
        if (table == null) {
            return direct(false, "Table %s does not exist.", tableName);
        }
        if (!(table instanceof DrillTable)) {
            return notSupported(tableName);
        }
        final DrillTable drillTable = (DrillTable) table;
        final Object selection = drillTable.getSelection();
        if (selection instanceof FileSelection && ((FileSelection) selection).isEmptyDirectory()) {
            return direct(false, "Table %s is empty and doesn't contain any parquet files.", tableName);
        }
        if (!(selection instanceof FormatSelection)) {
            return notSupported(tableName);
        }
        final FormatSelection formatSelection = (FormatSelection) selection;
        FormatPluginConfig formatConfig = formatSelection.getFormat();
        if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) {
            return notSupported(tableName);
        }
        // Always create filesystem object using process user, since it owns the metadata file
        final DrillFileSystem fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), drillTable.getPlugin().getFormatPlugin(formatConfig).getFsConf());
        final Path selectionRoot = formatSelection.getSelection().getSelectionRoot();
        if (!fs.getFileStatus(selectionRoot).isDirectory()) {
            return notSupported(tableName);
        }
        if (!(formatConfig instanceof ParquetFormatConfig)) {
            formatConfig = new ParquetFormatConfig();
        }
        final ParquetReaderConfig readerConfig = ParquetReaderConfig.builder().withFormatConfig((ParquetFormatConfig) formatConfig).withOptions(context.getOptions()).build();
        Metadata.createMeta(fs, selectionRoot, readerConfig, allColumns.booleanValue(), columnSet);
        return direct(true, "Successfully updated metadata for table %s.", tableName);
    } catch (Exception e) {
        logger.error("Failed to update metadata for table '{}'", refreshTable.getName(), e);
        return DirectPlan.createDirectPlan(context, false, String.format("Error: %s", e.getMessage()));
    }
}
Also used : FileSelection(org.apache.drill.exec.store.dfs.FileSelection) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) Table(org.apache.calcite.schema.Table) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) SchemaPlus(org.apache.calcite.schema.SchemaPlus) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) SqlRefreshMetadata(org.apache.drill.exec.planner.sql.parser.SqlRefreshMetadata) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) SchemaPath(org.apache.drill.common.expression.SchemaPath) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) NamedFormatPluginConfig(org.apache.drill.exec.store.dfs.NamedFormatPluginConfig) SqlNodeList(org.apache.calcite.sql.SqlNodeList) ParquetFormatConfig(org.apache.drill.exec.store.parquet.ParquetFormatConfig) SqlLiteral(org.apache.calcite.sql.SqlLiteral) ParquetReaderConfig(org.apache.drill.exec.store.parquet.ParquetReaderConfig)

Example 14 with FormatPluginConfig

use of org.apache.drill.common.logical.FormatPluginConfig in project drill by apache.

the class IcebergQueriesTest method setUpBeforeClass.

@BeforeClass
public static void setUpBeforeClass() throws Exception {
    startCluster(ClusterFixture.builder(dirTestWatcher));
    StoragePluginRegistry pluginRegistry = cluster.drillbit().getContext().getStorage();
    FileSystemConfig pluginConfig = (FileSystemConfig) pluginRegistry.getPlugin(DFS_PLUGIN_NAME).getConfig();
    Map<String, FormatPluginConfig> formats = new HashMap<>(pluginConfig.getFormats());
    formats.put("iceberg", IcebergFormatPluginConfig.builder().build());
    FileSystemConfig newPluginConfig = new FileSystemConfig(pluginConfig.getConnection(), pluginConfig.getConfig(), pluginConfig.getWorkspaces(), formats, PlainCredentialsProvider.EMPTY_CREDENTIALS_PROVIDER);
    newPluginConfig.setEnabled(pluginConfig.isEnabled());
    pluginRegistry.put(DFS_PLUGIN_NAME, newPluginConfig);
    // defining another plugin with iceberg format to ensure that DRILL-8049 is fixed
    FileSystemConfig anotherFileSystemConfig = pluginConfig.copyWithFormats(formats);
    pluginRegistry.put("dfs2", anotherFileSystemConfig);
    Configuration config = new Configuration();
    config.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
    HadoopTables tables = new HadoopTables(config);
    Schema structSchema = new Schema(Types.NestedField.optional(13, "struct_int_field", Types.IntegerType.get()), Types.NestedField.optional(14, "struct_string_field", Types.StringType.get()));
    Types.ListType repeatedStructType = Types.ListType.ofOptional(16, Types.StructType.of(Types.NestedField.optional(17, "struct_int_field", Types.IntegerType.get()), Types.NestedField.optional(18, "struct_string_field", Types.StringType.get())));
    Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()), Types.NestedField.optional(2, "long_field", Types.LongType.get()), Types.NestedField.optional(3, "float_field", Types.FloatType.get()), Types.NestedField.optional(4, "double_field", Types.DoubleType.get()), Types.NestedField.optional(5, "string_field", Types.StringType.get()), Types.NestedField.optional(6, "boolean_field", Types.BooleanType.get()), Types.NestedField.optional(26, "time_field", Types.TimeType.get()), Types.NestedField.optional(27, "timestamp_field", Types.TimestampType.withoutZone()), Types.NestedField.optional(28, "date_field", Types.DateType.get()), Types.NestedField.optional(29, "decimal_field", Types.DecimalType.of(4, 2)), Types.NestedField.optional(30, "uuid_field", Types.UUIDType.get()), Types.NestedField.optional(31, "fixed_field", Types.FixedType.ofLength(10)), Types.NestedField.optional(32, "binary_field", Types.BinaryType.get()), Types.NestedField.optional(7, "list_field", Types.ListType.ofOptional(10, Types.StringType.get())), Types.NestedField.optional(8, "map_field", Types.MapType.ofOptional(11, 12, Types.StringType.get(), Types.FloatType.get())), Types.NestedField.required(9, "struct_field", structSchema.asStruct()), Types.NestedField.required(15, "repeated_struct_field", repeatedStructType), Types.NestedField.required(19, "repeated_list_field", Types.ListType.ofOptional(20, Types.ListType.ofOptional(21, Types.StringType.get()))), Types.NestedField.optional(22, "repeated_map_field", Types.ListType.ofOptional(23, Types.MapType.ofOptional(24, 25, Types.StringType.get(), Types.FloatType.get()))));
    List<String> listValue = Arrays.asList("a", "b", "c");
    Map<String, Float> mapValue = new HashMap<>();
    mapValue.put("a", 0.1F);
    mapValue.put("b", 0.2F);
    Map<String, Float> secondMapValue = new HashMap<>();
    secondMapValue.put("true", 1F);
    secondMapValue.put("false", 0F);
    Record structValue = GenericRecord.create(structSchema);
    structValue.setField("struct_int_field", 123);
    structValue.setField("struct_string_field", "abc");
    Record secondStructValue = GenericRecord.create(structSchema);
    secondStructValue.setField("struct_int_field", 321);
    secondStructValue.setField("struct_string_field", "def");
    Record record = GenericRecord.create(schema);
    record.setField("int_field", 1);
    record.setField("long_field", 100L);
    record.setField("float_field", 0.5F);
    record.setField("double_field", 1.5D);
    record.setField("string_field", "abc");
    record.setField("boolean_field", true);
    record.setField("time_field", LocalTime.of(2, 42, 42));
    record.setField("timestamp_field", LocalDateTime.of(1994, 4, 18, 11, 0, 0));
    record.setField("date_field", LocalDate.of(1994, 4, 18));
    record.setField("decimal_field", new BigDecimal("12.34"));
    record.setField("uuid_field", new byte[16]);
    record.setField("fixed_field", new byte[10]);
    record.setField("binary_field", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)));
    record.setField("list_field", listValue);
    record.setField("map_field", mapValue);
    record.setField("struct_field", structValue);
    record.setField("repeated_struct_field", Arrays.asList(structValue, structValue));
    record.setField("repeated_list_field", Arrays.asList(listValue, listValue));
    record.setField("repeated_map_field", Arrays.asList(mapValue, mapValue));
    Record nullsRecord = GenericRecord.create(schema);
    nullsRecord.setField("int_field", null);
    nullsRecord.setField("long_field", null);
    nullsRecord.setField("float_field", null);
    nullsRecord.setField("double_field", null);
    nullsRecord.setField("string_field", null);
    nullsRecord.setField("boolean_field", null);
    nullsRecord.setField("time_field", null);
    nullsRecord.setField("timestamp_field", null);
    nullsRecord.setField("date_field", null);
    nullsRecord.setField("decimal_field", null);
    nullsRecord.setField("uuid_field", null);
    nullsRecord.setField("fixed_field", null);
    nullsRecord.setField("binary_field", null);
    nullsRecord.setField("list_field", null);
    nullsRecord.setField("map_field", null);
    nullsRecord.setField("struct_field", GenericRecord.create(structSchema));
    nullsRecord.setField("repeated_struct_field", Collections.emptyList());
    nullsRecord.setField("repeated_list_field", Collections.emptyList());
    nullsRecord.setField("repeated_map_field", Collections.emptyList());
    Record secondRecord = GenericRecord.create(schema);
    secondRecord.setField("int_field", 988);
    secondRecord.setField("long_field", 543L);
    secondRecord.setField("float_field", Float.NaN);
    secondRecord.setField("double_field", Double.MAX_VALUE);
    secondRecord.setField("string_field", "def");
    secondRecord.setField("boolean_field", false);
    secondRecord.setField("time_field", LocalTime.of(3, 41, 53));
    secondRecord.setField("timestamp_field", LocalDateTime.of(1995, 9, 10, 9, 0, 0));
    secondRecord.setField("date_field", LocalDate.of(1995, 9, 10));
    secondRecord.setField("decimal_field", new BigDecimal("99.99"));
    secondRecord.setField("uuid_field", new byte[16]);
    secondRecord.setField("fixed_field", new byte[10]);
    secondRecord.setField("binary_field", ByteBuffer.wrap("world".getBytes(StandardCharsets.UTF_8)));
    secondRecord.setField("list_field", Arrays.asList("y", "n"));
    secondRecord.setField("map_field", secondMapValue);
    secondRecord.setField("struct_field", secondStructValue);
    secondRecord.setField("repeated_struct_field", Arrays.asList(structValue, secondStructValue));
    secondRecord.setField("repeated_list_field", Arrays.asList(listValue, Arrays.asList("y", "n")));
    secondRecord.setField("repeated_map_field", Arrays.asList(mapValue, secondMapValue));
    String location = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypes").toUri().getPath();
    table = tables.create(schema, location);
    writeParquetAndCommitDataFile(table, "allTypes", Arrays.asList(record, nullsRecord));
    writeParquetAndCommitDataFile(table, "allTypes_1", Collections.singleton(secondRecord));
    String avroLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesAvro").toUri().getPath();
    writeAndCommitDataFile(tables.create(structSchema, avroLocation), "allTypes", FileFormat.AVRO, Arrays.asList(structValue, GenericRecord.create(structSchema), secondStructValue));
    String orcLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesOrc").toUri().getPath();
    writeAndCommitDataFile(tables.create(structSchema, orcLocation), "allTypes", FileFormat.ORC, Arrays.asList(structValue, GenericRecord.create(structSchema), secondStructValue));
    String emptyTableLocation = Paths.get(dirTestWatcher.getDfsTestTmpDir().toURI().getPath(), "testAllTypesEmpty").toUri().getPath();
    tables.create(structSchema, emptyTableLocation);
}
Also used : StoragePluginRegistry(org.apache.drill.exec.store.StoragePluginRegistry) Types(org.apache.iceberg.types.Types) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Schema(org.apache.iceberg.Schema) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) BigDecimal(java.math.BigDecimal) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) IcebergFormatPluginConfig(org.apache.drill.exec.store.iceberg.format.IcebergFormatPluginConfig) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) FileSystemConfig(org.apache.drill.exec.store.dfs.FileSystemConfig) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) BeforeClass(org.junit.BeforeClass)

Example 15 with FormatPluginConfig

use of org.apache.drill.common.logical.FormatPluginConfig in project drill by apache.

the class TestPcapWithPersistentStore method pcapPluginBackwardCompatabilityTest.

/**
 * DRILL-7828
 * Note: If this test breaks you are probably breaking backward and forward compatibility. Verify with the community
 * that breaking compatibility is acceptable and planned for.
 */
@Test
public void pcapPluginBackwardCompatabilityTest() throws Exception {
    final String oldPlugin = "oldFormatPlugin";
    try (CuratorFramework curator = createCurator()) {
        curator.start();
        ObjectMapper objectMapper = new ObjectMapper();
        objectMapper.registerSubtypes(PcapFormatConfig.class, PcapngFormatConfig.class);
        PersistentStoreConfig<FileSystemConfig> storeConfig = PersistentStoreConfig.newJacksonBuilder(objectMapper, FileSystemConfig.class).name("type").build();
        try (ZookeeperClient zkClient = new ZookeeperClient(curator, PathUtils.join("/", storeConfig.getName()), CreateMode.PERSISTENT)) {
            zkClient.start();
            String oldFormatPlugin = DrillFileUtils.getResourceAsString("/config/oldPcapPlugins.json");
            zkClient.put(oldPlugin, oldFormatPlugin.getBytes(), null);
        }
        try (ZookeeperPersistentStoreProvider provider = new ZookeeperPersistentStoreProvider(zkHelper.getConfig(), curator)) {
            PersistentStore<FileSystemConfig> store = provider.getOrCreateStore(storeConfig);
            assertTrue(store instanceof ZookeeperPersistentStore);
            FileSystemConfig oldPluginConfig = ((ZookeeperPersistentStore<FileSystemConfig>) store).get(oldPlugin, null);
            Map<String, FormatPluginConfig> formats = oldPluginConfig.getFormats();
            Assert.assertEquals(formats.keySet(), ImmutableSet.of("pcap", "pcapng"));
            PcapFormatConfig pcap = (PcapFormatConfig) formats.get("pcap");
            PcapngFormatConfig pcapng = (PcapngFormatConfig) formats.get("pcapng");
            Assert.assertEquals(pcap.getExtensions(), ImmutableList.of("pcap"));
            assertTrue(pcapng.getStat());
        }
    }
}
Also used : ZookeeperClient(org.apache.drill.exec.coord.zk.ZookeeperClient) ZookeeperPersistentStore(org.apache.drill.exec.store.sys.store.ZookeeperPersistentStore) CuratorFramework(org.apache.curator.framework.CuratorFramework) PcapFormatConfig(org.apache.drill.exec.store.pcap.plugin.PcapFormatConfig) FormatPluginConfig(org.apache.drill.common.logical.FormatPluginConfig) PcapngFormatConfig(org.apache.drill.exec.store.pcap.plugin.PcapngFormatConfig) FileSystemConfig(org.apache.drill.exec.store.dfs.FileSystemConfig) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ZookeeperPersistentStoreProvider(org.apache.drill.exec.store.sys.store.provider.ZookeeperPersistentStoreProvider) Test(org.junit.Test)

Aggregations

FormatPluginConfig (org.apache.drill.common.logical.FormatPluginConfig)16 FileSystemConfig (org.apache.drill.exec.store.dfs.FileSystemConfig)7 HashMap (java.util.HashMap)6 ParquetFormatConfig (org.apache.drill.exec.store.parquet.ParquetFormatConfig)6 DrillFileSystem (org.apache.drill.exec.store.dfs.DrillFileSystem)4 FileSystemPlugin (org.apache.drill.exec.store.dfs.FileSystemPlugin)4 FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)4 NamedFormatPluginConfig (org.apache.drill.exec.store.dfs.NamedFormatPluginConfig)4 Path (org.apache.hadoop.fs.Path)4 BeforeClass (org.junit.BeforeClass)4 Table (org.apache.calcite.schema.Table)3 SchemaPath (org.apache.drill.common.expression.SchemaPath)3 DrillTable (org.apache.drill.exec.planner.logical.DrillTable)3 IOException (java.io.IOException)2 Field (java.lang.reflect.Field)2 SchemaPlus (org.apache.calcite.schema.SchemaPlus)2 StoragePluginConfig (org.apache.drill.common.logical.StoragePluginConfig)2 SqlRefreshMetadata (org.apache.drill.exec.planner.sql.parser.SqlRefreshMetadata)2 StoragePluginRegistry (org.apache.drill.exec.store.StoragePluginRegistry)2 FileSelection (org.apache.drill.exec.store.dfs.FileSelection)2