Search in sources :

Example 1 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class HiveCatalogHiveMetadataTest method testAlterPartitionColumnStatistics.

@Test
public void testAlterPartitionColumnStatistics() throws Exception {
    catalog.createDatabase(db1, createDb(), false);
    CatalogTable catalogTable = createPartitionedTable();
    catalog.createTable(path1, catalogTable, false);
    CatalogPartitionSpec partitionSpec = new CatalogPartitionSpec(new HashMap<String, String>() {

        {
            put("second", "2010-04-21 09:45:00");
            put("third", "2000");
        }
    });
    catalog.createPartition(path1, partitionSpec, createPartition(), true);
    Map<String, CatalogColumnStatisticsDataBase> columnStatisticsDataBaseMap = new HashMap<>();
    columnStatisticsDataBaseMap.put("first", new CatalogColumnStatisticsDataString(10L, 5.2, 3L, 100L));
    CatalogColumnStatistics catalogColumnStatistics = new CatalogColumnStatistics(columnStatisticsDataBaseMap);
    catalog.alterPartitionColumnStatistics(path1, partitionSpec, catalogColumnStatistics, false);
    checkEquals(catalogColumnStatistics, catalog.getPartitionColumnStatistics(path1, partitionSpec));
}
Also used : CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) Test(org.junit.Test)

Example 2 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class HiveParserDDLSemanticAnalyzer method convertAlterTableProps.

private Operation convertAlterTableProps(CatalogBaseTable oldBaseTable, String tableName, Map<String, String> partSpec, Map<String, String> newProps) {
    ObjectIdentifier tableIdentifier = parseObjectIdentifier(tableName);
    CatalogTable oldTable = (CatalogTable) oldBaseTable;
    CatalogPartitionSpec catalogPartitionSpec = partSpec != null ? new CatalogPartitionSpec(partSpec) : null;
    CatalogPartition catalogPartition = partSpec != null ? getPartition(tableIdentifier, catalogPartitionSpec) : null;
    Map<String, String> props = new HashMap<>();
    if (catalogPartition != null) {
        props.putAll(catalogPartition.getProperties());
        props.putAll(newProps);
        return new AlterPartitionPropertiesOperation(tableIdentifier, catalogPartitionSpec, new CatalogPartitionImpl(props, catalogPartition.getComment()));
    } else {
        props.putAll(oldTable.getOptions());
        props.putAll(newProps);
        return new AlterTableOptionsOperation(tableIdentifier, oldTable.copy(props));
    }
}
Also used : AlterTableOptionsOperation(org.apache.flink.table.operations.ddl.AlterTableOptionsOperation) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) CatalogPartition(org.apache.flink.table.catalog.CatalogPartition) AlterPartitionPropertiesOperation(org.apache.flink.table.operations.ddl.AlterPartitionPropertiesOperation) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) CatalogPartitionImpl(org.apache.flink.table.catalog.CatalogPartitionImpl)

Example 3 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class HiveDialectITCase method testAddDropPartitions.

@Test
public void testAddDropPartitions() throws Exception {
    tableEnv.executeSql("create table tbl (x int,y binary) partitioned by (dt date,country string)");
    tableEnv.executeSql("alter table tbl add partition (dt='2020-04-30',country='china') partition (dt='2020-04-30',country='us')");
    ObjectPath tablePath = new ObjectPath("default", "tbl");
    assertEquals(2, hiveCatalog.listPartitions(tablePath).size());
    String partLocation = warehouse + "/part3_location";
    tableEnv.executeSql(String.format("alter table tbl add partition (dt='2020-05-01',country='belgium') location '%s'", partLocation));
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    CatalogPartitionSpec spec = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {

        {
            put("dt", "2020-05-01");
            put("country", "belgium");
        }
    });
    Partition hivePartition = hiveCatalog.getHivePartition(hiveTable, spec);
    assertEquals(partLocation, locationPath(hivePartition.getSd().getLocation()));
    tableEnv.executeSql("alter table tbl drop partition (dt='2020-04-30',country='china'),partition (dt='2020-05-01',country='belgium')");
    assertEquals(1, hiveCatalog.listPartitions(tablePath).size());
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) SqlCreateHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) Test(org.junit.Test)

Example 4 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class HiveDialectITCase method testAlterTable.

@Test
public void testAlterTable() throws Exception {
    tableEnv.executeSql("create table tbl (x int) tblproperties('k1'='v1')");
    tableEnv.executeSql("alter table tbl rename to tbl1");
    ObjectPath tablePath = new ObjectPath("default", "tbl1");
    // change properties
    tableEnv.executeSql("alter table `default`.tbl1 set tblproperties ('k2'='v2')");
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals("v1", hiveTable.getParameters().get("k1"));
    assertEquals("v2", hiveTable.getParameters().get("k2"));
    // change location
    String newLocation = warehouse + "/tbl1_new_location";
    tableEnv.executeSql(String.format("alter table default.tbl1 set location '%s'", newLocation));
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(newLocation, locationPath(hiveTable.getSd().getLocation()));
    // change file format
    tableEnv.executeSql("alter table tbl1 set fileformat orc");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(OrcSerde.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());
    assertEquals(OrcInputFormat.class.getName(), hiveTable.getSd().getInputFormat());
    assertEquals(OrcOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());
    // change serde
    tableEnv.executeSql(String.format("alter table tbl1 set serde '%s' with serdeproperties('%s'='%s')", LazyBinarySerDe.class.getName(), serdeConstants.FIELD_DELIM, "\u0001"));
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(LazyBinarySerDe.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());
    assertEquals("\u0001", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.FIELD_DELIM));
    // replace columns
    tableEnv.executeSql("alter table tbl1 replace columns (t tinyint,s smallint,i int,b bigint,f float,d double,num decimal," + "ts timestamp,dt date,str string,var varchar(10),ch char(123),bool boolean,bin binary)");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(14, hiveTable.getSd().getColsSize());
    assertEquals("varchar(10)", hiveTable.getSd().getCols().get(10).getType());
    assertEquals("char(123)", hiveTable.getSd().getCols().get(11).getType());
    tableEnv.executeSql("alter table tbl1 replace columns (a array<array<int>>,s struct<f1:struct<f11:int,f12:binary>, f2:map<double,date>>," + "m map<char(5),map<timestamp,decimal(20,10)>>)");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals("array<array<int>>", hiveTable.getSd().getCols().get(0).getType());
    assertEquals("struct<f1:struct<f11:int,f12:binary>,f2:map<double,date>>", hiveTable.getSd().getCols().get(1).getType());
    assertEquals("map<char(5),map<timestamp,decimal(20,10)>>", hiveTable.getSd().getCols().get(2).getType());
    // add columns
    tableEnv.executeSql("alter table tbl1 add columns (x int,y int)");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(5, hiveTable.getSd().getColsSize());
    // change column
    tableEnv.executeSql("alter table tbl1 change column x x1 string comment 'new x col'");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    assertEquals(5, hiveTable.getSd().getColsSize());
    FieldSchema newField = hiveTable.getSd().getCols().get(3);
    assertEquals("x1", newField.getName());
    assertEquals("string", newField.getType());
    tableEnv.executeSql("alter table tbl1 change column y y int first");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    newField = hiveTable.getSd().getCols().get(0);
    assertEquals("y", newField.getName());
    assertEquals("int", newField.getType());
    tableEnv.executeSql("alter table tbl1 change column x1 x2 timestamp after y");
    hiveTable = hiveCatalog.getHiveTable(tablePath);
    newField = hiveTable.getSd().getCols().get(1);
    assertEquals("x2", newField.getName());
    assertEquals("timestamp", newField.getType());
    // add/replace columns cascade
    tableEnv.executeSql("create table tbl2 (x int) partitioned by (dt date,id bigint)");
    tableEnv.executeSql("alter table tbl2 add partition (dt='2020-01-23',id=1) partition (dt='2020-04-24',id=2)");
    CatalogPartitionSpec partitionSpec1 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {

        {
            put("dt", "2020-01-23");
            put("id", "1");
        }
    });
    CatalogPartitionSpec partitionSpec2 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {

        {
            put("dt", "2020-04-24");
            put("id", "2");
        }
    });
    tableEnv.executeSql("alter table tbl2 replace columns (ti tinyint,d decimal) cascade");
    ObjectPath tablePath2 = new ObjectPath("default", "tbl2");
    hiveTable = hiveCatalog.getHiveTable(tablePath2);
    Partition hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals(2, hivePartition.getSd().getColsSize());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals(2, hivePartition.getSd().getColsSize());
    tableEnv.executeSql("alter table tbl2 add columns (ch char(5),vch varchar(9)) cascade");
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals(4, hivePartition.getSd().getColsSize());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals(4, hivePartition.getSd().getColsSize());
    // change column cascade
    tableEnv.executeSql("alter table tbl2 change column ch ch char(10) cascade");
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals("char(10)", hivePartition.getSd().getCols().get(2).getType());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals("char(10)", hivePartition.getSd().getCols().get(2).getType());
    tableEnv.executeSql("alter table tbl2 change column vch str string first cascade");
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec1);
    assertEquals("str", hivePartition.getSd().getCols().get(0).getName());
    hivePartition = hiveCatalog.getHivePartition(hiveTable, partitionSpec2);
    assertEquals("str", hivePartition.getSd().getCols().get(0).getName());
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) SqlCreateHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) Test(org.junit.Test)

Example 5 with CatalogPartitionSpec

use of org.apache.flink.table.catalog.CatalogPartitionSpec in project flink by apache.

the class TestManagedSinkCommitter method commitAdd.

private void commitAdd(Map<CatalogPartitionSpec, List<RowData>> toAdd, Map<CatalogPartitionSpec, List<Path>> managedTableFileEntries) throws IOException {
    Map<CatalogPartitionSpec, String> processedPartitions = new HashMap<>();
    for (Map.Entry<CatalogPartitionSpec, List<RowData>> entry : toAdd.entrySet()) {
        CatalogPartitionSpec partitionSpec = entry.getKey();
        String partition = processedPartitions.computeIfAbsent(partitionSpec, (spec) -> PartitionPathUtils.generatePartitionPath(new LinkedHashMap<>(spec.getPartitionSpec())));
        List<RowData> elements = entry.getValue();
        Path compactFilePath = new Path(basePath, new Path(String.format("%scompact-%s-file-0", partition, UUID.randomUUID())));
        FSDataOutputStream outputStream = compactFilePath.getFileSystem().create(compactFilePath, FileSystem.WriteMode.NO_OVERWRITE);
        for (RowData element : elements) {
            encoder.encode(element, outputStream);
        }
        outputStream.flush();
        outputStream.close();
        List<Path> fileEntries = managedTableFileEntries.get(partitionSpec);
        fileEntries.add(compactFilePath);
        managedTableFileEntries.put(partitionSpec, fileEntries);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) LinkedHashMap(java.util.LinkedHashMap) RowData(org.apache.flink.table.data.RowData) List(java.util.List) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec)

Aggregations

CatalogPartitionSpec (org.apache.flink.table.catalog.CatalogPartitionSpec)32 HashMap (java.util.HashMap)20 LinkedHashMap (java.util.LinkedHashMap)15 ArrayList (java.util.ArrayList)11 Map (java.util.Map)11 ObjectPath (org.apache.flink.table.catalog.ObjectPath)11 List (java.util.List)10 CatalogTable (org.apache.flink.table.catalog.CatalogTable)10 Path (org.apache.flink.core.fs.Path)8 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)8 CatalogPartition (org.apache.flink.table.catalog.CatalogPartition)7 Test (org.junit.Test)7 HashSet (java.util.HashSet)6 SqlCreateHiveTable (org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable)6 CatalogPartitionImpl (org.apache.flink.table.catalog.CatalogPartitionImpl)6 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)6 ValidationException (org.apache.flink.table.api.ValidationException)5 RowData (org.apache.flink.table.data.RowData)5 Partition (org.apache.hadoop.hive.metastore.api.Partition)5 Set (java.util.Set)4