Search in sources :

Example 1 with HiveLayout

use of alluxio.table.common.layout.HiveLayout in project alluxio by Alluxio.

the class AlluxioCatalogTest method createMockUdbTable.

UdbTable createMockUdbTable(String name, Schema schema) throws IOException {
    UdbPartition partition = Mockito.mock(UdbPartition.class);
    when(partition.getSpec()).thenReturn(name);
    when(partition.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()));
    UdbTable tbl = Mockito.mock(UdbTable.class);
    when(tbl.getName()).thenReturn(name);
    when(tbl.getSchema()).thenReturn(schema);
    when(tbl.getStatistics()).thenReturn(createRandomStatsForSchema(schema));
    when(tbl.getPartitions()).thenReturn(Arrays.asList(partition));
    when(tbl.getPartitionCols()).thenReturn(Collections.emptyList());
    when(tbl.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()).toProto());
    return tbl;
}
Also used : UdbTable(alluxio.table.common.udb.UdbTable) HiveLayout(alluxio.table.common.layout.HiveLayout) UdbPartition(alluxio.table.common.UdbPartition)

Example 2 with HiveLayout

use of alluxio.table.common.layout.HiveLayout in project alluxio by Alluxio.

the class CompactActionTest method dynamicNumFiles.

@Test
public void dynamicNumFiles() {
    final CompactAction compactAction = parse("file.count.max=1000;file.size.min=1024");
    HiveLayout from = TableTestUtils.createLayout("/from");
    HiveLayout to = TableTestUtils.createLayout("/to");
    JobConfig job = compactAction.generateJobConfig(from, to, false);
    assertEquals(CompactConfig.class, job.getClass());
    CompactConfig compact = (CompactConfig) job;
    assertEquals("/from", compact.getInput());
    assertEquals("/to", compact.getOutput());
    assertEquals(1000, compact.getMaxNumFiles());
    assertEquals(1024, compact.getMinFileSize());
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) CompactConfig(alluxio.job.plan.transform.CompactConfig) JobConfig(alluxio.job.JobConfig) Test(org.junit.Test)

Example 3 with HiveLayout

use of alluxio.table.common.layout.HiveLayout in project alluxio by Alluxio.

the class CompactActionTest method generateJobConfig.

@Test
public void generateJobConfig() {
    final CompactAction compactAction = parse("file.count.max=12");
    HiveLayout from = TableTestUtils.createLayout("/from");
    HiveLayout to = TableTestUtils.createLayout("/to");
    JobConfig job = compactAction.generateJobConfig(from, to, false);
    assertEquals(CompactConfig.class, job.getClass());
    CompactConfig compact = (CompactConfig) job;
    assertEquals("/from", compact.getInput());
    assertEquals("/to", compact.getOutput());
    assertEquals(12, compact.getMaxNumFiles());
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) CompactConfig(alluxio.job.plan.transform.CompactConfig) JobConfig(alluxio.job.JobConfig) Test(org.junit.Test)

Example 4 with HiveLayout

use of alluxio.table.common.layout.HiveLayout in project alluxio by Alluxio.

the class TransformPlanTest method getJobConfigs.

@Test
public void getJobConfigs() {
    HiveLayout from = TableTestUtils.createLayout("/from");
    HiveLayout to = TableTestUtils.createLayout("/to");
    TransformDefinition definition = TransformDefinition.parse("file.count.max=12");
    TransformPlan plan = new TransformPlan(from, to, definition);
    assertEquals(from, plan.getBaseLayout());
    assertEquals(to, plan.getTransformedLayout());
    ArrayList<JobConfig> jobs = plan.getJobConfigs();
    assertEquals(1, jobs.size());
    assertEquals(CompactConfig.class, jobs.get(0).getClass());
    CompactConfig compact = (CompactConfig) jobs.get(0);
    assertEquals("/from", compact.getInput());
    assertEquals("/to", compact.getOutput());
    assertEquals(12, compact.getMaxNumFiles());
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) CompactConfig(alluxio.job.plan.transform.CompactConfig) JobConfig(alluxio.job.JobConfig) Test(org.junit.Test)

Example 5 with HiveLayout

use of alluxio.table.common.layout.HiveLayout in project alluxio by Alluxio.

the class GlueDatabase method getTable.

@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
    Table table;
    List<Partition> partitions;
    try {
        GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
        table = getClient().getTable(tableRequest).getTable();
        partitions = batchGetPartitions(getClient(), tableName);
        PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
        List<Column> partitionColumns;
        if (table.getPartitionKeys() == null) {
            partitionColumns = Collections.emptyList();
        } else {
            partitionColumns = table.getPartitionKeys();
        }
        // Get table parameters
        Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
        // Get column statistics info for table
        List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
        GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
        List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
        if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
            columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
        }
        // Get column statistics info for partitions
        // potential expensive call
        Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
        if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
            for (Partition partition : partitions) {
                List<String> partitionValue = partition.getValues();
                if (partitionValue != null) {
                    GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
                    String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                    statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
                }
            }
        }
        PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
        Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
        List<UdbPartition> udbPartitions = new ArrayList<>();
        if (partitionColumns.isEmpty()) {
            PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
            udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
        } else {
            for (Partition partition : partitions) {
                String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
                PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
                if (partition.getValues() != null) {
                    partitionInfoBuilder.addAllValues(partition.getValues());
                }
                udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
            }
        }
        return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
        GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
    } catch (EntityNotFoundException e) {
        throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
    } catch (ValidationException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
    } catch (GlueEncryptionException e) {
        throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
    }
}
Also used : HiveLayout(alluxio.table.common.layout.HiveLayout) ValidationException(com.amazonaws.services.glue.model.ValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) NotFoundException(alluxio.exception.status.NotFoundException) UdbPartition(alluxio.table.common.UdbPartition) Column(com.amazonaws.services.glue.model.Column) GlueEncryptionException(com.amazonaws.services.glue.model.GlueEncryptionException) List(java.util.List) ArrayList(java.util.ArrayList) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) UdbPartition(alluxio.table.common.UdbPartition) Partition(com.amazonaws.services.glue.model.Partition) UdbTable(alluxio.table.common.udb.UdbTable) Table(com.amazonaws.services.glue.model.Table) GetColumnStatisticsForTableRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForTableRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) IOException(java.io.IOException) GetTableRequest(com.amazonaws.services.glue.model.GetTableRequest) GetColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest) PathTranslator(alluxio.table.common.udb.PathTranslator) Layout(alluxio.grpc.table.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo)

Aggregations

HiveLayout (alluxio.table.common.layout.HiveLayout)7 UdbPartition (alluxio.table.common.UdbPartition)4 UdbTable (alluxio.table.common.udb.UdbTable)4 JobConfig (alluxio.job.JobConfig)3 CompactConfig (alluxio.job.plan.transform.CompactConfig)3 Test (org.junit.Test)3 NotFoundException (alluxio.exception.status.NotFoundException)2 ColumnStatisticsInfo (alluxio.grpc.table.ColumnStatisticsInfo)2 Layout (alluxio.grpc.table.Layout)2 PartitionInfo (alluxio.grpc.table.layout.hive.PartitionInfo)2 PathTranslator (alluxio.table.common.udb.PathTranslator)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 AlluxioURI (alluxio.AlluxioURI)1 AlluxioException (alluxio.exception.AlluxioException)1 DatabaseInfo (alluxio.master.table.DatabaseInfo)1 CloseableResource (alluxio.resource.CloseableResource)1 UdbBypassSpec (alluxio.table.common.udb.UdbBypassSpec)1