use of alluxio.table.common.udb.UdbTable in project alluxio by Alluxio.
the class Database method sync.
/**
* Syncs the metadata from the under db. To avoid concurrent sync operations, this requires
* external synchronization.
*
* @param context journal context
* @return the resulting sync status
*/
public SyncStatus sync(JournalContext context) throws IOException {
// Keep track of the status of each syncing table.
// Synchronization is necessary if accessed concurrently from multiple threads
SyncStatus.Builder builder = SyncStatus.newBuilder();
if (!mConfigPath.equals(CatalogProperty.DB_CONFIG_FILE.getDefaultValue())) {
if (!Files.exists(Paths.get(mConfigPath))) {
throw new FileNotFoundException(mConfigPath);
}
ObjectMapper mapper = new ObjectMapper();
try {
mDbConfig = mapper.readValue(new File(mConfigPath), DbConfig.class);
} catch (JsonProcessingException e) {
LOG.error("Failed to deserialize UDB config file {}, stays unsynced", mConfigPath, e);
throw e;
}
}
DatabaseInfo newDbInfo = mUdb.getDatabaseInfo();
if (!newDbInfo.equals(mDatabaseInfo)) {
applyAndJournal(context, Journal.JournalEntry.newBuilder().setUpdateDatabaseInfo(toJournalProto(newDbInfo, mName)).build());
}
Set<String> udbTableNames = new HashSet<>(mUdb.getTableNames());
// keeps track of how many tables have been synced
final AtomicInteger tablesSynced = new AtomicInteger();
// # of synced tables, after which a log message is printed for progress
final int progressBatch = (udbTableNames.size() < 100) ? udbTableNames.size() : udbTableNames.size() / 10;
// sync each table in parallel, with the executor service
List<Callable<Void>> tasks = new ArrayList<>(udbTableNames.size());
final Database thisDb = this;
for (String tableName : udbTableNames) {
if (mIgnoreTables.contains(tableName)) {
// this table should be ignored.
builder.addTablesIgnored(tableName);
tablesSynced.incrementAndGet();
continue;
}
tasks.add(() -> {
// Save all exceptions
try {
Table previousTable = mTables.get(tableName);
UdbTable udbTable = mUdb.getTable(tableName, mDbConfig.getUdbBypassSpec());
Table newTable = Table.create(thisDb, udbTable, previousTable);
if (newTable != null) {
// table was created or was updated
alluxio.proto.journal.Table.AddTableEntry addTableEntry = newTable.getTableJournalProto();
Journal.JournalEntry entry = Journal.JournalEntry.newBuilder().setAddTable(addTableEntry).build();
applyAndJournal(context, entry);
// separate the possible big table entry into multiple smaller table partitions entry
newTable.getTablePartitionsJournalProto().forEach((partitionsEntry) -> {
applyAndJournal(context, Journal.JournalEntry.newBuilder().setAddTablePartitions(partitionsEntry).build());
});
synchronized (builder) {
builder.addTablesUpdated(tableName);
}
} else {
synchronized (builder) {
builder.addTablesUnchanged(tableName);
}
}
} catch (Exception e) {
LOG.error(String.format("Sync thread failed for %s.%s", thisDb.mName, tableName), e);
synchronized (builder) {
builder.putTablesErrors(tableName, e.toString());
}
} finally {
int syncedTables = tablesSynced.incrementAndGet();
int percentage = -1;
// Only log at regular intervals, or when complete
if (syncedTables % progressBatch == 0) {
// compute percentage, cap at 99%
percentage = Math.min(Math.round(100.0f * syncedTables / udbTableNames.size()), 99);
}
if (syncedTables == udbTableNames.size()) {
percentage = 100;
}
if (percentage != -1) {
LOG.info("Syncing db {} progress: completed {} of {} tables ({}%)", mName, syncedTables, udbTableNames.size(), percentage);
}
}
return null;
});
}
// create a thread pool to parallelize the sync
int threads;
try {
threads = Integer.parseInt(mConfig.get(CatalogProperty.DB_SYNC_THREADS));
} catch (NumberFormatException e) {
LOG.warn("Catalog property {} with value {} cannot be parsed as an int", CatalogProperty.DB_SYNC_THREADS.getName(), mConfig.get(CatalogProperty.DB_SYNC_THREADS));
threads = CatalogProperty.DEFAULT_DB_SYNC_THREADS;
}
if (threads < 1) {
// if invalid, set to the default
threads = CatalogProperty.DEFAULT_DB_SYNC_THREADS;
}
ExecutorService service = ExecutorServiceFactories.fixedThreadPool(String.format("Catalog-Sync-%s", mName), threads).create();
try {
CommonUtils.invokeAll(service, tasks, mUdbSyncTimeoutMs);
} catch (Exception e) {
throw new IOException("Failed to sync database " + mName + ". error: " + e.toString(), e);
} finally {
// shutdown the thread pool
service.shutdownNow();
String errorMessage = String.format("waiting for db-sync thread pool to shut down. db: %s", mName);
try {
if (!service.awaitTermination(5, TimeUnit.SECONDS)) {
LOG.warn("Timed out " + errorMessage);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("Interrupted while " + errorMessage);
}
}
for (Table existingTable : mTables.values()) {
if (!udbTableNames.contains(existingTable.getName())) {
// this table no longer exists in udb
alluxio.proto.journal.Table.RemoveTableEntry removeTableEntry = alluxio.proto.journal.Table.RemoveTableEntry.newBuilder().setDbName(mName).setTableName(existingTable.getName()).setVersion(existingTable.getVersion()).build();
Journal.JournalEntry entry = Journal.JournalEntry.newBuilder().setRemoveTable(removeTableEntry).build();
applyAndJournal(context, entry);
builder.addTablesRemoved(existingTable.getName());
}
}
return builder.build();
}
use of alluxio.table.common.udb.UdbTable in project alluxio by Alluxio.
the class AlluxioCatalogTest method createMockUdbTable.
UdbTable createMockUdbTable(String name, Schema schema) throws IOException {
UdbPartition partition = Mockito.mock(UdbPartition.class);
when(partition.getSpec()).thenReturn(name);
when(partition.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()));
UdbTable tbl = Mockito.mock(UdbTable.class);
when(tbl.getName()).thenReturn(name);
when(tbl.getSchema()).thenReturn(schema);
when(tbl.getStatistics()).thenReturn(createRandomStatsForSchema(schema));
when(tbl.getPartitions()).thenReturn(Arrays.asList(partition));
when(tbl.getPartitionCols()).thenReturn(Collections.emptyList());
when(tbl.getLayout()).thenReturn(new HiveLayout(PartitionInfo.getDefaultInstance(), Collections.emptyList()).toProto());
return tbl;
}
use of alluxio.table.common.udb.UdbTable in project alluxio by Alluxio.
the class AlluxioCatalogTest method testGetColumnStats.
@Test
public void testGetColumnStats() throws Exception {
Schema s = schemaFromColNames("c1", "c2", "c3");
// setup
// Why does this API seem so counter intuitive?
UdbTable tbl = createMockUdbTable("test", s);
Database db = createMockDatabase("noop", "test", Collections.emptyList());
addTableToDb(db, Table.create(db, tbl, null));
addDbToCatalog(db);
// basic, filter on each col
assertEquals(1, mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c1")).size());
assertEquals(1, mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c2")).size());
assertEquals(1, mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c3")).size());
// try two
assertEquals(2, mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c1", "c2")).size());
// flip order
assertEquals(2, mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("c2", "c1")).size());
// non existing
assertEquals(0, mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList("doesnotexist")).size());
// empty
assertEquals(0, mCatalog.getTableColumnStatistics("test", "test", Lists.newArrayList()).size());
}
use of alluxio.table.common.udb.UdbTable in project alluxio by Alluxio.
the class GlueDatabase method getTable.
@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
Table table;
List<Partition> partitions;
try {
GetTableRequest tableRequest = new GetTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withName(tableName);
table = getClient().getTable(tableRequest).getTable();
partitions = batchGetPartitions(getClient(), tableName);
PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
List<Column> partitionColumns;
if (table.getPartitionKeys() == null) {
partitionColumns = Collections.emptyList();
} else {
partitionColumns = table.getPartitionKeys();
}
// Get table parameters
Map<String, String> tableParameters = table.getParameters() == null ? Collections.emptyMap() : table.getParameters();
// Get column statistics info for table
List<String> columnNames = table.getStorageDescriptor().getColumns().stream().map(Column::getName).collect(Collectors.toList());
GetColumnStatisticsForTableRequest getColumnStatisticsForTableRequest = new GetColumnStatisticsForTableRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames);
List<ColumnStatisticsInfo> columnStatisticsTableData = new ArrayList<>();
if (mGlueConfiguration.getBoolean(Property.TABLE_COLUMN_STATISTICS_ENABLE)) {
columnStatisticsTableData = getTableColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForTableRequest);
}
// Get column statistics info for partitions
// potential expensive call
Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
if (mGlueConfiguration.getBoolean(Property.PARTITION_COLUMN_STATISTICS_ENABLE)) {
for (Partition partition : partitions) {
List<String> partitionValue = partition.getValues();
if (partitionValue != null) {
GetColumnStatisticsForPartitionRequest getColumnStatisticsForPartitionRequest = new GetColumnStatisticsForPartitionRequest().withCatalogId(mGlueConfiguration.get(Property.CATALOG_ID)).withDatabaseName(mGlueDbName).withTableName(tableName).withColumnNames(columnNames).withPartitionValues(partitionValue);
String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
statsMap.put(partName, getPartitionColumnStatistics(mGlueDbName, tableName, getColumnStatisticsForPartitionRequest));
}
}
}
PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).putAllParameters(tableParameters).build();
Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
List<UdbPartition> udbPartitions = new ArrayList<>();
if (partitionColumns.isEmpty()) {
PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(table.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(table.getStorageDescriptor(), pathTranslator)).setPartitionName(tableName).putAllParameters(tableParameters);
udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), Collections.emptyList())));
} else {
for (Partition partition : partitions) {
String partName = GlueUtils.makePartitionName(partitionColumns, partition.getValues());
PartitionInfo.Builder partitionInfoBuilder = PartitionInfo.newBuilder().setDbName(mGlueDbName).setTableName(tableName).addAllDataCols(GlueUtils.toProto(partition.getStorageDescriptor().getColumns())).setStorage(GlueUtils.toProto(partition.getStorageDescriptor(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters() == null ? Collections.emptyMap() : partition.getParameters());
if (partition.getValues() != null) {
partitionInfoBuilder.addAllValues(partition.getValues());
}
udbPartitions.add(new GluePartition(new HiveLayout(partitionInfoBuilder.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
}
}
return new GlueTable(this, pathTranslator, tableName, GlueUtils.toProtoSchema(table.getStorageDescriptor().getColumns()), columnStatisticsTableData, // Get FieldSchema from partition keys
GlueUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
} catch (EntityNotFoundException e) {
throw new NotFoundException("Table " + tableName + " does not exist in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + ".", e);
} catch (ValidationException e) {
throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " with validation error: " + e.getMessage(), e);
} catch (GlueEncryptionException e) {
throw new IOException("Failed to get table: " + tableName + " in Database: " + mGlueDbName + "; Catalog ID: " + mGlueConfiguration.get(Property.CATALOG_ID) + " error: " + e.getMessage(), e);
}
}
use of alluxio.table.common.udb.UdbTable in project alluxio by Alluxio.
the class HiveDatabase method getTable.
@Override
public UdbTable getTable(String tableName, UdbBypassSpec bypassSpec) throws IOException {
try {
Table table;
List<Partition> partitions;
List<ColumnStatisticsObj> columnStats;
List<String> partitionColumns;
Map<String, List<ColumnStatisticsInfo>> statsMap = new HashMap<>();
// perform all the hive client operations, and release the client early.
try (CloseableResource<IMetaStoreClient> client = mClientPool.acquireClientResource()) {
table = client.get().getTable(mHiveDbName, tableName);
// Potentially expensive call
partitions = client.get().listPartitions(mHiveDbName, table.getTableName(), (short) -1);
List<String> colNames = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
columnStats = client.get().getTableColumnStatistics(mHiveDbName, tableName, colNames);
// construct the partition statistics
List<String> dataColumns = table.getSd().getCols().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
partitionColumns = table.getPartitionKeys().stream().map(org.apache.hadoop.hive.metastore.api.FieldSchema::getName).collect(Collectors.toList());
List<String> partitionNames = partitions.stream().map(partition -> FileUtils.makePartName(partitionColumns, partition.getValues())).collect(Collectors.toList());
for (List<String> partialPartitionNames : Lists.partition(partitionNames, MAX_PARTITION_COLUMN_STATISTICS)) {
statsMap.putAll(client.get().getPartitionColumnStatistics(mHiveDbName, tableName, partialPartitionNames, dataColumns).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().stream().map(HiveUtils::toProto).collect(Collectors.toList()), (e1, e2) -> e2)));
}
}
PathTranslator pathTranslator = mountAlluxioPaths(table, partitions, bypassSpec);
List<ColumnStatisticsInfo> colStats = columnStats.stream().map(HiveUtils::toProto).collect(Collectors.toList());
// construct table layout
PartitionInfo partitionInfo = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).putAllParameters(table.getParameters()).build();
Layout layout = Layout.newBuilder().setLayoutType(HiveLayout.TYPE).setLayoutData(partitionInfo.toByteString()).build();
// create udb partitions info
List<UdbPartition> udbPartitions = new ArrayList<>();
if (partitionColumns.isEmpty()) {
// unpartitioned table, generate a partition
PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(table.getSd().getCols())).setStorage(HiveUtils.toProto(table.getSd(), pathTranslator)).setPartitionName(tableName).putAllParameters(table.getParameters());
udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), Collections.emptyList())));
} else {
for (Partition partition : partitions) {
String partName = FileUtils.makePartName(partitionColumns, partition.getValues());
PartitionInfo.Builder pib = PartitionInfo.newBuilder().setDbName(getUdbContext().getDbName()).setTableName(tableName).addAllDataCols(HiveUtils.toProto(partition.getSd().getCols())).setStorage(HiveUtils.toProto(partition.getSd(), pathTranslator)).setPartitionName(partName).putAllParameters(partition.getParameters());
if (partition.getValues() != null) {
pib.addAllValues(partition.getValues());
}
udbPartitions.add(new HivePartition(new HiveLayout(pib.build(), statsMap.getOrDefault(partName, Collections.emptyList()))));
}
}
return new HiveTable(tableName, HiveUtils.toProtoSchema(table.getSd().getCols()), colStats, HiveUtils.toProto(table.getPartitionKeys()), udbPartitions, layout, table);
} catch (NoSuchObjectException e) {
throw new NotFoundException("Table " + tableName + " does not exist.", e);
} catch (TException e) {
throw new IOException("Failed to get table: " + tableName + " error: " + e.getMessage(), e);
}
}
Aggregations