use of com.netflix.metacat.common.server.connectors.model.StorageInfo in project metacat by Netflix.
the class HiveConnectorFastPartitionService method getpartitions.
private List<PartitionInfo> getpartitions(@Nonnull @NonNull final String databaseName, @Nonnull @NonNull final String tableName, @Nullable final List<String> partitionIds, final String filterExpression, final Sort sort, final Pageable pageable, final boolean includePartitionDetails) {
final FilterPartition filter = new FilterPartition();
// batch exists
final boolean isBatched = !Strings.isNullOrEmpty(filterExpression) && filterExpression.contains(FIELD_BATCHID);
final boolean hasDateCreated = !Strings.isNullOrEmpty(filterExpression) && filterExpression.contains(FIELD_DATE_CREATED);
// Handler for reading the result set
final ResultSetHandler<List<PartitionDetail>> handler = rs -> {
final List<PartitionDetail> result = Lists.newArrayList();
while (rs.next()) {
final String name = rs.getString("name");
final String uri = rs.getString("uri");
final long createdDate = rs.getLong(FIELD_DATE_CREATED);
Map<String, String> values = null;
if (hasDateCreated) {
values = Maps.newHashMap();
values.put(FIELD_DATE_CREATED, createdDate + "");
}
if (Strings.isNullOrEmpty(filterExpression) || filter.evaluatePartitionExpression(filterExpression, name, uri, isBatched, values)) {
final Long id = rs.getLong("id");
final Long sdId = rs.getLong("sd_id");
final Long serdeId = rs.getLong("serde_id");
final String inputFormat = rs.getString("input_format");
final String outputFormat = rs.getString("output_format");
final String serializationLib = rs.getString("slib");
final StorageInfo storageInfo = new StorageInfo();
storageInfo.setUri(uri);
storageInfo.setInputFormat(inputFormat);
storageInfo.setOutputFormat(outputFormat);
storageInfo.setSerializationLib(serializationLib);
final AuditInfo auditInfo = new AuditInfo();
auditInfo.setCreatedDate(Date.from(Instant.ofEpochSecond(createdDate)));
auditInfo.setLastModifiedDate(Date.from(Instant.ofEpochSecond(createdDate)));
result.add(new PartitionDetail(id, sdId, serdeId, PartitionInfo.builder().name(QualifiedName.ofPartition(catalogName, databaseName, tableName, name)).auditInfo(auditInfo).serde(storageInfo).build()));
}
}
return result;
};
final List<PartitionInfo> partitionInfos = new ArrayList<>();
final List<PartitionDetail> partitions = getHandlerResults(databaseName, tableName, filterExpression, partitionIds, SQL_GET_PARTITIONS, handler, sort, pageable);
if (includePartitionDetails && !partitions.isEmpty()) {
final List<Long> partIds = Lists.newArrayListWithCapacity(partitions.size());
final List<Long> sdIds = Lists.newArrayListWithCapacity(partitions.size());
final List<Long> serdeIds = Lists.newArrayListWithCapacity(partitions.size());
for (PartitionDetail partitionDetail : partitions) {
partIds.add(partitionDetail.getId());
sdIds.add(partitionDetail.getSdId());
serdeIds.add(partitionDetail.getSerdeId());
}
final List<ListenableFuture<Void>> futures = Lists.newArrayList();
final Map<Long, Map<String, String>> partitionParams = Maps.newHashMap();
futures.add(threadServiceManager.getExecutor().submit(() -> populateParameters(partIds, SQL_GET_PARTITION_PARAMS, "part_id", partitionParams)));
final Map<Long, Map<String, String>> sdParams = Maps.newHashMap();
if (!sdIds.isEmpty()) {
futures.add(threadServiceManager.getExecutor().submit(() -> populateParameters(sdIds, SQL_GET_SD_PARAMS, "sd_id", sdParams)));
}
final Map<Long, Map<String, String>> serdeParams = Maps.newHashMap();
if (!serdeIds.isEmpty()) {
futures.add(threadServiceManager.getExecutor().submit(() -> populateParameters(serdeIds, SQL_GET_SERDE_PARAMS, "serde_id", serdeParams)));
}
try {
Futures.transform(Futures.successfulAsList(futures), Functions.constant(null)).get(1, TimeUnit.HOURS);
} catch (Exception e) {
Throwables.propagate(e);
}
for (PartitionDetail partitionDetail : partitions) {
partitionDetail.getPartitionInfo().setMetadata(partitionParams.get(partitionDetail.getId()));
partitionDetail.getPartitionInfo().getSerde().setParameters(sdParams.get(partitionDetail.getSdId()));
partitionDetail.getPartitionInfo().getSerde().setSerdeInfoParameters(serdeParams.get(partitionDetail.getSerdeId()));
}
}
for (PartitionDetail partitionDetail : partitions) {
partitionInfos.add(partitionDetail.getPartitionInfo());
}
return partitionInfos;
}
use of com.netflix.metacat.common.server.connectors.model.StorageInfo in project metacat by Netflix.
the class S3ConnectorInfoConverter method toPartitionInfo.
@Override
public PartitionInfo toPartitionInfo(final TableInfo tableInfo, final Partition partition) {
final QualifiedName tableName = tableInfo.getName();
final StorageInfo storageInfo = tableInfo.getSerde();
storageInfo.setUri(partition.getUri());
final AuditInfo auditInfo = AuditInfo.builder().createdDate(partition.getCreatedDate()).lastModifiedDate(partition.getLastUpdatedDate()).build();
final AuditInfo tableAuditInfo = tableInfo.getAudit();
if (tableAuditInfo != null) {
auditInfo.setCreatedBy(tableAuditInfo.getCreatedBy());
auditInfo.setLastModifiedBy(tableAuditInfo.getLastModifiedBy());
}
return PartitionInfo.builder().name(QualifiedName.ofPartition(tableName.getCatalogName(), tableName.getDatabaseName(), tableName.getTableName(), partition.getName())).serde(storageInfo).auditInfo(auditInfo).build();
}
use of com.netflix.metacat.common.server.connectors.model.StorageInfo in project metacat by Netflix.
the class S3ConnectorInfoConverter method toPartitionInfo.
/**
* Returns a partition info.
* @param tableName table name
* @param table s3 table
* @param partition partition
* @return partition info
*/
PartitionInfo toPartitionInfo(final QualifiedName tableName, final Table table, final Partition partition) {
final StorageInfo storageInfo = toStorageInfo(table);
storageInfo.setUri(partition.getUri());
final AuditInfo auditInfo = AuditInfo.builder().createdDate(partition.getCreatedDate()).lastModifiedDate(partition.getLastUpdatedDate()).build();
final AuditInfo tableAuditInfo = toAuditInfo(table);
if (tableAuditInfo != null) {
auditInfo.setCreatedBy(tableAuditInfo.getCreatedBy());
auditInfo.setLastModifiedBy(tableAuditInfo.getLastModifiedBy());
}
return PartitionInfo.builder().name(QualifiedName.ofPartition(tableName.getCatalogName(), tableName.getDatabaseName(), tableName.getTableName(), partition.getName())).serde(storageInfo).auditInfo(auditInfo).build();
}
use of com.netflix.metacat.common.server.connectors.model.StorageInfo in project metacat by Netflix.
the class DirectSqlGetPartition method getPartitions.
private List<PartitionHolder> getPartitions(final String databaseName, final String tableName, @Nullable final List<String> partitionIds, @Nullable final String filterExpression, @Nullable final Sort sort, @Nullable final Pageable pageable, final boolean includePartitionDetails, final boolean forceDisableAudit) {
final FilterPartition filter = config.escapePartitionNameOnFilter() ? new HiveFilterPartition() : new FilterPartition();
// batch exists
final boolean isBatched = !Strings.isNullOrEmpty(filterExpression) && filterExpression.contains(FIELD_BATCHID);
final boolean hasDateCreated = !Strings.isNullOrEmpty(filterExpression) && filterExpression.contains(FIELD_DATE_CREATED);
// Handler for reading the result set
final ResultSetExtractor<List<PartitionHolder>> handler = rs -> {
final List<PartitionHolder> result = Lists.newArrayList();
final QualifiedName tableQName = QualifiedName.ofTable(catalogName, databaseName, tableName);
int noOfRows = 0;
while (rs.next()) {
noOfRows++;
final String name = rs.getString("name");
final String uri = rs.getString("uri");
final long createdDate = rs.getLong(FIELD_DATE_CREATED);
Map<String, String> values = null;
if (hasDateCreated) {
values = Maps.newHashMap();
values.put(FIELD_DATE_CREATED, createdDate + "");
}
if (Strings.isNullOrEmpty(filterExpression) || filter.evaluatePartitionExpression(filterExpression, name, uri, isBatched, values)) {
final Long id = rs.getLong("id");
final Long sdId = rs.getLong("sd_id");
final Long serdeId = rs.getLong("serde_id");
final String inputFormat = rs.getString("input_format");
final String outputFormat = rs.getString("output_format");
final String serializationLib = rs.getString("slib");
final StorageInfo storageInfo = new StorageInfo();
storageInfo.setUri(uri);
storageInfo.setInputFormat(inputFormat);
storageInfo.setOutputFormat(outputFormat);
storageInfo.setSerializationLib(serializationLib);
final AuditInfo auditInfo = new AuditInfo();
auditInfo.setCreatedDate(Date.from(Instant.ofEpochSecond(createdDate)));
auditInfo.setLastModifiedDate(Date.from(Instant.ofEpochSecond(createdDate)));
result.add(new PartitionHolder(id, sdId, serdeId, PartitionInfo.builder().name(QualifiedName.ofPartition(catalogName, databaseName, tableName, name)).auditInfo(auditInfo).serde(storageInfo).build()));
}
// Fail if the number of partitions exceeds the threshold limit.
if (result.size() > config.getMaxPartitionsThreshold()) {
registry.counter(registry.createId(HiveMetrics.CounterHiveGetPartitionsExceedThresholdFailure.getMetricName()).withTags(tableQName.parts())).increment();
final String message = String.format("Number of partitions queried for table %s exceeded the threshold %d", tableQName, config.getMaxPartitionsThreshold());
log.warn(message);
throw new IllegalArgumentException(message);
}
}
registry.gauge(registry.createId(HiveMetrics.GaugePreExpressionFilterGetPartitionsCount.getMetricName()).withTags(tableQName.parts())).set(noOfRows);
return result;
};
final List<PartitionHolder> partitions = this.getHandlerResults(databaseName, tableName, filterExpression, partitionIds, SQL.SQL_GET_PARTITIONS, handler, sort, pageable, forceDisableAudit);
if (includePartitionDetails && !partitions.isEmpty()) {
final List<Long> partIds = Lists.newArrayListWithCapacity(partitions.size());
final List<Long> sdIds = Lists.newArrayListWithCapacity(partitions.size());
final List<Long> serdeIds = Lists.newArrayListWithCapacity(partitions.size());
for (PartitionHolder partitionHolder : partitions) {
partIds.add(partitionHolder.getId());
sdIds.add(partitionHolder.getSdId());
serdeIds.add(partitionHolder.getSerdeId());
}
final List<ListenableFuture<Void>> futures = Lists.newArrayList();
final Map<Long, Map<String, String>> partitionParams = Maps.newHashMap();
futures.add(threadServiceManager.getExecutor().submit(() -> populateParameters(partIds, SQL.SQL_GET_PARTITION_PARAMS, "part_id", partitionParams)));
final Map<Long, Map<String, String>> sdParams = Maps.newHashMap();
if (!sdIds.isEmpty()) {
futures.add(threadServiceManager.getExecutor().submit(() -> populateParameters(sdIds, SQL.SQL_GET_SD_PARAMS, "sd_id", sdParams)));
}
final Map<Long, Map<String, String>> serdeParams = Maps.newHashMap();
if (!serdeIds.isEmpty()) {
futures.add(threadServiceManager.getExecutor().submit(() -> populateParameters(serdeIds, SQL.SQL_GET_SERDE_PARAMS, "serde_id", serdeParams)));
}
ListenableFuture<List<Void>> future = null;
try {
future = Futures.allAsList(futures);
final int getPartitionsDetailsTimeout = Integer.parseInt(configuration.getOrDefault(HiveConfigConstants.GET_PARTITION_DETAILS_TIMEOUT, "120"));
future.get(getPartitionsDetailsTimeout, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
try {
if (future != null) {
future.cancel(true);
}
} catch (Exception ignored) {
log.warn("Failed cancelling the task that gets the partition details.");
}
Throwables.propagate(e);
}
for (PartitionHolder partitionHolder : partitions) {
partitionHolder.getPartitionInfo().setMetadata(partitionParams.get(partitionHolder.getId()));
partitionHolder.getPartitionInfo().getSerde().setParameters(sdParams.get(partitionHolder.getSdId()));
partitionHolder.getPartitionInfo().getSerde().setSerdeInfoParameters(serdeParams.get(partitionHolder.getSerdeId()));
}
}
return partitions;
}
use of com.netflix.metacat.common.server.connectors.model.StorageInfo in project metacat by Netflix.
the class DirectSqlSavePartition method _insert.
@SuppressWarnings("checkstyle:methodname")
private void _insert(final QualifiedName tableQName, final Table table, final TableSequenceIds tableSequenceIds, final PartitionSequenceIds partitionSequenceIds, final List<PartitionInfo> partitions, final long currentTimeInEpoch, final int index) {
final List<Object[]> serdesValues = Lists.newArrayList();
final List<Object[]> serdeParamsValues = Lists.newArrayList();
final List<Object[]> sdsValues = Lists.newArrayList();
final List<Object[]> partitionsValues = Lists.newArrayList();
final List<Object[]> partitionParamsValues = Lists.newArrayList();
final List<Object[]> partitionKeyValsValues = Lists.newArrayList();
final List<String> partitionNames = Lists.newArrayList();
int currentIndex = index;
for (PartitionInfo partition : partitions) {
final StorageInfo storageInfo = partition.getSerde();
final long partId = partitionSequenceIds.getPartId() + currentIndex;
final long sdsId = partitionSequenceIds.getSdsId() + currentIndex;
final long serdeId = partitionSequenceIds.getSerdeId() + currentIndex;
final String partitionName = partition.getName().getPartitionName();
final List<String> partValues = PartitionUtil.getPartValuesFromPartName(tableQName, table, partitionName);
final String escapedPartName = PartitionUtil.makePartName(table.getPartitionKeys(), partValues);
partitionsValues.add(new Object[] { 0, tableSequenceIds.getTableId(), currentTimeInEpoch, sdsId, escapedPartName, partId });
for (int i = 0; i < partValues.size(); i++) {
partitionKeyValsValues.add(new Object[] { partId, partValues.get(i), i });
}
// Partition parameters
final Map<String, String> parameters = partition.getMetadata();
if (parameters != null) {
parameters.forEach((key, value) -> partitionParamsValues.add(new Object[] { value, partId, key }));
}
partitionParamsValues.add(new Object[] { currentTimeInEpoch, partId, PARAM_LAST_DDL_TIME });
if (storageInfo != null) {
serdesValues.add(new Object[] { null, storageInfo.getSerializationLib(), serdeId });
final Map<String, String> serdeInfoParameters = storageInfo.getSerdeInfoParameters();
if (serdeInfoParameters != null) {
serdeInfoParameters.forEach((key, value) -> serdeParamsValues.add(new Object[] { value, serdeId, key }));
}
sdsValues.add(new Object[] { storageInfo.getOutputFormat(), false, tableSequenceIds.getCdId(), false, serdeId, storageInfo.getUri(), storageInfo.getInputFormat(), 0, sdsId });
}
partitionNames.add(partitionName);
currentIndex++;
}
try {
jdbcTemplate.batchUpdate(SQL.SERDES_INSERT, serdesValues, new int[] { Types.VARCHAR, Types.VARCHAR, Types.BIGINT });
jdbcTemplate.batchUpdate(SQL.SERDE_PARAMS_INSERT, serdeParamsValues, new int[] { Types.VARCHAR, Types.BIGINT, Types.VARCHAR });
jdbcTemplate.batchUpdate(SQL.SDS_INSERT, sdsValues, new int[] { Types.VARCHAR, Types.BOOLEAN, Types.BIGINT, Types.BOOLEAN, Types.BIGINT, Types.VARCHAR, Types.VARCHAR, Types.INTEGER, Types.BIGINT });
jdbcTemplate.batchUpdate(SQL.PARTITIONS_INSERT, partitionsValues, new int[] { Types.INTEGER, Types.BIGINT, Types.INTEGER, Types.BIGINT, Types.VARCHAR, Types.BIGINT });
jdbcTemplate.batchUpdate(SQL.PARTITION_PARAMS_INSERT, partitionParamsValues, new int[] { Types.VARCHAR, Types.BIGINT, Types.VARCHAR });
jdbcTemplate.batchUpdate(SQL.PARTITION_KEY_VALS_INSERT, partitionKeyValsValues, new int[] { Types.BIGINT, Types.VARCHAR, Types.INTEGER });
} catch (DuplicateKeyException e) {
throw new PartitionAlreadyExistsException(tableQName, partitionNames, e);
} catch (Exception e) {
throw new ConnectorException(String.format("Failed inserting partitions %s for table %s", partitionNames, tableQName), e);
}
}
Aggregations