use of org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset in project incubator-gobblin by apache.
the class HiveQueryExecutionWriter method addPropsForPublisher.
/**
* Method to add properties needed by publisher to preserve partition params
*/
private void addPropsForPublisher(QueryBasedHiveConversionEntity hiveConversionEntity) {
if (!hiveConversionEntity.getPartition().isPresent()) {
return;
}
ConvertibleHiveDataset convertibleHiveDataset = hiveConversionEntity.getConvertibleHiveDataset();
for (String format : convertibleHiveDataset.getDestFormats()) {
Optional<ConvertibleHiveDataset.ConversionConfig> conversionConfigForFormat = convertibleHiveDataset.getConversionConfigForFormat(format);
if (!conversionConfigForFormat.isPresent()) {
continue;
}
SchemaAwareHivePartition sourcePartition = hiveConversionEntity.getHivePartition().get();
// Get complete source partition name dbName@tableName@partitionName
String completeSourcePartitionName = StringUtils.join(Arrays.asList(sourcePartition.getTable().getDbName(), sourcePartition.getTable().getTableName(), sourcePartition.getName()), AT_CHAR);
ConvertibleHiveDataset.ConversionConfig config = conversionConfigForFormat.get();
// Get complete destination partition name dbName@tableName@partitionName
String completeDestPartitionName = StringUtils.join(Arrays.asList(config.getDestinationDbName(), config.getDestinationTableName(), sourcePartition.getName()), AT_CHAR);
workUnit.setProp(HiveConvertPublisher.COMPLETE_SOURCE_PARTITION_NAME, completeSourcePartitionName);
workUnit.setProp(HiveConvertPublisher.COMPLETE_DEST_PARTITION_NAME, completeDestPartitionName);
}
}
use of org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset in project incubator-gobblin by apache.
the class Avro2OrcStaleDatasetCleaner method run.
@Override
public void run() throws Exception {
Iterator<HiveDataset> iterator = this.datasetFinder.getDatasetsIterator();
while (iterator.hasNext()) {
ConvertibleHiveDataset hiveDataset = (ConvertibleHiveDataset) iterator.next();
try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) {
Set<Partition> sourcePartitions = new HashSet<>(HiveUtils.getPartitions(client.get(), hiveDataset.getTable(), Optional.<String>absent()));
sourcePartitions.parallelStream().filter(partition -> isUnixTimeStamp(partition.getDataLocation().getName())).forEach(partition -> {
Arrays.stream(listFiles(partition.getDataLocation().getParent())).filter(fileStatus -> !fileStatus.getPath().toString().equalsIgnoreCase(partition.getDataLocation().toString())).forEach(fileStatus -> {
deletePath(fileStatus, this.graceTimeInMillis, true);
});
});
}
}
}
use of org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset in project incubator-gobblin by apache.
the class ValidationJob method runCountValidation.
private void runCountValidation() throws InterruptedException {
try {
// Validation results
this.successfulConversions = Maps.newConcurrentMap();
this.failedConversions = Maps.newConcurrentMap();
this.warnConversions = Maps.newConcurrentMap();
this.dataValidationFailed = Maps.newConcurrentMap();
this.dataValidationSuccessful = Maps.newConcurrentMap();
// Find datasets to validate
Iterator<HiveDataset> iterator = this.datasetFinder.getDatasetsIterator();
EventSubmitter.submit(Optional.of(this.eventSubmitter), EventConstants.VALIDATION_FIND_HIVE_TABLES_EVENT);
while (iterator.hasNext()) {
ConvertibleHiveDataset hiveDataset = (ConvertibleHiveDataset) iterator.next();
try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) {
// Validate dataset
log.info(String.format("Validating dataset: %s", hiveDataset));
if (HiveUtils.isPartitioned(hiveDataset.getTable())) {
processPartitionedTable(hiveDataset, client);
} else {
processNonPartitionedTable(hiveDataset);
}
}
}
// Wait for all validation queries to finish
log.info(String.format("Waiting for %d futures to complete", this.futures.size()));
this.exec.shutdown();
this.exec.awaitTermination(4, TimeUnit.HOURS);
boolean oneFutureFailure = false;
// Check if there were any exceptions
for (Future<Void> future : this.futures) {
try {
future.get();
} catch (Throwable t) {
log.error("getValidationOutputFromHive failed", t);
oneFutureFailure = true;
}
}
// These are then converted into log lines in the Azkaban logs as done below
for (Map.Entry<String, String> successfulConversion : this.successfulConversions.entrySet()) {
log.info(String.format("Successful conversion: %s [%s]", successfulConversion.getKey(), successfulConversion.getValue()));
}
for (Map.Entry<String, String> successfulConversion : this.warnConversions.entrySet()) {
log.warn(String.format("No conversion found for: %s [%s]", successfulConversion.getKey(), successfulConversion.getValue()));
}
for (Map.Entry<String, String> failedConverion : this.failedConversions.entrySet()) {
log.error(String.format("Failed conversion: %s [%s]", failedConverion.getKey(), failedConverion.getValue()));
}
for (Map.Entry<String, String> success : this.dataValidationSuccessful.entrySet()) {
log.info(String.format("Data validation successful: %s [%s]", success.getKey(), success.getValue()));
}
for (Map.Entry<String, String> failed : this.dataValidationFailed.entrySet()) {
log.error(String.format("Data validation failed: %s [%s]", failed.getKey(), failed.getValue()));
}
if (!this.failedConversions.isEmpty() || !this.dataValidationFailed.isEmpty()) {
throw new RuntimeException(String.format("Validation failed for %s conversions. See previous logs for exact validation failures", failedConversions.size()));
}
if (oneFutureFailure) {
throw new RuntimeException("At least one hive ddl failed. Check previous logs");
}
} catch (IOException e) {
Throwables.propagate(e);
}
}
Aggregations