use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class SerialCompactor method compact.
@Override
public void compact() throws IOException {
checkSchemaCompatibility();
Closer closer = Closer.create();
try {
this.conn = closer.register(HiveJdbcConnector.newConnectorWithProps(CompactionRunner.properties));
setHiveParameters();
createTables();
HiveTable mergedDelta = mergeDeltas();
HiveManagedTable notUpdated = getNotUpdatedRecords(this.snapshot, mergedDelta);
unionNotUpdatedRecordsAndDeltas(notUpdated, mergedDelta);
} catch (SQLException e) {
LOG.error("SQLException during compaction: " + e.getMessage());
throw new RuntimeException(e);
} catch (IOException e) {
LOG.error("IOException during compaction: " + e.getMessage());
throw new RuntimeException(e);
} catch (RuntimeException e) {
LOG.error("Runtime Exception during compaction: " + e.getMessage());
throw e;
} finally {
try {
deleteTmpFiles();
} finally {
closer.close();
}
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class HiveSerDeTest method testAvroOrcSerDes.
/**
* This test uses Avro SerDe to deserialize data from Avro files, and use ORC SerDe
* to serialize them into ORC files.
*/
@Test(groups = { "gobblin.serde" })
public void testAvroOrcSerDes() throws IOException, DataRecordException, DataConversionException, URISyntaxException {
Properties properties = new Properties();
properties.load(HiveSerDeTest.class.getClassLoader().getResourceAsStream("serde/serde.properties"));
SourceState sourceState = new SourceState(new State(properties), ImmutableList.<WorkUnitState>of());
File schemaFile = new File(HiveSerDeTest.class.getClassLoader().getResource("serde/serde.avsc").toURI());
sourceState.setProp("avro.schema.url", schemaFile.getAbsolutePath());
OldApiWritableFileSource source = new OldApiWritableFileSource();
List<WorkUnit> workUnits = source.getWorkunits(sourceState);
Assert.assertEquals(workUnits.size(), 1);
WorkUnitState wus = new WorkUnitState(workUnits.get(0));
wus.addAll(sourceState);
Closer closer = Closer.create();
HiveWritableHdfsDataWriter writer = null;
try {
OldApiWritableFileExtractor extractor = closer.register((OldApiWritableFileExtractor) source.getExtractor(wus));
HiveSerDeConverter converter = closer.register(new HiveSerDeConverter());
writer = closer.register((HiveWritableHdfsDataWriter) new HiveWritableHdfsDataWriterBuilder<>().withBranches(1).withWriterId("0").writeTo(Destination.of(DestinationType.HDFS, sourceState)).withAttemptId("0-0").writeInFormat(WriterOutputFormat.ORC).build());
Assert.assertTrue(writer.isSpeculativeAttemptSafe());
converter.init(wus);
Writable record;
while ((record = extractor.readRecord(null)) != null) {
Iterable<Writable> convertedRecordIterable = converter.convertRecordImpl(null, record, wus);
Assert.assertEquals(Iterators.size(convertedRecordIterable.iterator()), 1);
writer.write(convertedRecordIterable.iterator().next());
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
if (writer != null) {
writer.commit();
}
Assert.assertTrue(this.fs.exists(new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), sourceState.getProp(ConfigurationKeys.WRITER_FILE_NAME))));
HadoopUtils.deletePath(this.fs, new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)), true);
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class OldApiHadoopFileInputSourceTest method testGetWorkUnitsAndExtractor.
@Test
public void testGetWorkUnitsAndExtractor() throws IOException, DataRecordException {
OldApiHadoopFileInputSource<String, Text, LongWritable, Text> fileInputSource = new TestHadoopFileInputSource();
List<WorkUnit> workUnitList = fileInputSource.getWorkunits(this.sourceState);
Assert.assertEquals(workUnitList.size(), 1);
WorkUnitState workUnitState = new WorkUnitState(workUnitList.get(0));
Closer closer = Closer.create();
try {
OldApiHadoopFileInputExtractor<String, Text, LongWritable, Text> extractor = (OldApiHadoopFileInputExtractor<String, Text, LongWritable, Text>) fileInputSource.getExtractor(workUnitState);
Text text = extractor.readRecord(null);
Assert.assertEquals(text.toString(), TEXT);
Assert.assertNull(extractor.readRecord(null));
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class HivePartitionFileSet method generateCopyEntities.
@Override
protected Collection<CopyEntity> generateCopyEntities() throws IOException {
try (Closer closer = Closer.create()) {
MultiTimingEvent multiTimer = closer.register(new MultiTimingEvent(this.eventSubmitter, "PartitionCopy", true));
int stepPriority = 0;
String fileSet = HiveCopyEntityHelper.gson.toJson(this.partition.getValues());
List<CopyEntity> copyEntities = Lists.newArrayList();
stepPriority = hiveCopyEntityHelper.addSharedSteps(copyEntities, fileSet, stepPriority);
multiTimer.nextStage(HiveCopyEntityHelper.Stages.COMPUTE_TARGETS);
Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition));
Partition targetPartition = getTargetPartition(this.partition, targetPath);
multiTimer.nextStage(HiveCopyEntityHelper.Stages.EXISTING_PARTITION);
if (this.existingTargetPartition.isPresent()) {
hiveCopyEntityHelper.getTargetPartitions().remove(this.partition.getValues());
try {
checkPartitionCompatibility(targetPartition, this.existingTargetPartition.get());
} catch (IOException ioe) {
if (hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_PARTITIONS && hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_TABLE_AND_PARTITIONS) {
log.error("Source and target partitions are not compatible. Aborting copy of partition " + this.partition, ioe);
// Silence error and continue processing workunits if we allow partial success
if (ConfigUtils.getString(hiveCopyEntityHelper.getConfiguration().getConfig(), ConfigurationKeys.JOB_COMMIT_POLICY_KEY, JobCommitPolicy.COMMIT_ON_FULL_SUCCESS.toString()).equals(JobCommitPolicy.COMMIT_SUCCESSFUL_TASKS.toString())) {
return Lists.newArrayList();
} else {
throw ioe;
}
}
log.warn("Source and target partitions are not compatible. Will override target partition: " + ioe.getMessage());
log.debug("Incompatibility details: ", ioe);
stepPriority = hiveCopyEntityHelper.addPartitionDeregisterSteps(copyEntities, fileSet, stepPriority, hiveCopyEntityHelper.getTargetTable(), this.existingTargetPartition.get());
this.existingTargetPartition = Optional.absent();
}
}
multiTimer.nextStage(HiveCopyEntityHelper.Stages.PARTITION_SKIP_PREDICATE);
if (hiveCopyEntityHelper.getFastPartitionSkip().isPresent() && hiveCopyEntityHelper.getFastPartitionSkip().get().apply(this)) {
log.info(String.format("Skipping copy of partition %s due to fast partition skip predicate.", this.partition.getCompleteName()));
return Lists.newArrayList();
}
HiveSpec partitionHiveSpec = new SimpleHiveSpec.Builder<>(targetPath).withTable(HiveMetaStoreUtils.getHiveTable(hiveCopyEntityHelper.getTargetTable().getTTable())).withPartition(Optional.of(HiveMetaStoreUtils.getHivePartition(targetPartition.getTPartition()))).build();
HiveRegisterStep register = new HiveRegisterStep(hiveCopyEntityHelper.getTargetMetastoreURI(), partitionHiveSpec, hiveCopyEntityHelper.getHiveRegProps());
copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), register, stepPriority++));
multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_LOCATIONS);
HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties);
HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties);
Optional<HiveLocationDescriptor> existingTargetLocation = this.existingTargetPartition.isPresent() ? Optional.of(HiveLocationDescriptor.forPartition(this.existingTargetPartition.get(), hiveCopyEntityHelper.getTargetFs(), this.properties)) : Optional.<HiveLocationDescriptor>absent();
multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF);
HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition>absent(), multiTimer, hiveCopyEntityHelper);
multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_DELETE_UNITS);
if (diffPathSet.pathsToDelete.size() > 0) {
DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties);
copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String>newHashMap(), deleteStep, stepPriority++));
}
multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_COPY_UNITS);
for (CopyableFile.Builder builder : hiveCopyEntityHelper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, hiveCopyEntityHelper.getConfiguration(), Optional.of(this.partition))) {
CopyableFile fileEntity = builder.fileSet(fileSet).checksum(new byte[0]).datasetOutputPath(desiredTargetLocation.location.toString()).build();
DatasetDescriptor sourceDataset = this.hiveCopyEntityHelper.getSourceDataset();
PartitionDescriptor source = new PartitionDescriptor(partition.getName(), sourceDataset);
fileEntity.setSourceData(source);
DatasetDescriptor destinationDataset = this.hiveCopyEntityHelper.getDestinationDataset();
Partition destinationPartition = this.existingTargetPartition.isPresent() ? this.existingTargetPartition.get() : partition;
PartitionDescriptor destination = new PartitionDescriptor(destinationPartition.getName(), destinationDataset);
fileEntity.setDestinationData(destination);
copyEntities.add(fileEntity);
}
log.info("Created {} copy entities for partition {}", copyEntities.size(), this.partition.getCompleteName());
return copyEntities;
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class WikipediaExtractor method performHttpQuery.
private JsonElement performHttpQuery(String rootUrl, Map<String, String> query) throws URISyntaxException, IOException {
if (null == this.httpClient) {
this.httpClient = createHttpClient();
}
HttpUriRequest req = createHttpRequest(rootUrl, query);
Closer closer = Closer.create();
StringBuilder sb = new StringBuilder();
try {
HttpResponse response = sendHttpRequest(req, this.httpClient);
if (response instanceof CloseableHttpResponse) {
closer.register((CloseableHttpResponse) response);
}
BufferedReader br = closer.register(new BufferedReader(new InputStreamReader(response.getEntity().getContent(), ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
String line;
while ((line = br.readLine()) != null) {
sb.append(line + "\n");
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
try {
closer.close();
} catch (IOException e) {
LOG.error("IOException in Closer.close() while performing query " + req + ": " + e, e);
}
}
if (Strings.isNullOrEmpty(sb.toString())) {
LOG.warn("Received empty response for query: " + req);
return new JsonObject();
}
JsonElement jsonElement = GSON.fromJson(sb.toString(), JsonElement.class);
return jsonElement;
}
Aggregations