Search in sources :

Example 81 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class SerialCompactor method compact.

@Override
public void compact() throws IOException {
    checkSchemaCompatibility();
    Closer closer = Closer.create();
    try {
        this.conn = closer.register(HiveJdbcConnector.newConnectorWithProps(CompactionRunner.properties));
        setHiveParameters();
        createTables();
        HiveTable mergedDelta = mergeDeltas();
        HiveManagedTable notUpdated = getNotUpdatedRecords(this.snapshot, mergedDelta);
        unionNotUpdatedRecordsAndDeltas(notUpdated, mergedDelta);
    } catch (SQLException e) {
        LOG.error("SQLException during compaction: " + e.getMessage());
        throw new RuntimeException(e);
    } catch (IOException e) {
        LOG.error("IOException during compaction: " + e.getMessage());
        throw new RuntimeException(e);
    } catch (RuntimeException e) {
        LOG.error("Runtime Exception during compaction: " + e.getMessage());
        throw e;
    } finally {
        try {
            deleteTmpFiles();
        } finally {
            closer.close();
        }
    }
}
Also used : Closer(com.google.common.io.Closer) SQLException(java.sql.SQLException) IOException(java.io.IOException)

Example 82 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class HiveSerDeTest method testAvroOrcSerDes.

/**
 * This test uses Avro SerDe to deserialize data from Avro files, and use ORC SerDe
 * to serialize them into ORC files.
 */
@Test(groups = { "gobblin.serde" })
public void testAvroOrcSerDes() throws IOException, DataRecordException, DataConversionException, URISyntaxException {
    Properties properties = new Properties();
    properties.load(HiveSerDeTest.class.getClassLoader().getResourceAsStream("serde/serde.properties"));
    SourceState sourceState = new SourceState(new State(properties), ImmutableList.<WorkUnitState>of());
    File schemaFile = new File(HiveSerDeTest.class.getClassLoader().getResource("serde/serde.avsc").toURI());
    sourceState.setProp("avro.schema.url", schemaFile.getAbsolutePath());
    OldApiWritableFileSource source = new OldApiWritableFileSource();
    List<WorkUnit> workUnits = source.getWorkunits(sourceState);
    Assert.assertEquals(workUnits.size(), 1);
    WorkUnitState wus = new WorkUnitState(workUnits.get(0));
    wus.addAll(sourceState);
    Closer closer = Closer.create();
    HiveWritableHdfsDataWriter writer = null;
    try {
        OldApiWritableFileExtractor extractor = closer.register((OldApiWritableFileExtractor) source.getExtractor(wus));
        HiveSerDeConverter converter = closer.register(new HiveSerDeConverter());
        writer = closer.register((HiveWritableHdfsDataWriter) new HiveWritableHdfsDataWriterBuilder<>().withBranches(1).withWriterId("0").writeTo(Destination.of(DestinationType.HDFS, sourceState)).withAttemptId("0-0").writeInFormat(WriterOutputFormat.ORC).build());
        Assert.assertTrue(writer.isSpeculativeAttemptSafe());
        converter.init(wus);
        Writable record;
        while ((record = extractor.readRecord(null)) != null) {
            Iterable<Writable> convertedRecordIterable = converter.convertRecordImpl(null, record, wus);
            Assert.assertEquals(Iterators.size(convertedRecordIterable.iterator()), 1);
            writer.write(convertedRecordIterable.iterator().next());
        }
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
        if (writer != null) {
            writer.commit();
        }
        Assert.assertTrue(this.fs.exists(new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), sourceState.getProp(ConfigurationKeys.WRITER_FILE_NAME))));
        HadoopUtils.deletePath(this.fs, new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)), true);
    }
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) SourceState(org.apache.gobblin.configuration.SourceState) OldApiWritableFileExtractor(org.apache.gobblin.source.extractor.hadoop.OldApiWritableFileExtractor) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) HiveSerDeConverter(org.apache.gobblin.converter.serde.HiveSerDeConverter) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties) HiveWritableHdfsDataWriter(org.apache.gobblin.writer.HiveWritableHdfsDataWriter) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) OldApiWritableFileSource(org.apache.gobblin.source.extractor.hadoop.OldApiWritableFileSource) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) File(java.io.File) Test(org.testng.annotations.Test)

Example 83 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class OldApiHadoopFileInputSourceTest method testGetWorkUnitsAndExtractor.

@Test
public void testGetWorkUnitsAndExtractor() throws IOException, DataRecordException {
    OldApiHadoopFileInputSource<String, Text, LongWritable, Text> fileInputSource = new TestHadoopFileInputSource();
    List<WorkUnit> workUnitList = fileInputSource.getWorkunits(this.sourceState);
    Assert.assertEquals(workUnitList.size(), 1);
    WorkUnitState workUnitState = new WorkUnitState(workUnitList.get(0));
    Closer closer = Closer.create();
    try {
        OldApiHadoopFileInputExtractor<String, Text, LongWritable, Text> extractor = (OldApiHadoopFileInputExtractor<String, Text, LongWritable, Text>) fileInputSource.getExtractor(workUnitState);
        Text text = extractor.readRecord(null);
        Assert.assertEquals(text.toString(), TEXT);
        Assert.assertNull(extractor.readRecord(null));
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : Closer(com.google.common.io.Closer) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 84 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class HivePartitionFileSet method generateCopyEntities.

@Override
protected Collection<CopyEntity> generateCopyEntities() throws IOException {
    try (Closer closer = Closer.create()) {
        MultiTimingEvent multiTimer = closer.register(new MultiTimingEvent(this.eventSubmitter, "PartitionCopy", true));
        int stepPriority = 0;
        String fileSet = HiveCopyEntityHelper.gson.toJson(this.partition.getValues());
        List<CopyEntity> copyEntities = Lists.newArrayList();
        stepPriority = hiveCopyEntityHelper.addSharedSteps(copyEntities, fileSet, stepPriority);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.COMPUTE_TARGETS);
        Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition));
        Partition targetPartition = getTargetPartition(this.partition, targetPath);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.EXISTING_PARTITION);
        if (this.existingTargetPartition.isPresent()) {
            hiveCopyEntityHelper.getTargetPartitions().remove(this.partition.getValues());
            try {
                checkPartitionCompatibility(targetPartition, this.existingTargetPartition.get());
            } catch (IOException ioe) {
                if (hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_PARTITIONS && hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_TABLE_AND_PARTITIONS) {
                    log.error("Source and target partitions are not compatible. Aborting copy of partition " + this.partition, ioe);
                    // Silence error and continue processing workunits if we allow partial success
                    if (ConfigUtils.getString(hiveCopyEntityHelper.getConfiguration().getConfig(), ConfigurationKeys.JOB_COMMIT_POLICY_KEY, JobCommitPolicy.COMMIT_ON_FULL_SUCCESS.toString()).equals(JobCommitPolicy.COMMIT_SUCCESSFUL_TASKS.toString())) {
                        return Lists.newArrayList();
                    } else {
                        throw ioe;
                    }
                }
                log.warn("Source and target partitions are not compatible. Will override target partition: " + ioe.getMessage());
                log.debug("Incompatibility details: ", ioe);
                stepPriority = hiveCopyEntityHelper.addPartitionDeregisterSteps(copyEntities, fileSet, stepPriority, hiveCopyEntityHelper.getTargetTable(), this.existingTargetPartition.get());
                this.existingTargetPartition = Optional.absent();
            }
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.PARTITION_SKIP_PREDICATE);
        if (hiveCopyEntityHelper.getFastPartitionSkip().isPresent() && hiveCopyEntityHelper.getFastPartitionSkip().get().apply(this)) {
            log.info(String.format("Skipping copy of partition %s due to fast partition skip predicate.", this.partition.getCompleteName()));
            return Lists.newArrayList();
        }
        HiveSpec partitionHiveSpec = new SimpleHiveSpec.Builder<>(targetPath).withTable(HiveMetaStoreUtils.getHiveTable(hiveCopyEntityHelper.getTargetTable().getTTable())).withPartition(Optional.of(HiveMetaStoreUtils.getHivePartition(targetPartition.getTPartition()))).build();
        HiveRegisterStep register = new HiveRegisterStep(hiveCopyEntityHelper.getTargetMetastoreURI(), partitionHiveSpec, hiveCopyEntityHelper.getHiveRegProps());
        copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), register, stepPriority++));
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_LOCATIONS);
        HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties);
        HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties);
        Optional<HiveLocationDescriptor> existingTargetLocation = this.existingTargetPartition.isPresent() ? Optional.of(HiveLocationDescriptor.forPartition(this.existingTargetPartition.get(), hiveCopyEntityHelper.getTargetFs(), this.properties)) : Optional.<HiveLocationDescriptor>absent();
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF);
        HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition>absent(), multiTimer, hiveCopyEntityHelper);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_DELETE_UNITS);
        if (diffPathSet.pathsToDelete.size() > 0) {
            DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties);
            copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String>newHashMap(), deleteStep, stepPriority++));
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_COPY_UNITS);
        for (CopyableFile.Builder builder : hiveCopyEntityHelper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, hiveCopyEntityHelper.getConfiguration(), Optional.of(this.partition))) {
            CopyableFile fileEntity = builder.fileSet(fileSet).checksum(new byte[0]).datasetOutputPath(desiredTargetLocation.location.toString()).build();
            DatasetDescriptor sourceDataset = this.hiveCopyEntityHelper.getSourceDataset();
            PartitionDescriptor source = new PartitionDescriptor(partition.getName(), sourceDataset);
            fileEntity.setSourceData(source);
            DatasetDescriptor destinationDataset = this.hiveCopyEntityHelper.getDestinationDataset();
            Partition destinationPartition = this.existingTargetPartition.isPresent() ? this.existingTargetPartition.get() : partition;
            PartitionDescriptor destination = new PartitionDescriptor(destinationPartition.getName(), destinationDataset);
            fileEntity.setDestinationData(destination);
            copyEntities.add(fileEntity);
        }
        log.info("Created {} copy entities for partition {}", copyEntities.size(), this.partition.getCompleteName());
        return copyEntities;
    }
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) DatasetDescriptor(org.apache.gobblin.dataset.DatasetDescriptor) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) PostPublishStep(org.apache.gobblin.data.management.copy.entities.PostPublishStep) MultiTimingEvent(org.apache.gobblin.metrics.event.MultiTimingEvent) IOException(java.io.IOException) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) HiveRegisterStep(org.apache.gobblin.hive.HiveRegisterStep) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) PartitionDescriptor(org.apache.gobblin.dataset.PartitionDescriptor) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec)

Example 85 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class WikipediaExtractor method performHttpQuery.

private JsonElement performHttpQuery(String rootUrl, Map<String, String> query) throws URISyntaxException, IOException {
    if (null == this.httpClient) {
        this.httpClient = createHttpClient();
    }
    HttpUriRequest req = createHttpRequest(rootUrl, query);
    Closer closer = Closer.create();
    StringBuilder sb = new StringBuilder();
    try {
        HttpResponse response = sendHttpRequest(req, this.httpClient);
        if (response instanceof CloseableHttpResponse) {
            closer.register((CloseableHttpResponse) response);
        }
        BufferedReader br = closer.register(new BufferedReader(new InputStreamReader(response.getEntity().getContent(), ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
        String line;
        while ((line = br.readLine()) != null) {
            sb.append(line + "\n");
        }
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        try {
            closer.close();
        } catch (IOException e) {
            LOG.error("IOException in Closer.close() while performing query " + req + ": " + e, e);
        }
    }
    if (Strings.isNullOrEmpty(sb.toString())) {
        LOG.warn("Received empty response for query: " + req);
        return new JsonObject();
    }
    JsonElement jsonElement = GSON.fromJson(sb.toString(), JsonElement.class);
    return jsonElement;
}
Also used : HttpUriRequest(org.apache.http.client.methods.HttpUriRequest) Closer(com.google.common.io.Closer) InputStreamReader(java.io.InputStreamReader) JsonElement(com.google.gson.JsonElement) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) BufferedReader(java.io.BufferedReader) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) HttpResponse(org.apache.http.HttpResponse) JsonObject(com.google.gson.JsonObject) IOException(java.io.IOException)

Aggregations

Closer (com.google.common.io.Closer)213 IOException (java.io.IOException)95 File (java.io.File)26 Test (org.testng.annotations.Test)21 Path (org.apache.hadoop.fs.Path)18 Test (org.junit.Test)18 Properties (java.util.Properties)16 Closer (org.apache.flink.shaded.guava30.com.google.common.io.Closer)16 FileOutputStream (java.io.FileOutputStream)15 ArrayList (java.util.ArrayList)15 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)13 FileInputStream (java.io.FileInputStream)12 InputStream (java.io.InputStream)12 OutputStream (java.io.OutputStream)12 Map (java.util.Map)12 ByteArrayInputStream (java.io.ByteArrayInputStream)10 DataInputStream (java.io.DataInputStream)10 UncheckedIOException (java.io.UncheckedIOException)10 Configuration (org.apache.hadoop.conf.Configuration)10 Text (org.apache.hadoop.io.Text)9