use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class WikipediaExtractor method performHttpQuery.
private JsonElement performHttpQuery(String rootUrl, Map<String, String> query) throws URISyntaxException, IOException {
if (null == this.httpClient) {
this.httpClient = createHttpClient();
}
HttpUriRequest req = createHttpRequest(rootUrl, query);
Closer closer = Closer.create();
StringBuilder sb = new StringBuilder();
try {
HttpResponse response = sendHttpRequest(req, this.httpClient);
if (response instanceof CloseableHttpResponse) {
closer.register((CloseableHttpResponse) response);
}
BufferedReader br = closer.register(new BufferedReader(new InputStreamReader(response.getEntity().getContent(), ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
String line;
while ((line = br.readLine()) != null) {
sb.append(line + "\n");
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
try {
closer.close();
} catch (IOException e) {
LOG.error("IOException in Closer.close() while performing query " + req + ": " + e, e);
}
}
if (Strings.isNullOrEmpty(sb.toString())) {
LOG.warn("Received empty response for query: " + req);
return new JsonObject();
}
JsonElement jsonElement = GSON.fromJson(sb.toString(), JsonElement.class);
return jsonElement;
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class EventUtils method deserializeReportFromAvroSerialization.
/**
* Parses a {@link org.apache.gobblin.metrics.MetricReport} from a byte array Avro serialization.
* @param reuse MetricReport to reuse.
* @param bytes Input bytes.
* @return MetricReport.
* @throws java.io.IOException
*/
public static synchronized GobblinTrackingEvent deserializeReportFromAvroSerialization(GobblinTrackingEvent reuse, byte[] bytes) throws IOException {
if (!reader.isPresent()) {
reader = Optional.of(new SpecificDatumReader<>(GobblinTrackingEvent.class));
}
Closer closer = Closer.create();
try {
DataInputStream inputStream = closer.register(new DataInputStream(new ByteArrayInputStream(bytes)));
// Check version byte
int versionNumber = inputStream.readInt();
if (versionNumber != SCHEMA_VERSION) {
throw new IOException(String.format("MetricReport schema version not recognized. Found version %d, expected %d.", versionNumber, SCHEMA_VERSION));
}
// Decode the rest
Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
return reader.get().read(reuse, decoder);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class ValidationJob method getValidationOutputFromHive.
/**
* Execute Hive queries using {@link HiveJdbcConnector} and validate results.
* @param queries Queries to execute.
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {
if (null == queries || queries.size() == 0) {
log.warn("No queries specified to be executed");
return Collections.emptyList();
}
List<Long> rowCounts = Lists.newArrayList();
Closer closer = Closer.create();
try {
HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
for (String query : queries) {
String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
log.info("Executing query: " + query);
try {
if (this.hiveSettings.size() > 0) {
hiveJdbcConnector.executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()]));
}
hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false", "SET hive.auto.convert.join=false", query);
FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
List<FileStatus> files = new ArrayList<>();
for (FileStatus fileStatus : fileStatusList) {
if (fileStatus.isFile()) {
files.add(fileStatus);
}
}
if (files.size() > 1) {
log.warn("Found more than one output file. Should have been one.");
} else if (files.size() == 0) {
log.warn("Found no output file. Should have been one.");
} else {
String theString = IOUtils.toString(new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
log.info("Found row count: " + theString.trim());
if (StringUtils.isBlank(theString.trim())) {
rowCounts.add(0l);
} else {
try {
rowCounts.add(Long.parseLong(theString.trim()));
} catch (NumberFormatException e) {
throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
}
}
}
} finally {
if (this.fs.exists(hiveTempDir)) {
log.debug("Deleting temp dir: " + hiveTempDir);
this.fs.delete(hiveTempDir, true);
}
}
}
} catch (SQLException e) {
log.warn("Execution failed for query set " + queries.toString(), e);
} finally {
try {
closer.close();
} catch (Exception e) {
log.warn("Could not close HiveJdbcConnector", e);
}
}
return rowCounts;
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class ValidationJob method getValidationOutputFromHiveJdbc.
/**
* Execute Hive queries using {@link HiveJdbcConnector} and validate results.
* @param queries Queries to execute.
*/
@SuppressWarnings("unused")
private List<Long> getValidationOutputFromHiveJdbc(List<String> queries) throws IOException {
if (null == queries || queries.size() == 0) {
log.warn("No queries specified to be executed");
return Collections.emptyList();
}
Statement statement = null;
List<Long> rowCounts = Lists.newArrayList();
Closer closer = Closer.create();
try {
HiveJdbcConnector hiveJdbcConnector = HiveJdbcConnector.newConnectorWithProps(props);
statement = hiveJdbcConnector.getConnection().createStatement();
for (String query : queries) {
log.info("Executing query: " + query);
boolean result = statement.execute(query);
if (result) {
ResultSet resultSet = statement.getResultSet();
if (resultSet.next()) {
rowCounts.add(resultSet.getLong(1));
}
} else {
log.warn("Query output for: " + query + " : " + result);
}
}
} catch (SQLException e) {
throw new RuntimeException(e);
} finally {
try {
closer.close();
} catch (Exception e) {
log.warn("Could not close HiveJdbcConnector", e);
}
if (null != statement) {
try {
statement.close();
} catch (SQLException e) {
log.warn("Could not close Hive statement", e);
}
}
}
return rowCounts;
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class Kafka08DataWriterIntegrationTest method testErrors.
@Test
public void testErrors() throws Exception {
log.warn("Process id = " + ManagementFactory.getRuntimeMXBean().getName());
int numRecordsPerExtract = 5;
int numParallel = 2;
int errorEvery = 2000;
int totalRecords = numRecordsPerExtract * numParallel;
int totalSuccessful = totalRecords / errorEvery + totalRecords % errorEvery;
{
Closer closer = Closer.create();
try {
kafkaTestHelper.provisionTopic(TOPIC);
jobProps.setProperty("source.numRecordsPerExtract", "" + numRecordsPerExtract);
jobProps.setProperty("source.numParallelism", "" + numParallel);
jobProps.setProperty("writer.kafka.producerConfig.flaky.errorType", "regex");
// all records from partition 0 will be dropped.
jobProps.setProperty("writer.kafka.producerConfig.flaky.regexPattern", ":index:0.*");
jobProps.setProperty("job.commit.policy", "partial");
jobProps.setProperty("publish.at.job.level", "false");
// number of records in partition 1
totalSuccessful = 5;
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(gobblinProps, jobProps));
jobLauncher.launchJob(null);
} catch (Exception e) {
log.error("Failed to run job with exception ", e);
Assert.fail("Should not throw exception on running the job");
} finally {
closer.close();
}
// test records written
testRecordsWritten(totalSuccessful, TOPIC);
}
boolean trySecond = true;
if (trySecond) {
Closer closer = Closer.create();
try {
jobProps.setProperty("source.numRecordsPerExtract", "" + numRecordsPerExtract);
jobProps.setProperty("source.numParallelism", "" + numParallel);
jobProps.setProperty("writer.kafka.producerConfig.flaky.errorType", "nth");
jobProps.setProperty("writer.kafka.producerConfig.flaky.errorEvery", "" + errorEvery);
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(gobblinProps, jobProps));
jobLauncher.launchJob(null);
totalSuccessful = totalRecords / errorEvery + totalRecords % errorEvery;
} catch (Exception e) {
log.error("Failed to run job with exception ", e);
Assert.fail("Should not throw exception on running the job");
} finally {
closer.close();
}
}
// test records written
testRecordsWritten(totalSuccessful, TOPIC);
}
Aggregations