use of org.apache.gobblin.util.HiveJdbcConnector in project incubator-gobblin by apache.
the class ValidationJob method getValidationOutputFromHiveJdbc.
/**
* Execute Hive queries using {@link HiveJdbcConnector} and validate results.
* @param queries Queries to execute.
*/
@SuppressWarnings("unused")
private List<Long> getValidationOutputFromHiveJdbc(List<String> queries) throws IOException {
if (null == queries || queries.size() == 0) {
log.warn("No queries specified to be executed");
return Collections.emptyList();
}
Statement statement = null;
List<Long> rowCounts = Lists.newArrayList();
Closer closer = Closer.create();
try {
HiveJdbcConnector hiveJdbcConnector = HiveJdbcConnector.newConnectorWithProps(props);
statement = hiveJdbcConnector.getConnection().createStatement();
for (String query : queries) {
log.info("Executing query: " + query);
boolean result = statement.execute(query);
if (result) {
ResultSet resultSet = statement.getResultSet();
if (resultSet.next()) {
rowCounts.add(resultSet.getLong(1));
}
} else {
log.warn("Query output for: " + query + " : " + result);
}
}
} catch (SQLException e) {
throw new RuntimeException(e);
} finally {
try {
closer.close();
} catch (Exception e) {
log.warn("Could not close HiveJdbcConnector", e);
}
if (null != statement) {
try {
statement.close();
} catch (SQLException e) {
log.warn("Could not close Hive statement", e);
}
}
}
return rowCounts;
}
use of org.apache.gobblin.util.HiveJdbcConnector in project incubator-gobblin by apache.
the class ValidationJob method getValidationOutputFromHive.
/**
* Execute Hive queries using {@link HiveJdbcConnector} and validate results.
* @param queries Queries to execute.
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {
if (null == queries || queries.size() == 0) {
log.warn("No queries specified to be executed");
return Collections.emptyList();
}
List<Long> rowCounts = Lists.newArrayList();
Closer closer = Closer.create();
try {
HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
for (String query : queries) {
String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
log.info("Executing query: " + query);
try {
if (this.hiveSettings.size() > 0) {
hiveJdbcConnector.executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()]));
}
hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false", "SET hive.auto.convert.join=false", query);
FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
List<FileStatus> files = new ArrayList<>();
for (FileStatus fileStatus : fileStatusList) {
if (fileStatus.isFile()) {
files.add(fileStatus);
}
}
if (files.size() > 1) {
log.warn("Found more than one output file. Should have been one.");
} else if (files.size() == 0) {
log.warn("Found no output file. Should have been one.");
} else {
String theString = IOUtils.toString(new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
log.info("Found row count: " + theString.trim());
if (StringUtils.isBlank(theString.trim())) {
rowCounts.add(0l);
} else {
try {
rowCounts.add(Long.parseLong(theString.trim()));
} catch (NumberFormatException e) {
throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
}
}
}
} finally {
if (this.fs.exists(hiveTempDir)) {
log.debug("Deleting temp dir: " + hiveTempDir);
this.fs.delete(hiveTempDir, true);
}
}
}
} catch (SQLException e) {
log.warn("Execution failed for query set " + queries.toString(), e);
} finally {
try {
closer.close();
} catch (Exception e) {
log.warn("Could not close HiveJdbcConnector", e);
}
}
return rowCounts;
}
Aggregations