use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class GobblinMCEPublisherTest method setUp.
@BeforeClass
public void setUp() throws Exception {
tmpDir = Files.createTempDir();
datasetDir = new File(tmpDir, "/data/tracking/testTable");
dataFile = new File(datasetDir, "/hourly/2020/03/17/08/data.avro");
Files.createParentDirs(dataFile);
dataDir = new File(dataFile.getParent());
Assert.assertTrue(dataDir.exists());
writeRecord();
_avroPartitionSchema = SchemaBuilder.record("partitionTest").fields().name("ds").type().optional().stringType().endRecord();
// Write ORC file for test
Schema schema = new Schema.Parser().parse(this.getClass().getClassLoader().getResourceAsStream("publisherTest/schema.avsc"));
orcSchema = schema.toString();
List<GenericRecord> recordList = deserializeAvroRecords(this.getClass(), schema, "publisherTest/data.json");
// Mock WriterBuilder, bunch of mocking behaviors to work-around precondition checks in writer builder
FsDataWriterBuilder<Schema, GenericRecord> mockBuilder = (FsDataWriterBuilder<Schema, GenericRecord>) Mockito.mock(FsDataWriterBuilder.class);
when(mockBuilder.getSchema()).thenReturn(schema);
State dummyState = new WorkUnit();
String stagingDir = new File(tmpDir, "/orc/staging").getAbsolutePath();
String outputDir = new File(tmpDir, "/orc/output").getAbsolutePath();
dummyState.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir);
dummyState.setProp(ConfigurationKeys.WRITER_FILE_PATH, "simple");
dummyState.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, outputDir);
dummyState.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir);
when(mockBuilder.getFileName(dummyState)).thenReturn("file.orc");
orcFilePath = new Path(outputDir, "simple/file.orc");
// Having a closer to manage the life-cycle of the writer object.
// Will verify if scenarios like double-close could survive.
Closer closer = Closer.create();
GobblinOrcWriter orcWriter = closer.register(new GobblinOrcWriter(mockBuilder, dummyState));
for (GenericRecord record : recordList) {
orcWriter.write(record);
}
orcWriter.commit();
orcWriter.close();
// Verify ORC file contains correct records.
FileSystem fs = FileSystem.getLocal(new Configuration());
Assert.assertTrue(fs.exists(orcFilePath));
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class EventUtils method deserializeEventFromAvroSerialization.
/**
* Parses a {@link org.apache.gobblin.metrics.GobblinTrackingEvent} from a byte array Avro serialization.
* @param reuse GobblinTrackingEvent to reuse.
* @param bytes Input bytes.
* @param schemaId Expected schemaId.
* @return GobblinTrackingEvent.
* @throws java.io.IOException
*/
public static synchronized GobblinTrackingEvent deserializeEventFromAvroSerialization(GobblinTrackingEvent reuse, byte[] bytes, @Nullable String schemaId) throws IOException {
if (!reader.isPresent()) {
reader = Optional.of(new SpecificDatumReader<>(GobblinTrackingEvent.class));
}
Closer closer = Closer.create();
try {
DataInputStream inputStream = closer.register(new DataInputStream(new ByteArrayInputStream(bytes)));
if (schemaId != null) {
MetricReportUtils.readAndVerifySchemaId(inputStream, schemaId);
} else {
MetricReportUtils.readAndVerifySchemaVersion(inputStream);
}
// Decode the rest
Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
return reader.get().read(reuse, decoder);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class MetricReportUtils method deserializeReportFromAvroSerialization.
/**
* Parses a {@link org.apache.gobblin.metrics.MetricReport} from a byte array Avro serialization.
* @param reuse MetricReport to reuse.
* @param bytes Input bytes.
* @param schemaId Expected schemaId.
* @return MetricReport.
* @throws java.io.IOException
*/
public static synchronized MetricReport deserializeReportFromAvroSerialization(MetricReport reuse, byte[] bytes, @Nullable String schemaId) throws IOException {
if (!READER.isPresent()) {
READER = Optional.of(new SpecificDatumReader<>(MetricReport.class));
}
Closer closer = Closer.create();
try {
DataInputStream inputStream = closer.register(new DataInputStream(new ByteArrayInputStream(bytes)));
if (schemaId != null) {
readAndVerifySchemaId(inputStream, schemaId);
} else {
readAndVerifySchemaVersion(inputStream);
}
// Decode the rest
Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
return READER.get().read(reuse, decoder);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class EmbeddedGobblinYarnAppLauncher method setup.
private static void setup(String[] args) throws Exception {
// Parsing zk-string
Preconditions.checkArgument(args.length == 1);
zkString = args[0];
// Initialize necessary external components: Yarn and Helix
Closer closer = Closer.create();
// Set java home in environment since it isn't set on some systems
String javaHome = System.getProperty("java.home");
setEnv("JAVA_HOME", javaHome);
final YarnConfiguration clusterConf = new YarnConfiguration();
clusterConf.set("yarn.resourcemanager.connect.max-wait.ms", "10000");
clusterConf.set("yarn.nodemanager.resource.memory-mb", "512");
clusterConf.set("yarn.scheduler.maximum-allocation-mb", "1024");
MiniYARNCluster miniYARNCluster = closer.register(new MiniYARNCluster("TestCluster", 1, 1, 1));
miniYARNCluster.init(clusterConf);
miniYARNCluster.start();
// YARN client should not be started before the Resource Manager is up
AssertWithBackoff.create().logger(log).timeoutMs(10000).assertTrue(new Predicate<Void>() {
@Override
public boolean apply(Void input) {
return !clusterConf.get(YarnConfiguration.RM_ADDRESS).contains(":0");
}
}, "Waiting for RM");
try (PrintWriter pw = new PrintWriter(DYNAMIC_CONF_PATH, "UTF-8")) {
File dir = new File("target/dummydir");
// dummy directory specified in configuration
if (!dir.mkdir()) {
log.error("The dummy folder's creation is not successful");
}
dir.deleteOnExit();
pw.println("gobblin.cluster.zk.connection.string=\"" + zkString + "\"");
pw.println("jobconf.fullyQualifiedPath=\"" + dir.getAbsolutePath() + "\"");
}
// YARN config is dynamic and needs to be passed to other processes
try (OutputStream os = new FileOutputStream(new File(YARN_SITE_XML_PATH))) {
clusterConf.writeXml(os);
}
/**
* Have to pass the same yarn-site.xml to the GobblinYarnAppLauncher to initialize Yarn Client.
*/
fileAddress = new File(YARN_SITE_XML_PATH).getAbsolutePath();
}
use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.
the class ValidationJob method getValidationOutputFromHiveJdbc.
/**
* Execute Hive queries using {@link HiveJdbcConnector} and validate results.
* @param queries Queries to execute.
*/
@SuppressWarnings("unused")
private List<Long> getValidationOutputFromHiveJdbc(List<String> queries) throws IOException {
if (null == queries || queries.size() == 0) {
log.warn("No queries specified to be executed");
return Collections.emptyList();
}
Statement statement = null;
List<Long> rowCounts = Lists.newArrayList();
Closer closer = Closer.create();
try {
HiveJdbcConnector hiveJdbcConnector = HiveJdbcConnector.newConnectorWithProps(props);
statement = hiveJdbcConnector.getConnection().createStatement();
for (String query : queries) {
log.info("Executing query: " + query);
boolean result = statement.execute(query);
if (result) {
ResultSet resultSet = statement.getResultSet();
if (resultSet.next()) {
rowCounts.add(resultSet.getLong(1));
}
} else {
log.warn("Query output for: " + query + " : " + result);
}
}
} catch (SQLException e) {
throw new RuntimeException(e);
} finally {
try {
closer.close();
} catch (Exception e) {
log.warn("Could not close HiveJdbcConnector", e);
}
if (null != statement) {
try {
statement.close();
} catch (SQLException e) {
log.warn("Could not close Hive statement", e);
}
}
}
return rowCounts;
}
Aggregations