Search in sources :

Example 86 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class GobblinMCEPublisherTest method setUp.

@BeforeClass
public void setUp() throws Exception {
    tmpDir = Files.createTempDir();
    datasetDir = new File(tmpDir, "/data/tracking/testTable");
    dataFile = new File(datasetDir, "/hourly/2020/03/17/08/data.avro");
    Files.createParentDirs(dataFile);
    dataDir = new File(dataFile.getParent());
    Assert.assertTrue(dataDir.exists());
    writeRecord();
    _avroPartitionSchema = SchemaBuilder.record("partitionTest").fields().name("ds").type().optional().stringType().endRecord();
    // Write ORC file for test
    Schema schema = new Schema.Parser().parse(this.getClass().getClassLoader().getResourceAsStream("publisherTest/schema.avsc"));
    orcSchema = schema.toString();
    List<GenericRecord> recordList = deserializeAvroRecords(this.getClass(), schema, "publisherTest/data.json");
    // Mock WriterBuilder, bunch of mocking behaviors to work-around precondition checks in writer builder
    FsDataWriterBuilder<Schema, GenericRecord> mockBuilder = (FsDataWriterBuilder<Schema, GenericRecord>) Mockito.mock(FsDataWriterBuilder.class);
    when(mockBuilder.getSchema()).thenReturn(schema);
    State dummyState = new WorkUnit();
    String stagingDir = new File(tmpDir, "/orc/staging").getAbsolutePath();
    String outputDir = new File(tmpDir, "/orc/output").getAbsolutePath();
    dummyState.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir);
    dummyState.setProp(ConfigurationKeys.WRITER_FILE_PATH, "simple");
    dummyState.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, outputDir);
    dummyState.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir);
    when(mockBuilder.getFileName(dummyState)).thenReturn("file.orc");
    orcFilePath = new Path(outputDir, "simple/file.orc");
    // Having a closer to manage the life-cycle of the writer object.
    // Will verify if scenarios like double-close could survive.
    Closer closer = Closer.create();
    GobblinOrcWriter orcWriter = closer.register(new GobblinOrcWriter(mockBuilder, dummyState));
    for (GenericRecord record : recordList) {
        orcWriter.write(record);
    }
    orcWriter.commit();
    orcWriter.close();
    // Verify ORC file contains correct records.
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Assert.assertTrue(fs.exists(orcFilePath));
}
Also used : Path(org.apache.hadoop.fs.Path) Closer(com.google.common.io.Closer) FsDataWriterBuilder(org.apache.gobblin.writer.FsDataWriterBuilder) Configuration(org.apache.hadoop.conf.Configuration) GobblinOrcWriter(org.apache.gobblin.writer.GobblinOrcWriter) Schema(org.apache.avro.Schema) WorkUnitState(gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) FileSystem(org.apache.hadoop.fs.FileSystem) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 87 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class EventUtils method deserializeEventFromAvroSerialization.

/**
 * Parses a {@link org.apache.gobblin.metrics.GobblinTrackingEvent} from a byte array Avro serialization.
 * @param reuse GobblinTrackingEvent to reuse.
 * @param bytes Input bytes.
 * @param schemaId Expected schemaId.
 * @return GobblinTrackingEvent.
 * @throws java.io.IOException
 */
public static synchronized GobblinTrackingEvent deserializeEventFromAvroSerialization(GobblinTrackingEvent reuse, byte[] bytes, @Nullable String schemaId) throws IOException {
    if (!reader.isPresent()) {
        reader = Optional.of(new SpecificDatumReader<>(GobblinTrackingEvent.class));
    }
    Closer closer = Closer.create();
    try {
        DataInputStream inputStream = closer.register(new DataInputStream(new ByteArrayInputStream(bytes)));
        if (schemaId != null) {
            MetricReportUtils.readAndVerifySchemaId(inputStream, schemaId);
        } else {
            MetricReportUtils.readAndVerifySchemaVersion(inputStream);
        }
        // Decode the rest
        Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
        return reader.get().read(reuse, decoder);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : Closer(com.google.common.io.Closer) ByteArrayInputStream(java.io.ByteArrayInputStream) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) DataInputStream(java.io.DataInputStream) Decoder(org.apache.avro.io.Decoder)

Example 88 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class MetricReportUtils method deserializeReportFromAvroSerialization.

/**
 * Parses a {@link org.apache.gobblin.metrics.MetricReport} from a byte array Avro serialization.
 * @param reuse MetricReport to reuse.
 * @param bytes Input bytes.
 * @param schemaId Expected schemaId.
 * @return MetricReport.
 * @throws java.io.IOException
 */
public static synchronized MetricReport deserializeReportFromAvroSerialization(MetricReport reuse, byte[] bytes, @Nullable String schemaId) throws IOException {
    if (!READER.isPresent()) {
        READER = Optional.of(new SpecificDatumReader<>(MetricReport.class));
    }
    Closer closer = Closer.create();
    try {
        DataInputStream inputStream = closer.register(new DataInputStream(new ByteArrayInputStream(bytes)));
        if (schemaId != null) {
            readAndVerifySchemaId(inputStream, schemaId);
        } else {
            readAndVerifySchemaVersion(inputStream);
        }
        // Decode the rest
        Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
        return READER.get().read(reuse, decoder);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : Closer(com.google.common.io.Closer) ByteArrayInputStream(java.io.ByteArrayInputStream) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) DataInputStream(java.io.DataInputStream) Decoder(org.apache.avro.io.Decoder)

Example 89 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class EmbeddedGobblinYarnAppLauncher method setup.

private static void setup(String[] args) throws Exception {
    // Parsing zk-string
    Preconditions.checkArgument(args.length == 1);
    zkString = args[0];
    // Initialize necessary external components: Yarn and Helix
    Closer closer = Closer.create();
    // Set java home in environment since it isn't set on some systems
    String javaHome = System.getProperty("java.home");
    setEnv("JAVA_HOME", javaHome);
    final YarnConfiguration clusterConf = new YarnConfiguration();
    clusterConf.set("yarn.resourcemanager.connect.max-wait.ms", "10000");
    clusterConf.set("yarn.nodemanager.resource.memory-mb", "512");
    clusterConf.set("yarn.scheduler.maximum-allocation-mb", "1024");
    MiniYARNCluster miniYARNCluster = closer.register(new MiniYARNCluster("TestCluster", 1, 1, 1));
    miniYARNCluster.init(clusterConf);
    miniYARNCluster.start();
    // YARN client should not be started before the Resource Manager is up
    AssertWithBackoff.create().logger(log).timeoutMs(10000).assertTrue(new Predicate<Void>() {

        @Override
        public boolean apply(Void input) {
            return !clusterConf.get(YarnConfiguration.RM_ADDRESS).contains(":0");
        }
    }, "Waiting for RM");
    try (PrintWriter pw = new PrintWriter(DYNAMIC_CONF_PATH, "UTF-8")) {
        File dir = new File("target/dummydir");
        // dummy directory specified in configuration
        if (!dir.mkdir()) {
            log.error("The dummy folder's creation is not successful");
        }
        dir.deleteOnExit();
        pw.println("gobblin.cluster.zk.connection.string=\"" + zkString + "\"");
        pw.println("jobconf.fullyQualifiedPath=\"" + dir.getAbsolutePath() + "\"");
    }
    // YARN config is dynamic and needs to be passed to other processes
    try (OutputStream os = new FileOutputStream(new File(YARN_SITE_XML_PATH))) {
        clusterConf.writeXml(os);
    }
    /**
     * Have to pass the same yarn-site.xml to the GobblinYarnAppLauncher to initialize Yarn Client.
     */
    fileAddress = new File(YARN_SITE_XML_PATH).getAbsolutePath();
}
Also used : Closer(com.google.common.io.Closer) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) MiniYARNCluster(org.apache.hadoop.yarn.server.MiniYARNCluster) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 90 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class ValidationJob method getValidationOutputFromHiveJdbc.

/**
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute.
 */
@SuppressWarnings("unused")
private List<Long> getValidationOutputFromHiveJdbc(List<String> queries) throws IOException {
    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }
    Statement statement = null;
    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();
    try {
        HiveJdbcConnector hiveJdbcConnector = HiveJdbcConnector.newConnectorWithProps(props);
        statement = hiveJdbcConnector.getConnection().createStatement();
        for (String query : queries) {
            log.info("Executing query: " + query);
            boolean result = statement.execute(query);
            if (result) {
                ResultSet resultSet = statement.getResultSet();
                if (resultSet.next()) {
                    rowCounts.add(resultSet.getLong(1));
                }
            } else {
                log.warn("Query output for: " + query + " : " + result);
            }
        }
    } catch (SQLException e) {
        throw new RuntimeException(e);
    } finally {
        try {
            closer.close();
        } catch (Exception e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
        if (null != statement) {
            try {
                statement.close();
            } catch (SQLException e) {
                log.warn("Could not close Hive statement", e);
            }
        }
    }
    return rowCounts;
}
Also used : Closer(com.google.common.io.Closer) HiveJdbcConnector(org.apache.gobblin.util.HiveJdbcConnector) SQLException(java.sql.SQLException) Statement(java.sql.Statement) ResultSet(java.sql.ResultSet) ParseException(java.text.ParseException) SQLException(java.sql.SQLException) UpdateNotFoundException(org.apache.gobblin.data.management.conversion.hive.provider.UpdateNotFoundException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) IOException(java.io.IOException)

Aggregations

Closer (com.google.common.io.Closer)213 IOException (java.io.IOException)95 File (java.io.File)26 Test (org.testng.annotations.Test)21 Path (org.apache.hadoop.fs.Path)18 Test (org.junit.Test)18 Properties (java.util.Properties)16 Closer (org.apache.flink.shaded.guava30.com.google.common.io.Closer)16 FileOutputStream (java.io.FileOutputStream)15 ArrayList (java.util.ArrayList)15 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)13 FileInputStream (java.io.FileInputStream)12 InputStream (java.io.InputStream)12 OutputStream (java.io.OutputStream)12 Map (java.util.Map)12 ByteArrayInputStream (java.io.ByteArrayInputStream)10 DataInputStream (java.io.DataInputStream)10 UncheckedIOException (java.io.UncheckedIOException)10 Configuration (org.apache.hadoop.conf.Configuration)10 Text (org.apache.hadoop.io.Text)9