Search in sources :

Example 26 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class WikipediaExtractor method performHttpQuery.

private JsonElement performHttpQuery(String rootUrl, Map<String, String> query) throws URISyntaxException, IOException {
    if (null == this.httpClient) {
        this.httpClient = createHttpClient();
    }
    HttpUriRequest req = createHttpRequest(rootUrl, query);
    Closer closer = Closer.create();
    StringBuilder sb = new StringBuilder();
    try {
        HttpResponse response = sendHttpRequest(req, this.httpClient);
        if (response instanceof CloseableHttpResponse) {
            closer.register((CloseableHttpResponse) response);
        }
        BufferedReader br = closer.register(new BufferedReader(new InputStreamReader(response.getEntity().getContent(), ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
        String line;
        while ((line = br.readLine()) != null) {
            sb.append(line + "\n");
        }
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        try {
            closer.close();
        } catch (IOException e) {
            LOG.error("IOException in Closer.close() while performing query " + req + ": " + e, e);
        }
    }
    if (Strings.isNullOrEmpty(sb.toString())) {
        LOG.warn("Received empty response for query: " + req);
        return new JsonObject();
    }
    JsonElement jsonElement = GSON.fromJson(sb.toString(), JsonElement.class);
    return jsonElement;
}
Also used : HttpUriRequest(org.apache.http.client.methods.HttpUriRequest) Closer(com.google.common.io.Closer) InputStreamReader(java.io.InputStreamReader) JsonElement(com.google.gson.JsonElement) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) BufferedReader(java.io.BufferedReader) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) HttpResponse(org.apache.http.HttpResponse) JsonObject(com.google.gson.JsonObject) IOException(java.io.IOException)

Example 27 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class EventUtils method deserializeReportFromAvroSerialization.

/**
 * Parses a {@link org.apache.gobblin.metrics.MetricReport} from a byte array Avro serialization.
 * @param reuse MetricReport to reuse.
 * @param bytes Input bytes.
 * @return MetricReport.
 * @throws java.io.IOException
 */
public static synchronized GobblinTrackingEvent deserializeReportFromAvroSerialization(GobblinTrackingEvent reuse, byte[] bytes) throws IOException {
    if (!reader.isPresent()) {
        reader = Optional.of(new SpecificDatumReader<>(GobblinTrackingEvent.class));
    }
    Closer closer = Closer.create();
    try {
        DataInputStream inputStream = closer.register(new DataInputStream(new ByteArrayInputStream(bytes)));
        // Check version byte
        int versionNumber = inputStream.readInt();
        if (versionNumber != SCHEMA_VERSION) {
            throw new IOException(String.format("MetricReport schema version not recognized. Found version %d, expected %d.", versionNumber, SCHEMA_VERSION));
        }
        // Decode the rest
        Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
        return reader.get().read(reuse, decoder);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : Closer(com.google.common.io.Closer) ByteArrayInputStream(java.io.ByteArrayInputStream) IOException(java.io.IOException) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) DataInputStream(java.io.DataInputStream) Decoder(org.apache.avro.io.Decoder)

Example 28 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class ValidationJob method getValidationOutputFromHive.

/**
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute.
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {
    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }
    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();
    try {
        HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
        for (String query : queries) {
            String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
            Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
            query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
            log.info("Executing query: " + query);
            try {
                if (this.hiveSettings.size() > 0) {
                    hiveJdbcConnector.executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()]));
                }
                hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false", "SET hive.auto.convert.join=false", query);
                FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
                List<FileStatus> files = new ArrayList<>();
                for (FileStatus fileStatus : fileStatusList) {
                    if (fileStatus.isFile()) {
                        files.add(fileStatus);
                    }
                }
                if (files.size() > 1) {
                    log.warn("Found more than one output file. Should have been one.");
                } else if (files.size() == 0) {
                    log.warn("Found no output file. Should have been one.");
                } else {
                    String theString = IOUtils.toString(new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
                    log.info("Found row count: " + theString.trim());
                    if (StringUtils.isBlank(theString.trim())) {
                        rowCounts.add(0l);
                    } else {
                        try {
                            rowCounts.add(Long.parseLong(theString.trim()));
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
                        }
                    }
                }
            } finally {
                if (this.fs.exists(hiveTempDir)) {
                    log.debug("Deleting temp dir: " + hiveTempDir);
                    this.fs.delete(hiveTempDir, true);
                }
            }
        }
    } catch (SQLException e) {
        log.warn("Execution failed for query set " + queries.toString(), e);
    } finally {
        try {
            closer.close();
        } catch (Exception e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
    }
    return rowCounts;
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) HiveJdbcConnector(org.apache.gobblin.util.HiveJdbcConnector) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) SQLException(java.sql.SQLException) ArrayList(java.util.ArrayList) ParseException(java.text.ParseException) SQLException(java.sql.SQLException) UpdateNotFoundException(org.apache.gobblin.data.management.conversion.hive.provider.UpdateNotFoundException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) IOException(java.io.IOException)

Example 29 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class ValidationJob method getValidationOutputFromHiveJdbc.

/**
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute.
 */
@SuppressWarnings("unused")
private List<Long> getValidationOutputFromHiveJdbc(List<String> queries) throws IOException {
    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }
    Statement statement = null;
    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();
    try {
        HiveJdbcConnector hiveJdbcConnector = HiveJdbcConnector.newConnectorWithProps(props);
        statement = hiveJdbcConnector.getConnection().createStatement();
        for (String query : queries) {
            log.info("Executing query: " + query);
            boolean result = statement.execute(query);
            if (result) {
                ResultSet resultSet = statement.getResultSet();
                if (resultSet.next()) {
                    rowCounts.add(resultSet.getLong(1));
                }
            } else {
                log.warn("Query output for: " + query + " : " + result);
            }
        }
    } catch (SQLException e) {
        throw new RuntimeException(e);
    } finally {
        try {
            closer.close();
        } catch (Exception e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
        if (null != statement) {
            try {
                statement.close();
            } catch (SQLException e) {
                log.warn("Could not close Hive statement", e);
            }
        }
    }
    return rowCounts;
}
Also used : Closer(com.google.common.io.Closer) HiveJdbcConnector(org.apache.gobblin.util.HiveJdbcConnector) SQLException(java.sql.SQLException) Statement(java.sql.Statement) ResultSet(java.sql.ResultSet) ParseException(java.text.ParseException) SQLException(java.sql.SQLException) UpdateNotFoundException(org.apache.gobblin.data.management.conversion.hive.provider.UpdateNotFoundException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) IOException(java.io.IOException)

Example 30 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class Kafka08DataWriterIntegrationTest method testErrors.

@Test
public void testErrors() throws Exception {
    log.warn("Process id = " + ManagementFactory.getRuntimeMXBean().getName());
    int numRecordsPerExtract = 5;
    int numParallel = 2;
    int errorEvery = 2000;
    int totalRecords = numRecordsPerExtract * numParallel;
    int totalSuccessful = totalRecords / errorEvery + totalRecords % errorEvery;
    {
        Closer closer = Closer.create();
        try {
            kafkaTestHelper.provisionTopic(TOPIC);
            jobProps.setProperty("source.numRecordsPerExtract", "" + numRecordsPerExtract);
            jobProps.setProperty("source.numParallelism", "" + numParallel);
            jobProps.setProperty("writer.kafka.producerConfig.flaky.errorType", "regex");
            // all records from partition 0 will be dropped.
            jobProps.setProperty("writer.kafka.producerConfig.flaky.regexPattern", ":index:0.*");
            jobProps.setProperty("job.commit.policy", "partial");
            jobProps.setProperty("publish.at.job.level", "false");
            // number of records in partition 1
            totalSuccessful = 5;
            JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(gobblinProps, jobProps));
            jobLauncher.launchJob(null);
        } catch (Exception e) {
            log.error("Failed to run job with exception ", e);
            Assert.fail("Should not throw exception on running the job");
        } finally {
            closer.close();
        }
        // test records written
        testRecordsWritten(totalSuccessful, TOPIC);
    }
    boolean trySecond = true;
    if (trySecond) {
        Closer closer = Closer.create();
        try {
            jobProps.setProperty("source.numRecordsPerExtract", "" + numRecordsPerExtract);
            jobProps.setProperty("source.numParallelism", "" + numParallel);
            jobProps.setProperty("writer.kafka.producerConfig.flaky.errorType", "nth");
            jobProps.setProperty("writer.kafka.producerConfig.flaky.errorEvery", "" + errorEvery);
            JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(gobblinProps, jobProps));
            jobLauncher.launchJob(null);
            totalSuccessful = totalRecords / errorEvery + totalRecords % errorEvery;
        } catch (Exception e) {
            log.error("Failed to run job with exception ", e);
            Assert.fail("Should not throw exception on running the job");
        } finally {
            closer.close();
        }
    }
    // test records written
    testRecordsWritten(totalSuccessful, TOPIC);
}
Also used : Closer(com.google.common.io.Closer) JobLauncher(org.apache.gobblin.runtime.JobLauncher) IOException(java.io.IOException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Test(org.testng.annotations.Test)

Aggregations

Closer (com.google.common.io.Closer)200 IOException (java.io.IOException)94 File (java.io.File)22 Test (org.junit.Test)18 Test (org.testng.annotations.Test)18 Closer (org.apache.flink.shaded.guava30.com.google.common.io.Closer)16 Path (org.apache.hadoop.fs.Path)16 ArrayList (java.util.ArrayList)15 Properties (java.util.Properties)14 FileOutputStream (java.io.FileOutputStream)13 FileInputStream (java.io.FileInputStream)12 InputStream (java.io.InputStream)12 Map (java.util.Map)12 OutputStream (java.io.OutputStream)11 UncheckedIOException (java.io.UncheckedIOException)10 NettyShuffleEnvironment (org.apache.flink.runtime.io.network.NettyShuffleEnvironment)9 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)9 AlluxioException (alluxio.exception.AlluxioException)8 InputStreamReader (java.io.InputStreamReader)8 HashMap (java.util.HashMap)8