Search in sources :

Example 1 with TimestampedMessageParser

use of org.apache.oozie.util.TimestampedMessageParser in project oozie by apache.

the class ZKXLogStreamingService method collateLogs.

/**
 * Contacts each of the other Oozie servers, gets their logs for the job, collates them, and sends them to the user via the
 * Writer.  It will make sure to not read all of the log messages into memory at the same time to not use up the heap.  If there
 * is a problem talking to one of the other servers, it will ignore that server and prepend a message to the Writer about it.
 * For getting the logs from this server, it won't use the REST API and instead get them directly to be more efficient.
 *
 * @param logStreamer the XLogStreamer
 * @param startTime the job start time
 * @param endTime the job end time
 * @param writer the writer
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void collateLogs(XLogStreamer logStreamer, Date startTime, Date endTime, Writer writer) throws IOException {
    List<String> badOozies = new ArrayList<String>();
    List<ServiceInstance<Map>> oozies = null;
    try {
        oozies = zk.getAllMetaData();
    } catch (Exception ex) {
        throw new IOException("Issue communicating with ZooKeeper: " + ex.getMessage(), ex);
    }
    List<TimestampedMessageParser> parsers = new ArrayList<TimestampedMessageParser>(oozies.size());
    try {
        // Create a BufferedReader for getting the logs of each server and put them in a TimestampedMessageParser
        for (ServiceInstance<Map> oozie : oozies) {
            Map<String, String> oozieMeta = oozie.getPayload();
            String otherId = oozieMeta.get(ZKUtils.ZKMetadataKeys.OOZIE_ID);
            // If it's this server, we can just get them directly
            if (otherId.equals(zk.getZKId())) {
                BufferedReader reader = logStreamer.makeReader(startTime, endTime);
                parsers.add(new TimestampedMessageParser(reader, logStreamer.getXLogFilter()));
            } else // If it's another server, we'll have to use the REST API
            {
                String otherUrl = oozieMeta.get(ZKUtils.ZKMetadataKeys.OOZIE_URL);
                String jobId = logStreamer.getXLogFilter().getFilterParams().get(DagXLogInfoService.JOB);
                try {
                    // It's important that we specify ALL_SERVERS_PARAM=false in the GET request to prevent the other Oozie
                    // Server from trying aggregate logs from the other Oozie servers (and creating an infinite recursion)
                    final String url = otherUrl + "/v" + OozieClient.WS_PROTOCOL_VERSION + "/" + RestConstants.JOB + "/" + jobId + "?" + RestConstants.JOB_SHOW_PARAM + "=" + logStreamer.getLogType() + "&" + RestConstants.ALL_SERVER_REQUEST + "=false" + AuthUrlClient.getQueryParamString(logStreamer.getRequestParam());
                    // remove doAs from url to avoid failure while fetching
                    // logs in case of HA mode
                    String key = "doAs";
                    String[] value = null;
                    if (logStreamer.getRequestParam() != null) {
                        value = logStreamer.getRequestParam().get(key);
                    }
                    String urlWithoutdoAs = null;
                    if (value != null && value.length > 0 && value[0] != null && value[0].length() > 0) {
                        urlWithoutdoAs = url.replace("&" + key + "=" + URLEncoder.encode(value[0], "UTF-8"), "");
                    } else {
                        urlWithoutdoAs = url;
                    }
                    BufferedReader reader = AuthUrlClient.callServer(urlWithoutdoAs);
                    parsers.add(new SimpleTimestampedMessageParser(reader, logStreamer.getXLogFilter()));
                } catch (IOException ioe) {
                    log.warn("Failed to retrieve logs for job [" + jobId + "] from Oozie server with ID [" + otherId + "] at [" + otherUrl + "]; log information may be incomplete", ioe);
                    badOozies.add(otherId);
                }
            }
        }
        // If log param debug is set, we need to write start date and end date to outputstream.
        if (!StringUtils.isEmpty(logStreamer.getXLogFilter().getTruncatedMessage())) {
            writer.write(logStreamer.getXLogFilter().getTruncatedMessage());
        }
        if (logStreamer.getXLogFilter().isDebugMode()) {
            writer.write(logStreamer.getXLogFilter().getDebugMessage());
        }
        // Add a message about any servers we couldn't contact
        if (!badOozies.isEmpty()) {
            writer.write("Unable to contact the following Oozie Servers for logs (log information may be incomplete):\n");
            for (String badOozie : badOozies) {
                writer.write("     ");
                writer.write(badOozie);
                writer.write("\n");
            }
            writer.write("\n");
            writer.flush();
        }
        // If it's just the one server (this server), then we don't need to do any more processing and can just copy it directly
        if (parsers.size() == 1) {
            TimestampedMessageParser parser = parsers.get(0);
            parser.processRemaining(writer, logStreamer);
        } else {
            // Now that we have a Reader for each server to get the logs from that server, we have to collate them.  Within each
            // server, the logs should already be in the correct order, so we can take advantage of that.  We'll use the
            // BufferedReaders to read the messages from the logs of each server and put them in order without having to bring
            // every message into memory at the same time.
            TreeMap<String, TimestampedMessageParser> timestampMap = new TreeMap<String, TimestampedMessageParser>();
            // populate timestampMap with initial values
            for (TimestampedMessageParser parser : parsers) {
                if (parser.increment()) {
                    timestampMap.put(parser.getLastTimestamp(), parser);
                }
            }
            while (timestampMap.size() > 1) {
                // The first entry will be the earliest based on the timestamp (also removes it) from the map
                TimestampedMessageParser earliestParser = timestampMap.pollFirstEntry().getValue();
                // Write the message from that parser at that timestamp
                writer.write(earliestParser.getLastMessage());
                if (logStreamer.shouldFlushOutput(earliestParser.getLastMessage().length())) {
                    writer.flush();
                }
                // Increment that parser to read the next message
                if (earliestParser.increment()) {
                    // If it still has messages left, put it back in the map with the new last timestamp for it
                    timestampMap.put(earliestParser.getLastTimestamp(), earliestParser);
                }
            }
            // If there's only one parser left in the map, then we can simply copy the rest of its lines directly to be faster
            if (timestampMap.size() == 1) {
                TimestampedMessageParser parser = timestampMap.values().iterator().next();
                // don't forget the last message read by the parser
                writer.write(parser.getLastMessage());
                parser.processRemaining(writer, logStreamer);
            }
        }
    } finally {
        for (TimestampedMessageParser parser : parsers) {
            parser.closeReader();
        }
    }
}
Also used : SimpleTimestampedMessageParser(org.apache.oozie.util.SimpleTimestampedMessageParser) TimestampedMessageParser(org.apache.oozie.util.TimestampedMessageParser) SimpleTimestampedMessageParser(org.apache.oozie.util.SimpleTimestampedMessageParser) ArrayList(java.util.ArrayList) ServiceInstance(org.apache.curator.x.discovery.ServiceInstance) IOException(java.io.IOException) TreeMap(java.util.TreeMap) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) TreeMap(java.util.TreeMap) Map(java.util.Map)

Aggregations

BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 ServiceInstance (org.apache.curator.x.discovery.ServiceInstance)1 SimpleTimestampedMessageParser (org.apache.oozie.util.SimpleTimestampedMessageParser)1 TimestampedMessageParser (org.apache.oozie.util.TimestampedMessageParser)1