Search in sources :

Example 1 with PigStats

use of org.apache.pig.tools.pigstats.PigStats in project oozie by apache.

the class OoziePigStats method toJSON.

/**
 * The PigStats API is used to collect the statistics and the result is returned as a JSON String.
 *
 * @return a JSON string
 */
@SuppressWarnings("unchecked")
@Override
public String toJSON() {
    JSONObject pigStatsGroup = new JSONObject();
    pigStatsGroup.put("ACTION_TYPE", getCurrentActionType().toString());
    // pig summary related counters
    pigStatsGroup.put("BYTES_WRITTEN", Long.toString(pigStats.getBytesWritten()));
    pigStatsGroup.put("DURATION", Long.toString(pigStats.getDuration()));
    pigStatsGroup.put("ERROR_CODE", Long.toString(pigStats.getErrorCode()));
    pigStatsGroup.put("ERROR_MESSAGE", pigStats.getErrorMessage());
    pigStatsGroup.put("FEATURES", pigStats.getFeatures());
    pigStatsGroup.put("HADOOP_VERSION", pigStats.getHadoopVersion());
    pigStatsGroup.put("NUMBER_JOBS", Long.toString(pigStats.getNumberJobs()));
    pigStatsGroup.put("PIG_VERSION", pigStats.getPigVersion());
    pigStatsGroup.put("PROACTIVE_SPILL_COUNT_OBJECTS", Long.toString(pigStats.getProactiveSpillCountObjects()));
    pigStatsGroup.put("PROACTIVE_SPILL_COUNT_RECORDS", Long.toString(pigStats.getProactiveSpillCountRecords()));
    pigStatsGroup.put("RECORD_WRITTEN", Long.toString(pigStats.getRecordWritten()));
    pigStatsGroup.put("RETURN_CODE", Long.toString(pigStats.getReturnCode()));
    pigStatsGroup.put("SCRIPT_ID", pigStats.getScriptId());
    pigStatsGroup.put("SMM_SPILL_COUNT", Long.toString(pigStats.getSMMSpillCount()));
    PigStats.JobGraph jobGraph = pigStats.getJobGraph();
    StringBuffer sb = new StringBuffer();
    String separator = ",";
    for (JobStats jobStats : jobGraph) {
        // Get all the HadoopIds and put them as comma separated string for JOB_GRAPH
        String hadoopId = jobStats.getJobId();
        if (sb.length() > 0) {
            sb.append(separator);
        }
        sb.append(hadoopId);
        // Hadoop Counters for pig created MR job
        pigStatsGroup.put(hadoopId, toJSONFromJobStats(jobStats));
    }
    pigStatsGroup.put("JOB_GRAPH", sb.toString());
    return pigStatsGroup.toJSONString();
}
Also used : JSONObject(org.json.simple.JSONObject) PigStats(org.apache.pig.tools.pigstats.PigStats) JobStats(org.apache.pig.tools.pigstats.JobStats)

Example 2 with PigStats

use of org.apache.pig.tools.pigstats.PigStats in project vespa by vespa-engine.

the class VespaStorageTest method assertAllDocumentsOk.

private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
    PigServer ps = setup(script, conf);
    List<ExecJob> jobs = ps.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    for (JobStats js : stats.getJobGraph()) {
        Counters hadoopCounters = ((MRJobStats) js).getHadoopCounters();
        assertNotNull(hadoopCounters);
        VespaCounters counters = VespaCounters.get(hadoopCounters);
        assertEquals(10, counters.getDocumentsSent());
        assertEquals(0, counters.getDocumentsFailed());
        assertEquals(10, counters.getDocumentsOk());
    }
}
Also used : PigServer(org.apache.pig.PigServer) PigStats(org.apache.pig.tools.pigstats.PigStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats) VespaCounters(com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters) VespaCounters(com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters) Counters(org.apache.hadoop.mapred.Counters) ExecJob(org.apache.pig.backend.executionengine.ExecJob) JobStats(org.apache.pig.tools.pigstats.JobStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats)

Example 3 with PigStats

use of org.apache.pig.tools.pigstats.PigStats in project zeppelin by apache.

the class PigQueryInterpreter method interpret.

@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
    // '-' is invalid for pig alias
    String alias = "paragraph_" + context.getParagraphId().replace("-", "_");
    String[] lines = st.split("\n");
    List<String> queries = new ArrayList<>();
    for (int i = 0; i < lines.length; ++i) {
        if (i == lines.length - 1) {
            lines[i] = alias + " = " + lines[i];
        }
        queries.add(lines[i]);
    }
    StringBuilder resultBuilder = new StringBuilder("%table ");
    try {
        pigServer.setJobName(createJobName(st, context));
        File tmpScriptFile = PigUtils.createTempPigScript(queries);
        // each thread should its own ScriptState & PigStats
        ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
        // reset PigStats, otherwise you may get the PigStats of last job in the same thread
        // because PigStats is ThreadLocal variable
        PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
        PigScriptListener scriptListener = new PigScriptListener();
        ScriptState.get().registerListener(scriptListener);
        listenerMap.put(context.getParagraphId(), scriptListener);
        pigServer.registerScript(tmpScriptFile.getAbsolutePath());
        Schema schema = pigServer.dumpSchema(alias);
        boolean schemaKnown = (schema != null);
        if (schemaKnown) {
            for (int i = 0; i < schema.size(); ++i) {
                Schema.FieldSchema field = schema.getField(i);
                resultBuilder.append(field.alias != null ? field.alias : "col_" + i);
                if (i != schema.size() - 1) {
                    resultBuilder.append("\t");
                }
            }
            resultBuilder.append("\n");
        }
        Iterator<Tuple> iter = pigServer.openIterator(alias);
        boolean firstRow = true;
        int index = 0;
        while (iter.hasNext() && index < maxResult) {
            index++;
            Tuple tuple = iter.next();
            if (firstRow && !schemaKnown) {
                for (int i = 0; i < tuple.size(); ++i) {
                    resultBuilder.append("c_" + i + "\t");
                }
                resultBuilder.append("\n");
                firstRow = false;
            }
            resultBuilder.append(StringUtils.join(tuple.iterator(), "\t"));
            resultBuilder.append("\n");
        }
        if (index >= maxResult && iter.hasNext()) {
            resultBuilder.append("\n");
            resultBuilder.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, MAX_RESULTS));
        }
    } catch (IOException e) {
        // 4. Other errors.
        if (e instanceof FrontendException) {
            FrontendException fe = (FrontendException) e;
            if (!fe.getMessage().contains("Backend error :")) {
                LOGGER.error("Fail to run pig query.", e);
                return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
            }
        }
        if (e.getCause() instanceof ParseException) {
            return new InterpreterResult(Code.ERROR, e.getMessage());
        }
        PigStats stats = PigStats.get();
        if (stats != null) {
            String errorMsg = stats.getDisplayString();
            if (errorMsg != null) {
                return new InterpreterResult(Code.ERROR, errorMsg);
            }
        }
        LOGGER.error("Fail to run pig query.", e);
        return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
    } finally {
        listenerMap.remove(context.getParagraphId());
    }
    return new InterpreterResult(Code.SUCCESS, resultBuilder.toString());
}
Also used : PigStats(org.apache.pig.tools.pigstats.PigStats) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) ArrayList(java.util.ArrayList) InterpreterResult(org.apache.zeppelin.interpreter.InterpreterResult) IOException(java.io.IOException) ParseException(org.apache.pig.tools.pigscript.parser.ParseException) File(java.io.File) Tuple(org.apache.pig.data.Tuple) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException)

Example 4 with PigStats

use of org.apache.pig.tools.pigstats.PigStats in project zeppelin by apache.

the class PigInterpreter method interpret.

@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
    // remember the origial stdout, because we will redirect stdout to capture
    // the pig dump output.
    PrintStream originalStdOut = System.out;
    ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream();
    File tmpFile = null;
    try {
        pigServer.setJobName(createJobName(cmd, contextInterpreter));
        tmpFile = PigUtils.createTempPigScript(cmd);
        System.setOut(new PrintStream(bytesOutput));
        // each thread should its own ScriptState & PigStats
        ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
        // reset PigStats, otherwise you may get the PigStats of last job in the same thread
        // because PigStats is ThreadLocal variable
        PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
        PigScriptListener scriptListener = new PigScriptListener();
        ScriptState.get().registerListener(scriptListener);
        listenerMap.put(contextInterpreter.getParagraphId(), scriptListener);
        pigServer.registerScript(tmpFile.getAbsolutePath());
    } catch (IOException e) {
        // 4. Other errors.
        if (e instanceof FrontendException) {
            FrontendException fe = (FrontendException) e;
            if (!fe.getMessage().contains("Backend error :")) {
                // If the error message contains "Backend error :", that means the exception is from
                // backend.
                LOGGER.error("Fail to run pig script.", e);
                return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
            }
        }
        if (e.getCause() instanceof ParseException) {
            return new InterpreterResult(Code.ERROR, e.getCause().getMessage());
        }
        PigStats stats = PigStats.get();
        if (stats != null) {
            String errorMsg = stats.getDisplayString();
            if (errorMsg != null) {
                LOGGER.error("Fail to run pig script, " + errorMsg);
                return new InterpreterResult(Code.ERROR, errorMsg);
            }
        }
        LOGGER.error("Fail to run pig script.", e);
        return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
    } finally {
        System.setOut(originalStdOut);
        listenerMap.remove(contextInterpreter.getParagraphId());
        if (tmpFile != null) {
            tmpFile.delete();
        }
    }
    StringBuilder outputBuilder = new StringBuilder();
    PigStats stats = PigStats.get();
    if (stats != null && includeJobStats) {
        String jobStats = stats.getDisplayString();
        if (jobStats != null) {
            outputBuilder.append(jobStats);
        }
    }
    outputBuilder.append(bytesOutput.toString());
    return new InterpreterResult(Code.SUCCESS, outputBuilder.toString());
}
Also used : PrintStream(java.io.PrintStream) PigStats(org.apache.pig.tools.pigstats.PigStats) InterpreterResult(org.apache.zeppelin.interpreter.InterpreterResult) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) IOException(java.io.IOException) ParseException(org.apache.pig.tools.pigscript.parser.ParseException) File(java.io.File) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException)

Example 5 with PigStats

use of org.apache.pig.tools.pigstats.PigStats in project hive by apache.

the class AbstractHCatLoaderTest method testColumnarStorePushdown.

@Test
public void testColumnarStorePushdown() throws Exception {
    String PIGOUTPUT_DIR = TEST_DATA_DIR + "/colpushdownop";
    String PIG_FILE = "test.pig";
    String expectedCols = "0,1";
    PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
    w.println("A = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    w.println("B = foreach A generate name,studentid;");
    w.println("C = filter B by name is not null;");
    w.println("store C into '" + PIGOUTPUT_DIR + "' using PigStorage();");
    w.close();
    try {
        String[] args = { "-x", "local", PIG_FILE };
        PigStats stats = PigRunner.run(args, null);
        // Pig script was successful
        assertTrue(stats.isSuccessful());
        // Single MapReduce job is launched
        OutputStats outstats = stats.getOutputStats().get(0);
        assertTrue(outstats != null);
        assertEquals(expectedCols, outstats.getConf().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
        // delete output file on exit
        FileSystem fs = FileSystem.get(outstats.getConf());
        if (fs.exists(new Path(PIGOUTPUT_DIR))) {
            fs.delete(new Path(PIGOUTPUT_DIR), true);
        }
    } finally {
        new File(PIG_FILE).delete();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStats(org.apache.pig.tools.pigstats.OutputStats) PigStats(org.apache.pig.tools.pigstats.PigStats) FileWriter(java.io.FileWriter) FileSystem(org.apache.hadoop.fs.FileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) PrintWriter(java.io.PrintWriter) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Aggregations

PigStats (org.apache.pig.tools.pigstats.PigStats)8 File (java.io.File)4 JobStats (org.apache.pig.tools.pigstats.JobStats)3 IOException (java.io.IOException)2 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)2 ParseException (org.apache.pig.tools.pigscript.parser.ParseException)2 InterpreterResult (org.apache.zeppelin.interpreter.InterpreterResult)2 VespaCounters (com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters)1 FileWriter (java.io.FileWriter)1 PrintStream (java.io.PrintStream)1 PrintWriter (java.io.PrintWriter)1 RandomAccessFile (java.io.RandomAccessFile)1 Method (java.lang.reflect.Method)1 ArrayList (java.util.ArrayList)1 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 Counters (org.apache.hadoop.mapred.Counters)1 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)1 PigServer (org.apache.pig.PigServer)1