use of org.apache.pig.tools.pigstats.PigStats in project oozie by apache.
the class OoziePigStats method toJSON.
/**
* The PigStats API is used to collect the statistics and the result is returned as a JSON String.
*
* @return a JSON string
*/
@SuppressWarnings("unchecked")
@Override
public String toJSON() {
JSONObject pigStatsGroup = new JSONObject();
pigStatsGroup.put("ACTION_TYPE", getCurrentActionType().toString());
// pig summary related counters
pigStatsGroup.put("BYTES_WRITTEN", Long.toString(pigStats.getBytesWritten()));
pigStatsGroup.put("DURATION", Long.toString(pigStats.getDuration()));
pigStatsGroup.put("ERROR_CODE", Long.toString(pigStats.getErrorCode()));
pigStatsGroup.put("ERROR_MESSAGE", pigStats.getErrorMessage());
pigStatsGroup.put("FEATURES", pigStats.getFeatures());
pigStatsGroup.put("HADOOP_VERSION", pigStats.getHadoopVersion());
pigStatsGroup.put("NUMBER_JOBS", Long.toString(pigStats.getNumberJobs()));
pigStatsGroup.put("PIG_VERSION", pigStats.getPigVersion());
pigStatsGroup.put("PROACTIVE_SPILL_COUNT_OBJECTS", Long.toString(pigStats.getProactiveSpillCountObjects()));
pigStatsGroup.put("PROACTIVE_SPILL_COUNT_RECORDS", Long.toString(pigStats.getProactiveSpillCountRecords()));
pigStatsGroup.put("RECORD_WRITTEN", Long.toString(pigStats.getRecordWritten()));
pigStatsGroup.put("RETURN_CODE", Long.toString(pigStats.getReturnCode()));
pigStatsGroup.put("SCRIPT_ID", pigStats.getScriptId());
pigStatsGroup.put("SMM_SPILL_COUNT", Long.toString(pigStats.getSMMSpillCount()));
PigStats.JobGraph jobGraph = pigStats.getJobGraph();
StringBuffer sb = new StringBuffer();
String separator = ",";
for (JobStats jobStats : jobGraph) {
// Get all the HadoopIds and put them as comma separated string for JOB_GRAPH
String hadoopId = jobStats.getJobId();
if (sb.length() > 0) {
sb.append(separator);
}
sb.append(hadoopId);
// Hadoop Counters for pig created MR job
pigStatsGroup.put(hadoopId, toJSONFromJobStats(jobStats));
}
pigStatsGroup.put("JOB_GRAPH", sb.toString());
return pigStatsGroup.toJSONString();
}
use of org.apache.pig.tools.pigstats.PigStats in project vespa by vespa-engine.
the class VespaStorageTest method assertAllDocumentsOk.
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
PigServer ps = setup(script, conf);
List<ExecJob> jobs = ps.executeBatch();
PigStats stats = jobs.get(0).getStatistics();
for (JobStats js : stats.getJobGraph()) {
Counters hadoopCounters = ((MRJobStats) js).getHadoopCounters();
assertNotNull(hadoopCounters);
VespaCounters counters = VespaCounters.get(hadoopCounters);
assertEquals(10, counters.getDocumentsSent());
assertEquals(0, counters.getDocumentsFailed());
assertEquals(10, counters.getDocumentsOk());
}
}
use of org.apache.pig.tools.pigstats.PigStats in project zeppelin by apache.
the class PigQueryInterpreter method interpret.
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
// '-' is invalid for pig alias
String alias = "paragraph_" + context.getParagraphId().replace("-", "_");
String[] lines = st.split("\n");
List<String> queries = new ArrayList<>();
for (int i = 0; i < lines.length; ++i) {
if (i == lines.length - 1) {
lines[i] = alias + " = " + lines[i];
}
queries.add(lines[i]);
}
StringBuilder resultBuilder = new StringBuilder("%table ");
try {
pigServer.setJobName(createJobName(st, context));
File tmpScriptFile = PigUtils.createTempPigScript(queries);
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
// reset PigStats, otherwise you may get the PigStats of last job in the same thread
// because PigStats is ThreadLocal variable
PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
PigScriptListener scriptListener = new PigScriptListener();
ScriptState.get().registerListener(scriptListener);
listenerMap.put(context.getParagraphId(), scriptListener);
pigServer.registerScript(tmpScriptFile.getAbsolutePath());
Schema schema = pigServer.dumpSchema(alias);
boolean schemaKnown = (schema != null);
if (schemaKnown) {
for (int i = 0; i < schema.size(); ++i) {
Schema.FieldSchema field = schema.getField(i);
resultBuilder.append(field.alias != null ? field.alias : "col_" + i);
if (i != schema.size() - 1) {
resultBuilder.append("\t");
}
}
resultBuilder.append("\n");
}
Iterator<Tuple> iter = pigServer.openIterator(alias);
boolean firstRow = true;
int index = 0;
while (iter.hasNext() && index < maxResult) {
index++;
Tuple tuple = iter.next();
if (firstRow && !schemaKnown) {
for (int i = 0; i < tuple.size(); ++i) {
resultBuilder.append("c_" + i + "\t");
}
resultBuilder.append("\n");
firstRow = false;
}
resultBuilder.append(StringUtils.join(tuple.iterator(), "\t"));
resultBuilder.append("\n");
}
if (index >= maxResult && iter.hasNext()) {
resultBuilder.append("\n");
resultBuilder.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, MAX_RESULTS));
}
} catch (IOException e) {
// 4. Other errors.
if (e instanceof FrontendException) {
FrontendException fe = (FrontendException) e;
if (!fe.getMessage().contains("Backend error :")) {
LOGGER.error("Fail to run pig query.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
}
}
if (e.getCause() instanceof ParseException) {
return new InterpreterResult(Code.ERROR, e.getMessage());
}
PigStats stats = PigStats.get();
if (stats != null) {
String errorMsg = stats.getDisplayString();
if (errorMsg != null) {
return new InterpreterResult(Code.ERROR, errorMsg);
}
}
LOGGER.error("Fail to run pig query.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
listenerMap.remove(context.getParagraphId());
}
return new InterpreterResult(Code.SUCCESS, resultBuilder.toString());
}
use of org.apache.pig.tools.pigstats.PigStats in project zeppelin by apache.
the class PigInterpreter method interpret.
@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
// remember the origial stdout, because we will redirect stdout to capture
// the pig dump output.
PrintStream originalStdOut = System.out;
ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream();
File tmpFile = null;
try {
pigServer.setJobName(createJobName(cmd, contextInterpreter));
tmpFile = PigUtils.createTempPigScript(cmd);
System.setOut(new PrintStream(bytesOutput));
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
// reset PigStats, otherwise you may get the PigStats of last job in the same thread
// because PigStats is ThreadLocal variable
PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
PigScriptListener scriptListener = new PigScriptListener();
ScriptState.get().registerListener(scriptListener);
listenerMap.put(contextInterpreter.getParagraphId(), scriptListener);
pigServer.registerScript(tmpFile.getAbsolutePath());
} catch (IOException e) {
// 4. Other errors.
if (e instanceof FrontendException) {
FrontendException fe = (FrontendException) e;
if (!fe.getMessage().contains("Backend error :")) {
// If the error message contains "Backend error :", that means the exception is from
// backend.
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
}
}
if (e.getCause() instanceof ParseException) {
return new InterpreterResult(Code.ERROR, e.getCause().getMessage());
}
PigStats stats = PigStats.get();
if (stats != null) {
String errorMsg = stats.getDisplayString();
if (errorMsg != null) {
LOGGER.error("Fail to run pig script, " + errorMsg);
return new InterpreterResult(Code.ERROR, errorMsg);
}
}
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
System.setOut(originalStdOut);
listenerMap.remove(contextInterpreter.getParagraphId());
if (tmpFile != null) {
tmpFile.delete();
}
}
StringBuilder outputBuilder = new StringBuilder();
PigStats stats = PigStats.get();
if (stats != null && includeJobStats) {
String jobStats = stats.getDisplayString();
if (jobStats != null) {
outputBuilder.append(jobStats);
}
}
outputBuilder.append(bytesOutput.toString());
return new InterpreterResult(Code.SUCCESS, outputBuilder.toString());
}
use of org.apache.pig.tools.pigstats.PigStats in project hive by apache.
the class AbstractHCatLoaderTest method testColumnarStorePushdown.
@Test
public void testColumnarStorePushdown() throws Exception {
String PIGOUTPUT_DIR = TEST_DATA_DIR + "/colpushdownop";
String PIG_FILE = "test.pig";
String expectedCols = "0,1";
PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
w.println("A = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
w.println("B = foreach A generate name,studentid;");
w.println("C = filter B by name is not null;");
w.println("store C into '" + PIGOUTPUT_DIR + "' using PigStorage();");
w.close();
try {
String[] args = { "-x", "local", PIG_FILE };
PigStats stats = PigRunner.run(args, null);
// Pig script was successful
assertTrue(stats.isSuccessful());
// Single MapReduce job is launched
OutputStats outstats = stats.getOutputStats().get(0);
assertTrue(outstats != null);
assertEquals(expectedCols, outstats.getConf().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
// delete output file on exit
FileSystem fs = FileSystem.get(outstats.getConf());
if (fs.exists(new Path(PIGOUTPUT_DIR))) {
fs.delete(new Path(PIGOUTPUT_DIR), true);
}
} finally {
new File(PIG_FILE).delete();
}
}
Aggregations