Search in sources :

Example 1 with OutputStats

use of org.apache.pig.tools.pigstats.OutputStats in project zeppelin by apache.

the class PigUtils method extractFromSimplePigStats.

public static String extractFromSimplePigStats(SimplePigStats stats) {
    try {
        Field userIdField = PigStats.class.getDeclaredField("userId");
        userIdField.setAccessible(true);
        String userId = (String) (userIdField.get(stats));
        Field startTimeField = PigStats.class.getDeclaredField("startTime");
        startTimeField.setAccessible(true);
        long startTime = (Long) (startTimeField.get(stats));
        Field endTimeField = PigStats.class.getDeclaredField("endTime");
        endTimeField.setAccessible(true);
        long endTime = (Long) (endTimeField.get(stats));
        if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
            LOGGER.warn("unknown return code, can't display the results");
            return null;
        }
        if (stats.getPigContext() == null) {
            LOGGER.warn("unknown exec type, don't display the results");
            return null;
        }
        SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
        StringBuilder sb = new StringBuilder();
        sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
        sb.append(stats.getHadoopVersion()).append("\t").append(stats.getPigVersion()).append("\t").append(userId).append("\t").append(sdf.format(new Date(startTime))).append("\t").append(sdf.format(new Date(endTime))).append("\t").append(stats.getFeatures()).append("\n");
        sb.append("\n");
        if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
            sb.append("Success!\n");
        } else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            sb.append("Some jobs have failed! Stop running all dependent jobs\n");
        } else {
            sb.append("Failed!\n");
        }
        sb.append("\n");
        Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
        jobPlanField.setAccessible(true);
        PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stats);
        if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS || stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            sb.append("Job Stats (time in seconds):\n");
            sb.append(MRJobStats.SUCCESS_HEADER).append("\n");
            List<JobStats> arr = jobPlan.getSuccessfulJobs();
            for (JobStats js : arr) {
                sb.append(js.getDisplayString());
            }
            sb.append("\n");
        }
        if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE || stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            sb.append("Failed Jobs:\n");
            sb.append(MRJobStats.FAILURE_HEADER).append("\n");
            List<JobStats> arr = jobPlan.getFailedJobs();
            for (JobStats js : arr) {
                sb.append(js.getDisplayString());
            }
            sb.append("\n");
        }
        sb.append("Input(s):\n");
        for (InputStats is : stats.getInputStats()) {
            sb.append(is.getDisplayString());
        }
        sb.append("\n");
        sb.append("Output(s):\n");
        for (OutputStats ds : stats.getOutputStats()) {
            sb.append(ds.getDisplayString());
        }
        sb.append("\nCounters:\n");
        sb.append("Total records written : " + stats.getRecordWritten()).append("\n");
        sb.append("Total bytes written : " + stats.getBytesWritten()).append("\n");
        sb.append("Spillable Memory Manager spill count : " + stats.getSMMSpillCount()).append("\n");
        sb.append("Total bags proactively spilled: " + stats.getProactiveSpillCountObjects()).append("\n");
        sb.append("Total records proactively spilled: " + stats.getProactiveSpillCountRecords()).append("\n");
        sb.append("\nJob DAG:\n").append(jobPlan.toString());
        return "Script Statistics: \n" + sb.toString();
    } catch (Exception e) {
        LOGGER.error("Can not extract message from SimplePigStats", e);
        return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
    }
}
Also used : OutputStats(org.apache.pig.tools.pigstats.OutputStats) PigStats(org.apache.pig.tools.pigstats.PigStats) SimplePigStats(org.apache.pig.tools.pigstats.mapreduce.SimplePigStats) Date(java.util.Date) IOException(java.io.IOException) JobStats(org.apache.pig.tools.pigstats.JobStats) MRJobStats(org.apache.pig.tools.pigstats.mapreduce.MRJobStats) Field(java.lang.reflect.Field) InputStats(org.apache.pig.tools.pigstats.InputStats) SimpleDateFormat(java.text.SimpleDateFormat)

Example 2 with OutputStats

use of org.apache.pig.tools.pigstats.OutputStats in project zeppelin by apache.

the class PigUtils method extractFromTezPigStats.

private static String extractFromTezPigStats(TezPigScriptStats stats) {
    try {
        if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
            LOGGER.warn("unknown return code, can't display the results");
            return null;
        }
        if (stats.getPigContext() == null) {
            LOGGER.warn("unknown exec type, don't display the results");
            return null;
        }
        Field userIdField = PigStats.class.getDeclaredField("userId");
        userIdField.setAccessible(true);
        String userId = (String) (userIdField.get(stats));
        Field startTimeField = PigStats.class.getDeclaredField("startTime");
        startTimeField.setAccessible(true);
        long startTime = (Long) (startTimeField.get(stats));
        Field endTimeField = PigStats.class.getDeclaredField("endTime");
        endTimeField.setAccessible(true);
        long endTime = (Long) (endTimeField.get(stats));
        SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
        StringBuilder sb = new StringBuilder();
        sb.append("\n");
        sb.append(String.format("%1$20s: %2$-100s%n", "HadoopVersion", stats.getHadoopVersion()));
        sb.append(String.format("%1$20s: %2$-100s%n", "PigVersion", stats.getPigVersion()));
        sb.append(String.format("%1$20s: %2$-100s%n", "TezVersion", TezExecType.getTezVersion()));
        sb.append(String.format("%1$20s: %2$-100s%n", "UserId", userId));
        sb.append(String.format("%1$20s: %2$-100s%n", "FileName", stats.getFileName()));
        sb.append(String.format("%1$20s: %2$-100s%n", "StartedAt", sdf.format(new Date(startTime))));
        sb.append(String.format("%1$20s: %2$-100s%n", "FinishedAt", sdf.format(new Date(endTime))));
        sb.append(String.format("%1$20s: %2$-100s%n", "Features", stats.getFeatures()));
        sb.append("\n");
        if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
            sb.append("Success!\n");
        } else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            sb.append("Some tasks have failed! Stop running all dependent tasks\n");
        } else {
            sb.append("Failed!\n");
        }
        sb.append("\n");
        // Print diagnostic info in case of failure
        if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE || stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
            if (stats.getErrorMessage() != null) {
                String[] lines = stats.getErrorMessage().split("\n");
                for (int i = 0; i < lines.length; i++) {
                    String s = lines[i].trim();
                    if (i == 0 || !org.apache.commons.lang.StringUtils.isEmpty(s)) {
                        sb.append(String.format("%1$20s: %2$-100s%n", i == 0 ? "ErrorMessage" : "", s));
                    }
                }
                sb.append("\n");
            }
        }
        Field tezDAGStatsMapField = TezPigScriptStats.class.getDeclaredField("tezDAGStatsMap");
        tezDAGStatsMapField.setAccessible(true);
        Map<String, TezDAGStats> tezDAGStatsMap = (Map<String, TezDAGStats>) tezDAGStatsMapField.get(stats);
        int count = 0;
        for (TezDAGStats dagStats : tezDAGStatsMap.values()) {
            sb.append("\n");
            sb.append("DAG " + count++ + ":\n");
            sb.append(dagStats.getDisplayString());
            sb.append("\n");
        }
        sb.append("Input(s):\n");
        for (InputStats is : stats.getInputStats()) {
            sb.append(is.getDisplayString().trim()).append("\n");
        }
        sb.append("\n");
        sb.append("Output(s):\n");
        for (OutputStats os : stats.getOutputStats()) {
            sb.append(os.getDisplayString().trim()).append("\n");
        }
        return "Script Statistics:\n" + sb.toString();
    } catch (Exception e) {
        LOGGER.error("Can not extract message from SimplePigStats", e);
        return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
    }
}
Also used : OutputStats(org.apache.pig.tools.pigstats.OutputStats) TezDAGStats(org.apache.pig.tools.pigstats.tez.TezDAGStats) Date(java.util.Date) IOException(java.io.IOException) Field(java.lang.reflect.Field) InputStats(org.apache.pig.tools.pigstats.InputStats) SimpleDateFormat(java.text.SimpleDateFormat) Map(java.util.Map)

Example 3 with OutputStats

use of org.apache.pig.tools.pigstats.OutputStats in project hive by apache.

the class AbstractHCatLoaderTest method testColumnarStorePushdown.

@Test
public void testColumnarStorePushdown() throws Exception {
    String PIGOUTPUT_DIR = TEST_DATA_DIR + "/colpushdownop";
    String PIG_FILE = "test.pig";
    String expectedCols = "0,1";
    PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
    w.println("A = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    w.println("B = foreach A generate name,studentid;");
    w.println("C = filter B by name is not null;");
    w.println("store C into '" + PIGOUTPUT_DIR + "' using PigStorage();");
    w.close();
    try {
        String[] args = { "-x", "local", PIG_FILE };
        PigStats stats = PigRunner.run(args, null);
        // Pig script was successful
        assertTrue(stats.isSuccessful());
        // Single MapReduce job is launched
        OutputStats outstats = stats.getOutputStats().get(0);
        assertTrue(outstats != null);
        assertEquals(expectedCols, outstats.getConf().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
        // delete output file on exit
        FileSystem fs = FileSystem.get(outstats.getConf());
        if (fs.exists(new Path(PIGOUTPUT_DIR))) {
            fs.delete(new Path(PIGOUTPUT_DIR), true);
        }
    } finally {
        new File(PIG_FILE).delete();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStats(org.apache.pig.tools.pigstats.OutputStats) PigStats(org.apache.pig.tools.pigstats.PigStats) FileWriter(java.io.FileWriter) FileSystem(org.apache.hadoop.fs.FileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) PrintWriter(java.io.PrintWriter) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Aggregations

OutputStats (org.apache.pig.tools.pigstats.OutputStats)3 IOException (java.io.IOException)2 Field (java.lang.reflect.Field)2 SimpleDateFormat (java.text.SimpleDateFormat)2 Date (java.util.Date)2 InputStats (org.apache.pig.tools.pigstats.InputStats)2 PigStats (org.apache.pig.tools.pigstats.PigStats)2 File (java.io.File)1 FileWriter (java.io.FileWriter)1 PrintWriter (java.io.PrintWriter)1 RandomAccessFile (java.io.RandomAccessFile)1 Map (java.util.Map)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)1 JobStats (org.apache.pig.tools.pigstats.JobStats)1 MRJobStats (org.apache.pig.tools.pigstats.mapreduce.MRJobStats)1 SimplePigStats (org.apache.pig.tools.pigstats.mapreduce.SimplePigStats)1 TezDAGStats (org.apache.pig.tools.pigstats.tez.TezDAGStats)1 Test (org.junit.Test)1