Search in sources :

Example 16 with FrontendException

use of org.apache.pig.impl.logicalLayer.FrontendException in project zeppelin by apache.

the class PigInterpreter method interpret.

@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
    // remember the origial stdout, because we will redirect stdout to capture
    // the pig dump output.
    PrintStream originalStdOut = System.out;
    ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream();
    File tmpFile = null;
    try {
        pigServer.setJobName(createJobName(cmd, contextInterpreter));
        tmpFile = PigUtils.createTempPigScript(cmd);
        System.setOut(new PrintStream(bytesOutput));
        // each thread should its own ScriptState & PigStats
        ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
        // reset PigStats, otherwise you may get the PigStats of last job in the same thread
        // because PigStats is ThreadLocal variable
        PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
        PigScriptListener scriptListener = new PigScriptListener();
        ScriptState.get().registerListener(scriptListener);
        listenerMap.put(contextInterpreter.getParagraphId(), scriptListener);
        pigServer.registerScript(tmpFile.getAbsolutePath());
    } catch (IOException e) {
        // 4. Other errors.
        if (e instanceof FrontendException) {
            FrontendException fe = (FrontendException) e;
            if (!fe.getMessage().contains("Backend error :")) {
                // If the error message contains "Backend error :", that means the exception is from
                // backend.
                LOGGER.error("Fail to run pig script.", e);
                return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
            }
        }
        if (e.getCause() instanceof ParseException) {
            return new InterpreterResult(Code.ERROR, e.getCause().getMessage());
        }
        PigStats stats = PigStats.get();
        if (stats != null) {
            String errorMsg = stats.getDisplayString();
            if (errorMsg != null) {
                LOGGER.error("Fail to run pig script, " + errorMsg);
                return new InterpreterResult(Code.ERROR, errorMsg);
            }
        }
        LOGGER.error("Fail to run pig script.", e);
        return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
    } finally {
        System.setOut(originalStdOut);
        listenerMap.remove(contextInterpreter.getParagraphId());
        if (tmpFile != null) {
            tmpFile.delete();
        }
    }
    StringBuilder outputBuilder = new StringBuilder();
    PigStats stats = PigStats.get();
    if (stats != null && includeJobStats) {
        String jobStats = stats.getDisplayString();
        if (jobStats != null) {
            outputBuilder.append(jobStats);
        }
    }
    outputBuilder.append(bytesOutput.toString());
    return new InterpreterResult(Code.SUCCESS, outputBuilder.toString());
}
Also used : PrintStream(java.io.PrintStream) PigStats(org.apache.pig.tools.pigstats.PigStats) InterpreterResult(org.apache.zeppelin.interpreter.InterpreterResult) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) IOException(java.io.IOException) ParseException(org.apache.pig.tools.pigscript.parser.ParseException) File(java.io.File) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException)

Example 17 with FrontendException

use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.

the class HCatBaseStorer method convertPigSchemaToHCatSchema.

/**
 * Constructs HCatSchema from pigSchema. Passed tableSchema is the existing
 * schema of the table in metastore.
 */
protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")");
    }
    List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
    for (FieldSchema fSchema : pigSchema.getFields()) {
        try {
            HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema);
            // if writing to a partitioned table, then pigSchema will have more columns than tableSchema
            // partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData()
            // HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " +
            // "in target table schema", LOG);
            fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema));
        } catch (HCatException he) {
            throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
        }
    }
    HCatSchema s = new HCatSchema(fieldSchemas);
    LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")");
    return s;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 18 with FrontendException

use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.

the class HCatBaseStorer method getHCatFSFromPigFS.

/**
 * Here we are processing HCat table schema as derived from metastore,
 * thus it should have information about all fields/sub-fields, but not for partition columns
 */
private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema, Schema pigSchema, HCatSchema tableSchema) throws FrontendException, HCatException {
    if (hcatFieldSchema == null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("hcatFieldSchema is null for fSchema '" + fSchema.alias + "'");
        // throw new IllegalArgumentException("hcatFiledSchema is null; fSchema=" + fSchema + " " +
        // "(pigSchema, tableSchema)=(" + pigSchema + "," + tableSchema + ")");
        }
    }
    byte type = fSchema.type;
    switch(type) {
        case DataType.CHARARRAY:
        case DataType.BIGCHARARRAY:
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, null);
        case DataType.INTEGER:
            if (hcatFieldSchema != null) {
                if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) {
                    throw new FrontendException("Unsupported type: " + type + "  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
                }
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.intTypeInfo, null);
        case DataType.LONG:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.longTypeInfo, null);
        case DataType.FLOAT:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.floatTypeInfo, null);
        case DataType.DOUBLE:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.doubleTypeInfo, null);
        case DataType.BYTEARRAY:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.binaryTypeInfo, null);
        case DataType.BOOLEAN:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.booleanTypeInfo, null);
        case DataType.DATETIME:
            // is controlled by Hive target table information
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.timestampTypeInfo, null);
        case DataType.BIGDECIMAL:
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.decimalTypeInfo, null);
        case DataType.BAG:
            Schema bagSchema = fSchema.schema;
            FieldSchema field;
            // Find out if we need to throw away the tuple or not.
            if (removeTupleFromBag(hcatFieldSchema, fSchema)) {
                field = bagSchema.getField(0).schema.getField(0);
            } else {
                field = bagSchema.getField(0);
            }
            List<HCatFieldSchema> arrFields = Collections.singletonList(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0), pigSchema, tableSchema));
            return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");
        case DataType.TUPLE:
            List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
            HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
            List<FieldSchema> fields = fSchema.schema.getFields();
            for (int i = 0; i < fields.size(); i++) {
                FieldSchema fieldSchema = fields.get(i);
                hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i), pigSchema, tableSchema));
            }
            return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");
        case DataType.MAP:
            {
                if (hcatFieldSchema != null) {
                    return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, hcatFieldSchema.getMapKeyTypeInfo(), hcatFieldSchema.getMapValueSchema(), "");
                }
                // Column not found in target table. Its a new column. Its schema is map<string,string>
                List<HCatFieldSchema> valFSList = Collections.singletonList(new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, ""));
                return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, new HCatSchema(valFSList), "");
            }
        case DataType.BIGINTEGER:
        // fall through; doesn't map to Hive/Hcat type; here for completeness
        default:
            throw new FrontendException("Unsupported type: " + type + "  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
    }
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceSchema(org.apache.pig.ResourceSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 19 with FrontendException

use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.

the class HCatBaseStorer method doSchemaValidations.

protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException {
    // Iterate through all the elements in Pig Schema and do validations as
    // dictated by semantics, consult HCatSchema of table when need be.
    // helps with debug messages
    int columnPos = 0;
    for (FieldSchema pigField : pigSchema.getFields()) {
        HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema);
        validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++);
    }
    try {
        PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema);
    } catch (IOException e) {
        throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e);
    }
}
Also used : HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) IOException(java.io.IOException) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 20 with FrontendException

use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.

the class AbstractHCatStorerTest method pigValueRangeTest.

/**
 * This is used to test how Pig values of various data types which are out of range for Hive
 * target column are handled. Currently the options are to raise an error or write NULL. 1. create
 * a data file with 1 column, 1 row 2. load into pig 3. use pig to store into Hive table 4. read
 * from Hive table using Pig 5. check that read value is what is expected
 *
 * @param tblName Hive table name to create
 * @param hiveType datatype to use for the single column in table
 * @param pigType corresponding Pig type when loading file into Pig
 * @param goal how out-of-range values from Pig are handled by HCat, may be {@code null}
 * @param inputValue written to file which is read by Pig, thus must be something Pig can read
 *          (e.g. DateTime.toString(), rather than java.sql.Date)
 * @param expectedValue what Pig should see when reading Hive table
 * @param format date format to use for comparison of values since default DateTime.toString()
 *          includes TZ which is meaningless for Hive DATE type
 */
void pigValueRangeTest(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, String format) throws Exception {
    AbstractHCatLoaderTest.dropTable(tblName, driver);
    final String field = "f1";
    AbstractHCatLoaderTest.createTableDefaultDB(tblName, field + " " + hiveType, null, driver, storageFormat);
    HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { inputValue });
    LOG.debug("File=" + INPUT_FILE_NAME);
    dumpFile(INPUT_FILE_NAME);
    PigServer server = createPigServer(true);
    int queryNumber = 1;
    logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (" + field + ":" + pigType + ");", queryNumber++);
    if (goal == null) {
        logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++);
    } else {
        FrontendException fe = null;
        try {
            logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "('','','-" + HCatStorer.ON_OOR_VALUE_OPT + " " + goal + "');", queryNumber++);
        } catch (FrontendException e) {
            fe = e;
        }
        switch(goal) {
            case Null:
                // do nothing, fall through and verify the data
                break;
            case Throw:
                assertTrue("Expected a FrontendException", fe != null);
                assertEquals("Expected a different FrontendException.", fe.getMessage(), "Unable to store alias A");
                // this test is done
                return;
            default:
                assertFalse("Unexpected goal: " + goal, 1 == 1);
        }
    }
    logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber);
    try {
        driver.run("select * from " + tblName);
    } catch (CommandProcessorException e) {
        LOG.debug("cpr.respCode=" + e.getResponseCode() + " cpr.errMsg=" + e.getMessage() + " for table " + tblName);
    }
    List l = new ArrayList();
    driver.getResults(l);
    LOG.debug("Dumping rows via SQL from " + tblName);
    for (Object t : l) {
        LOG.debug(t == null ? null : t.toString() + " t.class=" + t.getClass());
    }
    Iterator<Tuple> itr = server.openIterator("B");
    int numRowsRead = 0;
    while (itr.hasNext()) {
        Tuple t = itr.next();
        if ("date".equals(hiveType)) {
            DateTime dateTime = (DateTime) t.get(0);
            assertTrue(format != null);
            assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, dateTime == null ? null : dateTime.toString(format));
        } else {
            assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, t.isNull(0) ? null : t.get(0).toString());
        }
        // see comment at "Dumping rows via SQL..." for why this doesn't work
        // assertEquals("Comparing Pig to Hive", t.get(0), l.get(0));
        numRowsRead++;
    }
    assertEquals("Expected " + 1 + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME + "; table " + tblName, 1, numRowsRead);
/*
     * Misc notes: Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a
     * String thus the timestamp in 't' doesn't match rawData
     */
}
Also used : CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) List(java.util.List) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) Tuple(org.apache.pig.data.Tuple)

Aggregations

FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)36 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)27 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)11 ArrayList (java.util.ArrayList)6 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)5 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)5 ResourceSchema (org.apache.pig.ResourceSchema)4 IOException (java.io.IOException)3 OriginalType (org.apache.parquet.schema.OriginalType)3 PigServer (org.apache.pig.PigServer)3 DataType (org.apache.pig.data.DataType)3 Tuple (org.apache.pig.data.Tuple)3 File (java.io.File)2 List (java.util.List)2 HCatException (org.apache.hive.hcatalog.common.HCatException)2 PigSchemaConverter.parsePigSchema (org.apache.parquet.pig.PigSchemaConverter.parsePigSchema)2 GroupType (org.apache.parquet.schema.GroupType)2 MessageType (org.apache.parquet.schema.MessageType)2 PrimitiveType (org.apache.parquet.schema.PrimitiveType)2 Type (org.apache.parquet.schema.Type)2