use of org.apache.pig.impl.logicalLayer.FrontendException in project zeppelin by apache.
the class PigInterpreter method interpret.
@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
// remember the origial stdout, because we will redirect stdout to capture
// the pig dump output.
PrintStream originalStdOut = System.out;
ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream();
File tmpFile = null;
try {
pigServer.setJobName(createJobName(cmd, contextInterpreter));
tmpFile = PigUtils.createTempPigScript(cmd);
System.setOut(new PrintStream(bytesOutput));
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
// reset PigStats, otherwise you may get the PigStats of last job in the same thread
// because PigStats is ThreadLocal variable
PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
PigScriptListener scriptListener = new PigScriptListener();
ScriptState.get().registerListener(scriptListener);
listenerMap.put(contextInterpreter.getParagraphId(), scriptListener);
pigServer.registerScript(tmpFile.getAbsolutePath());
} catch (IOException e) {
// 4. Other errors.
if (e instanceof FrontendException) {
FrontendException fe = (FrontendException) e;
if (!fe.getMessage().contains("Backend error :")) {
// If the error message contains "Backend error :", that means the exception is from
// backend.
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
}
}
if (e.getCause() instanceof ParseException) {
return new InterpreterResult(Code.ERROR, e.getCause().getMessage());
}
PigStats stats = PigStats.get();
if (stats != null) {
String errorMsg = stats.getDisplayString();
if (errorMsg != null) {
LOGGER.error("Fail to run pig script, " + errorMsg);
return new InterpreterResult(Code.ERROR, errorMsg);
}
}
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
System.setOut(originalStdOut);
listenerMap.remove(contextInterpreter.getParagraphId());
if (tmpFile != null) {
tmpFile.delete();
}
}
StringBuilder outputBuilder = new StringBuilder();
PigStats stats = PigStats.get();
if (stats != null && includeJobStats) {
String jobStats = stats.getDisplayString();
if (jobStats != null) {
outputBuilder.append(jobStats);
}
}
outputBuilder.append(bytesOutput.toString());
return new InterpreterResult(Code.SUCCESS, outputBuilder.toString());
}
use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.
the class HCatBaseStorer method convertPigSchemaToHCatSchema.
/**
* Constructs HCatSchema from pigSchema. Passed tableSchema is the existing
* schema of the table in metastore.
*/
protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException {
if (LOG.isDebugEnabled()) {
LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")");
}
List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
for (FieldSchema fSchema : pigSchema.getFields()) {
try {
HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema);
// if writing to a partitioned table, then pigSchema will have more columns than tableSchema
// partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData()
// HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " +
// "in target table schema", LOG);
fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema));
} catch (HCatException he) {
throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
}
}
HCatSchema s = new HCatSchema(fieldSchemas);
LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")");
return s;
}
use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.
the class HCatBaseStorer method getHCatFSFromPigFS.
/**
* Here we are processing HCat table schema as derived from metastore,
* thus it should have information about all fields/sub-fields, but not for partition columns
*/
private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema, Schema pigSchema, HCatSchema tableSchema) throws FrontendException, HCatException {
if (hcatFieldSchema == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("hcatFieldSchema is null for fSchema '" + fSchema.alias + "'");
// throw new IllegalArgumentException("hcatFiledSchema is null; fSchema=" + fSchema + " " +
// "(pigSchema, tableSchema)=(" + pigSchema + "," + tableSchema + ")");
}
}
byte type = fSchema.type;
switch(type) {
case DataType.CHARARRAY:
case DataType.BIGCHARARRAY:
if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, null);
case DataType.INTEGER:
if (hcatFieldSchema != null) {
if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) {
throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
}
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.intTypeInfo, null);
case DataType.LONG:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.longTypeInfo, null);
case DataType.FLOAT:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.floatTypeInfo, null);
case DataType.DOUBLE:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.doubleTypeInfo, null);
case DataType.BYTEARRAY:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.binaryTypeInfo, null);
case DataType.BOOLEAN:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.booleanTypeInfo, null);
case DataType.DATETIME:
// is controlled by Hive target table information
if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.timestampTypeInfo, null);
case DataType.BIGDECIMAL:
if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.decimalTypeInfo, null);
case DataType.BAG:
Schema bagSchema = fSchema.schema;
FieldSchema field;
// Find out if we need to throw away the tuple or not.
if (removeTupleFromBag(hcatFieldSchema, fSchema)) {
field = bagSchema.getField(0).schema.getField(0);
} else {
field = bagSchema.getField(0);
}
List<HCatFieldSchema> arrFields = Collections.singletonList(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0), pigSchema, tableSchema));
return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");
case DataType.TUPLE:
List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
List<FieldSchema> fields = fSchema.schema.getFields();
for (int i = 0; i < fields.size(); i++) {
FieldSchema fieldSchema = fields.get(i);
hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i), pigSchema, tableSchema));
}
return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");
case DataType.MAP:
{
if (hcatFieldSchema != null) {
return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, hcatFieldSchema.getMapKeyTypeInfo(), hcatFieldSchema.getMapValueSchema(), "");
}
// Column not found in target table. Its a new column. Its schema is map<string,string>
List<HCatFieldSchema> valFSList = Collections.singletonList(new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, ""));
return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, new HCatSchema(valFSList), "");
}
case DataType.BIGINTEGER:
// fall through; doesn't map to Hive/Hcat type; here for completeness
default:
throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
}
}
use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.
the class HCatBaseStorer method doSchemaValidations.
protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException {
// Iterate through all the elements in Pig Schema and do validations as
// dictated by semantics, consult HCatSchema of table when need be.
// helps with debug messages
int columnPos = 0;
for (FieldSchema pigField : pigSchema.getFields()) {
HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema);
validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++);
}
try {
PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema);
} catch (IOException e) {
throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e);
}
}
use of org.apache.pig.impl.logicalLayer.FrontendException in project hive by apache.
the class AbstractHCatStorerTest method pigValueRangeTest.
/**
* This is used to test how Pig values of various data types which are out of range for Hive
* target column are handled. Currently the options are to raise an error or write NULL. 1. create
* a data file with 1 column, 1 row 2. load into pig 3. use pig to store into Hive table 4. read
* from Hive table using Pig 5. check that read value is what is expected
*
* @param tblName Hive table name to create
* @param hiveType datatype to use for the single column in table
* @param pigType corresponding Pig type when loading file into Pig
* @param goal how out-of-range values from Pig are handled by HCat, may be {@code null}
* @param inputValue written to file which is read by Pig, thus must be something Pig can read
* (e.g. DateTime.toString(), rather than java.sql.Date)
* @param expectedValue what Pig should see when reading Hive table
* @param format date format to use for comparison of values since default DateTime.toString()
* includes TZ which is meaningless for Hive DATE type
*/
void pigValueRangeTest(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, String format) throws Exception {
AbstractHCatLoaderTest.dropTable(tblName, driver);
final String field = "f1";
AbstractHCatLoaderTest.createTableDefaultDB(tblName, field + " " + hiveType, null, driver, storageFormat);
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { inputValue });
LOG.debug("File=" + INPUT_FILE_NAME);
dumpFile(INPUT_FILE_NAME);
PigServer server = createPigServer(true);
int queryNumber = 1;
logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (" + field + ":" + pigType + ");", queryNumber++);
if (goal == null) {
logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++);
} else {
FrontendException fe = null;
try {
logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "('','','-" + HCatStorer.ON_OOR_VALUE_OPT + " " + goal + "');", queryNumber++);
} catch (FrontendException e) {
fe = e;
}
switch(goal) {
case Null:
// do nothing, fall through and verify the data
break;
case Throw:
assertTrue("Expected a FrontendException", fe != null);
assertEquals("Expected a different FrontendException.", fe.getMessage(), "Unable to store alias A");
// this test is done
return;
default:
assertFalse("Unexpected goal: " + goal, 1 == 1);
}
}
logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber);
try {
driver.run("select * from " + tblName);
} catch (CommandProcessorException e) {
LOG.debug("cpr.respCode=" + e.getResponseCode() + " cpr.errMsg=" + e.getMessage() + " for table " + tblName);
}
List l = new ArrayList();
driver.getResults(l);
LOG.debug("Dumping rows via SQL from " + tblName);
for (Object t : l) {
LOG.debug(t == null ? null : t.toString() + " t.class=" + t.getClass());
}
Iterator<Tuple> itr = server.openIterator("B");
int numRowsRead = 0;
while (itr.hasNext()) {
Tuple t = itr.next();
if ("date".equals(hiveType)) {
DateTime dateTime = (DateTime) t.get(0);
assertTrue(format != null);
assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, dateTime == null ? null : dateTime.toString(format));
} else {
assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, t.isNull(0) ? null : t.get(0).toString());
}
// see comment at "Dumping rows via SQL..." for why this doesn't work
// assertEquals("Comparing Pig to Hive", t.get(0), l.get(0));
numRowsRead++;
}
assertEquals("Expected " + 1 + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME + "; table " + tblName, 1, numRowsRead);
/*
* Misc notes: Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a
* String thus the timestamp in 't' doesn't match rawData
*/
}
Aggregations