use of org.apache.pig.impl.util.UDFContext in project hive by apache.
the class HCatLoader method getHCatComparisonString.
private String getHCatComparisonString(Expression expr) {
if (expr instanceof BinaryExpression) {
// call getHCatComparisonString on lhs and rhs, and and join the
// results with OpType string
// we can just use OpType.toString() on all Expression types except
// Equal, NotEqualt since Equal has '==' in toString() and
// we need '='
String opStr = null;
switch(expr.getOpType()) {
case OP_EQ:
opStr = " = ";
break;
default:
opStr = expr.getOpType().toString();
}
BinaryExpression be = (BinaryExpression) expr;
if (be.getRhs() instanceof Const) {
// If the expr is column op const, will try to cast the const to string
// according to the data type of the column
UDFContext udfContext = UDFContext.getUDFContext();
Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature });
HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA);
HCatFieldSchema fs = null;
try {
fs = hcatTableSchema.get(be.getLhs().toString());
} catch (HCatException e) {
// Shall never happen
}
if (fs != null) {
return "(" + getHCatComparisonString(be.getLhs()) + opStr + getHCatConstString((Const) be.getRhs(), fs.getType()) + ")";
}
}
return "(" + getHCatComparisonString(be.getLhs()) + opStr + getHCatComparisonString(be.getRhs()) + ")";
} else {
// should be a constant or column
return expr.toString();
}
}
use of org.apache.pig.impl.util.UDFContext in project wonderdog by infochimps-labs.
the class ElasticSearchIndex method putNext.
/**
Map a tuple object into a map-writable object for elasticsearch.
*/
@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
UDFContext context = UDFContext.getUDFContext();
Properties property = context.getUDFProperties(ResourceSchema.class);
MapWritable record = new MapWritable();
String[] fieldNames = property.getProperty(PIG_ES_FIELD_NAMES).split(COMMA);
for (int i = 0; i < t.size(); i++) {
if (i < fieldNames.length) {
try {
record.put(new Text(fieldNames[i]), new Text(t.get(i).toString()));
} catch (NullPointerException e) {
//LOG.info("Increment null field counter.");
}
}
}
try {
writer.write(NullWritable.get(), record);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
use of org.apache.pig.impl.util.UDFContext in project wonderdog by infochimps-labs.
the class ElasticSearchStorage method checkSchema.
/**
Here we set the field names for a given tuple even if we
*/
@Override
public void checkSchema(ResourceSchema s) throws IOException {
UDFContext context = UDFContext.getUDFContext();
Properties property = context.getUDFProperties(ResourceSchema.class);
String fieldNames = "";
for (String field : s.fieldNames()) {
fieldNames += field;
fieldNames += COMMA;
}
property.setProperty(PIG_ES_FIELD_NAMES, fieldNames);
}
use of org.apache.pig.impl.util.UDFContext in project wonderdog by infochimps-labs.
the class ElasticSearchStorage method putNext.
/**
Here we handle both the delimited record case and the json case.
*/
@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
UDFContext context = UDFContext.getUDFContext();
Properties property = context.getUDFProperties(ResourceSchema.class);
MapWritable record = new MapWritable();
String isJson = property.getProperty(ES_IS_JSON);
// Handle delimited records (ie. isJson == false)
if (isJson != null && isJson.equals("false")) {
String[] fieldNames = property.getProperty(PIG_ES_FIELD_NAMES).split(COMMA);
for (int i = 0; i < t.size(); i++) {
if (i < fieldNames.length) {
try {
record.put(new Text(fieldNames[i]), new Text(t.get(i).toString()));
} catch (NullPointerException e) {
//LOG.info("Increment null field counter.");
}
}
}
} else {
if (!t.isNull(0)) {
String jsonData = t.get(0).toString();
// parse json data and put into mapwritable record
try {
HashMap<String, Object> data = mapper.readValue(jsonData, HashMap.class);
record = (MapWritable) toWritable(data);
} catch (JsonParseException e) {
e.printStackTrace();
} catch (JsonMappingException e) {
e.printStackTrace();
}
}
}
try {
writer.write(NullWritable.get(), record);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
use of org.apache.pig.impl.util.UDFContext in project wonderdog by infochimps-labs.
the class ElasticSearchStorage method elasticSearchSetup.
/**
Pull out the elasticsearch setup code
*/
private void elasticSearchSetup(String location, Job job) {
// Need to use the uri parsing library here to pull out everything
try {
// Parse the passed in location URI, pulling out the arguments as well
URI parsedLocation = new URI(location);
HashMap<String, String> query = parseURIQuery(parsedLocation.getQuery());
String esHost = location.substring(5).split("/")[0];
if (esHost == null) {
throw new RuntimeException("Missing elasticsearch index name, URI must be formatted as es://<index_name>/<object_type>?<params>");
}
if (parsedLocation.getPath() == null) {
throw new RuntimeException("Missing elasticsearch object type, URI must be formatted as es://<index_name>/<object_type>?<params>");
}
Configuration conf = job.getConfiguration();
if (conf.get(ES_INDEX_NAME) == null) {
// Set elasticsearch index and object type in the Hadoop configuration
job.getConfiguration().set(ES_INDEX_NAME, esHost);
job.getConfiguration().set(ES_OBJECT_TYPE, parsedLocation.getPath().replaceAll("/", ""));
// Set the request size in the Hadoop configuration
String requestSize = query.get("size");
if (requestSize == null)
requestSize = DEFAULT_BULK;
job.getConfiguration().set(ES_BULK_SIZE, requestSize);
job.getConfiguration().set(ES_REQUEST_SIZE, requestSize);
// Set the id field name in the Hadoop configuration
String idFieldName = query.get("id");
if (idFieldName == null)
idFieldName = "-1";
job.getConfiguration().set(ES_ID_FIELD_NAME, idFieldName);
String queryString = query.get("q");
if (queryString == null)
queryString = "*";
job.getConfiguration().set(ES_QUERY_STRING, queryString);
String numTasks = query.get("tasks");
if (numTasks == null)
numTasks = "100";
job.getConfiguration().set(ES_NUM_SPLITS, numTasks);
// Adds the elasticsearch.yml file (esConfig) and the plugins directory (esPlugins) to the distributed cache
try {
Path hdfsConfigPath = new Path(ES_CONFIG_HDFS_PATH);
Path hdfsPluginsPath = new Path(ES_PLUGINS_HDFS_PATH);
HadoopUtils.uploadLocalFileIfChanged(new Path(LOCAL_SCHEME + esConfig), hdfsConfigPath, job.getConfiguration());
HadoopUtils.shipFileIfNotShipped(hdfsConfigPath, job.getConfiguration());
HadoopUtils.uploadLocalFileIfChanged(new Path(LOCAL_SCHEME + esPlugins), hdfsPluginsPath, job.getConfiguration());
HadoopUtils.shipArchiveIfNotShipped(hdfsPluginsPath, job.getConfiguration());
} catch (Exception e) {
throw new RuntimeException(e);
}
//
// This gets set even when loading data from elasticsearch
//
String isJson = query.get("json");
if (isJson == null || isJson.equals("false")) {
// We're dealing with delimited records
UDFContext context = UDFContext.getUDFContext();
Properties property = context.getUDFProperties(ResourceSchema.class);
property.setProperty(ES_IS_JSON, "false");
}
// Need to set this to start the local instance of elasticsearch
job.getConfiguration().set(ES_CONFIG, esConfig);
job.getConfiguration().set(ES_PLUGINS, esPlugins);
}
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
Aggregations