use of org.apache.pig.data.DataByteArray in project phoenix by apache.
the class TypeUtil method transformToTuple.
/**
* Transforms the PhoenixRecord to Pig {@link Tuple}.
*
* @param record
* @param projectedColumns
* @return
* @throws IOException
*/
public static Tuple transformToTuple(final PhoenixRecordWritable record, final ResourceFieldSchema[] projectedColumns) throws IOException {
Map<String, Object> columnValues = record.getResultMap();
if (columnValues == null || columnValues.size() == 0 || projectedColumns == null || projectedColumns.length != columnValues.size()) {
return null;
}
int numColumns = columnValues.size();
Tuple tuple = TUPLE_FACTORY.newTuple(numColumns);
try {
int i = 0;
for (Map.Entry<String, Object> entry : columnValues.entrySet()) {
final ResourceFieldSchema fieldSchema = projectedColumns[i];
Object object = entry.getValue();
if (object == null) {
tuple.set(i++, null);
continue;
}
switch(fieldSchema.getType()) {
case DataType.BYTEARRAY:
byte[] bytes = PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object);
tuple.set(i, new DataByteArray(bytes, 0, bytes.length));
break;
case DataType.CHARARRAY:
tuple.set(i, DataType.toString(object));
break;
case DataType.DOUBLE:
tuple.set(i, DataType.toDouble(object));
break;
case DataType.FLOAT:
tuple.set(i, DataType.toFloat(object));
break;
case DataType.INTEGER:
tuple.set(i, DataType.toInteger(object));
break;
case DataType.LONG:
tuple.set(i, DataType.toLong(object));
break;
case DataType.BOOLEAN:
tuple.set(i, DataType.toBoolean(object));
break;
case DataType.DATETIME:
if (object instanceof java.sql.Timestamp)
tuple.set(i, new DateTime(((java.sql.Timestamp) object).getTime()));
else
tuple.set(i, new DateTime(object));
break;
case DataType.BIGDECIMAL:
tuple.set(i, DataType.toBigDecimal(object));
break;
case DataType.BIGINTEGER:
tuple.set(i, DataType.toBigInteger(object));
break;
case DataType.TUPLE:
{
PhoenixArray array = (PhoenixArray) object;
Tuple t = TUPLE_FACTORY.newTuple(array.getDimensions());
;
for (int j = 0; j < array.getDimensions(); j++) {
t.set(j, array.getElement(j));
}
tuple.set(i, t);
break;
}
default:
throw new RuntimeException(String.format(" Not supported [%s] pig type", fieldSchema));
}
i++;
}
} catch (Exception ex) {
final String errorMsg = String.format(" Error transforming PhoenixRecord to Tuple [%s] ", ex.getMessage());
LOG.error(errorMsg);
throw new PigException(errorMsg);
}
return tuple;
}
use of org.apache.pig.data.DataByteArray in project hive by apache.
the class PigHCatUtil method extractPigObject.
/**
* Converts object from Hive's value system to Pig's value system
* see HCatBaseStorer#getJavaObj() for Pig->Hive conversion
* @param o object from Hive value system
* @return object in Pig value system
*/
public static Object extractPigObject(Object o, HCatFieldSchema hfs) throws Exception {
/*Note that HCatRecordSerDe.serializePrimitiveField() will be called before this, thus some
* type promotion/conversion may occur: e.g. Short to Integer. We should refactor this so
* that it's hapenning in one place per module/product that we are integrating with.
* All Pig conversion should be done here, etc.*/
if (o == null) {
return null;
}
Object result;
Type itemType = hfs.getType();
switch(itemType) {
case BINARY:
result = new DataByteArray((byte[]) o);
break;
case STRUCT:
result = transformToTuple((List<?>) o, hfs);
break;
case ARRAY:
result = transformToBag((List<?>) o, hfs);
break;
case MAP:
result = transformToPigMap((Map<?, ?>) o, hfs);
break;
case DECIMAL:
result = ((HiveDecimal) o).bigDecimalValue();
break;
case CHAR:
result = ((HiveChar) o).getValue();
break;
case VARCHAR:
result = ((HiveVarchar) o).getValue();
break;
case DATE:
/*java.sql.Date is weird. It automatically adjusts it's millis value to be in the local TZ
* e.g. d = new java.sql.Date(System.currentMillis()).toString() so if you do this just after
* midnight in Palo Alto, you'll get yesterday's date printed out.*/
Date d = (Date) o;
//uses local TZ
result = new DateTime(d.getYear() + 1900, d.getMonth() + 1, d.getDate(), 0, 0);
break;
case TIMESTAMP:
/*DATA TRUNCATION!!!
Timestamp may have nanos; we'll strip those away and create a Joda DateTime
object in local TZ; This is arbitrary, since Hive value doesn't have any TZ notion, but
we need to set something for TZ.
Timestamp is consistently in GMT (unless you call toString() on it) so we use millis*/
//uses local TZ
result = new DateTime(((Timestamp) o).getTime());
break;
default:
result = o;
break;
}
return result;
}
use of org.apache.pig.data.DataByteArray in project wonderdog by infochimps-labs.
the class ElasticSearchStorage method getNext.
@Override
public Tuple getNext() throws IOException {
try {
Tuple tuple = TupleFactory.getInstance().newTuple(2);
if (reader.nextKeyValue()) {
Text docId = (Text) reader.getCurrentKey();
Text docContent = (Text) reader.getCurrentValue();
tuple.set(0, new DataByteArray(docId.toString()));
tuple.set(1, new DataByteArray(docContent.toString()));
return tuple;
}
} catch (InterruptedException e) {
throw new IOException(e);
}
return null;
}
use of org.apache.pig.data.DataByteArray in project akela by mozilla-metrics.
the class RegExLoader method getNext.
@Override
public Tuple getNext() throws IOException {
Tuple t = null;
boolean tryNext = true;
while (tryNext && reader.nextKeyValue()) {
Text val = reader.getCurrentValue();
if (val != null) {
String line = val.toString();
if (line.length() > 0 && line.charAt(line.length() - 1) == '\r') {
line = line.substring(0, line.length() - 1);
}
Matcher m = getPattern().matcher(line);
if (m.find()) {
tryNext = false;
t = TupleFactory.getInstance().newTuple();
for (int i = 1; i <= m.groupCount(); i++) {
t.append(new DataByteArray(m.group(i)));
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Failed to match line: " + val.toString());
}
}
}
}
return t;
}
use of org.apache.pig.data.DataByteArray in project akela by mozilla-metrics.
the class BytesSize method exec.
public Long exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) {
return 0L;
}
long bytesSize = 0L;
switch(input.getType(0)) {
case DataType.BYTEARRAY:
DataByteArray dba = (DataByteArray) input.get(0);
bytesSize = dba.size();
break;
case DataType.CHARARRAY:
String str = (String) input.get(0);
bytesSize = str.getBytes().length;
break;
default:
break;
}
return bytesSize;
}
Aggregations