use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class PigletMetadataProvider method getScannerRuntime.
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<String> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
PigletFileDataSource ds = (PigletFileDataSource) dataSource;
FileSplit[] fileSplits = ds.getFileSplits();
String[] locations = new String[fileSplits.length];
for (int i = 0; i < fileSplits.length; ++i) {
locations[i] = fileSplits[i].getNodeName();
}
IFileSplitProvider fsp = new ConstantFileSplitProvider(fileSplits);
Object[] colTypes = ds.getSchemaTypes();
IValueParserFactory[] vpfs = new IValueParserFactory[colTypes.length];
ISerializerDeserializer[] serDesers = new ISerializerDeserializer[colTypes.length];
for (int i = 0; i < colTypes.length; ++i) {
Type colType = (Type) colTypes[i];
IValueParserFactory vpf;
ISerializerDeserializer serDeser;
switch(colType.getTag()) {
case INTEGER:
vpf = IntegerParserFactory.INSTANCE;
serDeser = IntegerSerializerDeserializer.INSTANCE;
break;
case CHAR_ARRAY:
vpf = UTF8StringParserFactory.INSTANCE;
serDeser = new UTF8StringSerializerDeserializer();
break;
case FLOAT:
vpf = FloatParserFactory.INSTANCE;
serDeser = FloatSerializerDeserializer.INSTANCE;
break;
default:
throw new UnsupportedOperationException();
}
vpfs[i] = vpf;
serDesers[i] = serDeser;
}
ITupleParserFactory tpf = new DelimitedDataTupleParserFactory(vpfs, ',');
RecordDescriptor rDesc = new RecordDescriptor(serDesers);
IOperatorDescriptor scanner = new FileScanOperatorDescriptor(jobSpec, fsp, tpf, rDesc);
AlgebricksAbsolutePartitionConstraint constraint = new AlgebricksAbsolutePartitionConstraint(locations);
return new Pair<>(scanner, constraint);
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class JobGenHelper method mkRecordDescriptor.
@SuppressWarnings("rawtypes")
public static RecordDescriptor mkRecordDescriptor(IVariableTypeEnvironment env, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
ITypeTraits[] typeTraits = new ITypeTraits[opSchema.getSize()];
ISerializerDeserializerProvider sdp = context.getSerializerDeserializerProvider();
ITypeTraitProvider ttp = context.getTypeTraitProvider();
int i = 0;
for (LogicalVariable var : opSchema) {
Object t = env.getVarType(var);
if (t == null) {
LOGGER.warning("No type for variable " + var);
}
fields[i] = sdp.getSerializerDeserializer(t);
typeTraits[i] = ttp.getTypeTrait(t);
i++;
}
return new RecordDescriptor(fields, typeTraits);
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class SecondaryIndexSearchExample method createJob.
private static JobSpecification createJob(Options options) throws HyracksDataException {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples coming out of secondary index
RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// comparators for primary index
IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
primaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// schema of tuples coming out of primary index
RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for btree, note that we only need a comparator for the
// non-unique key
// i.e. we will have a range condition on the first field only (implying
// [-infinity, +infinity] for the second field)
IBinaryComparatorFactory[] searchComparatorFactories = new IBinaryComparatorFactory[1];
searchComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// build tuple containing low and high search keys
// low
ArrayTupleBuilder tb = new ArrayTupleBuilder(searchComparatorFactories.length * 2);
// and
// high
// key
DataOutput dos = tb.getDataOutput();
tb.reset();
// low
new UTF8StringSerializerDeserializer().serialize("0", dos);
// key
tb.addFieldEndOffset();
// high
new UTF8StringSerializerDeserializer().serialize("f", dos);
// key
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
// low key is in field 0 of tuples
int[] secondaryLowKeyFields = { 0 };
// going into secondary index
// search op
// high key is in field 1 of
int[] secondaryHighKeyFields = { 1 };
// tuples going into secondary
// index search op
IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
BTreeSearchOperatorDescriptor secondarySearchOp = new BTreeSearchOperatorDescriptor(spec, secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, secondarySearchOp, splitNCs);
// secondary index will output tuples with [UTF8String, Integer]
// the Integer field refers to the key in the primary index of the
// source data records
// low key is in field 0 of tuples
int[] primaryLowKeyFields = { 1 };
// going into primary index search op
// high key is in field 1 of tuples
int[] primaryHighKeyFields = { 1 };
// going into primary index search
// op
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, primarySearchOp, splitNCs);
// have each node print the results of its respective B-Tree
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, printer, splitNCs);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class PrefixLenDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private final DataOutput out = resultStorage.getDataOutput();
private final IPointable inputVal = new VoidPointable();
private final IScalarEvaluator evalLen = args[0].createScalarEvaluator(ctx);
private final IScalarEvaluator evalSimilarity = args[1].createScalarEvaluator(ctx);
private final IScalarEvaluator evalThreshold = args[2].createScalarEvaluator(ctx);
private final SimilarityFiltersCache similarityFiltersCache = new SimilarityFiltersCache();
// result
private final AMutableInt32 res = new AMutableInt32(0);
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<AInt32> int32Serde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32);
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
resultStorage.reset();
// length
evalLen.evaluate(tuple, inputVal);
byte[] data = inputVal.getByteArray();
int offset = inputVal.getStartOffset();
if (data[offset] != ATypeTag.SERIALIZED_INT32_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 0, data[offset], ATypeTag.SERIALIZED_INT32_TYPE_TAG);
}
int length = IntegerPointable.getInteger(data, offset + 1);
// similarity threshold
evalThreshold.evaluate(tuple, inputVal);
data = inputVal.getByteArray();
offset = inputVal.getStartOffset();
if (data[offset] != ATypeTag.SERIALIZED_DOUBLE_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 1, data[offset], ATypeTag.SERIALIZED_DOUBLE_TYPE_TAG);
}
float similarityThreshold = (float) ADoubleSerializerDeserializer.getDouble(data, offset + 1);
// similarity name
evalSimilarity.evaluate(tuple, inputVal);
data = inputVal.getByteArray();
offset = inputVal.getStartOffset();
int len = inputVal.getLength();
if (data[offset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 2, data[offset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
}
SimilarityFilters similarityFilters = similarityFiltersCache.get(similarityThreshold, data, offset, len);
int prefixLength = similarityFilters.getPrefixLength(length);
res.setValue(prefixLength);
try {
int32Serde.serialize(res, out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
result.set(resultStorage);
}
};
}
};
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class PrefixLenJaccardDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private final DataOutput out = resultStorage.getDataOutput();
private final IPointable lenPtr = new VoidPointable();
private final IPointable thresholdPtr = new VoidPointable();
private final IScalarEvaluator evalLen = args[0].createScalarEvaluator(ctx);
private final IScalarEvaluator evalThreshold = args[1].createScalarEvaluator(ctx);
private float similarityThresholdCache;
private SimilarityFiltersJaccard similarityFilters;
// result
private final AMutableInt32 res = new AMutableInt32(0);
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<AInt32> int32Serde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32);
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
resultStorage.reset();
evalLen.evaluate(tuple, lenPtr);
evalThreshold.evaluate(tuple, thresholdPtr);
// length
int length = ATypeHierarchy.getIntegerValue(getIdentifier().getName(), 0, lenPtr.getByteArray(), lenPtr.getStartOffset());
// similarity threshold
byte[] data = thresholdPtr.getByteArray();
int offset = thresholdPtr.getStartOffset();
if (data[offset] != ATypeTag.SERIALIZED_FLOAT_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 1, data[offset], ATypeTag.SERIALIZED_FLOAT_TYPE_TAG);
}
float similarityThreshold = AFloatSerializerDeserializer.getFloat(data, offset + 1);
if (similarityThreshold != similarityThresholdCache || similarityFilters == null) {
similarityFilters = new SimilarityFiltersJaccard(similarityThreshold);
}
int prefixLength = similarityFilters.getPrefixLength(length);
res.setValue(prefixLength);
try {
int32Serde.serialize(res, out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
result.set(resultStorage);
}
};
}
};
}
Aggregations