use of cascading.tuple.Tuple in project parquet-mr by apache.
the class ParquetValueScheme method source.
@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<? extends JobConf> fp, SourceCall<Object[], RecordReader> sc) throws IOException {
Container<T> value = (Container<T>) sc.getInput().createValue();
boolean hasNext = sc.getInput().next(null, value);
if (!hasNext) {
return false;
}
// Skip nulls
if (value == null) {
return true;
}
sc.getIncomingEntry().setTuple(new Tuple(value.get()));
return true;
}
use of cascading.tuple.Tuple in project Impatient by Cascading.
the class ScrubFunction method operate.
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
TupleEntry argument = functionCall.getArguments();
String doc_id = argument.getString(0);
String token = scrubText(argument.getString(1));
if (token.length() > 0) {
Tuple result = new Tuple();
result.add(doc_id);
result.add(token);
functionCall.getOutputCollector().add(result);
}
}
use of cascading.tuple.Tuple in project Impatient by Cascading.
the class ScrubTest method testScrub.
@Test
public void testScrub() {
Fields fieldDeclaration = new Fields("doc_id", "token");
Function scrub = new ScrubFunction(fieldDeclaration);
Tuple[] arguments = new Tuple[] { // will be scrubed
new Tuple("doc_1", "FoO"), // will be scrubed
new Tuple("doc_1", " BAR "), // will be scrubed
new Tuple("doc_1", " ") };
ArrayList<Tuple> expectResults = new ArrayList<Tuple>();
expectResults.add(new Tuple("doc_1", "foo"));
expectResults.add(new Tuple("doc_1", "bar"));
TupleListCollector collector = invokeFunction(scrub, arguments, Fields.ALL);
Iterator<Tuple> it = collector.iterator();
ArrayList<Tuple> results = new ArrayList<Tuple>();
while (it.hasNext()) results.add(it.next());
assertEquals("Scrub result is not expected", expectResults, results);
}
use of cascading.tuple.Tuple in project SpyGlass by ParallelAI.
the class HBaseScheme method sink.
@Override
public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
OutputCollector outputCollector = sinkCall.getOutput();
Tuple key = tupleEntry.selectTuple(keyField);
ImmutableBytesWritable keyBytes = (ImmutableBytesWritable) key.getObject(0);
if (useSalt) {
keyBytes = HBaseSalter.addSaltPrefix(keyBytes);
}
Put put;
if (this.timeStamp == 0L) {
put = new Put(keyBytes.get());
} else {
put = new Put(keyBytes.get(), this.timeStamp);
}
for (int i = 0; i < valueFields.length; i++) {
Fields fieldSelector = valueFields[i];
TupleEntry values = tupleEntry.selectEntry(fieldSelector);
for (int j = 0; j < values.getFields().size(); j++) {
Fields fields = values.getFields();
Tuple tuple = values.getTuple();
ImmutableBytesWritable valueBytes = (ImmutableBytesWritable) tuple.getObject(j);
if (valueBytes != null)
put.add(Bytes.toBytes(familyNames[i]), Bytes.toBytes((String) fields.get(j)), valueBytes.get());
}
}
outputCollector.collect(null, put);
}
use of cascading.tuple.Tuple in project SpyGlass by ParallelAI.
the class JDBCScheme method source.
@Override
public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException {
Object key = sourceCall.getContext()[0];
Object value = sourceCall.getContext()[1];
boolean result = sourceCall.getInput().next(key, value);
if (!result)
return false;
Tuple newTuple = ((TupleRecord) value).getTuple();
sourceCall.getIncomingEntry().setTuple(newTuple);
return true;
}
Aggregations