use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class TPCHCustomerOrderNestedLoopJoinTest method customerOrderCIDOuterJoinMulti.
@Test
public void customerOrderCIDOuterJoinMulti() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part2.tbl") };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[ordersDesc.getFieldCount()];
for (int j = 0; j < nonMatchWriterFactories.length; j++) {
nonMatchWriterFactories[j] = NoopMissingWriterFactory.INSTANCE;
}
NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 5, true, nonMatchWriterFactories);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
spec.addResultSetId(rsId);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
spec.connect(ordJoinConn, ordScanner, 0, join, 0);
IConnectorDescriptor custJoinConn = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(custJoinConn, custScanner, 0, join, 1);
IConnectorDescriptor joinPrinterConn = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(joinPrinterConn, join, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class FrameFileWriterOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
final FileSplit[] splits = fileSplitProvider.getFileSplits();
final IIOManager ioManager = ctx.getIoManager();
return new AbstractUnaryInputSinkOperatorNodePushable() {
private OutputStream out;
@Override
public void open() throws HyracksDataException {
try {
out = new FileOutputStream(splits[partition].getFile(ioManager));
} catch (FileNotFoundException e) {
throw new HyracksDataException(e);
}
}
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
try {
out.write(buffer.array());
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
@Override
public void fail() throws HyracksDataException {
}
@Override
public void close() throws HyracksDataException {
try {
out.close();
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
};
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class FileRemoveOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
final FileSplit split = fileSplitProvider.getFileSplits()[partition];
final IIOManager ioManager = ctx.getIoManager();
return new AbstractOperatorNodePushable() {
@Override
public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
throw new IllegalStateException();
}
@Override
public void initialize() throws HyracksDataException {
// will only work for files inside the io devices
File f = split.getFile(ioManager);
if (quietly) {
FileUtils.deleteQuietly(f);
} else {
try {
FileUtils.deleteDirectory(f);
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
}
@Override
public IFrameWriter getInputFrameWriter(int index) {
throw new IllegalStateException();
}
@Override
public int getInputArity() {
return 0;
}
@Override
public void deinitialize() throws HyracksDataException {
}
};
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class FileScanOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
final FileSplit split = fileSplitProvider.getFileSplits()[partition];
final ITupleParser tp = tupleParserFactory.createTupleParser(ctx);
final IIOManager ioManager = ctx.getIoManager();
return new AbstractUnaryOutputSourceOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
File f = split.getFile(ioManager);
try {
writer.open();
InputStream in;
try {
in = new FileInputStream(f);
} catch (FileNotFoundException e) {
writer.fail();
throw new HyracksDataException(e);
}
tp.parse(in, writer);
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
}
}
};
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class FilePartition method formResponseObject.
private void formResponseObject(ObjectNode jsonResponse, FileSplit[] fileSplits, ARecordType recordType, String primaryKeys, boolean temp, Map<String, NodeControllerInfo> nodeMap) {
ObjectMapper om = new ObjectMapper();
ArrayNode partititons = om.createArrayNode();
// Whether the dataset is temp or not
jsonResponse.put("temp", temp);
// Adds a primary key.
jsonResponse.put("keys", primaryKeys);
// Adds record type.
jsonResponse.set("type", recordType.toJSON());
// Generates file partitions.
for (FileSplit split : fileSplits) {
String ipAddress = nodeMap.get(split.getNodeName()).getNetworkAddress().getAddress();
String path = split.getPath();
FilePartition partition = new FilePartition(ipAddress, path);
partititons.add(partition.toObjectNode());
}
// Generates the response object which contains the splits.
jsonResponse.set("splits", partititons);
}
Aggregations