Search in sources :

Example 61 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class DatasetUtil method createDatasetJobSpec.

public static JobSpecification createDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException {
    Index index = IndexUtil.getPrimaryIndex(dataset);
    ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
    // get meta item type
    ARecordType metaItemType = null;
    if (dataset.hasMetaPart()) {
        metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
    }
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    FileSplit[] fs = splitsAndConstraint.first.getFileSplits();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fs.length; i++) {
        sb.append(fs[i] + " ");
    }
    LOGGER.info("CREATING File Splits: " + sb.toString());
    Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
    //prepare a LocalResourceMetadata which will be stored in NC's local resource repository
    IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, index, itemType, metaItemType, compactionInfo.first, compactionInfo.second);
    IndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first, resourceFactory, !dataset.isTemp());
    IndexCreateOperatorDescriptor indexCreateOp = new IndexCreateOperatorDescriptor(spec, indexBuilderFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexCreateOp, splitsAndConstraint.second);
    spec.addRoot(indexCreateOp);
    return spec;
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IndexBuilderFactory(org.apache.hyracks.storage.am.common.build.IndexBuilderFactory) Index(org.apache.asterix.metadata.entities.Index) FileSplit(org.apache.hyracks.api.io.FileSplit) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) ARecordType(org.apache.asterix.om.types.ARecordType) Map(java.util.Map) IResourceFactory(org.apache.hyracks.storage.common.IResourceFactory) ILSMMergePolicyFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory) IndexCreateOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.IndexCreateOperatorDescriptor)

Example 62 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class FileRemoveOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    final FileSplit split = fileSplitProvider.getFileSplits()[partition];
    final IIOManager ioManager = ctx.getIoManager();
    return new AbstractOperatorNodePushable() {

        @Override
        public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
            throw new IllegalStateException();
        }

        @Override
        public void initialize() throws HyracksDataException {
            // will only work for files inside the io devices
            File f = split.getFile(ioManager);
            if (quietly) {
                FileUtils.deleteQuietly(f);
            } else {
                try {
                    FileUtils.deleteDirectory(f);
                } catch (IOException e) {
                    throw new HyracksDataException(e);
                }
            }
        }

        @Override
        public IFrameWriter getInputFrameWriter(int index) {
            throw new IllegalStateException();
        }

        @Override
        public int getInputArity() {
            return 0;
        }

        @Override
        public void deinitialize() throws HyracksDataException {
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) AbstractOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOException(java.io.IOException) FileSplit(org.apache.hyracks.api.io.FileSplit) IIOManager(org.apache.hyracks.api.io.IIOManager) File(java.io.File) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 63 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class FileScanOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    final FileSplit split = fileSplitProvider.getFileSplits()[partition];
    final ITupleParser tp = tupleParserFactory.createTupleParser(ctx);
    final IIOManager ioManager = ctx.getIoManager();
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        @Override
        public void initialize() throws HyracksDataException {
            File f = split.getFile(ioManager);
            try {
                writer.open();
                InputStream in;
                try {
                    in = new FileInputStream(f);
                } catch (FileNotFoundException e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                }
                tp.parse(in, writer);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
            }
        }
    };
}
Also used : AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) FileSplit(org.apache.hyracks.api.io.FileSplit) IIOManager(org.apache.hyracks.api.io.IIOManager) File(java.io.File) FileInputStream(java.io.FileInputStream) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Aggregations

FileSplit (org.apache.hyracks.api.io.FileSplit)63 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)43 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)42 JobSpecification (org.apache.hyracks.api.job.JobSpecification)40 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)39 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)39 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)38 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)33 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)33 Test (org.junit.Test)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)32 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)28 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)23 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)21 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)21 File (java.io.File)18 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)18 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)14 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)10