Search in sources :

Example 41 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class SplitsAndConstraintsUtil method getIndexSplits.

public static FileSplit[] getIndexSplits(Dataset dataset, String indexName, List<String> nodes) {
    File relPathFile = new File(StoragePathUtil.prepareDataverseIndexName(dataset.getDataverseName(), dataset.getDatasetName(), indexName, dataset.getRebalanceCount()));
    String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
    List<FileSplit> splits = new ArrayList<>();
    for (String nd : nodes) {
        int numPartitions = ClusterStateManager.INSTANCE.getNodePartitionsCount(nd);
        ClusterPartition[] nodePartitions = ClusterStateManager.INSTANCE.getNodePartitions(nd);
        // currently this case is never executed since the metadata group doesn't exists
        if (dataset.getNodeGroupName().compareTo(MetadataConstants.METADATA_NODEGROUP_NAME) == 0) {
            numPartitions = 1;
        }
        for (int k = 0; k < numPartitions; k++) {
            // format: 'storage dir name'/partition_#/dataverse/dataset_idx_index
            File f = new File(StoragePathUtil.prepareStoragePartitionPath(storageDirName, nodePartitions[k].getPartitionId()) + (dataset.isTemp() ? (File.separator + StoragePathUtil.TEMP_DATASETS_STORAGE_FOLDER) : "") + File.separator + relPathFile);
            splits.add(StoragePathUtil.getFileSplitForClusterPartition(nodePartitions[k], f.getPath()));
        }
    }
    return splits.toArray(new FileSplit[] {});
}
Also used : ArrayList(java.util.ArrayList) FileSplit(org.apache.hyracks.api.io.FileSplit) File(java.io.File) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 42 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class IndexDataflowHelperFactory method create.

@Override
public IIndexDataflowHelper create(IHyracksTaskContext ctx, int partition) throws HyracksDataException {
    FileSplit fileSplit = fileSplitProvider.getFileSplits()[partition];
    FileReference resourceRef = fileSplit.getFileReference(ctx.getIoManager());
    return new IndexDataflowHelper(ctx.getJobletContext().getServiceContext(), storageMgr, resourceRef);
}
Also used : FileSplit(org.apache.hyracks.api.io.FileSplit) FileReference(org.apache.hyracks.api.io.FileReference) IIndexDataflowHelper(org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper)

Example 43 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class PigletMetadataProvider method getWriteFileRuntime.

@Override
public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriteFileRuntime(IDataSink sink, int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc) throws AlgebricksException {
    PigletFileDataSink ds = (PigletFileDataSink) sink;
    FileSplit[] fileSplits = ds.getFileSplits();
    String[] locations = new String[fileSplits.length];
    for (int i = 0; i < fileSplits.length; ++i) {
        locations[i] = fileSplits[i].getNodeName();
    }
    IPushRuntimeFactory prf;
    try {
        prf = new SinkWriterRuntimeFactory(printColumns, printerFactories, fileSplits[0].getFile(null), PrinterBasedWriterFactory.INSTANCE, inputDesc);
        AlgebricksAbsolutePartitionConstraint constraint = new AlgebricksAbsolutePartitionConstraint(locations);
        return new Pair<>(prf, constraint);
    } catch (HyracksDataException e) {
        throw new AlgebricksException(e);
    }
}
Also used : SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) FileSplit(org.apache.hyracks.api.io.FileSplit) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 44 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class AbstractBTreeOperatorTest method setup.

@Before
public void setup() throws Exception {
    testHelper = createTestHelper();
    String primaryFileName = testHelper.getPrimaryIndexName();
    primarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, primaryFileName) });
    String secondaryFileName = testHelper.getSecondaryIndexName();
    primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    secondarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, secondaryFileName) });
    secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
}
Also used : ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) Before(org.junit.Before)

Example 45 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class JobHelper method createFileSplitProvider.

public static IFileSplitProvider createFileSplitProvider(String[] splitNCs, String btreeFileName) {
    FileSplit[] fileSplits = new FileSplit[splitNCs.length];
    for (int i = 0; i < splitNCs.length; ++i) {
        String fileName = btreeFileName + "." + splitNCs[i];
        fileSplits[i] = new ManagedFileSplit(splitNCs[i], fileName);
    }
    return new ConstantFileSplitProvider(fileSplits);
}
Also used : ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit)

Aggregations

FileSplit (org.apache.hyracks.api.io.FileSplit)63 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)43 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)42 JobSpecification (org.apache.hyracks.api.job.JobSpecification)40 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)39 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)39 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)38 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)33 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)33 Test (org.junit.Test)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)32 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)28 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)23 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)21 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)21 File (java.io.File)18 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)18 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)14 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)10