use of org.apache.hadoop.io.Text in project deeplearning4j by deeplearning4j.
the class TestDataVecDataSetFunctions method testDataVecSequencePairDataSetFunctionVariableLength.
@Test
public void testDataVecSequencePairDataSetFunctionVariableLength() throws Exception {
//Same sort of test as testDataVecSequencePairDataSetFunction() but with variable length time series (labels shorter, align end)
//Convert data to a SequenceFile:
File f = new File("src/test/resources/csvsequence/csvsequence_0.txt");
String pathFeatures = f.getAbsolutePath();
String folderFeatures = pathFeatures.substring(0, pathFeatures.length() - 17);
pathFeatures = folderFeatures + "*";
File f2 = new File("src/test/resources/csvsequencelabels/csvsequencelabelsShort_0.txt");
String pathLabels = f2.getPath();
String folderLabels = pathLabels.substring(0, pathLabels.length() - 28);
pathLabels = folderLabels + "*";
//Extract a number from the file name
PathToKeyConverter pathConverter = new PathToKeyConverterNumber();
JavaPairRDD<Text, BytesPairWritable> toWrite = DataVecSparkUtil.combineFilesForSequenceFile(sc, pathFeatures, pathLabels, pathConverter);
Path p = Files.createTempDirectory("dl4j_testSeqPairFnVarLength");
p.toFile().deleteOnExit();
String outPath = p.toString() + "/out";
new File(outPath).deleteOnExit();
toWrite.saveAsNewAPIHadoopFile(outPath, Text.class, BytesPairWritable.class, SequenceFileOutputFormat.class);
//Load from sequence file:
JavaPairRDD<Text, BytesPairWritable> fromSeq = sc.sequenceFile(outPath, Text.class, BytesPairWritable.class);
SequenceRecordReader srr1 = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader srr2 = new CSVSequenceRecordReader(1, ",");
PairSequenceRecordReaderBytesFunction psrbf = new PairSequenceRecordReaderBytesFunction(srr1, srr2);
JavaRDD<Tuple2<List<List<Writable>>, List<List<Writable>>>> writables = fromSeq.map(psrbf);
//Map to DataSet:
DataVecSequencePairDataSetFunction pairFn = new DataVecSequencePairDataSetFunction(4, false, DataVecSequencePairDataSetFunction.AlignmentMode.ALIGN_END);
JavaRDD<DataSet> data = writables.map(pairFn);
List<DataSet> sparkData = data.collect();
//Now: do the same thing locally (SequenceRecordReaderDataSetIterator) and compare
String featuresPath = f.getPath().replaceAll("0", "%d");
String labelsPath = f2.getPath().replaceAll("0", "%d");
SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END);
List<DataSet> localData = new ArrayList<>(3);
while (iter.hasNext()) localData.add(iter.next());
assertEquals(3, sparkData.size());
assertEquals(3, localData.size());
//1 example, 3 values, 4 time steps
int[] fShapeExp = new int[] { 1, 3, 4 };
//1 example, 4 values/classes, 4 time steps (after padding)
int[] lShapeExp = new int[] { 1, 4, 4 };
for (int i = 0; i < 3; i++) {
//Check shapes etc. data sets order may differ for spark vs. local
DataSet dsSpark = sparkData.get(i);
DataSet dsLocal = localData.get(i);
//Expect mask array for labels
assertNotNull(dsSpark.getLabelsMaskArray());
INDArray fSpark = dsSpark.getFeatureMatrix();
INDArray fLocal = dsLocal.getFeatureMatrix();
INDArray lSpark = dsSpark.getLabels();
INDArray lLocal = dsLocal.getLabels();
assertArrayEquals(fShapeExp, fSpark.shape());
assertArrayEquals(fShapeExp, fLocal.shape());
assertArrayEquals(lShapeExp, lSpark.shape());
assertArrayEquals(lShapeExp, lLocal.shape());
}
//Check that results are the same (order not withstanding)
boolean[] found = new boolean[3];
for (int i = 0; i < 3; i++) {
int foundIndex = -1;
DataSet ds = sparkData.get(i);
for (int j = 0; j < 3; j++) {
if (ds.equals(localData.get(j))) {
if (foundIndex != -1)
//Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
fail();
foundIndex = j;
if (found[foundIndex])
//One of the other spark values was equal to this one -> suggests duplicates in Spark list
fail();
//mark this one as seen before
found[foundIndex] = true;
}
}
}
int count = 0;
for (boolean b : found) if (b)
count++;
//Expect all 3 and exactly 3 pairwise matches between spark and local versions
assertEquals(3, count);
//-------------------------------------------------
//NOW: test same thing, but for align start...
DataVecSequencePairDataSetFunction pairFnAlignStart = new DataVecSequencePairDataSetFunction(4, false, DataVecSequencePairDataSetFunction.AlignmentMode.ALIGN_START);
JavaRDD<DataSet> rddDataAlignStart = writables.map(pairFnAlignStart);
List<DataSet> sparkDataAlignStart = rddDataAlignStart.collect();
//re-initialize to reset
featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
SequenceRecordReaderDataSetIterator iterAlignStart = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_START);
List<DataSet> localDataAlignStart = new ArrayList<>(3);
while (iterAlignStart.hasNext()) localDataAlignStart.add(iterAlignStart.next());
assertEquals(3, sparkDataAlignStart.size());
assertEquals(3, localDataAlignStart.size());
for (int i = 0; i < 3; i++) {
//Check shapes etc. data sets order may differ for spark vs. local
DataSet dsSpark = sparkDataAlignStart.get(i);
DataSet dsLocal = localDataAlignStart.get(i);
//Expect mask array for labels
assertNotNull(dsSpark.getLabelsMaskArray());
INDArray fSpark = dsSpark.getFeatureMatrix();
INDArray fLocal = dsLocal.getFeatureMatrix();
INDArray lSpark = dsSpark.getLabels();
INDArray lLocal = dsLocal.getLabels();
assertArrayEquals(fShapeExp, fSpark.shape());
assertArrayEquals(fShapeExp, fLocal.shape());
assertArrayEquals(lShapeExp, lSpark.shape());
assertArrayEquals(lShapeExp, lLocal.shape());
}
//Check that results are the same (order not withstanding)
found = new boolean[3];
for (int i = 0; i < 3; i++) {
int foundIndex = -1;
DataSet ds = sparkData.get(i);
for (int j = 0; j < 3; j++) {
if (ds.equals(localData.get(j))) {
if (foundIndex != -1)
//Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
fail();
foundIndex = j;
if (found[foundIndex])
//One of the other spark values was equal to this one -> suggests duplicates in Spark list
fail();
//mark this one as seen before
found[foundIndex] = true;
}
}
}
count = 0;
for (boolean b : found) if (b)
count++;
//Expect all 3 and exactly 3 pairwise matches between spark and local versions
assertEquals(3, count);
}
use of org.apache.hadoop.io.Text in project deeplearning4j by deeplearning4j.
the class TestDataVecDataSetFunctions method testDataVecSequencePairDataSetFunction.
@Test
public void testDataVecSequencePairDataSetFunction() throws Exception {
JavaSparkContext sc = getContext();
//Convert data to a SequenceFile:
File f = new File("src/test/resources/csvsequence/csvsequence_0.txt");
String path = f.getPath();
String folder = path.substring(0, path.length() - 17);
path = folder + "*";
PathToKeyConverter pathConverter = new PathToKeyConverterFilename();
JavaPairRDD<Text, BytesPairWritable> toWrite = DataVecSparkUtil.combineFilesForSequenceFile(sc, path, path, pathConverter);
Path p = Files.createTempDirectory("dl4j_testSeqPairFn");
p.toFile().deleteOnExit();
String outPath = p.toString() + "/out";
new File(outPath).deleteOnExit();
toWrite.saveAsNewAPIHadoopFile(outPath, Text.class, BytesPairWritable.class, SequenceFileOutputFormat.class);
//Load from sequence file:
JavaPairRDD<Text, BytesPairWritable> fromSeq = sc.sequenceFile(outPath, Text.class, BytesPairWritable.class);
SequenceRecordReader srr1 = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader srr2 = new CSVSequenceRecordReader(1, ",");
PairSequenceRecordReaderBytesFunction psrbf = new PairSequenceRecordReaderBytesFunction(srr1, srr2);
JavaRDD<Tuple2<List<List<Writable>>, List<List<Writable>>>> writables = fromSeq.map(psrbf);
//Map to DataSet:
DataVecSequencePairDataSetFunction pairFn = new DataVecSequencePairDataSetFunction();
JavaRDD<DataSet> data = writables.map(pairFn);
List<DataSet> sparkData = data.collect();
//Now: do the same thing locally (SequenceRecordReaderDataSetIterator) and compare
String featuresPath = f.getAbsolutePath().replaceAll("0", "%d");
SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
labelReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, -1, true);
List<DataSet> localData = new ArrayList<>(3);
while (iter.hasNext()) localData.add(iter.next());
assertEquals(3, sparkData.size());
assertEquals(3, localData.size());
for (int i = 0; i < 3; i++) {
//Check shapes etc. data sets order may differ for spark vs. local
DataSet dsSpark = sparkData.get(i);
DataSet dsLocal = localData.get(i);
assertNull(dsSpark.getFeaturesMaskArray());
assertNull(dsSpark.getLabelsMaskArray());
INDArray fSpark = dsSpark.getFeatureMatrix();
INDArray fLocal = dsLocal.getFeatureMatrix();
INDArray lSpark = dsSpark.getLabels();
INDArray lLocal = dsLocal.getLabels();
//1 example, 3 values, 3 time steps
int[] s = new int[] { 1, 3, 4 };
assertArrayEquals(s, fSpark.shape());
assertArrayEquals(s, fLocal.shape());
assertArrayEquals(s, lSpark.shape());
assertArrayEquals(s, lLocal.shape());
}
//Check that results are the same (order not withstanding)
boolean[] found = new boolean[3];
for (int i = 0; i < 3; i++) {
int foundIndex = -1;
DataSet ds = sparkData.get(i);
for (int j = 0; j < 3; j++) {
if (ds.equals(localData.get(j))) {
if (foundIndex != -1)
//Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
fail();
foundIndex = j;
if (found[foundIndex])
//One of the other spark values was equal to this one -> suggests duplicates in Spark list
fail();
//mark this one as seen before
found[foundIndex] = true;
}
}
}
int count = 0;
for (boolean b : found) if (b)
count++;
//Expect all 3 and exactly 3 pairwise matches between spark and local versions
assertEquals(3, count);
}
use of org.apache.hadoop.io.Text in project hive by apache.
the class TestDBTokenStore method testDBTokenStore.
public void testDBTokenStore() throws TokenStoreException, MetaException, IOException {
DelegationTokenStore ts = new DBTokenStore();
ts.init(new HMSHandler("Test handler"), ServerMode.METASTORE);
assertEquals(0, ts.getMasterKeys().length);
assertEquals(false, ts.removeMasterKey(-1));
try {
ts.updateMasterKey(-1, "non-existent-key");
fail("Updated non-existent key.");
} catch (TokenStoreException e) {
assertTrue(e.getCause() instanceof NoSuchObjectException);
}
int keySeq = ts.addMasterKey("key1Data");
int keySeq2 = ts.addMasterKey("key2Data");
int keySeq2same = ts.addMasterKey("key2Data");
assertEquals("keys sequential", keySeq + 1, keySeq2);
assertEquals("keys sequential", keySeq + 2, keySeq2same);
assertEquals("expected number of keys", 3, ts.getMasterKeys().length);
assertTrue(ts.removeMasterKey(keySeq));
assertTrue(ts.removeMasterKey(keySeq2same));
assertEquals("expected number of keys", 1, ts.getMasterKeys().length);
assertEquals("key2Data", ts.getMasterKeys()[0]);
ts.updateMasterKey(keySeq2, "updatedData");
assertEquals("updatedData", ts.getMasterKeys()[0]);
assertTrue(ts.removeMasterKey(keySeq2));
// tokens
assertEquals(0, ts.getAllDelegationTokenIdentifiers().size());
DelegationTokenIdentifier tokenId = new DelegationTokenIdentifier(new Text("owner"), new Text("renewer"), new Text("realUser"));
assertNull(ts.getToken(tokenId));
assertFalse(ts.removeToken(tokenId));
DelegationTokenInformation tokenInfo = new DelegationTokenInformation(99, "password".getBytes());
assertTrue(ts.addToken(tokenId, tokenInfo));
assertFalse(ts.addToken(tokenId, tokenInfo));
DelegationTokenInformation tokenInfoRead = ts.getToken(tokenId);
assertEquals(tokenInfo.getRenewDate(), tokenInfoRead.getRenewDate());
assertNotSame(tokenInfo, tokenInfoRead);
Assert.assertArrayEquals(HiveDelegationTokenSupport.encodeDelegationTokenInformation(tokenInfo), HiveDelegationTokenSupport.encodeDelegationTokenInformation(tokenInfoRead));
List<DelegationTokenIdentifier> allIds = ts.getAllDelegationTokenIdentifiers();
assertEquals(1, allIds.size());
Assert.assertEquals(TokenStoreDelegationTokenSecretManager.encodeWritable(tokenId), TokenStoreDelegationTokenSecretManager.encodeWritable(allIds.get(0)));
assertTrue(ts.removeToken(tokenId));
assertEquals(0, ts.getAllDelegationTokenIdentifiers().size());
assertNull(ts.getToken(tokenId));
ts.close();
}
use of org.apache.hadoop.io.Text in project hadoop by apache.
the class TestTokenClientRMService method checkTokenRenewal.
private void checkTokenRenewal(UserGroupInformation owner, UserGroupInformation renewer) throws IOException, YarnException {
RMDelegationTokenIdentifier tokenIdentifier = new RMDelegationTokenIdentifier(new Text(owner.getUserName()), new Text(renewer.getUserName()), null);
Token<?> token = new Token<RMDelegationTokenIdentifier>(tokenIdentifier, dtsm);
org.apache.hadoop.yarn.api.records.Token dToken = BuilderUtils.newDelegationToken(token.getIdentifier(), token.getKind().toString(), token.getPassword(), token.getService().toString());
RenewDelegationTokenRequest request = Records.newRecord(RenewDelegationTokenRequest.class);
request.setDelegationToken(dToken);
RMContext rmContext = mock(RMContext.class);
ClientRMService rmService = new ClientRMService(rmContext, null, null, null, null, dtsm);
rmService.renewDelegationToken(request);
}
use of org.apache.hadoop.io.Text in project hadoop by apache.
the class TestZKRMStateStore method testFencedState.
@Test
public void testFencedState() throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
RMStateStore store = zkTester.getRMStateStore();
// Move state to FENCED from ACTIVE
store.updateFencedState();
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
long submitTime = System.currentTimeMillis();
long startTime = submitTime + 1000;
// Add a new app
RMApp mockApp = mock(RMApp.class);
ApplicationSubmissionContext context = new ApplicationSubmissionContextPBImpl();
when(mockApp.getSubmitTime()).thenReturn(submitTime);
when(mockApp.getStartTime()).thenReturn(startTime);
when(mockApp.getApplicationSubmissionContext()).thenReturn(context);
when(mockApp.getUser()).thenReturn("test");
store.storeNewApplication(mockApp);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// Add a new attempt
ClientToAMTokenSecretManagerInRM clientToAMTokenMgr = new ClientToAMTokenSecretManagerInRM();
ApplicationAttemptId attemptId = ApplicationAttemptId.fromString("appattempt_1234567894321_0001_000001");
SecretKey clientTokenMasterKey = clientToAMTokenMgr.createMasterKey(attemptId);
RMAppAttemptMetrics mockRmAppAttemptMetrics = mock(RMAppAttemptMetrics.class);
Container container = new ContainerPBImpl();
container.setId(ContainerId.fromString("container_1234567891234_0001_01_000001"));
RMAppAttempt mockAttempt = mock(RMAppAttempt.class);
when(mockAttempt.getAppAttemptId()).thenReturn(attemptId);
when(mockAttempt.getMasterContainer()).thenReturn(container);
when(mockAttempt.getClientTokenMasterKey()).thenReturn(clientTokenMasterKey);
when(mockAttempt.getRMAppAttemptMetrics()).thenReturn(mockRmAppAttemptMetrics);
when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()).thenReturn(new AggregateAppResourceUsage(0, 0));
store.storeNewApplicationAttempt(mockAttempt);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
long finishTime = submitTime + 1000;
// Update attempt
ApplicationAttemptStateData newAttemptState = ApplicationAttemptStateData.newInstance(attemptId, container, store.getCredentialsFromAppAttempt(mockAttempt), startTime, RMAppAttemptState.FINISHED, "testUrl", "test", FinalApplicationStatus.SUCCEEDED, 100, finishTime, 0, 0, 0, 0);
store.updateApplicationAttemptState(newAttemptState);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// Update app
ApplicationStateData appState = ApplicationStateData.newInstance(submitTime, startTime, context, "test");
store.updateApplicationState(appState);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// Remove app
store.removeApplication(mockApp);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// store RM delegation token;
RMDelegationTokenIdentifier dtId1 = new RMDelegationTokenIdentifier(new Text("owner1"), new Text("renewer1"), new Text("realuser1"));
Long renewDate1 = new Long(System.currentTimeMillis());
dtId1.setSequenceNumber(1111);
store.storeRMDelegationToken(dtId1, renewDate1);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
store.updateRMDelegationToken(dtId1, renewDate1);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// remove delegation key;
store.removeRMDelegationToken(dtId1);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// store delegation master key;
DelegationKey key = new DelegationKey(1234, 4321, "keyBytes".getBytes());
store.storeRMDTMasterKey(key);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// remove delegation master key;
store.removeRMDTMasterKey(key);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// store or update AMRMToken;
store.storeOrUpdateAMRMTokenSecretManager(null, false);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
store.close();
}
Aggregations