use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.
the class CosmotronHarvestJobTest method testNew996.
@Test
public void testNew996() throws Exception {
reset(httpClient);
InputStream response0 = this.getClass().getResourceAsStream("/sample/Identify.xml");
InputStream response1 = this.getClass().getResourceAsStream("/sample/cosmotron/New9961.xml");
InputStream response2 = this.getClass().getResourceAsStream("/sample/cosmotron/New9962.xml");
expect(httpClient.executeGet("http://katalog.cbvk.cz/i2/i2.ws.oai.cls?verb=Identify")).andReturn(response0);
expect(httpClient.executeGet("http://katalog.cbvk.cz/i2/i2.ws.oai.cls?verb=ListRecords&metadataPrefix=oai_marcxml_cpk")).andReturn(response1);
expect(httpClient.executeGet("http://katalog.cbvk.cz/i2/i2.ws.oai.cls?verb=ListRecords&resumptionToken=12345")).andReturn(response2);
replay(httpClient);
final Long confID = 328L;
Map<String, JobParameter> params = new HashMap<>();
params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(confID));
JobExecution exec = jobExecutor.execute(Constants.JOB_ID_HARVEST_COSMOTRON, new JobParameters(params));
Assert.assertEquals(exec.getExitStatus(), ExitStatus.COMPLETED);
OAIHarvestConfiguration config = configDao.get(confID);
HarvestedRecord hr = recordDao.findByIdAndHarvestConfiguration("CbvkUsCat" + Constants.COSMOTRON_RECORD_ID_CHAR + "m0000002", config);
Assert.assertNotNull(hr);
Assert.assertNotNull(cosmotronDao.findByIdAndHarvestConfiguration("CbvkUsCat" + Constants.COSMOTRON_RECORD_ID_CHAR + "0000003", config));
Assert.assertNotNull(cosmotronDao.findByIdAndHarvestConfiguration("CbvkUsCat" + Constants.COSMOTRON_RECORD_ID_CHAR + "0000004", config));
InputStream is = new ByteArrayInputStream(hr.getRawRecord());
Record record = marcXmlParser.parseUnderlyingRecord(is);
MarcRecord marcRecord = new MarcRecordImpl(record);
Assert.assertEquals(marcRecord.getDataFields("996").size(), 5);
}
use of org.marc4j.marc.Record in project OpenRefine by OpenRefine.
the class MarcImporter method createParserUIInitializationData.
@Override
public ObjectNode createParserUIInitializationData(ImportingJob job, List<ObjectNode> fileRecords, String format) {
if (fileRecords.size() > 0) {
ObjectNode firstFileRecord = fileRecords.get(0);
File file = ImportingUtilities.getFile(job, firstFileRecord);
File tempFile = new File(file.getAbsolutePath() + ".xml");
try {
InputStream inputStream = new FileInputStream(file);
OutputStream outputStream = new FileOutputStream(tempFile);
try {
MarcWriter writer = new MarcXmlWriter(outputStream, true);
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(inputStream, true, true);
while (reader.hasNext()) {
Record record = reader.next();
writer.write(record);
}
writer.close();
} finally {
try {
outputStream.close();
inputStream.close();
if (// write failed. Most of time because of wrong Marc format
tempFile.length() == 0)
tempFile.delete();
else
// only set json if write the temp file successfully:
JSONUtilities.safePut(firstFileRecord, "location", JSONUtilities.getString(firstFileRecord, "location", "") + ".xml");
// file.delete(); // get rid of our original file
} catch (IOException e) {
// Just ignore - not much we can do anyway
}
}
} catch (IOException e) {
logger.error("Failed to create temporary XML file from MARC file", e);
}
}
ObjectNode options = super.createParserUIInitializationData(job, fileRecords, format);
return options;
}
use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.
the class MarcRecordFactory method recordFactory.
/*
* Leader:
* 3 character tag 000
* space
* 24 character tag's value
* example: "000 00759cam a2200229 a 4500"
*
* ControlField:
* 3 character tag 00[1-9]
* space
* tag's value
* example: "001 001445152"
*
* DataField:
* 3 character tag - all digits
* space
* indicator1
* indicator2
* tag's value, perhaps with internal Subfields: 1 character $
* 1 character tag
* value
* example: "$aCambridge"
* example: "991 00$b201503$cNOV$gg$en"
*
* or
*
* 3 character tag - all digits
* space
* tag's value, perhaps with internal Subfields
* example - indicators set on ' ': "991 $b201503$cNOV$gg$en"
*/
public static MarcRecordImpl recordFactory(List<String> data) throws Exception {
MarcFactory marcFactory = MarcFactoryImpl.newInstance();
Record record = marcFactory.newRecord();
record.addVariableField(marcFactory.newControlField("001", "0"));
for (String field : data) {
String key;
String value;
if (field.length() >= 3) {
key = field.substring(0, 3);
value = (field.length() > 3) ? field.substring(4, field.length()) : "";
} else
continue;
// Leader
if (Pattern.matches("000", key)) {
if (value.length() > 24)
value = value.substring(0, 24);
else
value = String.format("%-24s", value);
record.setLeader(marcFactory.newLeader(value));
} else // ControlField
if (Pattern.matches("00[1-9]", key))
record.addVariableField(marcFactory.newControlField(key, value));
else // DataField
if (Pattern.matches("\\w{3}", key)) {
if (value.isEmpty())
record.addVariableField(marcFactory.newDataField(key, ' ', ' '));
else if (value.length() >= 2) {
// Indicators
DataField dataField;
if (value.charAt(0) == '$')
dataField = marcFactory.newDataField(key, ' ', ' ');
else if (value.charAt(1) == '$')
dataField = marcFactory.newDataField(key, value.charAt(0), ' ');
else
dataField = marcFactory.newDataField(key, value.charAt(0), value.charAt(1));
// Subfield
if (value.length() > 2) {
Pattern pattern = Pattern.compile("\\$([a-zA-Z0-9])([^$]*)(.*)");
Matcher matcher = pattern.matcher(value);
while (matcher.find()) {
dataField.addSubfield(marcFactory.newSubfield(matcher.group(1).charAt(0), matcher.group(2)));
matcher = pattern.matcher(matcher.group(3));
}
}
record.addVariableField(dataField);
}
}
}
return new MarcRecordImpl(record);
}
use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.
the class ImportRecordsWriter method writeInner.
protected void writeInner(List<? extends List<Record>> items) throws Exception {
for (List<Record> records : items) {
for (Record currentRecord : records) {
try {
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
MarcWriter marcWriter = new MarcXmlWriter(outStream, true);
marcWriter.setConverter(ISOCharConvertor.INSTANCE);
marcWriter.write(currentRecord);
marcWriter.close();
// need recordId before interception
byte[] recordContent = outStream.toByteArray();
MetadataRecord metadata = parseMetadata(recordContent);
String recordId = metadata.getUniqueId();
if (regexpExtractor != null) {
recordId = regexpExtractor.extract(recordId);
}
if (harvestConfiguration.isInterceptionEnabled()) {
MarcRecordInterceptor interceptor = marcInterceptorFactory.getInterceptor(harvestConfiguration, recordId, recordContent);
if (interceptor != null) {
byte[] recordContentNew = interceptor.intercept();
if (!Arrays.equals(recordContent, recordContentNew)) {
// if record content was changed, parse metadata again
metadata = parseMetadata(recordContentNew);
// set intercepted content
recordContent = recordContentNew;
}
}
}
HarvestedRecord hr = harvestedRecordDao.findByIdAndHarvestConfiguration(recordId, configurationId);
if (hr == null) {
HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestConfiguration, recordId);
hr = new HarvestedRecord(id);
// TODO detect format
hr.setFormat("marc21-xml");
hr.setHarvestedFrom(harvestConfiguration);
}
hr.setUpdated(new Date());
hr.setDeleted(null);
hr.setRawRecord(recordContent);
harvestedRecordDao.persist(hr);
dedupKeysParser.parse(hr, metadata);
if (harvestConfiguration.isFilteringEnabled() && !hr.getShouldBeProcessed()) {
logger.debug("Filtered record: " + hr.getUniqueId());
hr.setDeleted(new Date());
}
harvestedRecordDao.persist(hr);
progress.incrementAndLogProgress();
} catch (Exception e) {
logger.warn("Error occured in processing record");
throw e;
}
}
}
}
use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.
the class KkvyMarcInterceptor method intercept.
@Override
public byte[] intercept() {
if (super.getRecord() == null) {
return new byte[0];
}
MarcRecord marc = new MarcRecordImpl(super.getRecord());
Record newRecord = new RecordImpl();
newRecord.setLeader(getRecord().getLeader());
for (ControlField cf : super.getRecord().getControlFields()) {
newRecord.addVariableField(cf);
}
Map<String, List<DataField>> dfMap = marc.getAllFields();
for (String tag : new TreeSet<String>(dfMap.keySet())) {
// sorted tags
for (DataField df : dfMap.get(tag)) {
// kill fields 996l = VF
if (df.getTag().equals("996")) {
if (df.getSubfield('l') != null && df.getSubfield('l').getData().trim().equals("VF"))
continue;
}
processField996(df);
newRecord.addVariableField(df);
}
}
return new MarcRecordImpl(newRecord).export(IOFormat.XML_MARC).getBytes(StandardCharsets.UTF_8);
}
Aggregations