Search in sources :

Example 16 with Record

use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.

the class CosmotronHarvestJobTest method testNew996.

@Test
public void testNew996() throws Exception {
    reset(httpClient);
    InputStream response0 = this.getClass().getResourceAsStream("/sample/Identify.xml");
    InputStream response1 = this.getClass().getResourceAsStream("/sample/cosmotron/New9961.xml");
    InputStream response2 = this.getClass().getResourceAsStream("/sample/cosmotron/New9962.xml");
    expect(httpClient.executeGet("http://katalog.cbvk.cz/i2/i2.ws.oai.cls?verb=Identify")).andReturn(response0);
    expect(httpClient.executeGet("http://katalog.cbvk.cz/i2/i2.ws.oai.cls?verb=ListRecords&metadataPrefix=oai_marcxml_cpk")).andReturn(response1);
    expect(httpClient.executeGet("http://katalog.cbvk.cz/i2/i2.ws.oai.cls?verb=ListRecords&resumptionToken=12345")).andReturn(response2);
    replay(httpClient);
    final Long confID = 328L;
    Map<String, JobParameter> params = new HashMap<>();
    params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(confID));
    JobExecution exec = jobExecutor.execute(Constants.JOB_ID_HARVEST_COSMOTRON, new JobParameters(params));
    Assert.assertEquals(exec.getExitStatus(), ExitStatus.COMPLETED);
    OAIHarvestConfiguration config = configDao.get(confID);
    HarvestedRecord hr = recordDao.findByIdAndHarvestConfiguration("CbvkUsCat" + Constants.COSMOTRON_RECORD_ID_CHAR + "m0000002", config);
    Assert.assertNotNull(hr);
    Assert.assertNotNull(cosmotronDao.findByIdAndHarvestConfiguration("CbvkUsCat" + Constants.COSMOTRON_RECORD_ID_CHAR + "0000003", config));
    Assert.assertNotNull(cosmotronDao.findByIdAndHarvestConfiguration("CbvkUsCat" + Constants.COSMOTRON_RECORD_ID_CHAR + "0000004", config));
    InputStream is = new ByteArrayInputStream(hr.getRawRecord());
    Record record = marcXmlParser.parseUnderlyingRecord(is);
    MarcRecord marcRecord = new MarcRecordImpl(record);
    Assert.assertEquals(marcRecord.getDataFields("996").size(), 5);
}
Also used : OAIHarvestConfiguration(cz.mzk.recordmanager.server.model.OAIHarvestConfiguration) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) JobExecution(org.springframework.batch.core.JobExecution) MarcRecordImpl(cz.mzk.recordmanager.server.marc.MarcRecordImpl) ByteArrayInputStream(java.io.ByteArrayInputStream) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) Record(org.marc4j.marc.Record) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) JobParameters(org.springframework.batch.core.JobParameters) JobParameter(org.springframework.batch.core.JobParameter) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 17 with Record

use of org.marc4j.marc.Record in project OpenRefine by OpenRefine.

the class MarcImporter method createParserUIInitializationData.

@Override
public ObjectNode createParserUIInitializationData(ImportingJob job, List<ObjectNode> fileRecords, String format) {
    if (fileRecords.size() > 0) {
        ObjectNode firstFileRecord = fileRecords.get(0);
        File file = ImportingUtilities.getFile(job, firstFileRecord);
        File tempFile = new File(file.getAbsolutePath() + ".xml");
        try {
            InputStream inputStream = new FileInputStream(file);
            OutputStream outputStream = new FileOutputStream(tempFile);
            try {
                MarcWriter writer = new MarcXmlWriter(outputStream, true);
                MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(inputStream, true, true);
                while (reader.hasNext()) {
                    Record record = reader.next();
                    writer.write(record);
                }
                writer.close();
            } finally {
                try {
                    outputStream.close();
                    inputStream.close();
                    if (// write failed. Most of time because of wrong Marc format
                    tempFile.length() == 0)
                        tempFile.delete();
                    else
                        // only set json if write the temp file successfully:
                        JSONUtilities.safePut(firstFileRecord, "location", JSONUtilities.getString(firstFileRecord, "location", "") + ".xml");
                // file.delete(); // get rid of our original file
                } catch (IOException e) {
                // Just ignore - not much we can do anyway
                }
            }
        } catch (IOException e) {
            logger.error("Failed to create temporary XML file from MARC file", e);
        }
    }
    ObjectNode options = super.createParserUIInitializationData(job, fileRecords, format);
    return options;
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) MarcPermissiveStreamReader(org.marc4j.MarcPermissiveStreamReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) MarcWriter(org.marc4j.MarcWriter) Record(org.marc4j.marc.Record) IOException(java.io.IOException) File(java.io.File) FileInputStream(java.io.FileInputStream) MarcXmlWriter(org.marc4j.MarcXmlWriter)

Example 18 with Record

use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.

the class MarcRecordFactory method recordFactory.

/* 
	 * Leader:
	 * 	 3 character tag 000
	 * 	 space
	 * 	 24 character tag's value
	 *   example: "000 00759cam a2200229 a 4500"
	 *
	 * ControlField:
	 *   3 character tag 00[1-9]
	 *   space
	 *   tag's value
	 *   example: "001 001445152"
	 * 
	 * DataField:
	 *   3 character tag - all digits
	 *   space
	 *   indicator1
	 *   indicator2
	 *   tag's value, perhaps with internal Subfields: 1 character $
	 *                                                 1 character tag
	 *                                                 value
	 *                                                 example: "$aCambridge" 
	 *   example: "991 00$b201503$cNOV$gg$en"
	 *   
	 *   or
	 *   
	 *   3 character tag - all digits
	 *   space
	 *   tag's value, perhaps with internal Subfields
	 *   example - indicators set on ' ': "991 $b201503$cNOV$gg$en"
	 */
public static MarcRecordImpl recordFactory(List<String> data) throws Exception {
    MarcFactory marcFactory = MarcFactoryImpl.newInstance();
    Record record = marcFactory.newRecord();
    record.addVariableField(marcFactory.newControlField("001", "0"));
    for (String field : data) {
        String key;
        String value;
        if (field.length() >= 3) {
            key = field.substring(0, 3);
            value = (field.length() > 3) ? field.substring(4, field.length()) : "";
        } else
            continue;
        // Leader
        if (Pattern.matches("000", key)) {
            if (value.length() > 24)
                value = value.substring(0, 24);
            else
                value = String.format("%-24s", value);
            record.setLeader(marcFactory.newLeader(value));
        } else // ControlField
        if (Pattern.matches("00[1-9]", key))
            record.addVariableField(marcFactory.newControlField(key, value));
        else // DataField
        if (Pattern.matches("\\w{3}", key)) {
            if (value.isEmpty())
                record.addVariableField(marcFactory.newDataField(key, ' ', ' '));
            else if (value.length() >= 2) {
                // Indicators
                DataField dataField;
                if (value.charAt(0) == '$')
                    dataField = marcFactory.newDataField(key, ' ', ' ');
                else if (value.charAt(1) == '$')
                    dataField = marcFactory.newDataField(key, value.charAt(0), ' ');
                else
                    dataField = marcFactory.newDataField(key, value.charAt(0), value.charAt(1));
                // Subfield
                if (value.length() > 2) {
                    Pattern pattern = Pattern.compile("\\$([a-zA-Z0-9])([^$]*)(.*)");
                    Matcher matcher = pattern.matcher(value);
                    while (matcher.find()) {
                        dataField.addSubfield(marcFactory.newSubfield(matcher.group(1).charAt(0), matcher.group(2)));
                        matcher = pattern.matcher(matcher.group(3));
                    }
                }
                record.addVariableField(dataField);
            }
        }
    }
    return new MarcRecordImpl(record);
}
Also used : Pattern(java.util.regex.Pattern) DataField(org.marc4j.marc.DataField) Matcher(java.util.regex.Matcher) MarcFactory(org.marc4j.marc.MarcFactory) Record(org.marc4j.marc.Record)

Example 19 with Record

use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.

the class ImportRecordsWriter method writeInner.

protected void writeInner(List<? extends List<Record>> items) throws Exception {
    for (List<Record> records : items) {
        for (Record currentRecord : records) {
            try {
                ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                MarcWriter marcWriter = new MarcXmlWriter(outStream, true);
                marcWriter.setConverter(ISOCharConvertor.INSTANCE);
                marcWriter.write(currentRecord);
                marcWriter.close();
                // need recordId before interception
                byte[] recordContent = outStream.toByteArray();
                MetadataRecord metadata = parseMetadata(recordContent);
                String recordId = metadata.getUniqueId();
                if (regexpExtractor != null) {
                    recordId = regexpExtractor.extract(recordId);
                }
                if (harvestConfiguration.isInterceptionEnabled()) {
                    MarcRecordInterceptor interceptor = marcInterceptorFactory.getInterceptor(harvestConfiguration, recordId, recordContent);
                    if (interceptor != null) {
                        byte[] recordContentNew = interceptor.intercept();
                        if (!Arrays.equals(recordContent, recordContentNew)) {
                            // if record content was changed, parse metadata again
                            metadata = parseMetadata(recordContentNew);
                            // set intercepted content
                            recordContent = recordContentNew;
                        }
                    }
                }
                HarvestedRecord hr = harvestedRecordDao.findByIdAndHarvestConfiguration(recordId, configurationId);
                if (hr == null) {
                    HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestConfiguration, recordId);
                    hr = new HarvestedRecord(id);
                    // TODO detect format
                    hr.setFormat("marc21-xml");
                    hr.setHarvestedFrom(harvestConfiguration);
                }
                hr.setUpdated(new Date());
                hr.setDeleted(null);
                hr.setRawRecord(recordContent);
                harvestedRecordDao.persist(hr);
                dedupKeysParser.parse(hr, metadata);
                if (harvestConfiguration.isFilteringEnabled() && !hr.getShouldBeProcessed()) {
                    logger.debug("Filtered record: " + hr.getUniqueId());
                    hr.setDeleted(new Date());
                }
                harvestedRecordDao.persist(hr);
                progress.incrementAndLogProgress();
            } catch (Exception e) {
                logger.warn("Error occured in processing record");
                throw e;
            }
        }
    }
}
Also used : MarcRecordInterceptor(cz.mzk.recordmanager.server.marc.intercepting.MarcRecordInterceptor) HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) ByteArrayOutputStream(java.io.ByteArrayOutputStream) MarcXmlWriter(org.marc4j.MarcXmlWriter) Date(java.util.Date) MarcWriter(org.marc4j.MarcWriter) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) Record(org.marc4j.marc.Record) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 20 with Record

use of org.marc4j.marc.Record in project RecordManager2 by moravianlibrary.

the class KkvyMarcInterceptor method intercept.

@Override
public byte[] intercept() {
    if (super.getRecord() == null) {
        return new byte[0];
    }
    MarcRecord marc = new MarcRecordImpl(super.getRecord());
    Record newRecord = new RecordImpl();
    newRecord.setLeader(getRecord().getLeader());
    for (ControlField cf : super.getRecord().getControlFields()) {
        newRecord.addVariableField(cf);
    }
    Map<String, List<DataField>> dfMap = marc.getAllFields();
    for (String tag : new TreeSet<String>(dfMap.keySet())) {
        // sorted tags
        for (DataField df : dfMap.get(tag)) {
            // kill fields 996l = VF
            if (df.getTag().equals("996")) {
                if (df.getSubfield('l') != null && df.getSubfield('l').getData().trim().equals("VF"))
                    continue;
            }
            processField996(df);
            newRecord.addVariableField(df);
        }
    }
    return new MarcRecordImpl(newRecord).export(IOFormat.XML_MARC).getBytes(StandardCharsets.UTF_8);
}
Also used : MarcRecordImpl(cz.mzk.recordmanager.server.marc.MarcRecordImpl) ControlField(org.marc4j.marc.ControlField) DataField(org.marc4j.marc.DataField) TreeSet(java.util.TreeSet) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) Record(org.marc4j.marc.Record) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) List(java.util.List) MarcRecordImpl(cz.mzk.recordmanager.server.marc.MarcRecordImpl) RecordImpl(cz.mzk.recordmanager.server.marc.marc4j.RecordImpl)

Aggregations

Record (org.marc4j.marc.Record)32 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)21 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)20 DataField (org.marc4j.marc.DataField)13 ControlField (org.marc4j.marc.ControlField)12 RecordImpl (cz.mzk.recordmanager.server.marc.marc4j.RecordImpl)11 HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)10 List (java.util.List)10 TreeSet (java.util.TreeSet)9 InputStream (java.io.InputStream)8 MarcFactory (org.marc4j.marc.MarcFactory)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 MarcFactoryImpl (cz.mzk.recordmanager.server.marc.marc4j.MarcFactoryImpl)6 Date (java.util.Date)6 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)5 HashMap (java.util.HashMap)5 Matcher (java.util.regex.Matcher)5 MarcWriter (org.marc4j.MarcWriter)5 MarcXmlWriter (org.marc4j.MarcXmlWriter)5 JobExecution (org.springframework.batch.core.JobExecution)5