Skip to content

Commit

Permalink
Issues #1021, #1024, #1023, #1025, #1022
Browse files Browse the repository at this point in the history
pavel-stastny committed Dec 4, 2023
1 parent 744cd8a commit e124fb6
Showing 32 changed files with 2,440 additions and 125 deletions.
11 changes: 11 additions & 0 deletions processes/collections-backup/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
description "Collections backup/restore"


dependencies {
implementation project(':shared:common')
implementation project(':processes:import')

implementation 'org.json:json:20140107'

}

Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
/*
* Copyright (C) Nov 29, 2023 Pavel Stastny
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.inovatika.collections;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;

//import static cz.incad.kramerius.utils.XMLUtils.LOGGER;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;

import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;

import cz.incad.kramerius.FedoraNamespaces;
import cz.incad.kramerius.statistics.StatisticReport;
import cz.incad.kramerius.utils.RESTHelper;
import cz.incad.kramerius.utils.XMLUtils;
import cz.incad.kramerius.utils.conf.KConfiguration;


public class Backup {

public static final Logger LOGGER = Logger.getLogger(Backup.class.getName());

public static void main(String[] args) throws TransformerException, ParserConfigurationException, SAXException, IOException {
LOGGER.log(Level.INFO, "Process parameters: " + Arrays.asList(args).toString());
if (args.length > 2) {
Client client = Client.create();

String target = args[1];
String nameOfBackup = args[2];
String tmpDirPath = System.getProperty("java.io.tmpdir");

String subdirectoryPath = tmpDirPath + File.separator + nameOfBackup;
FileUtils.forceMkdir(new File(subdirectoryPath));


for (String pid : extractPids(target)) {
List<String> collectionProcessed = new ArrayList<>();
Stack<String> processingStack = new Stack<>();
processingStack.add(pid);
while(!processingStack.isEmpty()) {
String processingPid = processingStack.pop();
if (collectionProcessed.contains(processingPid)) {
LOGGER.warning(String.format("Found cycle on %s", processingPid));
continue;
}
collectionProcessed.add(processingPid);
if (head(client, processingPid) == 200) {
Document parsed = foxml(client, processingPid);
StringWriter writer = new StringWriter();
XMLUtils.print(parsed, writer);
LOGGER.info(String.format("Writing to %s", new File(new File(subdirectoryPath), processingPid.replace(":", "_")).getAbsolutePath()));
FileUtils.writeByteArrayToFile(new File(new File(subdirectoryPath), processingPid.replace(":", "_")+".xml"), writer.toString().getBytes("UTF-8"));
List<Element> recursiveElements = XMLUtils.getElementsRecursive(parsed.getDocumentElement(), new XMLUtils.ElementsFilter() {

@Override
public boolean acceptElement(Element element) {
boolean equals = element.getLocalName().equals("contains");
return equals;
}
});


List<String> pids = recursiveElements.stream().map(elm-> {
String attributeNS = elm.getAttributeNS(FedoraNamespaces.RDF_NAMESPACE_URI, "resource");
if (attributeNS.contains("info:fedora/")) {
String containsPid = attributeNS.substring("info:fedora/".length());
return containsPid;
} else return null;
}).filter(Objects::nonNull).collect(Collectors.toList());


if (pids.size() > 0) {

int batchSize = 40;
int numberOfIteration = pids.size() / batchSize;
if (pids.size() % batchSize != 0) {
numberOfIteration = numberOfIteration + 1;
}
for (int iteration = 0; iteration < numberOfIteration; iteration++) {
int start = iteration* batchSize;
int stop = Math.min((iteration+1)*batchSize, pids.size());
List<String> subPids = pids.subList(start, stop);

String query = subPids.stream().map(it-> {return '"' + it +'"';}).collect(Collectors.joining(" OR "));
String encodedCondition = URLEncoder.encode(" AND pid:(" + query + ")", "UTF-8");


String solrSearchHost = KConfiguration.getInstance().getSolrSearchHost()+String.format("/select?fq=model:collection%s&q=*&fl=pid&wt=json", encodedCondition);

InputStream inputStream = RESTHelper.inputStream(solrSearchHost, "", "");
String string = IOUtils.toString(inputStream, "UTF-8");
JSONObject object = new JSONObject(string);
JSONObject response = object.getJSONObject("response");
JSONArray docs = response.getJSONArray("docs");
for (int i = 0; i < docs.length(); i++) {
JSONObject doc = docs.getJSONObject(i);
String collectionPid = doc.optString("pid");
processingStack.push(collectionPid);
}

}

}
} else {
LOGGER.warning(String.format("Pid %s doesnt exists",processingPid));
}
}
}

File tmpDir = new File(subdirectoryPath);
File[] listFiles = tmpDir.listFiles();
if (listFiles != null) {
String parentZipFolder = KConfiguration.getInstance().getConfiguration().getString("collections.backup.folder");
if (parentZipFolder == null) throw new IllegalStateException("configuration property 'collections.backup.folder' must be set ");
FileUtils.forceMkdir(new File(parentZipFolder));

String zipFile = parentZipFolder + File.separator + nameOfBackup+".zip";
try {
FileOutputStream fos = new FileOutputStream(zipFile);
ZipOutputStream zos = new ZipOutputStream(fos);

for (File lF : listFiles) {
addFileToZip("", lF, zos);
}

zos.close();
fos.close();
} catch (IOException e) {
LOGGER.log(Level.SEVERE,e.getMessage(),e);
}
}
}
}


private static List<String> extractPids(String target) {
if (target.startsWith("pid:")) {
String pid = target.substring("pid:".length());
List<String> result = new ArrayList<>();
result.add(pid);
return result;
} else if (target.startsWith("pidlist:")) {
List<String> pids = Arrays.stream(target.substring("pidlist:".length()).split(";")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
return pids;
} else if (target.startsWith("pidlist_file:")) {
String filePath = target.substring("pidlist_file:".length());
File file = new File(filePath);
if (file.exists()) {
try {
return IOUtils.readLines(new FileInputStream(file), Charset.forName("UTF-8"));
} catch (IOException e) {
throw new RuntimeException("IOException " + e.getMessage());
}
} else {
throw new RuntimeException("file " + file.getAbsolutePath() + " doesnt exist ");
}
} else {
throw new RuntimeException("invalid target " + target);
}
}

private static int head(Client client, String pid) {
String url = KConfiguration.getInstance().getConfiguration().getString("api.client.point") + (KConfiguration.getInstance().getConfiguration().getString("api.point").endsWith("/") ? "" : "/") + String.format("items/%s/metadata/mods", pid);
LOGGER.info(String.format("Url %s", url));

WebResource r = client.resource(url);

WebResource.Builder builder = r.accept(MediaType.APPLICATION_XML);
ClientResponse head = builder.head();
int status = head.getStatus();
return status;
}


private static Document foxml(Client client, String processingPid)
throws ParserConfigurationException, SAXException, IOException {
String url = KConfiguration.getInstance().getConfiguration().getString("api.client.point") + (KConfiguration.getInstance().getConfiguration().getString("api.point").endsWith("/") ? "" : "/") + String.format("items/%s/foxml", processingPid);
LOGGER.info(String.format( "Requesting url is %s", url));
WebResource r = client.resource(url);

WebResource.Builder builder = r.accept(MediaType.APPLICATION_XML);
InputStream clientResponse = builder.get(InputStream.class);
Document parsed = XMLUtils.parseDocument(clientResponse, true);
return parsed;
}


private static void addFileToZip(String path, File srcFile, ZipOutputStream zipOut) throws IOException {
FileInputStream fis = new FileInputStream(srcFile);
ZipEntry zipEntry = new ZipEntry(path + "/" + srcFile.getName());
zipOut.putNextEntry(zipEntry);

byte[] bytes = new byte[1024];
int length;
while ((length = fis.read(bytes)) >= 0) {
zipOut.write(bytes, 0, length);
}
fis.close();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright (C) Nov 29, 2023 Pavel Stastny
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.inovatika.collections;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import javax.xml.bind.JAXBException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;

import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.xml.sax.SAXException;

import com.sun.jersey.api.client.Client;

import cz.incad.kramerius.processes.new_api.ProcessScheduler;
import cz.incad.kramerius.utils.conf.KConfiguration;
import cz.inovatika.collections.migrations.FromK5Instance;

public class Restore {

public static final Logger LOGGER = Logger.getLogger(Restore.class.getName());

public static void main(String[] args) throws TransformerException, ParserConfigurationException, SAXException, IOException, JAXBException, InterruptedException, SolrServerException {
LOGGER.log(Level.INFO, "Process parameters: " + Arrays.asList(args).toString());
if (args.length > 1) {
String authToken = args[0];
String target = args[1];

String parentZipFolder = KConfiguration.getInstance().getConfiguration().getString("collections.backup.folder");
if (parentZipFolder == null) throw new IllegalStateException("configuration property 'collections.backup.folder' must be set ");
String zipFile = parentZipFolder + File.separator + target;



String tmpDirPath = System.getProperty("java.io.tmpdir");
String subdirectoryPath = tmpDirPath + File.separator + target;
FileUtils.forceMkdir(new File(subdirectoryPath));
unzip(zipFile, subdirectoryPath);


LOGGER.info("Scheduling import "+subdirectoryPath);
FromK5Instance.importTmpDir(subdirectoryPath, true, authToken);
} else {
throw new IllegalArgumentException("expecting 2 arguments (authtoken, zipfile)");
}
}


public static void unzip(String zipFile, String outputFolder) throws IOException {
LOGGER.info("Unzipping file to "+outputFolder);

byte[] buffer = new byte[1024];

try (ZipInputStream zis = new ZipInputStream(new FileInputStream(zipFile))) {
ZipEntry zipEntry = zis.getNextEntry();
while (zipEntry != null) {
String fileName = zipEntry.getName();
File newFile = new File(outputFolder + File.separator + fileName);

// Vytvoření nadřazeného adresáře pro soubor, pokud neexistuje
new File(newFile.getParent()).mkdirs();

try (FileOutputStream fos = new FileOutputStream(newFile)) {
int len;
while ((len = zis.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
}

zipEntry = zis.getNextEntry();
}

zis.closeEntry();
}
}
}
Loading

0 comments on commit e124fb6

Please sign in to comment.