Skip to content

Commit

Permalink
Merge pull request #23 from stempler/master
Browse files Browse the repository at this point in the history
prepare 3.1.0 release
  • Loading branch information
stempler authored Dec 20, 2016
2 parents db71df5 + 34aae75 commit 8561036
Show file tree
Hide file tree
Showing 16 changed files with 1,437 additions and 46 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ See the [change log guidelines](http://keepachangelog.com/) for information on h

## [Unreleased]

## [3.1.0]

### Added
- Added command to split a XML/GML data set, respecting local XLink references and leaving them intact (`hale data split`)
- Added experimental commands for migrating hale projects to a different schema (`hale project migrate`)

## 3.0.0

Initial release based on hale studio 3.0.0.
Expand All @@ -17,4 +23,5 @@ Initial release based on hale studio 3.0.0.
- Command to generate JSON representations of hale alignments (`hale project alignment export-json`)
- Experimental command to filter to create a copy of a hale project with a filtered alignment (`hale project alignment filter`)

[Unreleased]: https://github.com/halestudio/hale/compare/3.0.0...HEAD
[Unreleased]: https://github.com/halestudio/hale/compare/v3.1.0...HEAD
[3.1.0]: https://github.com/halestudio/hale/compare/v3.0.0...v3.1.0
5 changes: 3 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ sourceCompatibility = '1.8'
group = 'to.wetransform'

project.ext {
haleVersion = '3.0.0'
haleVersion = '3.1.0'
}

version = '3.0.0'
version = '3.1.0'
//TODO define versioning strategy, specifically in respect to the hale version used

/*
Expand Down Expand Up @@ -70,6 +70,7 @@ dependencies {
// core
compile 'eu.esdihumboldt.unpuzzled:org.eclipse.equinox.nonosgi.registry:1.0.0'
compile "eu.esdihumboldt.hale:eu.esdihumboldt.hale.common.core:$haleVersion"
compile "eu.esdihumboldt.hale:eu.esdihumboldt.hale.common.align.groovy:$haleVersion"

// transformation
compile "eu.esdihumboldt.hale:eu.esdihumboldt.hale.app.transform:$haleVersion"
Expand Down
227 changes: 227 additions & 0 deletions src/main/groovy/to/wetransform/halecli/data/SplitCommand.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/*
* Copyright (c) 2016 wetransform GmbH
*
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the License,
* or (at your option) any later version.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* wetransform GmbH <http://www.wetransform.to>
*/

package to.wetransform.halecli.data

import static eu.esdihumboldt.hale.app.transform.ExecUtil.fail

import java.io.File;
import java.util.Iterator;
import java.util.List
import java.util.concurrent.atomic.AtomicBoolean;

import org.eclipse.core.runtime.NullProgressMonitor;
import org.eclipse.core.runtime.jobs.IJobChangeEvent
import org.eclipse.core.runtime.jobs.IJobChangeListener;
import org.eclipse.core.runtime.jobs.Job;
import org.eclipse.core.runtime.jobs.JobChangeAdapter;

import com.google.common.io.Files

import eu.esdihumboldt.hale.app.transform.ConsoleProgressMonitor;
import eu.esdihumboldt.hale.common.core.io.HaleIO
import eu.esdihumboldt.hale.common.core.io.Value;
import eu.esdihumboldt.hale.common.core.io.report.IOReport;
import eu.esdihumboldt.hale.common.core.io.supplier.FileIOSupplier
import eu.esdihumboldt.hale.common.headless.transform.AbstractTransformationJob;
import eu.esdihumboldt.hale.common.instance.graph.reference.ReferenceGraph;
import eu.esdihumboldt.hale.common.instance.graph.reference.impl.XMLInspector
import eu.esdihumboldt.hale.common.instance.io.InstanceWriter
import eu.esdihumboldt.hale.common.instance.model.DataSet;
import eu.esdihumboldt.hale.common.instance.model.Filter;
import eu.esdihumboldt.hale.common.instance.model.Instance;
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection
import eu.esdihumboldt.hale.common.instance.model.impl.FilteredInstanceCollection;
import eu.esdihumboldt.hale.common.instance.orient.OInstance;
import eu.esdihumboldt.hale.common.instance.orient.storage.BrowseOrientInstanceCollection;
import eu.esdihumboldt.hale.common.instance.orient.storage.LocalOrientDB
import eu.esdihumboldt.hale.common.instance.orient.storage.StoreInstancesJob;
import eu.esdihumboldt.hale.common.schema.model.Schema;
import eu.esdihumboldt.hale.common.schema.model.TypeIndex
import eu.esdihumboldt.hale.common.schema.model.impl.DefaultSchemaSpace;
import eu.esdihumboldt.util.cli.Command
import eu.esdihumboldt.util.cli.CommandContext
import groovy.transform.CompileStatic
import to.wetransform.halecli.util.InstanceCLI
import to.wetransform.halecli.util.SchemaCLI

/**
* @author simon
*
*/
class SplitCommand implements Command {

@Override
public int run(List<String> args, CommandContext context) {
CliBuilder cli = new CliBuilder(usage : "${context.baseCommand} [options] [...]")

cli._(longOpt: 'help', 'Show this help')

// threshold for splitting instances
cli._(longOpt: 'threshold', args: 1, argName: 'max-number', 'The maximum number of instances to put in a part (if possible)')

// options for schema
SchemaCLI.loadSchemaOptions(cli)

// options for source data
InstanceCLI.loadOptions(cli)

// options for target data
cli._(longOpt: 'target', args: 1, required: true, argName: 'target-folder', 'The target folder to write the parts too')
//TODO more options

OptionAccessor options = cli.parse(args)

if (options.help) {
cli.usage()
return 0
}

// handle schema
Schema schema = SchemaCLI.loadSchema(options)
assert schema

// handle source data
InstanceCollection source = InstanceCLI.load(options, schema)
assert source

// store in temporary database
//XXX reason is that sources may have slow InstanceReference resolving (e.g. XML/GML)
LocalOrientDB db = loadTempDatabase(source, schema)
try {
// replace source with database
source = new BrowseOrientInstanceCollection(db, schema, DataSet.SOURCE);
// Note: It is important that OrientDB caches are disabled
// via system properties to have a decent performance

println "Building reference graph..."

// create a reference graph
ReferenceGraph<String> rg = new ReferenceGraph<String>(new XMLInspector(),
source)

// partition the graph
int threshold = (options.threshold ?: 10000) as int
Iterator<InstanceCollection> parts = rg.partition(threshold);

// target
def target = options.target as File
if (!target) {
throw new IllegalStateException('Please provide a target folder')
}
if (target.exists()) {
if (!target.isDirectory()) {
throw new IllegalStateException('Target is not a folder')
}
}
else {
target.mkdirs()
}

int partCount = 0
while (parts.hasNext()) {
partCount++

def instances = parts.next()

//FIXME right now only GML as target supported, with default settings
File targetFile = new File(target, "part_${partCount}.gml")

def size = instances.size()
if (size >= 0) {
println "Writing part with $size instances to $targetFile"
}
else {
println "Writing part with undefined size to $targetFile"
}

saveGml(instances, targetFile, schema)
}
println "Total $partCount parts"
} finally {
db.delete()
}

return 0
}

@CompileStatic
private void saveGml(InstanceCollection instances, File targetFile, Schema schema) {
def target = new FileIOSupplier(targetFile)

// create I/O provider
InstanceWriter instanceWriter = null
String customProvider = 'eu.esdihumboldt.hale.io.gml.writer'
if (customProvider != null) {
// use specified provider
instanceWriter = HaleIO.createIOProvider(InstanceWriter, null, customProvider);
if (instanceWriter == null) {
fail("Could not find instance writer with ID " + customProvider);
}
}
if (instanceWriter == null) {
// find applicable reader
instanceWriter = HaleIO.findIOProvider(InstanceWriter, target, targetFile.name);
}
if (instanceWriter == null) {
throw fail("Could not determine instance reader to use for source data");
}

//FIXME apply custom settings
instanceWriter.setParameter('xml.pretty', Value.of((Boolean)true))

DefaultSchemaSpace schemaSpace = new DefaultSchemaSpace()
schemaSpace.addSchema(schema)
instanceWriter.targetSchema = schemaSpace
instanceWriter.target = target
instanceWriter.instances = instances

IOReport report = instanceWriter.execute(null)
//TODO report?

if (!report.isSuccess()) {
//TODO common way to deal with reports
throw new IllegalStateException('Writing target file failed: ' + report.summary)
}
}

@CompileStatic
private LocalOrientDB loadTempDatabase(InstanceCollection instances, TypeIndex schema) {
// create db
File tmpDir = Files.createTempDir();
LocalOrientDB db = new LocalOrientDB(tmpDir);
tmpDir.deleteOnExit();

// run store instance job first...
Job storeJob = new StoreInstancesJob("Load source instances into temporary database",
db, instances, null) {

@Override
protected void onComplete() {
// do nothing
}

};

storeJob.run(new ConsoleProgressMonitor())

db
}

final String shortDescription = 'Split a source file into portions'

final boolean experimental = true

}
Original file line number Diff line number Diff line change
Expand Up @@ -105,52 +105,10 @@ abstract class AbstractDeriveProjectCommand extends AbstractProjectEnvironmentCo
File projectOut = new File(projectFile.parentFile, fileName)
def output = new FileIOSupplier(projectOut)

saveProject(project, result.alignment, projectEnv.sourceSchema,
ProjectHelper.saveProject(project, result.alignment, projectEnv.sourceSchema,
projectEnv.targetSchema, output, reports, extension)

true
}

@CompileStatic
void saveProject(Project project, Alignment alignment, SchemaSpace sourceSchema,
SchemaSpace targetSchema, LocatableOutputSupplier<? extends OutputStream> output,
ReportHandler reports, String extension) {

// write project
IContentType projectType = HaleIO.findContentType(
ProjectWriter.class, null, "project.$extension")
IOProviderDescriptor factory = HaleIO.findIOProviderFactory(
ProjectWriter.class, projectType, null);
ProjectWriter projectWriter
try {
projectWriter = (ProjectWriter) factory.createExtensionObject()
} catch (Exception e1) {
throw new IllegalStateException("Failed to create project writer", e1)
}
projectWriter.setTarget(output)

// store (incomplete) save configuration
IOConfiguration saveConf = new IOConfiguration()
projectWriter.storeConfiguration(saveConf.getProviderConfiguration())
saveConf.setProviderId(factory.getIdentifier())
project.setSaveConfiguration(saveConf)

SaveProjectAdvisor advisor = new SaveProjectAdvisor(project, alignment, sourceSchema,
targetSchema);
advisor.prepareProvider(projectWriter)
advisor.updateConfiguration(projectWriter)
// HeadlessIO.executeProvider(projectWriter, advisor, null, reports);
IOReport report
try {
report = projectWriter.execute(null)
} catch (Exception e) {
throw new IllegalStateException("Error writing project file.", e)
}
if (report != null) {
if (!report.isSuccess() || report.errors) {
throw new IllegalStateException("Error writing project file.")
}
}
}

}
Loading

0 comments on commit 8561036

Please sign in to comment.