AbstractEdgeModeler.java
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.ostrichemulators.semtool.rdf.engine.edgemodelers;
import com.ostrichemulators.semtool.model.vocabulary.SEMONTO;
import com.ostrichemulators.semtool.poi.main.ImportData;
import com.ostrichemulators.semtool.poi.main.ImportMetadata;
import com.ostrichemulators.semtool.poi.main.ImportValidationException;
import com.ostrichemulators.semtool.poi.main.ImportValidationException.ErrorType;
import com.ostrichemulators.semtool.poi.main.LoadingSheetData;
import com.ostrichemulators.semtool.rdf.engine.util.QaChecker;
import com.ostrichemulators.semtool.rdf.engine.util.QaChecker.RelationCacheKey;
import com.ostrichemulators.semtool.rdf.engine.util.SemtoolStructureManagerImpl;
import static com.ostrichemulators.semtool.util.RDFDatatypeTools.URISTARTPATTERN;
import static com.ostrichemulators.semtool.util.RDFDatatypeTools.getRDFStringValue;
import static com.ostrichemulators.semtool.util.RDFDatatypeTools.getUriFromRawString;
import com.ostrichemulators.semtool.util.UriBuilder;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import org.apache.log4j.Logger;
import org.openrdf.model.Model;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.TreeModel;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
/**
*
* @author ryan
*/
public abstract class AbstractEdgeModeler implements EdgeModeler {
private static final Logger log = Logger.getLogger( AbstractEdgeModeler.class );
private final Set<URI> duplicates;
private QaChecker qaer;
public AbstractEdgeModeler( QaChecker qa ) {
qaer = qa;
duplicates = qaer.getKnownUris();
}
public static boolean isUri( String raw, Map<String, String> namespaces ) {
if ( raw.startsWith( "<" ) && raw.endsWith( ">" ) ) {
raw = raw.substring( 1, raw.length() - 1 );
}
Matcher m = URISTARTPATTERN.matcher( raw );
if ( m.matches() ) {
return true;
}
if ( raw.contains( ":" ) ) {
String[] pieces = raw.split( ":" );
if ( 2 == pieces.length ) {
String namespace = namespaces.get( pieces[0] );
if ( !( null == namespace || namespace.trim().isEmpty() ) ) {
return true;
}
}
}
return false;
}
/**
* Checks that the given ImportMetadata is valid for importing data
* (basically, does it have a {@link ImportMetadata#databuilder} set).
*
* @param metas the data to check
* @return true, if the ImportMetadata can be used for importing data
*/
public static boolean isValidMetadata( ImportMetadata metas ) {
return ( null != metas.getDataBuilder() );
}
/**
* Same as {@link #isValidMetadata(com.ostrichemulators.semtool.poi.main.ImportMetadata)},
* but throw an exception if
* {@link #isValidMetadata(gov.va.semoss.poi.main.ImportMetadata)} returns
* <code>false</code>
*
* @param metas the data to check
*/
public static void isValidMetadataEx( ImportMetadata metas ) throws ImportValidationException {
if ( !isValidMetadata( metas ) ) {
throw new ImportValidationException( ErrorType.MISSING_DATA,
"Invalid metadata" );
}
}
/**
* Adds just a node to the dataset (no properties, nothing else)
*
* @param typename
* @param rawlabel
* @param namespaces
* @param metas
* @param myrc
* @param checkCacheFirst
* @return
* @throws org.openrdf.repository.RepositoryException
*/
protected URI addSimpleNode( String typename, String rawlabel, Map<String, String> namespaces,
ImportMetadata metas, RepositoryConnection myrc, boolean checkCacheFirst )
throws RepositoryException {
boolean nodeIsAlreadyUri = isUri( rawlabel, namespaces );
if ( nodeIsAlreadyUri ) {
URI subject = getUriFromRawString( rawlabel, namespaces );
cacheInstance( subject, typename, rawlabel );
}
else {
if ( ( checkCacheFirst && !hasCachedInstance( typename, rawlabel ) )
|| !checkCacheFirst ) {
URI subject = ( nodeIsAlreadyUri
? getUriFromRawString( rawlabel, namespaces )
: metas.getDataBuilder().add( rawlabel ).build() );
subject = ensureUnique( subject );
cacheInstance( subject, typename, rawlabel );
}
}
URI subject = getCachedInstance( typename, rawlabel );
myrc.add( subject, RDF.TYPE, qaer.getCachedInstanceClass( typename ) );
return subject;
}
protected URI ensureUnique( URI uri ) {
if ( duplicates.contains( uri ) ) {
UriBuilder dupefixer = UriBuilder.getBuilder( uri.getNamespace() );
uri = dupefixer.uniqueUri();
duplicates.add( uri );
}
return uri;
}
@Override
public URI addNode( LoadingSheetData.LoadingNodeAndPropertyValues nap,
Map<String, String> namespaces, LoadingSheetData sheet, ImportMetadata metas,
RepositoryConnection myrc ) throws RepositoryException {
String typename = nap.getSubjectType();
String rawlabel = nap.getSubject();
URI subject = addSimpleNode( typename, rawlabel, namespaces, metas, myrc, true );
ValueFactory vf = myrc.getValueFactory();
boolean savelabel = metas.isAutocreateMetamodel();
if ( rawlabel.contains( ":" ) ) {
// we have something with a colon in it, so we need to figure out if it's
// a namespace-prefixed string, or just a string with a colon in it
Value val = getRDFStringValue( rawlabel, namespaces, vf );
// check if we have a prefixed URI
URI u = getUriFromRawString( rawlabel, namespaces );
savelabel = ( savelabel && null == u );
rawlabel = val.stringValue();
}
// if we have a label property, skip this label-making
// (it'll get handled in the addProperties function later)
if ( savelabel && !nap.hasProperty( RDFS.LABEL, namespaces ) ) {
myrc.add( subject, RDFS.LABEL, vf.createLiteral( rawlabel ) );
}
addProperties( subject, nap, namespaces, sheet, metas, myrc );
return subject;
}
@Override
public void addProperties( URI subject, Map<String, Value> properties,
Map<String, String> namespaces, LoadingSheetData sheet,
ImportMetadata metas, RepositoryConnection myrc )
throws RepositoryException {
for ( Map.Entry<String, Value> entry : properties.entrySet() ) {
String propname = entry.getKey();
URI predicate = getCachedPropertyClass( propname );
Value value = entry.getValue();
if ( sheet.isLink( propname ) ) {
// our "value" is really the label of another node, so find that node
value = addSimpleNode( propname, value.stringValue(), namespaces,
metas, myrc, true );
predicate = getCachedRelationClass( sheet.getSubjectType()
+ sheet.getObjectType() + propname );
}
myrc.add( subject, predicate, value );
}
}
@Override
public Model createMetamodel( ImportData alldata, Map<String, String> namespaces,
ValueFactory vf ) throws RepositoryException {
if ( null == vf ) {
vf = new ValueFactoryImpl();
}
Model model = new TreeModel();
ImportMetadata metas = alldata.getMetadata();
UriBuilder schema = metas.getSchemaBuilder();
Map<String, URI> structurelkp = new HashMap<>();
for ( LoadingSheetData sheet : alldata.getSheets() ) {
String stype = sheet.getSubjectType();
if ( !hasCachedInstanceClass( stype ) ) {
boolean nodeAlreadyMade = isUri( stype, namespaces );
URI subtype = ( nodeAlreadyMade
? getUriFromRawString( stype, namespaces )
: schema.build( stype ) );
cacheInstanceClass( subtype, stype );
if ( !nodeAlreadyMade ) {
model.add( subtype, RDF.TYPE, OWL.CLASS );
model.add( subtype, RDFS.LABEL, vf.createLiteral( stype ) );
model.add( subtype, RDFS.SUBCLASSOF, schema.getConceptUri().build() );
}
}
if ( sheet.isRel() ) {
String otype = sheet.getObjectType();
if ( !hasCachedInstanceClass( otype ) ) {
boolean nodeAlreadyMade = isUri( otype, namespaces );
URI objtype = ( nodeAlreadyMade
? getUriFromRawString( otype, namespaces )
: schema.build( otype ) );
cacheInstanceClass( objtype, otype );
if ( !nodeAlreadyMade ) {
model.add( objtype, RDF.TYPE, OWL.CLASS );
model.add( objtype, RDFS.LABEL, vf.createLiteral( otype ) );
model.add( objtype, RDFS.SUBCLASSOF, schema.getConceptUri().build() );
}
}
String rellabel = sheet.getRelname();
if ( !hasCachedRelationClass( rellabel ) ) {
boolean relationAlreadyMade = isUri( rellabel, namespaces );
URI reltype = ( relationAlreadyMade
? getUriFromRawString( rellabel, namespaces )
: schema.build( rellabel ) );
cacheRelationClass( reltype, rellabel );
if ( !relationAlreadyMade ) {
model.add( reltype, RDFS.LABEL, vf.createLiteral( rellabel ) );
model.add( reltype, RDF.TYPE, OWL.OBJECTPROPERTY );
model.add( reltype, RDFS.SUBPROPERTYOF, schema.getRelationUri().build() );
}
}
// save the structure data
URI subtype = getCachedInstanceClass( stype );
if ( sheet.isRel() ) {
URI objtype = getCachedInstanceClass( sheet.getObjectType() );
URI edgetype = getCachedRelationClass( sheet.getRelname() );
Collection<Statement> structures
= SemtoolStructureManagerImpl.getEdgeStructure( edgetype,
subtype, objtype, schema, structurelkp,
stype + "_" + sheet.getRelname() + "_" + sheet.getObjectType() );
model.addAll( structures );
}
}
}
for ( LoadingSheetData sheet : alldata.getSheets() ) {
URI subtype = getCachedInstanceClass( sheet.getSubjectType() );
URI edgetype = ( sheet.isRel()
? getCachedRelationClass( sheet.getRelname() )
: null );
for ( String propname : sheet.getProperties() ) {
// check to see if we're actually a link to some
// other node (and not really a new property
if ( sheet.isLink( propname ) || hasCachedInstanceClass( propname ) ) {
log.debug( "linking " + propname + " as a " + SEMONTO.has
+ " relationship to " + getCachedInstanceClass( propname ) );
cacheRelationClass( SEMONTO.has,
sheet.getSubjectType() + sheet.getObjectType() + propname );
// keep the ontology info for posterity
Collection<Statement> structures;
if ( sheet.isRel() ) {
structures = SemtoolStructureManagerImpl.getEdgeStructure( SEMONTO.has,
edgetype, getCachedInstanceClass( propname ),
schema, structurelkp,
sheet.getRelname() + "_has_" + propname );
}
else {
structures = SemtoolStructureManagerImpl.getEdgeStructure( SEMONTO.has,
subtype, getCachedInstanceClass( propname ),
schema, structurelkp,
sheet.getSubjectType() + "_has_" + propname );
}
model.addAll( structures );
continue;
}
boolean alreadyMadeProp = isUri( propname, namespaces );
if ( !hasCachedPropertyClass( propname ) ) {
URI predicate = ( alreadyMadeProp
? getUriFromRawString( propname, namespaces )
: schema.build( propname ) );
cachePropertyClass( predicate, propname );
}
URI predicate = getCachedPropertyClass( propname );
// save the ontology info for querying db structure
Collection<Statement> stmts;
if ( sheet.isRel() ) {
stmts = SemtoolStructureManagerImpl.getPropStructure( edgetype, subtype,
getCachedInstanceClass( sheet.getObjectType() ), predicate,
schema, structurelkp,
sheet.getSubjectType() + "_" + sheet.getRelname()
+ "_" + sheet.getObjectType() );
}
else {
stmts = SemtoolStructureManagerImpl.getPropStructure( predicate, subtype,
schema, structurelkp, sheet.getSubjectType() + "_" + propname );
}
model.addAll( stmts );
if ( !alreadyMadeProp ) {
model.add( predicate, RDFS.LABEL, vf.createLiteral( propname ) );
model.add( predicate, RDF.TYPE, OWL.DATATYPEPROPERTY );
}
}
}
return model;
}
@Override
public void setQaChecker( QaChecker q ) {
qaer = q;
}
public URI getCachedRelation( RelationCacheKey key ) {
return qaer.getCachedRelation( key );
}
public URI getCachedInstance( String typename, String rawlabel ) {
return qaer.getCachedInstance( typename, rawlabel );
}
public URI getCachedInstanceClass( String name ) {
return qaer.getCachedInstanceClass( name );
}
public URI getCachedRelationClass( String rel ) {
return qaer.getCachedRelationClass( rel );
}
public URI getCachedPropertyClass( String name ) {
return qaer.getCachedPropertyClass( name );
}
public boolean hasCachedPropertyClass( String name ) {
return qaer.hasCachedPropertyClass( name );
}
public boolean hasCachedRelationClass( String rel ) {
return qaer.hasCachedRelationClass( rel );
}
public boolean hasCachedRelation( String stype, String otype, String relname,
String slabel, String olabel ) {
return qaer.hasCachedRelation( stype, otype, relname, slabel, olabel );
}
public boolean hasCachedRelation( RelationCacheKey key ) {
return qaer.hasCachedRelation( key );
}
public boolean hasCachedInstance( String typename, String rawlabel ) {
return qaer.hasCachedInstance( typename, rawlabel );
}
public boolean hasCachedInstanceClass( String name ) {
return qaer.hasCachedInstanceClass( name );
}
public void cacheInstanceClass( URI uri, String label ) {
qaer.cacheInstanceClass( uri, label );
duplicates.add( uri );
}
public void cacheRelationNode( URI uri, String stype, String otype,
String relname, String slabel, String olabel ) {
qaer.cacheRelationNode( uri, stype, otype, relname, slabel, olabel );
duplicates.add( uri );
}
public void cacheRelationNode( URI uri, RelationCacheKey key ) {
qaer.cacheRelationNode( uri, key );
duplicates.add( uri );
}
public void cacheRelationClass( URI uri, String rel ) {
qaer.cacheRelationClass( uri, rel );
duplicates.add( uri );
}
public void cacheInstance( URI uri, String typelabel, String rawlabel ) {
qaer.cacheInstance( uri, typelabel, rawlabel );
duplicates.add( uri );
}
public void cachePropertyClass( URI uri, String name ) {
qaer.cachePropertyClass( uri, name );
duplicates.add( uri );
}
}