QaChecker.java
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.ostrichemulators.semtool.rdf.engine.util;
import com.ostrichemulators.semtool.poi.main.ImportData;
import com.ostrichemulators.semtool.poi.main.LoadingSheetData;
import com.ostrichemulators.semtool.poi.main.LoadingSheetData.DataIterator;
import com.ostrichemulators.semtool.poi.main.LoadingSheetData.LoadingNodeAndPropertyValues;
import com.ostrichemulators.semtool.rdf.engine.api.IEngine;
import com.ostrichemulators.semtool.rdf.engine.api.ReificationStyle;
import static com.ostrichemulators.semtool.rdf.engine.util.EngineLoader.cleanValue;
import com.ostrichemulators.semtool.rdf.query.util.impl.ListQueryAdapter;
import com.ostrichemulators.semtool.rdf.query.util.impl.VoidQueryAdapter;
import com.ostrichemulators.semtool.util.MultiMap;
import com.ostrichemulators.semtool.util.UriBuilder;
import com.ostrichemulators.semtool.util.Utility;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Consumer;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.mapdb.DB;
import org.mapdb.DBMaker;
import org.mapdb.Serializer;
import org.openrdf.model.Model;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.query.BindingSet;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.repository.RepositoryException;
/**
* A class to check for QA errors given a Loading Sheet and an Engine.
* Basically, this class is just a set of caches with accessor methods. It was
* lifted from the EngineLoader class to simplify that class.
*
* @author ryan
*/
public class QaChecker {
private static final Logger log = Logger.getLogger( QaChecker.class );
private final Map<ConceptInstanceCacheKey, URI> dataNodes;
private final Map<String, URI> instanceClassCache;
private final Map<String, URI> relationBaseClassCache;
private final Map<RelationCacheKey, URI> relationCache;
private final Map<String, URI> propertyClassCache;
private final File backingfile;
private final DB db;
public static enum CacheType {
CONCEPTCLASS, PROPERTYCLASS, RELATIONCLASS
};
public QaChecker() {
File f = null;
try {
f = File.createTempFile( "qachecker-", ".maps" );
}
catch ( IOException ioe ) {
log.error( "cannot make backing store...will use in-memory caches", ioe );
}
if ( null == f ) {
db = null;
backingfile = null;
dataNodes = new HashMap<>();
relationCache = new HashMap<>();
instanceClassCache = new HashMap<>();
relationBaseClassCache = new HashMap<>();
propertyClassCache = new HashMap<>();
}
else {
backingfile = f;
log.debug( "QA backing file is: " + backingfile );
db = DBMaker.fileDB( f ).
deleteFilesAfterClose().
fileMmapEnable().
transactionDisable().
asyncWriteEnable().
make();
dataNodes = db.treeMapCreate( "datanodes" ).counterEnable().make();
relationCache = db.treeMapCreate( "relations" ).counterEnable().make();
instanceClassCache = db.treeMapCreate( "instances" ).keySerializer( Serializer.STRING ).counterEnable().make();
relationBaseClassCache = db.treeMapCreate( "relationclasses" ).keySerializer( Serializer.STRING ).counterEnable().make();
propertyClassCache = db.treeMapCreate( "propclasses" ).keySerializer( Serializer.STRING ).counterEnable().make();
}
}
public QaChecker( IEngine eng ) {
this();
loadCaches( eng );
}
public void release() {
if ( null != db ) {
db.close();
FileUtils.deleteQuietly( backingfile );
}
}
public Set<URI> getKnownUris() {
Set<URI> set = new HashSet<>( instanceClassCache.size()
+ relationBaseClassCache.size() + relationCache.size()
+ propertyClassCache.size() + dataNodes.size() );
for ( Map<?, URI> map : Arrays.asList( instanceClassCache, relationBaseClassCache,
relationCache, propertyClassCache, dataNodes ) ) {
set.addAll( map.values() );
}
return set;
}
/**
* Separates any non-conforming data from the loading data. This removes the
* offending data from <code>data</code> and puts them in <code>errors</code>
*
* @param data the data to check for errors
* @param errors where to put non-conforming data. If null, this function does
* nothing
* @param engine the engine to check against
*/
public void separateConformanceErrors( ImportData data, ImportData errors,
IEngine engine ) {
if ( null != errors ) {
for ( LoadingSheetData d : data.getSheets() ) {
List<LoadingSheetData.LoadingNodeAndPropertyValues> errs
= checkConformance( d, engine, false );
if ( !errs.isEmpty() ) {
LoadingSheetData errdata = LoadingSheetData.copyHeadersOf( d );
errdata.setProperties( d.getPropertiesAndDataTypes() );
errors.add( errdata );
Set<LoadingSheetData.LoadingNodeAndPropertyValues> errvals = new HashSet<>();
for ( LoadingSheetData.LoadingNodeAndPropertyValues nap : errs ) {
errvals.add( nap );
errdata.add( nap );
}
d.removeAll( errvals );
}
}
}
}
/**
* Checks that the Loading Sheet's {@link LoadingSheetData#subjectType},
* {@link LoadingSheetData#objectType}, and
* {@link LoadingSheetData#getProperties()} exist in the given engine
*
* @param data the data to check
* @return the same loading sheet as the <code>data</code> arg
*/
public LoadingSheetData checkModelConformance( LoadingSheetData data ) {
data.setSubjectTypeIsError( !instanceClassCache.containsKey( data.getSubjectType() ) );
if ( data.isRel() ) {
data.setObjectTypeIsError( !instanceClassCache.containsKey( data.getObjectType() ) );
data.setRelationIsError( !hasCachedRelationClass( data.getRelname() ) );
}
for ( Map.Entry<String, URI> en : data.getPropertiesAndDataTypes().entrySet() ) {
data.setPropertyIsError( en.getKey(), !propertyClassCache.containsKey( en.getKey() ) );
}
return data;
}
public void loadCaches( IEngine engine ) {
if ( null == engine.getSchemaBuilder() || null == engine.getDataBuilder() ) {
log.error( "this engine does not have a schema or data URI defined" );
}
if ( ReificationStyle.LEGACY == EngineUtil2.getReificationStyle( engine ) ) {
loadLegacy( engine );
}
else {
load( engine );
}
}
/**
* Checks conformance of the given data. The <code>data</code> argument will
* be updated when errors are found. Only relationship data can be
* non-conforming.
*
* @param data the data to check
* @param eng the engine to check against. Can be null if
* <code>loadcaches</code> is false
* @param loadcaches call
* {@link #loadCaches(gov.va.semoss.rdf.engine.api.IEngine)} first
* @return a list of all {@link LoadingNodeAndPropertyValues} that fail the
* check
*/
public List<LoadingNodeAndPropertyValues> checkConformance( LoadingSheetData data,
IEngine eng, boolean loadcaches ) {
List<LoadingNodeAndPropertyValues> failures = new ArrayList<>();
if ( loadcaches ) {
loadCaches( eng );
}
String stype = data.getSubjectType();
String otype = data.getObjectType();
DataIterator di = data.iterator();
while ( di.hasNext() ) {
LoadingNodeAndPropertyValues nap = di.next();
// check that the subject and object are in our instance cache
ConceptInstanceCacheKey skey
= new ConceptInstanceCacheKey( stype, nap.getSubject() );
nap.setSubjectIsError( !dataNodes.containsKey( skey ) );
if ( data.isRel() ) {
ConceptInstanceCacheKey okey
= new ConceptInstanceCacheKey( otype, nap.getObject() );
nap.setObjectIsError( !dataNodes.containsKey( okey ) );
}
if ( nap.hasError() ) {
failures.add( nap );
}
}
return failures;
}
/**
* Checks for an instance of the given type and label.
* {@link #loadCaches(gov.va.semoss.rdf.engine.api.IEngine)} MUST be called
* prior to this function to have any hope at a true result
*
* @param type
* @param label
* @return true, if the type/label matches a cached value
*/
public boolean instanceExists( String type, String label ) {
return dataNodes.containsKey( new ConceptInstanceCacheKey( type, label ) );
}
public void cacheUris( CacheType type, Map<String, URI> newtocache ) {
if ( null == type ) {
throw new IllegalArgumentException( "cache type cannot be null" );
}
switch ( type ) {
case CONCEPTCLASS:
instanceClassCache.putAll( newtocache );
break;
case PROPERTYCLASS:
propertyClassCache.putAll( newtocache );
break;
case RELATIONCLASS:
relationBaseClassCache.putAll( newtocache );
break;
default:
throw new IllegalArgumentException( "unhandled cache type: " + type );
}
}
public Map<String, URI> getCache( CacheType type ) {
switch ( type ) {
case CONCEPTCLASS:
return new HashMap<>( instanceClassCache );
case PROPERTYCLASS:
return new HashMap<>( propertyClassCache );
case RELATIONCLASS:
return new HashMap<>( relationBaseClassCache );
default:
throw new IllegalArgumentException( "unhandled cache type: " + type );
}
}
public void cacheConceptInstances( Map<String, URI> instances, String typelabel ) {
for ( Map.Entry<String, URI> en : instances.entrySet() ) {
String l = en.getKey();
URI uri = en.getValue();
ConceptInstanceCacheKey key = new ConceptInstanceCacheKey( typelabel, l );
//log.debug( "conceptinstances : " + key + " -> " + en.getValue() );
dataNodes.put( key, uri );
}
}
/**
* Clears the caches
*/
public void clear() {
instanceClassCache.clear();
dataNodes.clear();
relationBaseClassCache.clear();
relationCache.clear();
propertyClassCache.clear();
}
/**
* Resets the caches to these calues
*
* @param schemaNodes
* @param dataNodes
* @param relationClassCache
* @param relationCache
* @param propertyClassCache
*/
public void setCaches( Map<String, URI> schemaNodes,
Map<ConceptInstanceCacheKey, URI> dataNodes,
Map<String, URI> relationClassCache,
Map<RelationCacheKey, URI> relationCache, Map<String, URI> propertyClassCache ) {
clear();
this.instanceClassCache.putAll( schemaNodes );
this.dataNodes.putAll( dataNodes );
//this.propertiedRelationClassCache.putAll( relationClassCache );
this.relationBaseClassCache.putAll( relationClassCache );
this.relationCache.putAll( relationCache );
this.propertyClassCache.putAll( propertyClassCache );
}
public URI getCachedRelationClass( String key ) {
return relationBaseClassCache.get( key );
//return propertiedRelationClassCache.get( key );
}
public URI getCachedPropertyClass( String name ) {
return propertyClassCache.get( name );
}
public URI getCachedRelation( RelationCacheKey key ) {
return relationCache.get( key );
}
public URI getCachedInstance( String typename, String rawlabel ) {
return dataNodes.get( new ConceptInstanceCacheKey( typename, rawlabel ) );
}
public URI getCachedInstanceClass( String name ) {
return instanceClassCache.get( name );
}
public boolean hasCachedRelationClass( String key ) {
return relationBaseClassCache.containsKey( key );
}
public boolean hasCachedPropertyClass( String name ) {
return propertyClassCache.containsKey( name );
}
public boolean hasCachedRelation( String stype, String otype, String relname,
String slabel, String olabel ) {
return hasCachedRelation( new RelationCacheKey( stype, otype, relname,
slabel, olabel ) );
}
public boolean hasCachedRelation( RelationCacheKey key ) {
return relationCache.containsKey( key );
}
public boolean hasCachedInstance( String typename, String rawlabel ) {
return hasCachedInstance( new ConceptInstanceCacheKey( typename, rawlabel ) );
}
public boolean hasCachedInstance( ConceptInstanceCacheKey key ) {
return dataNodes.containsKey( key );
}
public boolean hasCachedInstanceClass( String name ) {
return instanceClassCache.containsKey( name );
}
public void cachePropertyClass( URI uri, String name ) {
propertyClassCache.put( name, uri );
}
public void cacheInstanceClass( URI uri, String label ) {
instanceClassCache.put( label, uri );
}
public void cacheRelationNode( URI uri, String stype, String otype,
String relname, String slabel, String olabel ) {
cacheRelationNode( uri, new RelationCacheKey( stype, otype, relname, slabel,
olabel ) );
}
public void cacheRelationNode( URI uri, RelationCacheKey key ) {
relationCache.put( key, uri );
}
public void cacheRelationClass( URI uri, String key ) {
relationBaseClassCache.put( key, uri );
}
public void cacheInstance( URI uri, String typelabel, String rawlabel ) {
dataNodes.put( new ConceptInstanceCacheKey( typelabel, rawlabel ), uri );
}
private void loadLegacy( IEngine engine ) {
final Map<String, URI> map = new HashMap<>();
String subpropq = "SELECT ?uri ?label WHERE { ?uri rdfs:label ?label . ?uri ?isa ?type }";
VoidQueryAdapter vqa = new VoidQueryAdapter( subpropq ) {
@Override
public void handleTuple( BindingSet set, ValueFactory fac ) {
map.put( set.getValue( "label" ).stringValue(),
URI.class.cast( cleanValue( set.getValue( "uri" ), fac ) ) );
}
@Override
public void start( List<String> bnames ) {
super.start( bnames );
map.clear();
}
};
vqa.useInferred( true );
UriBuilder owlb = engine.getSchemaBuilder();
try {
URI type = owlb.getRelationUri().build();
vqa.bind( "type", type );
vqa.bind( "isa", RDFS.SUBPROPERTYOF );
engine.query( vqa );
Map<String, URI> props = new HashMap<>();
for ( Map.Entry<String, URI> en : map.entrySet() ) {
props.put( en.getKey(), en.getValue() );
}
cacheUris( CacheType.PROPERTYCLASS, props );
vqa.bind( "isa", RDFS.SUBCLASSOF );
type = owlb.getConceptUri().build();
vqa.bind( "type", type );
engine.query( vqa );
cacheUris( CacheType.CONCEPTCLASS, map );
vqa.bind( "isa", RDF.TYPE );
Map<String, URI> concepts = new HashMap<>( map );
for ( Map.Entry<String, URI> en : concepts.entrySet() ) {
vqa.bind( "type", en.getValue() );
engine.query( vqa );
cacheConceptInstances( map, en.getKey() );
}
Set<URI> needlabels = new HashSet<>();
String relq = "SELECT DISTINCT * WHERE {"
+ " ?left a ?lefttype ."
+ " ?lefttype a owl:Class ."
+ " ?right a ?righttype ."
+ " ?righttype a owl:Class ."
+ " ?left ?specrel ?right ."
+ " ?specrel rdfs:subPropertyOf ?reltype ."
+ "}";
ListQueryAdapter<URI[]> vqa2 = new ListQueryAdapter<URI[]>( relq ) {
@Override
public void handleTuple( BindingSet set, ValueFactory fac ) {
URI reltype = URI.class.cast( set.getValue( "reltype" ) );
URI lefttype = URI.class.cast( set.getValue( "lefttype" ) );
URI righttype = URI.class.cast( set.getValue( "righttype" ) );
URI left = URI.class.cast( set.getValue( "left" ) );
URI right = URI.class.cast( set.getValue( "right" ) );
URI specrel = URI.class.cast( set.getValue( "specrel" ) );
URI[] uris = new URI[]{ lefttype, righttype, reltype, left, right, specrel };
needlabels.addAll( Arrays.asList( uris ) );
add( uris );
}
};
vqa2.useInferred( false );
List<URI[]> data = engine.query( vqa2 );
Map<URI, String> labels = Utility.getInstanceLabels( needlabels, engine );
for ( URI[] uris : data ) {
cacheRelationNode( uris[5], labels.get( uris[0] ),
labels.get( uris[1] ), labels.get( uris[2] ),
labels.get( uris[3] ), labels.get( uris[4] ) );
}
}
catch ( RepositoryException | MalformedQueryException | QueryEvaluationException e ) {
log.warn( e, e );
}
}
private void load( IEngine engine ) {
StructureManager sm = StructureManagerFactory.getStructureManager( engine );
Model model = sm.rebuild( false );
Set<URI> datatypeProps = new HashSet<>();
for ( Statement s : model.filter( null, OWL.DATATYPEPROPERTY, null ) ) {
datatypeProps.add( URI.class.cast( s.getObject() ) );
}
Map<URI, String> dtlabels = Utility.getInstanceLabels( datatypeProps, engine );
cacheUris( CacheType.PROPERTYCLASS, MultiMap.lossyflip( dtlabels ) );
Set<URI> rels = new HashSet<>();
for ( Value v : model.filter( null, RDF.PREDICATE, null ).objects() ) {
rels.add( URI.class.cast( v ) );
}
Map<URI, String> rellabels = Utility.getInstanceLabels( rels, engine );
cacheUris( CacheType.RELATIONCLASS, MultiMap.lossyflip( rellabels ) );
Map<URI, String> cpclabels = Utility.getInstanceLabels( sm.getTopLevelConcepts(), engine );
cacheUris( CacheType.CONCEPTCLASS, MultiMap.lossyflip( cpclabels ) );
// cache concept instances
for ( Map.Entry<URI, String> en : rellabels.entrySet() ) {
List<URI> instances
= NodeDerivationTools.createInstanceList( en.getKey(), engine );
Map<URI, String> names = Utility.getInstanceLabels( instances, engine );
for ( Map.Entry<URI, String> en2 : names.entrySet() ) {
cacheInstance( en2.getKey(), en.getValue(), en2.getValue() );
}
}
// cache relation instances
Model preds = model.filter( null, RDF.PREDICATE, null );
for ( Statement s : preds ) {
URI pred = URI.class.cast( s.getObject() );
String relname = rellabels.get( pred );
// get the subject from the RDFS.DOMAIN statements
// and the object from the RDFS.RANGE statements
for ( Statement t : model.filter( s.getSubject(), RDFS.DOMAIN, null ) ) {
URI stype = URI.class.cast( t.getObject() );
String stypelabel = cpclabels.get( stype );
for ( Statement u : model.filter( s.getSubject(), RDFS.RANGE, null ) ) {
URI otype = URI.class.cast( u.getObject() );
String otypelabel = cpclabels.get( otype );
Model instancemodel = NodeDerivationTools.getInstances( stype, pred,
otype, null, engine );
List<Resource> objs = new ArrayList<>();
instancemodel.objects().forEach( new Consumer<Value>() {
@Override
public void accept( Value t ) {
objs.add( Resource.class.cast( t ) );
}
} );
Map<Resource, String> lkp
= Utility.getInstanceLabels( instancemodel.subjects(), engine );
lkp.putAll( Utility.getInstanceLabels( objs, engine ) );
for ( Statement instance : instancemodel ) {
RelationCacheKey rck = new RelationCacheKey(
stypelabel,
otypelabel,
relname,
lkp.get( instance.getSubject() ),
lkp.get( Resource.class.cast( instance.getObject() ) ) );
cacheRelationNode( instance.getPredicate(), rck );
}
}
}
}
}
public static class ConceptInstanceCacheKey implements Serializable,
Comparable<ConceptInstanceCacheKey> {
private final String typelabel;
private final String rawlabel;
public ConceptInstanceCacheKey( String typelabel, String conceptlabel ) {
this.typelabel = typelabel;
this.rawlabel = conceptlabel;
}
public String getTypeLabel() {
return typelabel;
}
public String getConceptLabel() {
return rawlabel;
}
@Override
public String toString() {
return "instance " + typelabel + "<->" + rawlabel;
}
@Override
public int hashCode() {
int hash = 7;
hash = 89 * hash + Objects.hashCode( this.typelabel );
hash = 89 * hash + Objects.hashCode( this.rawlabel );
return hash;
}
@Override
public boolean equals( Object obj ) {
if ( obj == null ) {
return false;
}
if ( getClass() != obj.getClass() ) {
return false;
}
final ConceptInstanceCacheKey other = (ConceptInstanceCacheKey) obj;
if ( !Objects.equals( this.typelabel, other.typelabel ) ) {
return false;
}
return ( Objects.equals( this.rawlabel, other.rawlabel ) );
}
@Override
public int compareTo( ConceptInstanceCacheKey o ) {
int diff = typelabel.compareTo( o.typelabel );
if ( 0 == diff ) {
return rawlabel.compareTo( o.rawlabel );
}
return diff;
}
}
public static class RelationCacheKey implements Serializable,
Comparable<RelationCacheKey> {
private final String s;
private final String o;
private final String relname;
private final String stype;
private final String otype;
public RelationCacheKey( String stype, String otype, String relname,
String s, String o ) {
this.s = s;
this.o = o;
this.relname = relname;
this.stype = stype;
this.otype = otype;
}
@Override
public int hashCode() {
int hash = 3;
hash = 97 * hash + Objects.hashCode( this.s );
hash = 97 * hash + Objects.hashCode( this.o );
hash = 97 * hash + Objects.hashCode( this.relname );
hash = 97 * hash + Objects.hashCode( this.stype );
hash = 97 * hash + Objects.hashCode( this.otype );
return hash;
}
@Override
public boolean equals( Object obj ) {
if ( obj == null ) {
return false;
}
if ( getClass() != obj.getClass() ) {
return false;
}
final RelationCacheKey other = (RelationCacheKey) obj;
if ( !Objects.equals( this.s, other.s ) ) {
return false;
}
if ( !Objects.equals( this.o, other.o ) ) {
return false;
}
if ( !Objects.equals( this.relname, other.relname ) ) {
return false;
}
if ( !Objects.equals( this.stype, other.stype ) ) {
return false;
}
if ( !Objects.equals( this.otype, other.otype ) ) {
return false;
}
return true;
}
@Override
public String toString() {
return String.format( "%s%s%s%s%s", stype, otype, s, o, relname );
}
@Override
public int compareTo( RelationCacheKey other ) {
return toString().compareTo( other.toString() );
}
}
}