MetadataTabXmlHandler.java

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.ostrichemulators.semtool.poi.main.xlsxml;

import com.ostrichemulators.semtool.poi.main.ImportMetadata;
import com.ostrichemulators.semtool.poi.main.ImportValidationException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.poi.ss.usermodel.Cell;
import org.openrdf.model.impl.URIImpl;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

/**
 *
 * @author ryan
 */
public class MetadataTabXmlHandler extends XlsXmlBase {

	private static final Logger log = Logger.getLogger( MetadataTabXmlHandler.class );
	private static final Map<String, Integer> formats = new HashMap<>();

	private final Map<Integer, String> currentrowdata = new LinkedHashMap<>();
	private final ImportMetadata metas;
	private final List<String[]> triples = new ArrayList<>();
	private final Map<String, String> namespaces;

	private int rownum;
	private int colnum;
	private int celltype;
	private String datanamespace = null;
	private String schemanamespace = null;
	private String baseuri = null;

	static {
		formats.put( "s", Cell.CELL_TYPE_STRING );
		formats.put( "n", Cell.CELL_TYPE_NUMERIC );
		formats.put( "b", Cell.CELL_TYPE_BOOLEAN );
	}

	public MetadataTabXmlHandler( List<String> sst, ImportMetadata metadata ) {
		super( sst );
		metas = metadata;
		namespaces = metas.getNamespaces();
	}

	public ImportMetadata getMetadata() {
		return metas;
	}

	@Override
	public void startElement( String uri, String localName, String name,
			Attributes attributes ) throws SAXException {
		if ( null != name ) {
			switch ( name ) {
				case "row":
					rownum = Integer.parseInt( attributes.getValue( "r" ) ) - 1;
					currentrowdata.clear();
					break;
				case "c": // c is a new cell
					String celltypestr = attributes.getValue( "t" );
					celltype = ( formats.containsKey( celltypestr )
							? formats.get( celltypestr ) : Cell.CELL_TYPE_BLANK );

					String colname = attributes.getValue( "r" );
					colnum = LoadingSheetXmlHandler.getColNum( colname.substring( 0,
							colname.lastIndexOf( Integer.toString( rownum + 1 ) ) ) );
					break;
				case "v": // new value for a cell
					setReading( true );
					resetContents();
					break;
			}
		}
	}

	@Override
	public void endElement( String uri, String localName, String name )
			throws SAXException {

		if ( "row".equals( name ) ) {
			currentrowdata.remove( 0 );
			fillInMetadata( currentrowdata );
		}

		if ( isReading() ) {
			// If we've fully read the data, add it to our row mapping
			if ( Cell.CELL_TYPE_STRING == celltype ) {
				String strval = getStringFromContentsInt();
				if ( !strval.isEmpty() ) {
					currentrowdata.put( colnum, strval );
				}
			}
			else {
				currentrowdata.put( colnum, getContents() );
			}

			setReading( false );
		}
	}

	@Override
	public void startDocument() throws SAXException {
		super.startDocument();
		triples.clear();
		metas.clear();
		datanamespace = null;
		schemanamespace = null;
		baseuri = null;
	}

	@Override
	public void endDocument() throws SAXException {
		super.endDocument();

		// now set the data
		if ( null != baseuri ) {
			log.debug( "setting base uri to " + baseuri );
			metas.setBase( new URIImpl( baseuri ) );
		}
		if ( null != datanamespace ) {
			log.debug( "setting data namespace to " + datanamespace );
			metas.setDataBuilder( datanamespace );
		}
		if ( null != schemanamespace ) {
			log.debug( "setting schema namespace to " + schemanamespace );
			metas.setSchemaBuilder( schemanamespace );
		}

		for ( Map.Entry<String, String> en : namespaces.entrySet() ) {
			log.debug( "registering namespace: "
					+ en.getKey() + " => " + en.getValue() );
			metas.setNamespace( en.getKey(), en.getValue() );
		}

		for ( String[] triple : triples ) {
			log.debug( "adding custom triple: "
					+ triple[0] + " => " + triple[1] + " => " + triple[2] );

			try {
				metas.add( triple[0], triple[1], triple[2] );
			}
			catch ( Exception e ) {
				throw new ImportValidationException( ImportValidationException.ErrorType.INVALID_DATA, e );
			}
		}

		namespaces.clear();
	}

	/**
	 * Handles a row of metadata from the event parser
	 *
	 * @param rowdata
	 * @param metas
	 * @throws ImportValidationException if something is wrong
	 */
	private void fillInMetadata( Map<Integer, String> rowdata ) {

		metas.setLegacyMode( false );
		// we want to load the base uri first, data-namespace, schema-namespace,
		// prefixes, and finally triples. so read everything first, and load later

		removeComments( rowdata );

		if ( rowdata.isEmpty() ) {
			return;
		}

		String propName = rowdata.get( 1 );
		String propertyMiddleColumn = rowdata.get( 2 );
		if ( null == propertyMiddleColumn ) {
			propertyMiddleColumn = "";
		}
		String propValue = rowdata.get( 3 );

		if ( "@base".equals( propName ) ) {
			if ( null == baseuri ) {
				if ( propValue.startsWith( "<" ) && propValue.endsWith( ">" ) ) {
					baseuri = propValue.substring( 1, propValue.length() - 1 );
				}
				else {
					throw new ImportValidationException( ImportValidationException.ErrorType.INVALID_DATA,
							"@base value does not appear to be a URI: \"" + propValue + "\"" );
				}
			}
			else {
				throw new ImportValidationException( ImportValidationException.ErrorType.TOO_MUCH_DATA,
						"Multiple @base lines in Metadata sheet" );
			}
		}
		else if ( "@prefix".equals( propName ) ) {
			// validate that this is necessary:
			if ( !( propValue.startsWith( "<" ) && propValue.endsWith( ">" ) ) ) {
				throw new ImportValidationException( ImportValidationException.ErrorType.INVALID_DATA,
						"@prefix value does not appear to be a URI: \"" + propValue + "\"" );
			}

			propValue = propValue.substring( 1, propValue.length() - 1 );
			if ( ":schema".equals( propertyMiddleColumn ) ) {
				if ( null == schemanamespace ) {
					schemanamespace = propValue;
				}
				else {
					throw new ImportValidationException( ImportValidationException.ErrorType.TOO_MUCH_DATA,
							"Multiple :schema lines in Metadata sheet" );
				}
			}
			else if ( ":data".equals( propertyMiddleColumn ) ) {
				if ( null == datanamespace ) {
					datanamespace = propValue;
				}
				else {
					throw new ImportValidationException( ImportValidationException.ErrorType.TOO_MUCH_DATA,
							"Multiple :data lines in Metadata sheet" );
				}
			}
			else if ( ":".equals( propertyMiddleColumn ) ) {
				/*
				 * The default namespace, ":", applies to all un-prefixed data elements.
				 * Specifically setting the schema or data namespace will override the
				 * default namespace.
				 */
				if ( null == schemanamespace ) {
					schemanamespace = propValue;
				}
				if ( null == datanamespace ) {
					datanamespace = propValue;
				}
				// we may still need to set the default namespace to handle RDF exports. keep an eye on this.
			}
			else {
				namespaces.put( propertyMiddleColumn.replaceAll( ":$", "" ), propValue );
			}
		}
		else {
			if ( null == propName || null == propValue || propertyMiddleColumn.isEmpty() ) {
				throw new ImportValidationException( ImportValidationException.ErrorType.MISSING_DATA,
						"Not enough data for a custom triple: " + propName + ","
						+ propertyMiddleColumn + "," + propValue );
			}
			else {
				triples.add( new String[]{ propName, propertyMiddleColumn, propValue } );
			}
		}
	}

	/**
	 * Removes any comments from the given mapping. Any column index after the
	 * comment is likewise removed
	 *
	 * @param rowdata the data to remove comments from
	 */
	private static void removeComments( Map<Integer, String> rowdata ) {
		int commentcol = Integer.MAX_VALUE;
		List<Integer> removers = new ArrayList<>();
		for ( Map.Entry<Integer, String> en : rowdata.entrySet() ) {
			int col = en.getKey();
			String val = en.getValue();

			if ( val.startsWith( "<" ) && val.endsWith( ">" ) ) {
				// this is really a URI and not a string, so skip it
				continue;
			}

			if ( col > commentcol ) {
				removers.add( col );
			}

			// if we start with a comment, we need to remove the whole value
			// but if we have a comment inside our text, only remove the commented part
			if ( val.startsWith( "#" ) ) {
				removers.add( col );
			}
			else if ( val.contains( "#" ) ) {
				commentcol = col;
				val = val.substring( 0, val.indexOf( "#" ) );
				en.setValue( val );
			}
		}

		for ( int col : removers ) {
			rowdata.remove( col );
		}
	}
}