LoadingSheetXmlHandler.java
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.ostrichemulators.semtool.poi.main.xlsxml;
import com.ostrichemulators.semtool.poi.main.LoadingSheetData;
import static com.ostrichemulators.semtool.util.RDFDatatypeTools.getRDFStringValue;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
/**
*
* @author ryan
*/
public class LoadingSheetXmlHandler extends XlsXmlBase {
private static final Logger log = Logger.getLogger( LoadingSheetXmlHandler.class );
private static final Map<String, Integer> formats = new HashMap<>();
private static final ValueFactory vf = new ValueFactoryImpl();
private final Map<Integer, Value> currentrowdata = new LinkedHashMap<>();
private final Map<Integer, String> proplkp = new HashMap<>();
private final Map<String, String> namespaces;
private final LoadingSheetData loadingsheet;
private final StylesTable styles;
private boolean isdate = false;
private int rownum;
private int colnum;
private int celltype;
static {
formats.put( "s", Cell.CELL_TYPE_STRING );
formats.put( "n", Cell.CELL_TYPE_NUMERIC );
formats.put( "b", Cell.CELL_TYPE_BOOLEAN );
}
public static int getColNum( String colname ) {
int sum = 0;
for ( int i = 0; i < colname.length(); i++ ) {
sum *= 26;
char charat = colname.charAt( i );
sum += ( charat - 'A' + 1 );
}
return sum - 1;
}
public LoadingSheetXmlHandler( List<String> sst, StylesTable styles,
String sheetname, Map<String, String> ns, boolean lsInMem ) {
super( sst );
this.styles = styles;
namespaces = ns;
// this will automatically convert to a relationship sheet if needed
loadingsheet = LoadingSheetData.nodesheet( sheetname, "", lsInMem );
}
public LoadingSheetData getSheet() {
return loadingsheet;
}
@Override
public void startDocument() throws SAXException {
log.debug( "Processing sheet " + loadingsheet.getName() );
super.startDocument();
}
@Override
public void startElement( String uri, String localName, String name,
Attributes attributes ) throws SAXException {
if ( null != name ) {
switch ( name ) {
case "row":
rownum = Integer.parseInt( attributes.getValue( "r" ) ) - 1;
currentrowdata.clear();
break;
case "c": // c is a new cell
String celltypestr = attributes.getValue( "t" );
celltype = ( formats.containsKey( celltypestr )
? formats.get( celltypestr ) : Cell.CELL_TYPE_BLANK );
// dates don't always have a type attribute
if ( Cell.CELL_TYPE_NUMERIC == celltype || null == celltypestr ) {
celltype = Cell.CELL_TYPE_NUMERIC;
// check if it's a date
String styleidstr = attributes.getValue( "s" );
int styleid = ( null == styleidstr ? 0
: Integer.parseInt( styleidstr ) );
XSSFCellStyle style = styles.getStyleAt( styleid );
int formatIndex = style.getDataFormat();
String formatString = style.getDataFormatString();
isdate = DateUtil.isADateFormat( formatIndex, formatString );
}
String colname = attributes.getValue( "r" );
colnum = getColNum( colname.substring( 0,
colname.lastIndexOf( Integer.toString( rownum + 1 ) ) ) );
break;
case "v": // new value for a cell
setReading( true );
resetContents();
break;
}
}
}
@Override
public void endElement( String uri, String localName, String name )
throws SAXException {
if ( "row".equals( name ) ) {
if ( 0 == rownum ) {
proplkp.clear();
String sheettype = currentrowdata.remove( 0 ).stringValue();
String subjtype = currentrowdata.remove( 1 ).stringValue();
loadingsheet.setSubjectType( subjtype );
if ( "relation".equalsIgnoreCase( sheettype ) ) {
String objtype = currentrowdata.remove( 2 ).stringValue();
loadingsheet.setObjectType( objtype );
}
for ( Map.Entry<Integer, Value> en : currentrowdata.entrySet() ) {
loadingsheet.addProperty( en.getValue().stringValue() );
proplkp.put( en.getKey(), en.getValue().stringValue() );
}
return;
}
else if ( 1 == rownum ) {
if ( loadingsheet.isRel() ) {
loadingsheet.setRelname( currentrowdata.get( 0 ).stringValue() );
}
}
fillInRow( currentrowdata, loadingsheet, proplkp );
}
if ( isReading() ) {
// If we've fully read the data, add it to our row mapping
switch ( celltype ) {
case Cell.CELL_TYPE_STRING:
String strval = this.getStringFromContentsInt();
if ( !strval.isEmpty() ) {
currentrowdata.put( colnum, getRDFStringValue( strval, namespaces, vf ) );
}
break;
case Cell.CELL_TYPE_BLANK:
break;
case Cell.CELL_TYPE_BOOLEAN:
currentrowdata.put( colnum,
vf.createLiteral( "1".equals( getContents() ) ) );
break;
case Cell.CELL_TYPE_NUMERIC:
if ( isdate ) {
currentrowdata.put( colnum,
vf.createLiteral( DateUtil.getJavaDate( Double.parseDouble( getContents() ) ) ) );
}
else {
// see if we can use an integer instead of a double
// (excel doesn't distinguish between the two, but I can't figure
// out how to get exactly what's shown in the cell via the cell style)
String contents = getContents();
Value val = ( contents.endsWith( ".0" )
? vf.createLiteral( Integer.parseInt( contents.substring( 0, contents.length() - 2 ) ) )
: vf.createLiteral( Double.parseDouble( contents ) ) );
currentrowdata.put( colnum, val );
}
break;
case Cell.CELL_TYPE_ERROR:
log.warn( "unhandled cell type: CELL_TYPE_ERROR" );
break;
case Cell.CELL_TYPE_FORMULA:
log.warn( "unhandled cell type: CELL_TYPE_FORMULA" );
break;
default:
log.warn( "unhandled cell type: " + celltype );
}
// if ( null != currentcell ) {
// log.debug( sheet.getSheetName() + "(" + currentrow.getRowNum() + ","
// + currentcell.getColumnIndex() + ") " + currentcell.getStringCellValue() );
// }
setReading( false );
}
}
private static void fillInRow( Map<Integer, Value> rowdata,
LoadingSheetData sheet, Map<Integer, String> proplkp ) {
removeComments( rowdata );
if ( rowdata.isEmpty() || !rowdata.containsKey( 1 ) ) {
return;
}
rowdata.remove( 0 );
String slabel = rowdata.remove( 1 ).stringValue();
String olabel = null;
if ( sheet.isRel() && rowdata.containsKey( 2 ) ) {
olabel = rowdata.remove( 2 ).stringValue();
}
Map<String, Value> props = new HashMap<>();
for ( Map.Entry<Integer, Value> en : rowdata.entrySet() ) {
props.put( proplkp.get( en.getKey() ), en.getValue() );
}
sheet.add( slabel, olabel, props );
}
/**
* Removes any comments from the given mapping. Any column index after the
* comment is likewise removed
*
* @param rowdata the data to remove comments from
*/
private static void removeComments( Map<Integer, Value> rowdata ) {
int commentcol = Integer.MAX_VALUE;
List<Integer> removers = new ArrayList<>();
for ( Map.Entry<Integer, Value> en : rowdata.entrySet() ) {
int col = en.getKey();
String val = en.getValue().stringValue();
// already seen a comment cell
if ( col > commentcol ) {
removers.add( col );
}
if ( val.startsWith( "#" ) ) {
commentcol = col;
removers.add( col );
}
}
for ( int col : removers ) {
rowdata.remove( col );
}
}
@Override
public void endDocument() throws SAXException {
super.endDocument();
loadingsheet.finishLoading();
if ( log.isDebugEnabled() ) {
log.debug( "Loading sheet " + loadingsheet.getName()
+ " processed. properties: "
+ Arrays.toString( loadingsheet.getProperties().toArray() ) );
log.debug( "Created " + loadingsheet.rows()
+ ( loadingsheet.isRel() ? " relationships" : " entities" ) );
}
}
}