POIReader.java
/**
* *****************************************************************************
* Copyright 2013 SEMOSS.ORG
*
* This file is part of SEMOSS.
*
* SEMOSS is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SEMOSS is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SEMOSS. If not, see <http://www.gnu.org/licenses/>.
* ****************************************************************************
*/
package com.ostrichemulators.semtool.poi.main;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.openrdf.model.ValueFactory;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.openrdf.model.Value;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.impl.ValueFactoryImpl;
/**
* Loading data into SEMOSS using Microsoft Excel Loading Sheet files
*/
public class POIReader implements ImportFileReader {
private static final Logger logger = Logger.getLogger( POIReader.class );
private boolean keepLoadInMemory = false;
public static ImportData readNonloadingSheet( File file ) throws IOException {
ImportData d
= readNonloadingSheet( new XSSFWorkbook( new FileInputStream( file ) ) );
d.getMetadata().setSourceOfData( new URIImpl( file.toURI().toString() ) );
return d;
}
public static ImportData readNonloadingSheet( Workbook workbook ) {
ImportData id = new ImportData();
int sheets = workbook.getNumberOfSheets();
for ( int sheetnum = 0; sheetnum < sheets; sheetnum++ ) {
Sheet sheet = workbook.getSheetAt( sheetnum );
String sheetname = workbook.getSheetName( sheetnum );
// we need to shoehorn the arbitrary data from a spreadsheet into our
// ImportData class, which has restrictions on the data...we're going
// to do it by figuring out the row with the most columns, and then
// naming all the columns with A, B, C...AA, AB...
// then load everything as if it was plain data
// first, figure out our max number of columns
int rows = sheet.getLastRowNum();
int maxcols = Integer.MIN_VALUE;
for ( int r = 0; r <= rows; r++ ) {
Row row = sheet.getRow( r );
if ( null != row ) {
int cols = (int) row.getLastCellNum();
if ( cols > maxcols ) {
maxcols = cols;
}
}
}
// second, make "properties" for each column
LoadingSheetData nlsd = new LoadingSheetData( sheetname, "A" );
for ( int c = 1; c < maxcols; c++ ) {
nlsd.addProperty( Integer.toString( c ) );
}
// lastly, fill the sheets
ValueFactory vf = new ValueFactoryImpl();
for ( int r = 0; r <= rows; r++ ) {
Row row = sheet.getRow( r );
if ( null != row ) {
Map<String, Value> propmap = new HashMap<>();
int lastpropcol = row.getLastCellNum();
for ( int c = 1; c <= lastpropcol; c++ ) {
String val = getString( row.getCell( c ) );
if ( !val.isEmpty() ) {
propmap.put( Integer.toString( c ), vf.createLiteral( val ) );
}
}
nlsd.add( getString( row.getCell( 0 ) ), propmap );
}
}
if ( !nlsd.isEmpty() ) {
id.add( nlsd );
}
}
return id;
}
@Override
public ImportMetadata getMetadata( File file ) throws IOException, ImportValidationException {
logger.debug( "getting metadata from file: " + file );
LowMemXlsReader reader = null;
try {
reader = new LowMemXlsReader( file );
ImportMetadata data = reader.getMetadata();
data.setSourceOfData( new URIImpl( file.toURI().toString() ) );
return data;
}
finally {
if ( null != reader ) {
reader.release();
}
}
}
@Override
public ImportData readOneFile( File file ) throws IOException, ImportValidationException {
logger.debug( "loading data from file: " + file );
LowMemXlsReader rdr = null;
try {
rdr = new LowMemXlsReader( file );
rdr.keepSheetDataInMemory( keepLoadInMemory );
ImportData d = rdr.getData();
d.getMetadata().setSourceOfData( new URIImpl( file.toURI().toString() ) );
logger.debug( "finished reading file: " + file );
return d;
}
finally {
if ( null != rdr ) {
rdr.release();
}
}
}
@Override
public void keepLoadInMemory( boolean b ) {
keepLoadInMemory = b;
}
/**
* Always return a non-null string (will be "" for null cells).
*
* @param cell
* @return
*/
private static String getString( Cell cell ) {
if ( null == cell ) {
return "";
}
switch ( cell.getCellType() ) {
case Cell.CELL_TYPE_NUMERIC:
return Double.toString( cell.getNumericCellValue() );
case Cell.CELL_TYPE_BOOLEAN:
return Boolean.toString( cell.getBooleanCellValue() );
case Cell.CELL_TYPE_FORMULA:
return cell.getCellFormula();
default:
return cell.getStringCellValue();
}
}
}