DefaultHandler를 가지고 Handler 구성 읽은 엑셀파일 내용을 List<String []> rows 에 저장

import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * Created by shea on 2018/10/12.
 */
public class MyHandler extends DefaultHandler {
    enum xssfDataType {
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
    }
    /**
     * Table with styles
     */
    private StylesTable stylesTable;

    private SharedStringsTable sharedStringsTable;
    private int minColumnCount;

    private boolean vIsOpen;

    private xssfDataType nextDataType;

    private short formatIndex;
    private String formatString;
    private DataFormatter formatter;

    private int thisColumn = -1;
    // The last column printed to the output stream
    private int lastColumnNumber = -1;

    // Gathers characters as they are seen.
    private StringBuffer value;
    private String[] record;
    private List<String[]> rows = new ArrayList<String[]>();
    private boolean isCellNull = false;
    private SimpleDateFormat sdf = null;
    private static DecimalFormat df = new DecimalFormat("###########");


    //构造器
    public MyHandler(StylesTable styles,
                              SharedStringsTable strings, int cols) {
        this.stylesTable = styles;
        this.sharedStringsTable = strings;
        this.minColumnCount = cols;
        this.value = new StringBuffer();
        this.nextDataType = xssfDataType.NUMBER;
        this.formatter = new DataFormatter();
        record = new String[this.minColumnCount];
        rows.clear();
    }

    public void startElement(String uri, String localName, String name,
                             Attributes attributes) throws SAXException {

        if ("inlineStr".equals(name) || "v".equals(name)) {
            vIsOpen = true;
            value.setLength(0);
        }
        // c => cell
        else if ("c".equals(name)) {
            // Get the cell reference
            String r = attributes.getValue("r");
            int firstDigit = -1;
            for (int c = 0; c < r.length(); ++c) {
                if (Character.isDigit(r.charAt(c))) {
                    firstDigit = c;
                    break;
                }
            }
            thisColumn = nameToColumn(r.substring(0, firstDigit));

            // Set up defaults.
            this.nextDataType = xssfDataType.NUMBER;
            this.formatIndex = -1;
            this.formatString = null;
            String cellType = attributes.getValue("t");
            String cellStyleStr = attributes.getValue("s");
            if ("b".equals(cellType))
                nextDataType = xssfDataType.BOOL;
            else if ("e".equals(cellType))
                nextDataType = xssfDataType.ERROR;
            else if ("inlineStr".equals(cellType))
                nextDataType = xssfDataType.INLINESTR;
            else if ("s".equals(cellType))
                nextDataType = xssfDataType.SSTINDEX;
            else if ("str".equals(cellType))
                nextDataType = xssfDataType.FORMULA;
            else if (cellStyleStr != null) {
                // It's a number, but almost certainly one
                // with a special style or format
                int styleIndex = Integer.parseInt(cellStyleStr);
                XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
                this.formatIndex = style.getDataFormat();
                this.formatString = style.getDataFormatString();
                if (this.formatString == null)
                    this.formatString = BuiltinFormats
                            .getBuiltinFormat(this.formatIndex);
            }
        }

    }

    public void endElement(String uri, String localName, String name)
            throws SAXException {

        String thisStr = null;

        // v => contents of a cell
        if ("v".equals(name)) {
            // Process the value contents as required.
            // Do now, as characters() may be called more than once
            switch (nextDataType) {

                case BOOL:
                    char first = value.charAt(0);
                    thisStr = first == '0' ? "FALSE" : "TRUE";
                    break;

                case ERROR:
                    thisStr = "\"ERROR:" + value.toString() + '"';
                    break;

                case FORMULA:
                    // A formula could result in a string value,
                    // so always add double-quote characters.
                    thisStr = value.toString();
                    break;

                case INLINESTR:
                    // TODO: have seen an example of this, so it's untested.
                    XSSFRichTextString rtsi = new XSSFRichTextString(
                            value.toString());
                    thisStr =rtsi.toString();
                    break;

                case SSTINDEX:
                    String sstIndex = value.toString();
                    try {
                        int idx = Integer.parseInt(sstIndex);
                        XSSFRichTextString rtss = new XSSFRichTextString(
                                sharedStringsTable.getEntryAt(idx));
                        thisStr = rtss.toString();
                    } catch (NumberFormatException ex) {
                        System.out.println("Failed to parse SST index '" + sstIndex
                                + "': " + ex.toString());
                    }
                    break;

                case NUMBER:
                    String n = value.toString();
                    if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58
                            || (176<=formatIndex && formatIndex<=178) || (182<=formatIndex && formatIndex<=196)
                            || (210<=formatIndex && formatIndex<=213) || (208==formatIndex ) ) {// 日期
                        sdf = new SimpleDateFormat("yyyy-MM-dd");
                        Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
                        thisStr=sdf.format(date);
                    } else if (formatIndex == 20 || formatIndex == 32 || formatIndex==183 || (200<=formatIndex && formatIndex<=209)) {//时间
                        sdf = new SimpleDateFormat("HH:mm");
                        Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
                        thisStr = sdf.format(date);
                    }
                    else{
                         if(n.contains("E")){
                             String[] split = n.split("\\+");
                             String e = split[0].replaceAll("E|e", "");
                             thisStr=e.replace(".","");
                        }else {
                             thisStr = n;
                         }
                    }
                    break;
                default:
                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
                    break;
            }
            if (lastColumnNumber == -1) {
                lastColumnNumber = 0;
            }
            if (thisStr == null || "".equals(isCellNull)) {
                isCellNull = true;
            }
            record[thisColumn] = thisStr;
            if (thisColumn > -1)
                lastColumnNumber = thisColumn;
        } else if ("row".equals(name)) {
            // Print out any missing commas if needed
            if (minColumnCount > 0) {
                // Columns are 0 based
                if (lastColumnNumber == -1) {
                    lastColumnNumber = 0;
                }
                if(record!=null &&record.length!=0){
                    rows.add(record.clone());
                    isCellNull = false;
                    for (int i = 0; i < record.length; i++) {
                        record[i] = null;
                    }
                }
            }
            lastColumnNumber = -1;
        }

    }

    public List<String[]> getRows() {
        return rows;
    }

    public void setRows(List<String[]> rows) {
        this.rows = rows;
    }

    public void characters(char[] ch, int start, int length)
            throws SAXException {
        if (vIsOpen)
            value.append(ch, start, length);
    }

    private int nameToColumn(String name) {
        int column = -1;
        for (int i = 0; i < name.length(); ++i) {
            int c = name.charAt(i);
            column = (column + 1) * 26 + c - 'A';
        }
        return column;
    }
}

 

저장해둔 핸들러 rows의 데이터를 받아와서 처리

 

		FileInputStream fis= new FileInputStream(fullFilePath);
            
    		OPCPackage opc = null;
    		opc = OPCPackage.open(fis);
    		XSSFReader xssfReader = null;

    		xssfReader = new XSSFReader(opc);
    		SharedStringsTable sst = xssfReader.getSharedStringsTable();
    		XMLReader parser = fetchSheetParser(sst);
    		XSSFReader.SheetIterator itr = (SheetIterator) xssfReader.getSheetsData();
    		StylesTable styles = xssfReader.getStylesTable();
    		ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opc);
    		
    		
    		//MyHandler handler = new MyHandler(styles, sst, minColumns);
    		
    		MyHandler handler = new MyHandler(styles,sst,70);
    		
    		while(itr.hasNext()){

    			InputStream sheetStream = itr.next();
    			
    			String shtName = itr.getSheetName();

    			if(shtName.equals("리스트")){
    				InputSource sheetSource = new InputSource(sheetStream);
    	    		
    	    		SAXParserFactory saxFactory = SAXParserFactory.newInstance();
    				SAXParser saxParser = null;;
    				saxParser = saxFactory.newSAXParser();
    	    		
    	    		parser.setContentHandler(handler);
    				
    				
    				//SheetHandler sheetListHanler = new SheetHandler(sst,styles);
    				//saxParser.parse(sheetSource, sheetListHanler);
    				
    				parser.parse(sheetSource);
    				
    				sheetStream.close();
    				
    				
    			}
    		}
    		
    		
//    		List<NewVO> list = inputModVo(dataList);
    		List<NewVO> list = new ArrayList<NewVO>();
    		
    		List<String[]> dataList = handler.getRows();

Posted by 샤린냥

java에서는 엑셀파일을 읽을때 대용량 엑셀 같은 경우에는 메모리 이슈(out of memory)가 발생하는 경우가 많다

그래서 이번에는 대용량 엑셀파일 처리하는 걸 올릴려고 한다.


여기서 부터 실제 코딩 


//엑셀파일 읽어서 저장할 List

List<String[]> dataList = null;

OPCPackage opc = null;


opc = OPCPackage.open(file);

XSSFReader xssfReader = null;

xssfReader = new XSSFReader(opc);

XSSFReader.SheetIterator itr = (SheetIterator) xssfReader.getSheetsData();

StylesTable styles = xssfReader.getStylesTable();

ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opc);

dataList = new ArrayList<String[]>();

while(itr.hasNext()){

InputStream sheetStream = itr.next();

InputSource sheetSource = new InputSource(sheetStream);

//Sheet2ListHandler은 엑셀 data를 가져와서 SheetContentHandler(Interface)를 재정의 해서 만든 Class

        //String[] 배열을 몇개 사용할지 숫자

Sheet2ListHandler sheet2ListHandler = new Sheet2ListHandler(dataList, 22);     //

ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, sheet2ListHandler, true);

SAXParserFactory saxFactory = SAXParserFactory.newInstance();

SAXParser saxParser = null;;

saxParser = saxFactory.newSAXParser();

//sax parser 방식의 xmlReader를 생성

XMLReader sheetParser = saxParser.getXMLReader();

//xml reader에 row와 cell 이벤트를 생성하는 핸들러를 설정한 후.

sheetParser.setContentHandler(handler);

//위에서 Sheet 별로 생성한 inputSource를 parsing합니다.

//이 과정에서 handler는 row와 cell이벤트를 생성하고 생성된 이벤트는 sheet2ListHandler 가 받아서

처리합니다.

sheetParser.parse(sheetSource);

  sheetStream.close();

}



//Sheet2ListHandler     

public class Sheet2ListHandler implements SheetContentsHandler {


private boolean firstCellOfRow = false;

private int currentCol = -1;

//collection 객체

private List<String[]> rows;


//collection에 추가될 객체 startRow에서 초기화함

private String[] row;

//collection 내 객체를 String[]로 잡았기 때문에 배열의 길이를 생성시 받도록 설계

private int columnCnt;

//cell 이벤트 처리 시 해당 cell의 데이터가 배열 어디에 저장되야 할지 가리키는 pointer

private int currColNum = 0;

//외부 collection 과 배열 size를 받기 위해 추가한 부분입니다.

public Sheet2ListHandler(List<String[]> rows, int columnsCnt) {

this.rows = rows;

this.columnCnt = columnsCnt;

}

//Row의 시작 부분에서 발생하는 이벤트를 처리하는 method

@Override

public void startRow(int rowNum) {

this.row = new String[columnCnt];

currColNum = 0;

}


//Row의 끝에서 발생하는 이벤트를 처리하는 method

@Override

public void endRow(int rowNum) {

//cell 이벤트에서 담아놓은 row String[]를 collection에 추가

//데이터가 하나도 없는 row는 collection 추가하지 않도록 조건 추가

boolean addFlag = false;

for(String data:row){

if(!"".equals(data)){

addFlag = true;

}

}

if(addFlag)rows.add(row);

}


//cell 이벤트 발생 시 해당 cell의 주소와 값을 받아옴.

@Override

public void cell(String cellReference, String formattedValue, XSSFComment comment) {

// 엑셀 내용중 비어있는 셀은 제외하고 쓰여지기 때문에 아래와 같이 체크후 빈셀은 ""로 채워서 자리를 채워줌

int thisCol = (new CellReference(cellReference)).getCol();

int missedCols = thisCol - currentCol - 1;

   for (int i=0; i<missedCols; i++) {

       row[currColNum++] = "";

   }

   currentCol = thisCol;

  row[currColNum++] = formattedValue == null ? "":formattedValue;

    }

@Override

public void headerFooter(String text, boolean isHeader, String tagName) {

}


'Java' 카테고리의 다른 글

JAVA SPRING + MYBATIS + ORACLE TXT 파일 BLOB 형태로 DB에 INSERT  (0) 2016.11.01
java 배열 내 문자열 확인  (0) 2016.10.04
[Tomcat] UTF-8 한글 처리  (0) 2016.04.08
이클립스 jdk 경로 지정  (0) 2016.04.07
이클립스 SVN 설치  (0) 2013.07.24
Posted by 샤린냥
이전버튼 1 이전버튼