Java讀取本地文件內容支援文件格式有(.doc+.docx+.txt+.xls+.xlsx)
阿新 • • 發佈:2019-01-07
友情提示:為了方便,整理了匯入本地文件功能,包含標題中幾種格式文件,以備不時之需。
一、所需jar包
這裡只介紹 pom 檔案引入jar 包的配置。如下:
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.17</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version> </dependency>
二、測試程式碼
package com.cdvcloud.thread.file; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import org.apache.poi.POIXMLDocument; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.springframework.util.StringUtils; public class FileImport { private static final String DEFULTCHARSET = "GB2312"; public static void main(String[] args) { String path="C:/Users/test/Desktop/ceshi2.docx"; String path2="C:/Users/test/Desktop/test2.xlsx"; String path3="C:/Users/test/Desktop/test.xls"; String path4="C:/Users/test/Desktop/test.txt"; String path5="C:/Users/test/Desktop/ceshi.doc"; String string = importFile(path,null); String string2 = importFile(path2,"GB2312"); String string3 = importFile(path3,null); String string4 = importFile(path4,null); String string5 = importFile(path5,null); System.out.println("docx===>>"+string); System.out.println("xlsx===>>"+string2); System.out.println("xls===>>"+string3); System.out.println("txt===>>"+string4); System.out.println("doc===>>"+string5); } /** * 讀取檔案統一入口 * @param path * @return */ static String importFile(String path,String charset){ String resTextString = null; if (StringUtils.isEmpty(path)) { return resTextString; } try { int lastIndexOf = path.lastIndexOf("."); String type = path.substring(lastIndexOf+1); if ("txt".equals(type)) { resTextString = importTxt(path,charset); }else if ("doc".equals(type)) { resTextString = importWord3(path); }else if ("docx".equals(type)) { resTextString = importWord7(path); }else if ("xls".equals(type)||"xlsx".equals(type)) { resTextString = importExcel(path, charset); } } catch (Exception e) { e.printStackTrace(); } return resTextString; } /** * 讀取word2003 * @throws */ static String importWord3(String path){ String textString=null; FileInputStream inputStream =null; try { inputStream = new FileInputStream(path); HWPFDocument doc = new HWPFDocument(inputStream); textString = doc.getText().toString(); } catch (Exception e) { e.printStackTrace(); } finally { if (inputStream!=null) { try { inputStream.close(); } catch (IOException e) { e.printStackTrace(); } } } return textString; } /** * 讀取word2007 * @param path * @return */ static String importWord7(String path){ String text = null; OPCPackage openPackage =null; try { openPackage = POIXMLDocument.openPackage(path); XWPFWordExtractor word = new XWPFWordExtractor(openPackage); text = word.getText(); } catch (Exception e) { e.printStackTrace(); }finally { if (openPackage!=null) { try { openPackage.close(); } catch (IOException e) { e.printStackTrace(); } } } return text; } /** * 匯入txt 檔案 * @param path * @return */ // try-with-resources可以優雅關閉檔案,異常時自動關閉檔案 static String importTxt(String path,String charset){ String resText=null; if (StringUtils.isEmpty(charset)) { charset=DEFULTCHARSET; } InputStreamReader reader = null; try { reader = new InputStreamReader(new FileInputStream(path),charset); BufferedReader br = new BufferedReader(reader); String line; while ((line=br.readLine())!=null) { resText+=line; } } catch (Exception e) { e.printStackTrace(); } finally { if (reader!=null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } return resText; } /** * 匯入.xls文件 * @param path * @param charset * @return */ static String importExcel(String path,String charset){ String resString = ""; Workbook wb=null; InputStream is =null; int lastIndexOf = path.lastIndexOf("."); String type = path.substring(lastIndexOf+1); try { is = new FileInputStream(path); if ("xls".equals(type)) { wb = new HSSFWorkbook(is); }else if ("xlsx".equals(type)) { wb = new XSSFWorkbook(is); }else { return resString; } if (wb!=null) { resString = getExcelVal(wb); } } catch (Exception e) { e.printStackTrace(); } finally { if (is!=null) { try { is.close(); } catch (Exception e2) { e2.printStackTrace(); } } } return resString; } /** * 獲取值 */ public static String getExcelVal(Workbook wb){ Sheet sheet=null; Row row =null; String cellData = null; String resString = ""; //用來存放表中資料 List<ArrayList<String>> list = new ArrayList<ArrayList<String>>(); //Excel的頁籤數量 int sheetNum = wb.getNumberOfSheets(); for (int m = 0; m<sheetNum; m++) { //獲取一個sheet sheet = wb.getSheetAt(m); Sheet sheetAt = wb.getSheetAt(0); //獲取最大行數 int rownum = sheet.getPhysicalNumberOfRows(); //獲取第一行 row = sheet.getRow(0); if (row==null) { continue; } //獲取最大列數 int colnum = row.getPhysicalNumberOfCells(); for (int i = 0; i<rownum; i++) { row = sheet.getRow(i); ArrayList<String> colList = new ArrayList<String>(); if(row !=null){ for (int j=0;j<colnum;j++){ cellData = String.valueOf(getCellFormatValue(row.getCell(j))); colList.add(cellData); if (j<colnum-1) { resString+=cellData+" "; }else{ resString+=cellData+"\n"; } } }else{ break; } list.add(colList); } } return resString; } /** * 根據型別取值 * @param cell * @return */ @SuppressWarnings("deprecation") public static Object getCellFormatValue(Cell cell){ Object cellValue = null; if(cell!=null){ //判斷cell型別 switch(cell.getCellType()){ case Cell.CELL_TYPE_NUMERIC:{ cellValue = String.valueOf(cell.getNumericCellValue()); break; } case Cell.CELL_TYPE_FORMULA:{ //判斷cell是否為日期格式 if(DateUtil.isCellDateFormatted(cell)){ //轉換為日期格式YYYY-mm-dd cellValue = cell.getDateCellValue(); }else{ //數字 cellValue = String.valueOf(cell.getNumericCellValue()); } break; } case Cell.CELL_TYPE_STRING:{ cellValue = cell.getRichStringCellValue().getString(); break; } default: cellValue = ""; } }else{ cellValue = ""; } return cellValue; } }
【注】txt文件可能會亂碼,請將引數設定為txt 對應的編碼格式。一般預設為GB2312 或者 gbk 。