org.apache.poi.hwpf.HWPFDocument类的使用及代码示例

x33g5p2x  于2022-01-20 转载在 其他  
字(11.8k)|赞(0)|评价(0)|浏览(2293)

本文整理了Java中org.apache.poi.hwpf.HWPFDocument类的一些代码示例,展示了HWPFDocument类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。HWPFDocument类的具体详情如下:
包路径:org.apache.poi.hwpf.HWPFDocument
类名称:HWPFDocument

HWPFDocument介绍

[英]This class acts as the bucket that we throw all of the Word data structures into.
[中]这个类充当我们将所有Word数据结构放入的存储桶。

代码示例

代码示例来源:origin: stackoverflow.com

FileInputStream fis = new FileInputStream(file.getAbsolutePath());
HWPFDocument document = new HWPFDocument(fis);
extractor = new WordExtractor(document);
String[] fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)

代码示例来源:origin: zhangyd-c/springboot-learning

public static void build(File tmpFile, Map<String, Object> contentMap, String exportFile) throws Exception {
  FileInputStream tempFileInputStream = new FileInputStream(tmpFile);
  HWPFDocument document = new HWPFDocument(tempFileInputStream);
  // 读取文本内容
  Range bodyRange = document.getRange();
  // 替换内容
  for (Map.Entry<String, Object> entry : contentMap.entrySet()) {
    bodyRange.replaceText("${" + entry.getKey() + "}", entry.getValue().toString());
  }
  // 导出到文件
  ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
  document.write(byteArrayOutputStream);
  write(exportFile, byteArrayOutputStream);
  document.close();
}

代码示例来源:origin: apache/tika

HWPFDocument document;
try {
  document = new HWPFDocument(root);
} catch (org.apache.poi.EncryptedDocumentException e) {
    throw new EncryptedDocumentException(e);
    new org.apache.poi.hwpf.extractor.WordExtractor(document);
PicturesTable pictureTable = document.getPicturesTable();
PicturesSource pictures = new PicturesSource(document);
HeaderStories headerFooter = null;
Range r = document.getRange();
ListManager listManager = new ListManager(document);
for (int i = 0; i < r.numParagraphs(); i++) {
  Paragraph p = r.getParagraph(i);
  i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, listManager, xhtml);
  for (String paragraph : wordExtractor.getMainTextboxText()) {
    xhtml.element("p", paragraph);
for (String paragraph : wordExtractor.getFootnoteText()) {
  xhtml.element("p", paragraph);

代码示例来源:origin: apache/tika

private PicturesSource(HWPFDocument doc) {
  picturesTable = doc.getPicturesTable();
  all = picturesTable.getAllPictures();
  // Build the Offset-Picture lookup map
  lookup = new HashMap<Integer, Picture>();
  for (Picture p : all) {
    lookup.put(p.getStartOffset(), p);
  }
  // Work out which Pictures aren't referenced by
  //  a \u0001 in the main text
  // These are \u0008 escher floating ones, ones
  //  found outside the normal text, and who
  //  knows what else...
  nonU1based = new ArrayList<Picture>();
  nonU1based.addAll(all);
  Range r = doc.getRange();
  for (int i = 0; i < r.numCharacterRuns(); i++) {
    CharacterRun cr = r.getCharacterRun(i);
    if (picturesTable.hasPicture(cr)) {
      Picture p = getFor(cr);
      int at = nonU1based.indexOf(p);
      nonU1based.set(at, null);
    }
  }
}

代码示例来源:origin: org.apache.poi/poi-examples

public static void main(String[] args) throws IOException {
   try (InputStream is = new FileInputStream(args[0]);
      OutputStream out = new FileOutputStream("test.xml")) {
    new Word2Forrest(new HWPFDocument(is), out);
   }
  }
}

代码示例来源:origin: stackoverflow.com

in = new FileInputStream("wto.doc");
doc = new HWPFDocument(in);
Range range = doc.getRange();
japan.write(outJapan);
in.close();
outUs.close();
outJapan.close();

代码示例来源:origin: stackoverflow.com

boolean isHidden = false;
 try {
   fs = new POIFSFileSystem(new FileInputStream(filesname));
   HWPFDocument doc = new HWPFDocument(fs);
   WordExtractor we = new WordExtractor(doc);
   String[] paragraphs = we.getParagraphText();
   System.out.println("Word Document has " + paragraphs.length
       + " paragraphs");
   Range range = doc.getRange();
   for (int k = 0; k < range.numParagraphs(); k++) {
     org.apache.poi.hwpf.usermodel.Paragraph paragraph = range
         .getParagraph(k);
     paragraph.text().trim();
     paragraph.text().replaceAll("\\cM?\r?\n", "");
     for (int j = 0; j < paragraph.numCharacterRuns(); j++) {
       org.apache.poi.hwpf.usermodel.CharacterRun cr = paragraph
           .getCharacterRun(j);
       if (cr.isVanished()) {
         // it is hidden
         System.out.println("text is hidden ");
         isHidden = true;
         break;
       }
     }

代码示例来源:origin: stackoverflow.com

FileInputStream fis = new FileInputStream(file.getAbsolutePath());
HWPFDocument docs = new HWPFDocument(fis);
extractor = new WordExtractor(docs);
String[] fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)
    data+=fileData[i];
fis.close();
file = new File("file2.doc");
fis = new FileInputStream(file.getAbsolutePath());
docs = new HWPFDocument(fis);
extractor = new WordExtractor(docs);
fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)

代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.poi

public static void main(String[] args) throws IOException {
    HWPFDocument doc = new HWPFDocument(new FileInputStream(args[0]));
    Range r = doc.getRange();

    System.out.println("Example you supplied:");
    System.out.println("---------------------");
    for (int x = 0; x < r.numSections(); x++) {
      Section s = r.getSection(x);
      for (int y = 0; y < s.numParagraphs(); y++) {
        Paragraph p = s.getParagraph(y);
        for (int z = 0; z < p.numCharacterRuns(); z++) {
          // character run
          CharacterRun run = p.getCharacterRun(z);
          // character run text
          String text = run.text();
          // show us the text
          System.out.print(text);
        }
        // use a new line at the paragraph break
        System.out.println();
      }
    }
    doc.close();
  }
}

代码示例来源:origin: stackoverflow.com

import java.io.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;

public class test {
  public static void main(String[] args) throws Exception {
    // POI apparently can't create a document from scratch,
    // so we need an existing empty dummy document
    HWPFDocument doc = new HWPFDocument(new FileInputStream("D:\\src.doc"));
    Range range = doc.getRange();
    CharacterRun run = range
        .insertAfter("Text After copied file contents!");
    run.setBold(true);
    OutputStream out = new FileOutputStream("D:\\result.doc");
    doc.write(out);
    out.flush();
    out.close();

  }
}

代码示例来源:origin: stackoverflow.com

import org.apache.poi.hwpf.HWPFDocument;
...
File fin = new File(yourFilePath);
FileInputStream fis = new FileInputStream(fin);
HWPFDocument doc = new HWPFDocument(fis);
String text = doc.getDocumentText();
System.out.println(text);
...

代码示例来源:origin: org.apache.poi/poi-examples

FileInputStream is = new FileInputStream(args[0]);
HSLFSlideShow ppt = new HSLFSlideShow(is);
is.close();
        HWPFDocument doc = new HWPFDocument(data.getInputStream());
        Range r = doc.getRange();
        for(int k = 0; k < r.numParagraphs(); k++) {
          Paragraph p = r.getParagraph(k);
          System.out.println(p.text());
        doc.write(out);
        out.close();
        doc.close();
       }  else {
        FileOutputStream out = new FileOutputStream(ole.getProgId() + "-"+(oleIdx+1)+".dat");
         out.write(chunk,0,count);
        is.close();
        out.close();

代码示例来源:origin: stackoverflow.com

FileInputStream fis = new FileInputStream(file.getAbsolutePath());
 HWPFDocument document = new HWPFDocument(fis);
 WordExtractor extractor = new WordExtractor(document);
 String rawText = extractor.getText();
 String displayText = extractor.stripFields(rawText);

代码示例来源:origin: stackoverflow.com

File file = null;
 FileInputStream fis = null;
 HWPFDocument document = null;
 Range commentRange = null;
 try {
   file = new File(fileName);
   fis = new FileInputStream(file);
   document = new HWPFDocument(fis);
   commentRange = document.getCommentsRange();
   int numComments = commentRange.numParagraphs();
   for (int i = 0; i < numComments; i++) {
     String comments = commentRange.getParagraph(i).text();
     comments = comments.replaceAll("\\cM?\r?\n", "").trim();
     if (!comments.equals("")) {
       System.out.println("comment :-  " + comments);
     }
   }
 } catch (Exception e) {
   e.printStackTrace();
 }

代码示例来源:origin: stackoverflow.com

//you can use the org.apache.poi.hwpf.extractor.WordExtractor to get the text
 String fileName = "example.doc";
 HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName));
 WordExtractor extractor = new WordExtractor(wordDoc);
 String[] text = extractor.getParagraphText();
 int lineCounter = text.length;
 String articleStr = ""; // This string object use to store text from the word document.
 for(int index = 0;index < lineCounter;++ index){
   String paragraphStr = text[index].replaceAll("\r\n","").replaceAll("\n","").trim();
   int paragraphLength = paragraphStr.length();
   if(paragraphLength != 0){
     articleStr.concat(paragraphStr);
   }
 }
 //you can use the org.apache.poi.hwpf.usermodel.Picture to get the image
 List<Picture> picturesList = wordDoc.getPicturesTable().getAllPictures();
 for(int i = 0;i < picturesList.size();++i){
   BufferedImage image = null;
   Picture pic = picturesList.get(i);
   image = ImageIO.read(new ByteArrayInputStream(pic.getContent()));
   if(image != null){
     System.out.println("Image["+i+"]"+" ImageWidth:"+image.getWidth()+" ImageHeight:"+image.getHeight()+" Suggest Image Format:"+pic.suggestFileExtension());
   }
 }

代码示例来源:origin: stackoverflow.com

fis = new FileInputStream(new File(FilePath));
XWPFDocument doc = new XWPFDocument(fis);
XWPFWordExtractor extract = new XWPFWordExtractor(doc);
  fis = new FileInputStream(new File(FilePath));
  HWPFDocument doc = new HWPFDocument(fis);
  WordExtractor extractor = new WordExtractor(doc);
  System.out.println(extractor.getText());
} catch (IOException e) {
  e.printStackTrace();

代码示例来源:origin: ekoz/kbase-doc

@Override
public byte[] handle(File originFile, String watermark, String color) throws IOException {
  watermark = StringUtils.isBlank(watermark)?DEFAULT_WATERMARK:watermark;
  color = StringUtils.isBlank(color)?DEFAULT_FONT_COLOR:color;
  if (originFile.getName().toLowerCase().endsWith("docx")) {
    try (InputStream in = new FileInputStream(originFile)){
      XWPFDocument doc = new XWPFDocument(in);
      addWaterMark(doc, watermark, color);
      try (OutputStream out = new FileOutputStream(originFile)){
        doc.write(out);
        doc.close();
      }
    }
    return IOUtils.toByteArray(new FileInputStream(originFile));
  } else if (originFile.getName().toLowerCase().endsWith("doc")) {
    try (InputStream in = new FileInputStream(originFile)){
      HWPFDocument doc = new HWPFDocument(in);
      addWaterMark(doc, watermark, color);
      try (OutputStream out = new FileOutputStream(originFile)){
        doc.write(out);
        doc.close();
      }
    }
    return IOUtils.toByteArray(new FileInputStream(originFile));
  }
  return null;
}

代码示例来源:origin: stackoverflow.com

String lowerFilePath = filePath.toLowerCase();
if (lowerFilePath.endsWith(".xls")) {
      HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(lowerFilePath));
      Integer sheetNums = workbook.getNumberOfSheets();
      if (sheetNums > 0) {
        return workbook.getSheetAt(0).getRowBreaks().length + 1;
      }
    } else if (lowerFilePath.endsWith(".xlsx")) {
      XSSFWorkbook xwb = new XSSFWorkbook(lowerFilePath);
      Integer sheetNums = xwb.getNumberOfSheets();
      if (sheetNums > 0) {
        return xwb.getSheetAt(0).getRowBreaks().length + 1;
      }
    } else if (lowerFilePath.endsWith(".docx")) {
      XWPFDocument docx = new XWPFDocument(POIXMLDocument.openPackage(lowerFilePath));
      return docx.getProperties().getExtendedProperties().getUnderlyingProperties().getPages();
    } else if (lowerFilePath.endsWith(".doc")) {
      HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(lowerFilePath));
      return wordDoc.getSummaryInformation().getPageCount();
    } else if (lowerFilePath.endsWith(".ppt")) {
      HSLFSlideShow document = new HSLFSlideShow(new FileInputStream(lowerFilePath));
      SlideShow slideShow = new SlideShow(document);
      return slideShow.getSlides().length;
    } else if (lowerFilePath.endsWith(".pptx")) {
      XSLFSlideShow xdocument = new XSLFSlideShow(lowerFilePath);
      XMLSlideShow xslideShow = new XMLSlideShow(xdocument);
      return xslideShow.getSlides().length;
}

代码示例来源:origin: stackoverflow.com

InputStream fis = new FileInputStream(fileName);  
 POIFSFileSystem fs = new POIFSFileSystem(fis);  
 HWPFDocument doc = new HWPFDocument(fs);  
 Range range = doc.getRange();
 TableIterator itr = new TableIterator(range);
 while(itr.hasNext()){
   Table table = itr.next();
   for(int rowIndex = 0; rowIndex < table.numRows(); rowIndex++){
     TableRow row = table.getRow(rowIndex);
     for(int colIndex = 0; colIndex < row.numCells(); colIndex++){
       TableCell cell = row.getCell(colIndex);
       System.out.println(cell.getParagraph(0).text());
     }
   }
 }

代码示例来源:origin: stackoverflow.com

POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename));
HWPFDocument doc = new HWPFDocument(fs);
ListTables listtables = doc.getListTables();
Paragraph para;
Range range = doc.getRange();
for(int x=0; x<range.numParagraphs(); x++) {
  para = range.getParagraph(x);

相关文章