本文整理了Java中org.apache.poi.hwpf.HWPFDocument
类的一些代码示例,展示了HWPFDocument
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。HWPFDocument
类的具体详情如下:
包路径:org.apache.poi.hwpf.HWPFDocument
类名称:HWPFDocument
[英]This class acts as the bucket that we throw all of the Word data structures into.
[中]这个类充当我们将所有Word数据结构放入的存储桶。
代码示例来源:origin: stackoverflow.com
FileInputStream fis = new FileInputStream(file.getAbsolutePath());
HWPFDocument document = new HWPFDocument(fis);
extractor = new WordExtractor(document);
String[] fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)
代码示例来源:origin: zhangyd-c/springboot-learning
public static void build(File tmpFile, Map<String, Object> contentMap, String exportFile) throws Exception {
FileInputStream tempFileInputStream = new FileInputStream(tmpFile);
HWPFDocument document = new HWPFDocument(tempFileInputStream);
// 读取文本内容
Range bodyRange = document.getRange();
// 替换内容
for (Map.Entry<String, Object> entry : contentMap.entrySet()) {
bodyRange.replaceText("${" + entry.getKey() + "}", entry.getValue().toString());
}
// 导出到文件
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
document.write(byteArrayOutputStream);
write(exportFile, byteArrayOutputStream);
document.close();
}
代码示例来源:origin: apache/tika
HWPFDocument document;
try {
document = new HWPFDocument(root);
} catch (org.apache.poi.EncryptedDocumentException e) {
throw new EncryptedDocumentException(e);
new org.apache.poi.hwpf.extractor.WordExtractor(document);
PicturesTable pictureTable = document.getPicturesTable();
PicturesSource pictures = new PicturesSource(document);
HeaderStories headerFooter = null;
Range r = document.getRange();
ListManager listManager = new ListManager(document);
for (int i = 0; i < r.numParagraphs(); i++) {
Paragraph p = r.getParagraph(i);
i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, listManager, xhtml);
for (String paragraph : wordExtractor.getMainTextboxText()) {
xhtml.element("p", paragraph);
for (String paragraph : wordExtractor.getFootnoteText()) {
xhtml.element("p", paragraph);
代码示例来源:origin: apache/tika
private PicturesSource(HWPFDocument doc) {
picturesTable = doc.getPicturesTable();
all = picturesTable.getAllPictures();
// Build the Offset-Picture lookup map
lookup = new HashMap<Integer, Picture>();
for (Picture p : all) {
lookup.put(p.getStartOffset(), p);
}
// Work out which Pictures aren't referenced by
// a \u0001 in the main text
// These are \u0008 escher floating ones, ones
// found outside the normal text, and who
// knows what else...
nonU1based = new ArrayList<Picture>();
nonU1based.addAll(all);
Range r = doc.getRange();
for (int i = 0; i < r.numCharacterRuns(); i++) {
CharacterRun cr = r.getCharacterRun(i);
if (picturesTable.hasPicture(cr)) {
Picture p = getFor(cr);
int at = nonU1based.indexOf(p);
nonU1based.set(at, null);
}
}
}
代码示例来源:origin: org.apache.poi/poi-examples
public static void main(String[] args) throws IOException {
try (InputStream is = new FileInputStream(args[0]);
OutputStream out = new FileOutputStream("test.xml")) {
new Word2Forrest(new HWPFDocument(is), out);
}
}
}
代码示例来源:origin: stackoverflow.com
in = new FileInputStream("wto.doc");
doc = new HWPFDocument(in);
Range range = doc.getRange();
japan.write(outJapan);
in.close();
outUs.close();
outJapan.close();
代码示例来源:origin: stackoverflow.com
boolean isHidden = false;
try {
fs = new POIFSFileSystem(new FileInputStream(filesname));
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
String[] paragraphs = we.getParagraphText();
System.out.println("Word Document has " + paragraphs.length
+ " paragraphs");
Range range = doc.getRange();
for (int k = 0; k < range.numParagraphs(); k++) {
org.apache.poi.hwpf.usermodel.Paragraph paragraph = range
.getParagraph(k);
paragraph.text().trim();
paragraph.text().replaceAll("\\cM?\r?\n", "");
for (int j = 0; j < paragraph.numCharacterRuns(); j++) {
org.apache.poi.hwpf.usermodel.CharacterRun cr = paragraph
.getCharacterRun(j);
if (cr.isVanished()) {
// it is hidden
System.out.println("text is hidden ");
isHidden = true;
break;
}
}
代码示例来源:origin: stackoverflow.com
FileInputStream fis = new FileInputStream(file.getAbsolutePath());
HWPFDocument docs = new HWPFDocument(fis);
extractor = new WordExtractor(docs);
String[] fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)
data+=fileData[i];
fis.close();
file = new File("file2.doc");
fis = new FileInputStream(file.getAbsolutePath());
docs = new HWPFDocument(fis);
extractor = new WordExtractor(docs);
fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)
代码示例来源:origin: org.apache.servicemix.bundles/org.apache.servicemix.bundles.poi
public static void main(String[] args) throws IOException {
HWPFDocument doc = new HWPFDocument(new FileInputStream(args[0]));
Range r = doc.getRange();
System.out.println("Example you supplied:");
System.out.println("---------------------");
for (int x = 0; x < r.numSections(); x++) {
Section s = r.getSection(x);
for (int y = 0; y < s.numParagraphs(); y++) {
Paragraph p = s.getParagraph(y);
for (int z = 0; z < p.numCharacterRuns(); z++) {
// character run
CharacterRun run = p.getCharacterRun(z);
// character run text
String text = run.text();
// show us the text
System.out.print(text);
}
// use a new line at the paragraph break
System.out.println();
}
}
doc.close();
}
}
代码示例来源:origin: stackoverflow.com
import java.io.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;
public class test {
public static void main(String[] args) throws Exception {
// POI apparently can't create a document from scratch,
// so we need an existing empty dummy document
HWPFDocument doc = new HWPFDocument(new FileInputStream("D:\\src.doc"));
Range range = doc.getRange();
CharacterRun run = range
.insertAfter("Text After copied file contents!");
run.setBold(true);
OutputStream out = new FileOutputStream("D:\\result.doc");
doc.write(out);
out.flush();
out.close();
}
}
代码示例来源:origin: stackoverflow.com
import org.apache.poi.hwpf.HWPFDocument;
...
File fin = new File(yourFilePath);
FileInputStream fis = new FileInputStream(fin);
HWPFDocument doc = new HWPFDocument(fis);
String text = doc.getDocumentText();
System.out.println(text);
...
代码示例来源:origin: org.apache.poi/poi-examples
FileInputStream is = new FileInputStream(args[0]);
HSLFSlideShow ppt = new HSLFSlideShow(is);
is.close();
HWPFDocument doc = new HWPFDocument(data.getInputStream());
Range r = doc.getRange();
for(int k = 0; k < r.numParagraphs(); k++) {
Paragraph p = r.getParagraph(k);
System.out.println(p.text());
doc.write(out);
out.close();
doc.close();
} else {
FileOutputStream out = new FileOutputStream(ole.getProgId() + "-"+(oleIdx+1)+".dat");
out.write(chunk,0,count);
is.close();
out.close();
代码示例来源:origin: stackoverflow.com
FileInputStream fis = new FileInputStream(file.getAbsolutePath());
HWPFDocument document = new HWPFDocument(fis);
WordExtractor extractor = new WordExtractor(document);
String rawText = extractor.getText();
String displayText = extractor.stripFields(rawText);
代码示例来源:origin: stackoverflow.com
File file = null;
FileInputStream fis = null;
HWPFDocument document = null;
Range commentRange = null;
try {
file = new File(fileName);
fis = new FileInputStream(file);
document = new HWPFDocument(fis);
commentRange = document.getCommentsRange();
int numComments = commentRange.numParagraphs();
for (int i = 0; i < numComments; i++) {
String comments = commentRange.getParagraph(i).text();
comments = comments.replaceAll("\\cM?\r?\n", "").trim();
if (!comments.equals("")) {
System.out.println("comment :- " + comments);
}
}
} catch (Exception e) {
e.printStackTrace();
}
代码示例来源:origin: stackoverflow.com
//you can use the org.apache.poi.hwpf.extractor.WordExtractor to get the text
String fileName = "example.doc";
HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName));
WordExtractor extractor = new WordExtractor(wordDoc);
String[] text = extractor.getParagraphText();
int lineCounter = text.length;
String articleStr = ""; // This string object use to store text from the word document.
for(int index = 0;index < lineCounter;++ index){
String paragraphStr = text[index].replaceAll("\r\n","").replaceAll("\n","").trim();
int paragraphLength = paragraphStr.length();
if(paragraphLength != 0){
articleStr.concat(paragraphStr);
}
}
//you can use the org.apache.poi.hwpf.usermodel.Picture to get the image
List<Picture> picturesList = wordDoc.getPicturesTable().getAllPictures();
for(int i = 0;i < picturesList.size();++i){
BufferedImage image = null;
Picture pic = picturesList.get(i);
image = ImageIO.read(new ByteArrayInputStream(pic.getContent()));
if(image != null){
System.out.println("Image["+i+"]"+" ImageWidth:"+image.getWidth()+" ImageHeight:"+image.getHeight()+" Suggest Image Format:"+pic.suggestFileExtension());
}
}
代码示例来源:origin: stackoverflow.com
fis = new FileInputStream(new File(FilePath));
XWPFDocument doc = new XWPFDocument(fis);
XWPFWordExtractor extract = new XWPFWordExtractor(doc);
fis = new FileInputStream(new File(FilePath));
HWPFDocument doc = new HWPFDocument(fis);
WordExtractor extractor = new WordExtractor(doc);
System.out.println(extractor.getText());
} catch (IOException e) {
e.printStackTrace();
代码示例来源:origin: ekoz/kbase-doc
@Override
public byte[] handle(File originFile, String watermark, String color) throws IOException {
watermark = StringUtils.isBlank(watermark)?DEFAULT_WATERMARK:watermark;
color = StringUtils.isBlank(color)?DEFAULT_FONT_COLOR:color;
if (originFile.getName().toLowerCase().endsWith("docx")) {
try (InputStream in = new FileInputStream(originFile)){
XWPFDocument doc = new XWPFDocument(in);
addWaterMark(doc, watermark, color);
try (OutputStream out = new FileOutputStream(originFile)){
doc.write(out);
doc.close();
}
}
return IOUtils.toByteArray(new FileInputStream(originFile));
} else if (originFile.getName().toLowerCase().endsWith("doc")) {
try (InputStream in = new FileInputStream(originFile)){
HWPFDocument doc = new HWPFDocument(in);
addWaterMark(doc, watermark, color);
try (OutputStream out = new FileOutputStream(originFile)){
doc.write(out);
doc.close();
}
}
return IOUtils.toByteArray(new FileInputStream(originFile));
}
return null;
}
代码示例来源:origin: stackoverflow.com
String lowerFilePath = filePath.toLowerCase();
if (lowerFilePath.endsWith(".xls")) {
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(lowerFilePath));
Integer sheetNums = workbook.getNumberOfSheets();
if (sheetNums > 0) {
return workbook.getSheetAt(0).getRowBreaks().length + 1;
}
} else if (lowerFilePath.endsWith(".xlsx")) {
XSSFWorkbook xwb = new XSSFWorkbook(lowerFilePath);
Integer sheetNums = xwb.getNumberOfSheets();
if (sheetNums > 0) {
return xwb.getSheetAt(0).getRowBreaks().length + 1;
}
} else if (lowerFilePath.endsWith(".docx")) {
XWPFDocument docx = new XWPFDocument(POIXMLDocument.openPackage(lowerFilePath));
return docx.getProperties().getExtendedProperties().getUnderlyingProperties().getPages();
} else if (lowerFilePath.endsWith(".doc")) {
HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(lowerFilePath));
return wordDoc.getSummaryInformation().getPageCount();
} else if (lowerFilePath.endsWith(".ppt")) {
HSLFSlideShow document = new HSLFSlideShow(new FileInputStream(lowerFilePath));
SlideShow slideShow = new SlideShow(document);
return slideShow.getSlides().length;
} else if (lowerFilePath.endsWith(".pptx")) {
XSLFSlideShow xdocument = new XSLFSlideShow(lowerFilePath);
XMLSlideShow xslideShow = new XMLSlideShow(xdocument);
return xslideShow.getSlides().length;
}
代码示例来源:origin: stackoverflow.com
InputStream fis = new FileInputStream(fileName);
POIFSFileSystem fs = new POIFSFileSystem(fis);
HWPFDocument doc = new HWPFDocument(fs);
Range range = doc.getRange();
TableIterator itr = new TableIterator(range);
while(itr.hasNext()){
Table table = itr.next();
for(int rowIndex = 0; rowIndex < table.numRows(); rowIndex++){
TableRow row = table.getRow(rowIndex);
for(int colIndex = 0; colIndex < row.numCells(); colIndex++){
TableCell cell = row.getCell(colIndex);
System.out.println(cell.getParagraph(0).text());
}
}
}
代码示例来源:origin: stackoverflow.com
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename));
HWPFDocument doc = new HWPFDocument(fs);
ListTables listtables = doc.getListTables();
Paragraph para;
Range range = doc.getRange();
for(int x=0; x<range.numParagraphs(); x++) {
para = range.getParagraph(x);
内容来源于网络,如有侵权,请联系作者删除!