首行开头String无法转Int问题

本文讨论了在文件读取过程中,如何正确处理字符串转换为整数时出现的错误,通过添加正则表达式过滤来解决问题,并提供了具体代码示例。

一个很奇怪的问题:在读取文件时,如果对文件开头的string进行int转换,出错

如:

   while ((lineStr = br.readLine()) != null) {
    tmpStr = lineStr.split("\t");
    try {
     String s1 = tmpStr[0].trim();
     System.out.println("s1:" + s1);
     int s2 = Integer.parseInt(s1);
     System.out.println("s2:" + s2);
    } catch (NumberFormatException e) {
     e.printStackTrace();
    }
   }

s1能打印出来正常的数字,而s2报错!

原因:文件头信息问题

解决方法:加个正则表达式过滤一下

代码中修改成:String s1 = tmpStr[0].replaceAll("[^0-9.]", "").trim();即可。

 

参考:http://stackoverflow.com/questions/4599061/unable-to-parse-as-integer

错误信息:文档检查过程中发生错误: The supplied data appears to be in the OLE2 Format. You are calling the part of POI that deals with OOXML (Office Open XML) Documents. You need to call a different part of POI to process this data (eg HSSF instead of XSSF) service代码: public DocumentCheckResult checkDocument(MultipartFile file) { DocumentCheckResult result = new DocumentCheckResult(); try { XWPFDocument document = new XWPFDocument(file.getInputStream()); // 检查文档内容 checkDocumentContent(document, result); document.close(); } catch (IOException e) { result.addError("文件读取失败: " + e.getMessage()); } catch (Exception e) { result.addError("文档检查过程中发生错误: " + e.getMessage()); } return result; } private void checkDocumentContent(XWPFDocument document, DocumentCheckResult result) { List<XWPFParagraph> paragraphs = document.getParagraphs(); // 检查各个部分 checkRangeSection(paragraphs, result); checkReferenceSection(paragraphs, result); checkTermDefinitionSection(paragraphs, result); checkOtherSections(paragraphs, result); checkFiguresAndTables(document, result); checkAppendixSection(document, result); } private void checkRangeSection(List<XWPFParagraph> paragraphs, DocumentCheckResult result) { boolean inRangeSection = false; boolean foundRangeTitle = false; int rangeStartIndex = -1; for (int i = 0; i < paragraphs.size(); i++) { XWPFParagraph paragraph = paragraphs.get(i); String text = paragraph.getText(); if (text != null && text.startsWith("1 范围")) { foundRangeTitle = true; rangeStartIndex = i; inRangeSection = true; // 检查标题格式 checkHeading1Format(paragraph, "1 范围", result); continue; } if (inRangeSection) { // 检查是否进入下一节 if (text != null && (text.matches("\\d+ .*") || text.startsWith("附录"))) { if (!text.startsWith("1 ")) { // 不是1开头的其他章节 inRangeSection = false; break; } } // 检查正文格式 if (!paragraph.getStyle().contains("Heading")) { checkRangeContentFormat(paragraph, result); } } } if (!foundRangeTitle) { result.addError("未找到'1 范围'章节标题"); } } private void checkHeading1Format(XWPFParagraph paragraph, String expectedText, DocumentCheckResult result) { // 检查字体 String fontName = getParagraphFontName(paragraph); if (!"黑体".equals(fontName)) { result.addError("'" + expectedText + "'标题字体应为黑体,当前为: " + fontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("'" + expectedText + "'标题字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 checkHeading1ParagraphFormat(paragraph, expectedText, result); } private void checkHeading1ParagraphFormat(XWPFParagraph paragraph, String sectionName, DocumentCheckResult result) { // 检查段前段后间距 int spacingBefore = paragraph.getSpacingBefore(); int spacingAfter = paragraph.getSpacingAfter(); // 0.5行大约等于15磅(基于默认行距) if (spacingBefore != 15) { result.addError("'" + sectionName + "'标题段前间距应为0.5行,当前为: " + (spacingBefore == 0 ? "0" : (spacingBefore/30.0)) + "行"); } if (spacingAfter != 15) { result.addError("'" + sectionName + "'标题段后间距应为0.5行,当前为: " + (spacingAfter == 0 ? "0" : (spacingAfter/30.0)) + "行"); } // 检查行距 if (paragraph.getSpacingLineRule() != LineSpacingRule.AUTO || Math.abs(paragraph.getSpacingBetween() - 1.5) > 0.01) { result.addError("'" + sectionName + "'标题行距应为1.5倍行距"); } } private void checkRangeContentFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String asianFontName = getParagraphAsianFontName(paragraph); String latinFontName = getParagraphLatinFontName(paragraph); if (!"宋体".equals(asianFontName)) { result.addError("'范围'章节正文字体(中文)应为宋体,当前为: " + asianFontName); } if (!"Times New Roman".equals(latinFontName)) { result.addError("'范围'章节正文字体(英文/数字)应为Times New Roman,当前为: " + latinFontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("'范围'章节正文字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 checkNormalParagraphFormat(paragraph, "'范围'章节正文", 1.5, result); // 1.5倍行距 } private void checkReferenceSection(List<XWPFParagraph> paragraphs, DocumentCheckResult result) { boolean inReferenceSection = false; boolean foundReferenceTitle = false; boolean foundDefaultText = false; String defaultText = "下列文件对于本文件的应用是必不可少的。凡是注日期的引用文件,仅注日期的版本是用与本文件。凡是不注日期的引用文件,其最新版本(包括所有的修改单)适用于本文件。"; for (int i = 0; i < paragraphs.size(); i++) { XWPFParagraph paragraph = paragraphs.get(i); String text = paragraph.getText(); if (text != null && text.startsWith("2 规范性引用文件")) { foundReferenceTitle = true; inReferenceSection = true; // 检查标题格式 checkHeading1Format(paragraph, "2 规范性引用文件", result); continue; } if (inReferenceSection) { // 检查是否进入下一节 if (text != null && (text.matches("\\d+ .*") || text.startsWith("附录"))) { if (!text.startsWith("2 ")) { // 不是2开头的其他章节 inReferenceSection = false; break; } } // 检查默认文本 if (text != null && text.contains(defaultText)) { foundDefaultText = true; // 检查段落格式(特殊要求) checkReferenceDefaultTextFormat(paragraph, result); } // 检查正文格式 if (!paragraph.getStyle().contains("Heading") && !foundDefaultText) { checkReferenceContentFormat(paragraph, result); } } } if (!foundReferenceTitle) { result.addError("未找到'2 规范性引用文件'章节标题"); } if (!foundDefaultText) { result.addError("未找到规范性引用文件章节的默认文本"); } } private void checkReferenceDefaultTextFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查段落格式(特殊要求) // 首行缩进2字符 if (paragraph.getIndentationFirstLine() != 420) { result.addError("'规范性引用文件'默认文本应首行缩进2字符"); } // 段前段后为0行距 int spacingBefore = paragraph.getSpacingBefore(); int spacingAfter = paragraph.getSpacingAfter(); if (spacingBefore != 0) { result.addError("'规范性引用文件'默认文本段前间距应为0行,当前为: " + (spacingBefore/30.0) + "行"); } if (spacingAfter != 0) { result.addError("'规范性引用文件'默认文本段后间距应为0行,当前为: " + (spacingAfter/30.0) + "行"); } // 行距为1.5倍行距 if (paragraph.getSpacingLineRule() != LineSpacingRule.AUTO || Math.abs(paragraph.getSpacingBetween() - 1.5) > 0.01) { result.addError("'规范性引用文件'默认文本行距应为1.5倍行距"); } } private void checkReferenceContentFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String asianFontName = getParagraphAsianFontName(paragraph); String latinFontName = getParagraphLatinFontName(paragraph); if (!"宋体".equals(asianFontName)) { result.addError("'规范性引用文件'章节正文字体(中文)应为宋体,当前为: " + asianFontName); } if (!"Times New Roman".equals(latinFontName)) { result.addError("'规范性引用文件'章节正文字体(英文/数字)应为Times New Roman,当前为: " + latinFontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("'规范性引用文件'章节正文字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式(同第一部分) checkNormalParagraphFormat(paragraph, "'规范性引用文件'章节正文", 1.5, result); } private void checkTermDefinitionSection(List<XWPFParagraph> paragraphs, DocumentCheckResult result) { boolean inTermSection = false; boolean foundTermTitle = false; int termLevel = 0; // 0:不在术语章节, 1:在术语章节标题, 2:在术语条目 for (int i = 0; i < paragraphs.size(); i++) { XWPFParagraph paragraph = paragraphs.get(i); String text = paragraph.getText(); if (text != null && text.startsWith("3 术语和定义")) { foundTermTitle = true; inTermSection = true; termLevel = 1; // 检查标题格式 checkHeading1Format(paragraph, "3 术语和定义", result); continue; } if (inTermSection) { // 检查是否进入下一节 if (text != null && (text.matches("\\d+ .*") || text.startsWith("附录"))) { if (!text.startsWith("3 ")) { // 不是3开头的其他章节 inTermSection = false; break; } } // 检查术语条目 (3.1, 3.2等) if (text != null && text.matches("3\\.\\d+ .*")) { termLevel = 2; // 检查术语条目标题格式 checkTermItemHeadingFormat(paragraph, result); continue; } // 检查术语条目正文 if (termLevel == 2 && !paragraph.getStyle().contains("Heading")) { checkTermItemContentFormat(paragraph, result); } // 检查普通正文 if (termLevel == 1 && !paragraph.getStyle().contains("Heading")) { checkTermContentFormat(paragraph, result); } } } if (!foundTermTitle) { result.addError("未找到'3 术语和定义'章节标题"); } } private void checkTermItemHeadingFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String fontName = getParagraphFontName(paragraph); if (!"黑体".equals(fontName)) { result.addError("术语条目标题字体应为黑体,当前为: " + fontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("术语条目标题字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 // 段前段后为0 int spacingBefore = paragraph.getSpacingBefore(); int spacingAfter = paragraph.getSpacingAfter(); if (spacingBefore != 0) { result.addError("术语条目标题段前间距应为0行,当前为: " + (spacingBefore/30.0) + "行"); } if (spacingAfter != 0) { result.addError("术语条目标题段后间距应为0行,当前为: " + (spacingAfter/30.0) + "行"); } // 行距为1.5倍行距 if (paragraph.getSpacingLineRule() != LineSpacingRule.AUTO || Math.abs(paragraph.getSpacingBetween() - 1.5) > 0.01) { result.addError("术语条目标题行距应为1.5倍行距"); } } private void checkTermItemContentFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查第一段格式(黑体五号) // 注意:这里简化处理,实际应该跟踪是否是术语条目的第一段 String asianFontName = getParagraphAsianFontName(paragraph); String latinFontName = getParagraphLatinFontName(paragraph); // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("术语条目正文字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 checkNormalParagraphFormat(paragraph, "术语条目正文", 1.5, result); } private void checkTermContentFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String asianFontName = getParagraphAsianFontName(paragraph); String latinFontName = getParagraphLatinFontName(paragraph); if (!"宋体".equals(asianFontName)) { result.addError("'术语和定义'章节正文字体(中文)应为宋体,当前为: " + asianFontName); } if (!"Times New Roman".equals(latinFontName)) { result.addError("'术语和定义'章节正文字体(英文/数字)应为Times New Roman,当前为: " + latinFontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("'术语和定义'章节正文字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式(同第一部分) checkNormalParagraphFormat(paragraph, "'术语和定义'章节正文", 1.5, result); } private void checkOtherSections(List<XWPFParagraph> paragraphs, DocumentCheckResult result) { boolean inOtherSection = false; int currentMainSection = 0; int currentSubSection = 0; int currentSubSubSection = 0; for (int i = 0; i < paragraphs.size(); i++) { XWPFParagraph paragraph = paragraphs.get(i); String text = paragraph.getText(); // 检查4、5、6等主章节标题 if (text != null && text.matches("[4-9]\\d* .*")) { inOtherSection = true; currentMainSection = Integer.parseInt(text.split(" ")[0]); currentSubSection = 0; currentSubSubSection = 0; // 检查标题1格式 checkOtherHeading1Format(paragraph, result); continue; } // 检查子章节标题 (如 4.1, 4.2) if (inOtherSection && text != null && text.matches(currentMainSection + "\\.\\d+ .*")) { currentSubSection = Integer.parseInt(text.split("\\.")[1].split(" ")[0]); currentSubSubSection = 0; // 检查标题2格式 checkOtherHeading2Format(paragraph, result); continue; } // 检查子子章节标题 (如 4.1.1, 4.1.2) if (inOtherSection && text != null && text.matches(currentMainSection + "\\." + currentSubSection + "\\.\\d+ .*")) { currentSubSubSection = Integer.parseInt(text.split("\\.")[2].split(" ")[0]); // 检查标题3格式 checkOtherHeading3Format(paragraph, result); continue; } // 检查正文格式 if (inOtherSection && !paragraph.getStyle().contains("Heading")) { checkOtherContentFormat(paragraph, result); } } } private void checkOtherHeading1Format(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String fontName = getParagraphFontName(paragraph); if (!"黑体".equals(fontName)) { result.addError("章节标题字体应为黑体,当前为: " + fontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("章节标题字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 checkHeading1ParagraphFormat(paragraph, "章节标题", result); } private void checkOtherHeading2Format(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String fontName = getParagraphFontName(paragraph); if (!"黑体".equals(fontName)) { result.addError("子章节标题字体应为黑体,当前为: " + fontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("子章节标题字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 (标题2及以后为单倍行距) checkHeading2PlusParagraphFormat(paragraph, "子章节标题", result); } private void checkOtherHeading3Format(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String fontName = getParagraphFontName(paragraph); if (!"黑体".equals(fontName)) { result.addError("子子章节标题字体应为黑体,当前为: " + fontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("子子章节标题字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 (标题2及以后为单倍行距) checkHeading2PlusParagraphFormat(paragraph, "子子章节标题", result); } private void checkHeading2PlusParagraphFormat(XWPFParagraph paragraph, String sectionName, DocumentCheckResult result) { // 检查段前段后间距 int spacingBefore = paragraph.getSpacingBefore(); int spacingAfter = paragraph.getSpacingAfter(); // 0.5行大约等于15磅(基于默认行距) if (spacingBefore != 15) { result.addError("'" + sectionName + "'段前间距应为0.5行,当前为: " + (spacingBefore/30.0) + "行"); } if (spacingAfter != 15) { result.addError("'" + sectionName + "'段后间距应为0.5行,当前为: " + (spacingAfter/30.0) + "行"); } // 检查行距 (单倍行距) if (paragraph.getSpacingLineRule() != LineSpacingRule.AUTO || Math.abs(paragraph.getSpacingBetween() - 1.0) > 0.01) { result.addError("'" + sectionName + "'行距应为单倍行距"); } } private void checkOtherContentFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 检查字体 String asianFontName = getParagraphAsianFontName(paragraph); String latinFontName = getParagraphLatinFontName(paragraph); if (!"宋体".equals(asianFontName)) { result.addError("正文字体(中文)应为宋体,当前为: " + asianFontName); } if (!"Times New Roman".equals(latinFontName)) { result.addError("正文字体(英文/数字)应为Times New Roman,当前为: " + latinFontName); } // 检查字号 int fontSize = getParagraphFontSize(paragraph); if (fontSize != 10) { // 五号字对应10磅 result.addError("正文字号应为五号(10磅),当前为: " + fontSize + "磅"); } // 检查段落格式 (首行缩进2字符,段前段后为0行,单倍行距) checkOtherNormalParagraphFormat(paragraph, result); } private void checkOtherNormalParagraphFormat(XWPFParagraph paragraph, DocumentCheckResult result) { // 首行缩进2字符 (约420缇) int indentationFirstLine = paragraph.getIndentationFirstLine(); if (indentationFirstLine != 420) { result.addError("正文应首行缩进2字符"); } // 段前段后为0行距 int spacingBefore = paragraph.getSpacingBefore(); int spacingAfter = paragraph.getSpacingAfter(); if (spacingBefore != 0) { result.addError("正文段前间距应为0行,当前为: " + (spacingBefore/30.0) + "行"); } if (spacingAfter != 0) { result.addError("正文段后间距应为0行,当前为: " + (spacingAfter/30.0) + "行"); } // 行距为单倍行距 if (paragraph.getSpacingLineRule() != LineSpacingRule.AUTO || Math.abs(paragraph.getSpacingBetween() - 1.0) > 0.01) { result.addError("正文行距应为单倍行距"); } } private void checkNormalParagraphFormat(XWPFParagraph paragraph, String sectionName, double lineSpacing, DocumentCheckResult result) { // 首行缩进2字符 (约420缇) int indentationFirstLine = paragraph.getIndentationFirstLine(); if (indentationFirstLine != 420) { result.addError(sectionName + "应首行缩进2字符"); } // 段前段后为0.5行距 int spacingBefore = paragraph.getSpacingBefore(); int spacingAfter = paragraph.getSpacingAfter(); // 0.5行大约等于15磅(基于默认行距) if (spacingBefore != 15) { result.addError(sectionName + "段前间距应为0.5行,当前为: " + (spacingBefore/30.0) + "行"); } if (spacingAfter != 15) { result.addError(sectionName + "段后间距应为0.5行,当前为: " + (spacingAfter/30.0) + "行"); } // 行距 if (paragraph.getSpacingLineRule() != LineSpacingRule.AUTO || Math.abs(paragraph.getSpacingBetween() - lineSpacing) > 0.01) { result.addError(sectionName + "行距应为" + lineSpacing + "倍行距"); } } private void checkFiguresAndTables(XWPFDocument document, DocumentCheckResult result) { // 检查图片 List<XWPFPictureData> pictures = document.getAllPictures(); List<XWPFParagraph> paragraphs = document.getParagraphs(); int figureCount = 1; for (XWPFParagraph paragraph : paragraphs) { // 查找包含图片的段落 if (paragraph.getRuns().size() > 0) { for (XWPFRun run : paragraph.getRuns()) { if (run.getEmbeddedPictures().size() > 0) { // 检查图片下方是否有图序号和图名称 // 这里简化处理,实际应该检查下一个段落 result.addWarning("请检查图片下方是否有'图" + figureCount + "'标识和图名称,字体应为黑体五号"); figureCount++; break; } } } } // 检查表格 List<XWPFTable> tables = document.getTables(); int tableCount = 1; for (XWPFTable table : tables) { // 检查表格上方是否有表序号和表名称 // 这里简化处理,实际应该检查前一个段落 result.addWarning("请检查表格上方是否有'表" + tableCount + "'标识和表名称,字体应为黑体五号"); tableCount++; } } private void checkAppendixSection(XWPFDocument document, DocumentCheckResult result) { List<XWPFParagraph> paragraphs = document.getParagraphs(); boolean inAppendixSection = false; for (XWPFParagraph paragraph : paragraphs) { String text = paragraph.getText(); if (text != null && text.startsWith("附录")) { inAppendixSection = true; // 检查附录格式 // 这里简化处理,实际应该更详细检查 result.addWarning("请确保附录部分在新页开始"); continue; } if (inAppendixSection) { // 检查附录中的条文编号格式 if (text != null && text.matches("附录[ABC]\\.\\d+ .*")) { // 检查字体 String fontName = getParagraphFontName(paragraph); if (!"黑体".equals(fontName)) { result.addError("附录条文标题字体应为黑体,当前为: " + fontName); } } } } } private String getParagraphFontName(XWPFParagraph paragraph) { if (paragraph.getRuns().size() > 0) { XWPFRun run = paragraph.getRuns().get(0); String fontName = run.getFontFamily(); return fontName != null ? fontName : "未知"; } return "未知"; } private String getParagraphAsianFontName(XWPFParagraph paragraph) { if (paragraph.getRuns().size() > 0) { XWPFRun run = paragraph.getRuns().get(0); String fontName = run.getFontFamily(); return fontName != null ? fontName : "未知"; } return "未知"; } private String getParagraphLatinFontName(XWPFParagraph paragraph) { if (paragraph.getRuns().size() > 0) { XWPFRun run = paragraph.getRuns().get(0); String fontName = run.getFontFamily(); return fontName != null ? fontName : "未知"; } return "未知"; } private int getParagraphFontSize(XWPFParagraph paragraph) { if (paragraph.getRuns().size() > 0) { XWPFRun run = paragraph.getRuns().get(0); if (run.getFontSize() != -1) { return run.getFontSize(); } } return -1; } controller代码: @PostMapping("/check") public ResponseEntity<DocumentCheckResult> checkDocumentFormat( @RequestParam("file") MultipartFile file) { try { if (file.isEmpty()) { return ResponseEntity.badRequest().build(); } DocumentCheckResult result = documentCheckService.checkDocument(file); return ResponseEntity.ok(result); } catch (Exception e) { DocumentCheckResult errorResult = new DocumentCheckResult(); errorResult.setValid(false); errorResult.addError("系统错误: " + e.getMessage()); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorResult); } } 怎么修改
10-12
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值