Export selection of word document as an image file

本文介绍了一种在Word 2002和2003版本中为图片添加圆角的方法,通过VBA宏实现图片导出及再利用,支持多种格式转换,并保持原有位置不变,适用于页面、表格中的图片编辑。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 原文地址: http://www.spotlight-wissen.de/archiv/message/1665077.html
Option Explicit

' (c) Désirée und Wolfram, 3/2005
' Modifiziert: 11/2007 - Bilder mit runden Ecken versehen
' Bilder aus Winword im Originalformat exportieren.
' Nur für WD2002 und WD2003 unter Win2000/XP/2003/Vista.
'
' Änderung  3.11.2007: RundeEcken Shape Seitenverhältnis sperren
' Änderung  4.11.2007: Table Pictures Contextmenu hinzugefügt
' Änderung  4.11.2007: Inlineshape Position wird erhalten, Section Delete
' Änderung  5.11.2007: Bilder in Header/Footer unterstützen

Private Declare Function EmptyClipboard Lib "user32" () As Long
Private Declare Function OpenClipboard Lib "user32" _
  (ByVal hwnd As Long) As Long
Private Declare Function CloseClipboard Lib "user32" () As Long
Private Declare Function GetClipboardData Lib "user32" _
  (ByVal wFormat As Long) As Long
Private Declare Function GetEnhMetaFileBits Lib "gdi32" _
  (ByVal hEMF As Long, ByVal cbBuffer As Long, lpbBuffer As Byte) As Long
Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" _
  (pDest As Any, pSource As Any, ByVal cbLength As Long)

Private Const CF_ENHMETAFILE = 14
Private emf() As Byte, imgData() As Byte

Private Type EmfRecord ' private emf-type
  id As Long
  len As Long
End Type

Private Type GDI_Comment ' private GDI type
  len As Long
  Type As Long
  data As Long
End Type

Function ExportSelectedPicture(Filename As String) As String
  Dim pBMI As Long, pDIB As Long, ext As String, picType As Integer, s As String

  On Error Resume Next
  Erase imgData: Erase emf
  GetImage Selection
  
  If ExportEMFPlusImageData(pBMI, pDIB) Then
    CopyMemory picType, imgData(0), 2
    Select Case picType
      Case &HD8FF: ext = "jpg"
      Case &H4947: ext = "gif"
      Case &H5089: ext = "png"
      Case &H1:    ext = "emf"
      Case &HCDD7: ext = "wmf"
      Case &H4D42: ext = "bmp"
      Case &H4949: ext = "tif"
      Case &H50A:  ext = "pcx"
      Case &H100:  ext = "tga"
      Case &HD0C5:  ext = "eps"
      Case &H2100:  ext = "cgm"
      Case Else:   ext = "bmp"
    End Select
    
    s = Filename & "." & ext
    If Len(Dir(s)) Then Kill s
    SaveRawImageData s
    ExportSelectedPicture = s
  Else
    MsgBox "Fehler beim Export des selektierten Bildes"
  End If
End Function

Function GetImage(ByVal r)
  Dim hEMF As Long, n As Long
  
  If Val(Application.Version) >= 11 Then
    ' EnhMetaFileBits liefert für Office 11 den raw EMF-stream
    ' Bug: Clipboard muss vorher geleert werden
    If OpenClipboard(0&) Then
      EmptyClipboard
      CloseClipboard
    End If
    emf = CallByName(r, "EnhMetaFileBits", VbGet): DoEvents
  Else
    ' für Office <=10 Ersatz über Clipboard. Vorsicht: In Office 11
    ' liefert CopyAsPicture nur eine EMF-Kopie, nicht den raw Stream.
    r.CopyAsPicture
    If OpenClipboard(0&) Then
      hEMF = GetClipboardData(CF_ENHMETAFILE)
      CloseClipboard
    End If
    If hEMF Then
      n = GetEnhMetaFileBits(hEMF, 0, ByVal 0&)
      If n Then
        ReDim emf(n - 1)
        GetEnhMetaFileBits hEMF, n, emf(0)
      End If
    End If
  End If
End Function

Function ExportEMFPlusImageData(pBMI As Long, pDIB As Long) As Boolean
  ' aus dem EMF-Stream die GDI+ (EMF+) Image-Daten extrahieren
  
  Dim pEMF As Long, lEmf As Long, n As Long, state As Long, pNext As Long
  Dim recEMF As EmfRecord, recEMFplus As GDI_Comment, pImgData As Long
  Dim nextblock As Boolean, pCmd As Long, imgtype As Long, toff As Long
  Dim WMFhdr As Long, WMFhsz As Integer, misalign As Boolean, big As Boolean
  Dim dib As Boolean, dibits As Long, bmi As Long, imgend As Boolean
  
  On Error Resume Next
  n = UBound(emf)
  If n < 7 Or Err <> 0 Then Exit Function
  Do
    CopyMemory recEMF, emf(pEMF), 8
    'Debug.Print Hex$(pEMF), Hex$(recEMF.id), Hex$(recEMF.len)
    Select Case state
      Case 0: ' header
        If recEMF.id <> 1 Or recEMF.len = 0 Then Exit Function ' wrong header
        state = 1
      Case 1: ' wait for GDI_COMMENT Begin Group
        If recEMF.id = 70 And recEMF.len > 23 Then
          CopyMemory recEMFplus, emf(pEMF + 8), 12
          If recEMFplus.type = &H43494447 And recEMFplus.data = 2 Then ' GDIC
            state = 2
          End If
        End If
      Case 2: ' wait for GDI_COMMENT EMF+ (GDI+) records
        If recEMF.id = 70 And recEMF.len >= 20 Then
          CopyMemory recEMFplus, emf(pEMF + 8), 12
          'Debug.Print "+", Hex$(recEMFplus.type), Hex$(recEMFplus.data)
          If (recEMFplus.type = &H2B464D45) And (Not imgend) Then ' GDI+ record
            pNext = pEMF + 16
            pCmd = recEMFplus.data
            Do While (pCmd And &HFFFF&) <> &H4008  ' wait for cmd Image
              CopyMemory n, emf(pNext + 4), 4  ' len of command
              pNext = pNext + n
              If pNext >= pEMF + recEMF.len Then Exit Do
              CopyMemory pCmd, emf(pNext), 4   ' next command
            Loop
            If (pCmd And &HFFFFFFF) = &H5004008 Then  ' cmd Image + Flags
              big = (pCmd And &H80000000) = &H80000000
              toff = IIf(big, pNext + 20, pNext + 16)
              If Not (big And nextblock) Then
                CopyMemory imgtype, emf(toff), 4
                If imgtype = 1 Then            ' bitmap
                  ReDim imgData(recEMF.len - toff - 24 + pEMF - 1)
                  CopyMemory imgData(0), emf(toff + 24), recEMF.len - toff - 24 + pEMF
                ElseIf imgtype = 2 Then        ' metafile
                  ReDim imgData(recEMF.len - toff - 12 + pEMF - 1): misalign = False
                  CopyMemory WMFhdr, emf(toff + 12), 4
                  CopyMemory WMFhsz, emf(toff + 12 + 22 + 2), 2
                  If WMFhdr = &H9AC6CDD7 Then  ' WMF APM Header?
                    misalign = WMFhsz <> 9     ' check Std WMF hdr misaling
                  End If
                  If misalign Then             ' correct GDI+ misalign-bug
                    CopyMemory imgData(0), emf(toff + 12), 22  ' APM header
                    CopyMemory imgData(22), emf(toff + 12 + 22 + 2), recEMF.len - toff - 12 + pEMF - 22 - 2
                    ReDim Preserve imgData(UBound(imgData) - 2)
                  Else
                    CopyMemory imgData(0), emf(toff + 12), recEMF.len - toff - 12 + pEMF
                  End If
                Else
                  Exit Do                            ' unknown type
                End If  ' imgtype
                If big Then nextblock = True Else imgend = True
              Else
                n = UBound(imgData)
                ReDim Preserve imgData(n + recEMF.len - &H20)
                CopyMemory imgData(n + 1), emf(pEMF + &H20), recEMF.len - &H20
              End If  ' not (big and next)
            End If ' cmd image
          ElseIf recEMFplus.type = &H43494447 And recEMFplus.data = 3 Then ' GDIC end
            Exit Do ' EMF+ group end
          End If
        ElseIf recEMF.id = 81 And recEMF.len >= 88 And (Not dib) Then ' EMR_StrechDibits
          dib = True
          CopyMemory n, emf(pEMF + 48), 4      ' BMIoffset (0x50)
          bmi = pEMF + n                       ' BIHdr
          CopyMemory n, emf(pEMF + 56), 4      '
          dibits = pEMF + n                    ' DIBits
        End If
    End Select
    pEMF = pEMF + recEMF.len
  Loop Until pEMF > UBound(emf)
  n = 0: n = UBound(imgData)
  If n = 0 Then  ' if image not found, copy enh metafile bits
    ReDim imgData(UBound(emf)): CopyMemory imgData(0), emf(0), UBound(emf) + 1
  Else: pDIB = dibits: pBMI = bmi
  End If
  ExportEMFPlusImageData = True
End Function

Function SaveRawImageData(ByVal Filename As String)
  Dim f As Long
  f = FreeFile
  Open Filename For Binary Access Write As f
  Put f, 1, imgData
  Close f
End Function

Sub GrafikMitRundenEcken()
  Dim ils As InlineShape, fBaseName As String, fName As String
  Dim w As Single, h As Single, sh As Shape, sh1 As Shape
  Dim sr As Single, sa As Range, sl As Single, st As Single
  Dim sla As Long, srh As Long, srv As Long, szp As Long
  Dim swo As Long, sdb As Single, sdl As Single, sdr As Single, hf As HeaderFooter
  Dim sdt As Single, ssi As Long, swt As Long, n As Long, r As Range, s As Long
      
  fBaseName = Options.DefaultFilePath(wdTempFilePath) & "/~temppic"
  
  s = Selection.Information(wdActiveEndSectionNumber)
  Select Case Selection.StoryType ' HeaderFooter Shapes
    Case wdEvenPagesHeaderStory ' 6
      Set hf = ActiveDocument.Sections(s).Headers(wdHeaderFooterEvenPages)
    Case wdPrimaryHeaderStory ' 7
      Set hf = ActiveDocument.Sections(s).Headers(wdHeaderFooterPrimary)
    Case wdEvenPagesFooterStory ' 8
      Set hf = ActiveDocument.Sections(s).Footers(wdHeaderFooterEvenPages)
    Case wdPrimaryFooterStory '  9
      Set hf = ActiveDocument.Sections(s).Footers(wdHeaderFooterPrimary)
    Case wdFirstPageHeaderStory ' 10
      Set hf = ActiveDocument.Sections(s).Headers(wdHeaderFooterFirstPage)
    Case wdFirstPageFooterStory ' 11
      Set hf = ActiveDocument.Sections(s).Footers(wdHeaderFooterFirstPage)
  End Select
  
  Select Case Selection.type
    Case wdSelectionInlineShape
      Set ils = Selection.InlineShapes(1)
      w = ils.Width
      h = ils.Height
      fName = ExportSelectedPicture(fBaseName)
      If Len(fName) Then
        'n = Selection.Start - Selection.Paragraphs(1).Range.Start
        Selection.Delete
        If Selection.StoryType >= 6 And Selection.StoryType <= 11 Then
          Set sh = hf.Shapes.AddShape(msoShapeRoundedRectangle, 0, 0, w, h, Selection.Range)
          Set r = hf.Range
          r.SetRange Selection.Paragraphs(1).Range.Start, Selection.Start
          n = r.Characters.Count
        Else
          Set sh = ActiveDocument.Shapes.AddShape(msoShapeRoundedRectangle, 0, 0, w, h, Selection.Range)
          n = ActiveDocument.Range(Selection.Paragraphs(1).Range.Start, Selection.Start).Characters.Count
        End If
        sh.Fill.UserPicture fName
        sh.Line.Visible = msoFalse
        sh.LockAspectRatio = msoTrue
        sh.Select
        CommandBars.FindControl(id:=5934).Execute  ' Ersatz für ConvertToInlineshape
        If n Then  ' ILS war nicht zu nicht zu Absatzbeginn
          Selection.Cut
          Selection.MoveRight wdCharacter, n ' an vorherige Position schieben
          Selection.Paste
        End If
      End If
      
    Case wdSelectionShape
      Set sh1 = Selection.ShapeRange(1)
      w = sh1.Width
      h = sh1.Height
      sr = sh1.Rotation
      Set sa = sh1.Anchor
      sl = sh1.Left
      st = sh1.Top
      sla = sh1.LockAnchor
      srh = sh1.RelativeHorizontalPosition
      srv = sh1.RelativeVerticalPosition
      szp = sh1.ZOrderPosition
      swo = sh1.WrapFormat.AllowOverlap
      sdb = sh1.WrapFormat.DistanceBottom
      sdl = sh1.WrapFormat.DistanceLeft
      sdr = sh1.WrapFormat.DistanceRight
      sdt = sh1.WrapFormat.DistanceTop
      ssi = sh1.WrapFormat.Side
      swt = sh1.WrapFormat.type
      
      fName = ExportSelectedPicture(fBaseName)
      If Len(fName) Then
        sh1.Delete
        If Selection.StoryType >= 6 And Selection.StoryType <= 11 Then
          Set sh = hf.Shapes.AddShape(msoShapeRoundedRectangle, sl, st, w, h, sa)
        Else
          Set sh = ActiveDocument.Shapes.AddShape(msoShapeRoundedRectangle, sl, st, w, h, sa)
        End If
        sh.Fill.UserPicture fName
        sh.Line.Visible = msoFalse
        sh.LockAspectRatio = msoTrue
        sh.Rotation = sr
        sh.LockAnchor = sla
        sh.RelativeHorizontalPosition = srh
        sh.RelativeVerticalPosition = srv
        sh.WrapFormat.AllowOverlap = swo
        sh.WrapFormat.DistanceBottom = sdb
        sh.WrapFormat.DistanceLeft = sdl
        sh.WrapFormat.DistanceRight = sdr
        sh.WrapFormat.DistanceTop = sdt
        sh.WrapFormat.Side = ssi
        sh.WrapFormat.type = swt
      End If
  End Select
End Sub

Sub AddContextMenu1()
  Const myId = "RundeEckenGrafik"
  CustomizationContext = ThisDocument
  Dim c As CommandBarControl, CBname As Variant, cbx As Variant
  
  CBname = Array("Inline Picture", "Floating Picture", "Table Pictures")
  
  For Each cbx In CBname
    For Each c In Application.CommandBars(cbx).Controls
      If c.Tag = myId Then c.Delete: Exit For
    Next
    With Application.CommandBars(cbx).Controls.Add(msoControlButton, , , 4)
      .Tag = myId
      .Caption = "Grafik mit runden Ecken"
      .OnAction = "GrafikMitRundenEcken"
    End With
  Next cbx
End Sub



Grüße
Wolfram
import os import tkinter as tk from tkinter import filedialog, ttk, messagebox, simpledialog import fitz # PyMuPDF import pandas as pd from PIL import Image, ImageTk import io class PDFExtractorApp: def __init__(self, root): self.root = root self.root.title("PDF文本提取器") self.root.geometry("1200x800") self.pdf_document = None self.current_page = 0 self.total_pages = 0 self.thumbnails = [] self.selection_start = None self.selection_end = None self.extracted_data = [] self.table_data = None # 用于存储表格数据 self.all_pages_table_data = None # 用于存储所有页面的表格数据 # 滚动和拖拽相关变量 self.is_dragging = False self.drag_start_x = 0 self.drag_start_y = 0 # 缩放和显示相关变量 self.zoom_factor = 1.0 self.zoom_step = 0.1 # 减小缩放步长,使缩放更平滑 self.min_zoom = 0.2 # 最小缩放比例 self.max_zoom = 5.0 # 最大缩放比例 self.auto_fit_mode = "page" # "page", "width", "none" # Ctrl键状态跟踪 self.ctrl_pressed = False # 选区相关变量 self.is_selecting = False self.selection_rect = None self.page_bbox = None # 页面边界框 # 选区坐标显示变量 self.coord_display = None self.coord_text_id = None # 选区辅助线条 self.helper_lines = [] # 表格识别参数 self.column_tolerance = 10 # 列容差,用于确定文本是否在同一列 self.row_tolerance = 10 # 行容差,用于确定文本是否在同一行 self.setup_ui() def setup_ui(self): # 创建主框架 main_frame = ttk.Frame(self.root) main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 左侧缩略图区域 left_frame = ttk.LabelFrame(main_frame, text="页面预览") left_frame.pack(side=tk.LEFT, fill=tk.Y, padx=5, pady=5) # 页码跳转区域 nav_frame = ttk.Frame(left_frame) nav_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(nav_frame, text="跳转至:").pack(side=tk.LEFT, padx=2) self.page_entry = ttk.Entry(nav_frame, width=6) self.page_entry.pack(side=tk.LEFT, padx=2) ttk.Button(nav_frame, text="跳转", command=self.jump_to_page).pack(side=tk.LEFT, padx=2) # 缩略图滚动区域 scrollbar = ttk.Scrollbar(left_frame) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.thumbnail_canvas = tk.Canvas(left_frame, width=150, yscrollcommand=scrollbar.set) self.thumbnail_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) scrollbar.config(command=self.thumbnail_canvas.yview) # 创建缩略图框架,用于容纳所有缩略图 self.thumbnails_frame = ttk.Frame(self.thumbnail_canvas) self.thumbnail_window = self.thumbnail_canvas.create_window((0, 0), window=self.thumbnails_frame, anchor=tk.NW) # 中间PDF预览区域 center_frame = ttk.LabelFrame(main_frame, text="PDF预览") center_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=5, pady=5) # 创建一个框架用于放置滚动条 canvas_frame = ttk.Frame(center_frame) canvas_frame.pack(fill=tk.BOTH, expand=True) # 创建水平和垂直滚动条 self.xscrollbar = ttk.Scrollbar(canvas_frame, orient=tk.HORIZONTAL) self.xscrollbar.pack(side=tk.BOTTOM, fill=tk.X) self.yscrollbar = ttk.Scrollbar(canvas_frame, orient=tk.VERTICAL) self.yscrollbar.pack(side=tk.RIGHT, fill=tk.Y) # 创建PDF画布 self.pdf_canvas = tk.Canvas(canvas_frame, bg="white", xscrollcommand=self.xscrollbar.set, yscrollcommand=self.yscrollbar.set) self.pdf_canvas.pack(fill=tk.BOTH, expand=True) # 配置滚动条 self.xscrollbar.config(command=self.pdf_canvas.xview) self.yscrollbar.config(command=self.pdf_canvas.yview) # 绑定鼠标事件用于选区 self.pdf_canvas.bind("<ButtonPress-1>", self.on_mouse_down) self.pdf_canvas.bind("<B1-Motion>", self.on_mouse_drag) self.pdf_canvas.bind("<ButtonRelease-1>", self.on_mouse_up) # 绑定鼠标滚轮和右键拖拽事件 self.pdf_canvas.bind("<MouseWheel>", self.on_mouse_wheel) self.pdf_canvas.bind("<ButtonPress-3>", self.on_right_mouse_down) self.pdf_canvas.bind("<B3-Motion>", self.on_right_mouse_drag) self.pdf_canvas.bind("<ButtonRelease-3>", self.on_right_mouse_up) # Ctrl键状态监听 self.root.bind("<Control-KeyPress>", self.on_ctrl_press) self.root.bind("<Control-KeyRelease>", self.on_ctrl_release) # 右侧控制区域 right_frame = ttk.LabelFrame(main_frame, text="控制区") right_frame.pack(side=tk.RIGHT, fill=tk.Y, padx=5, pady=5) # 文件选择 ttk.Button(right_frame, text="打开PDF", command=self.open_pdf).pack(fill=tk.X, padx=5, pady=5) # 页面导航 nav_frame = ttk.Frame(right_frame) nav_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Button(nav_frame, text="上一页", command=self.prev_page).pack(side=tk.LEFT, padx=2) self.page_label = ttk.Label(nav_frame, text="0/0") self.page_label.pack(side=tk.LEFT, padx=5) ttk.Button(nav_frame, text="下一页", command=self.next_page).pack(side=tk.LEFT, padx=2) # 缩放控制 zoom_frame = ttk.Frame(right_frame) zoom_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Button(zoom_frame, text="放大", command=self.zoom_in).pack(side=tk.LEFT, padx=2) ttk.Button(zoom_frame, text="缩小", command=self.zoom_out).pack(side=tk.LEFT, padx=2) ttk.Button(zoom_frame, text="重置视图", command=self.reset_view).pack(side=tk.LEFT, padx=2) # 自适应模式选择 fit_frame = ttk.Frame(right_frame) fit_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(fit_frame, text="自适应:").pack(side=tk.LEFT, padx=2) self.fit_mode = tk.StringVar(value="page") ttk.Radiobutton(fit_frame, text="整页", variable=self.fit_mode, value="page", command=self.update_fit_mode).pack(side=tk.LEFT, padx=2) ttk.Radiobutton(fit_frame, text="宽度", variable=self.fit_mode, value="width", command=self.update_fit_mode).pack(side=tk.LEFT, padx=2) ttk.Radiobutton(fit_frame, text="实际大小", variable=self.fit_mode, value="none", command=self.update_fit_mode).pack(side=tk.LEFT, padx=2) # 提取选项 ttk.Label(right_frame, text="提取选项").pack(fill=tk.X, padx=5, pady=5) self.extract_type = tk.StringVar(value="selection") ttk.Radiobutton(right_frame, text="选区提取", variable=self.extract_type, value="selection").pack(anchor=tk.W, padx=20) ttk.Radiobutton(right_frame, text="整页提取", variable=self.extract_type, value="full_page").pack(anchor=tk.W, padx=20) # 表格识别选项 table_frame = ttk.LabelFrame(right_frame, text="表格识别选项") table_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(table_frame, text="列容差:").grid(row=0, column=0, padx=5, pady=2, sticky=tk.W) self.column_tolerance_var = tk.StringVar(value=str(self.column_tolerance)) ttk.Entry(table_frame, textvariable=self.column_tolerance_var, width=5).grid(row=0, column=1, padx=2, pady=2) ttk.Label(table_frame, text="行容差:").grid(row=1, column=0, padx=5, pady=2, sticky=tk.W) self.row_tolerance_var = tk.StringVar(value=str(self.row_tolerance)) ttk.Entry(table_frame, textvariable=self.row_tolerance_var, width=5).grid(row=1, column=1, padx=2, pady=2) ttk.Button(table_frame, text="应用设置", command=self.apply_table_settings).grid(row=0, column=2, rowspan=2, padx=5, pady=2) # 选区坐标显示 selection_frame = ttk.LabelFrame(right_frame, text="选区坐标") selection_frame.pack(fill=tk.X, padx=5, pady=5) # 左上角坐标 ttk.Label(selection_frame, text="左上角:").grid(row=0, column=0, padx=5, pady=2, sticky=tk.W) self.selection_x1_var = tk.StringVar(value="0") self.selection_y1_var = tk.StringVar(value="0") ttk.Entry(selection_frame, textvariable=self.selection_x1_var, width=8, state="readonly").grid(row=0, column=1, padx=2, pady=2) ttk.Label(selection_frame, text=",").grid(row=0, column=2, padx=0, pady=2) ttk.Entry(selection_frame, textvariable=self.selection_y1_var, width=8, state="readonly").grid(row=0, column=3, padx=2, pady=2) # 右下角坐标 ttk.Label(selection_frame, text="右下角:").grid(row=1, column=0, padx=5, pady=2, sticky=tk.W) self.selection_x2_var = tk.StringVar(value="0") self.selection_y2_var = tk.StringVar(value="0") ttk.Entry(selection_frame, textvariable=self.selection_x2_var, width=8, state="readonly").grid(row=1, column=1, padx=2, pady=2) ttk.Label(selection_frame, text=",").grid(row=1, column=2, padx=0, pady=2) ttk.Entry(selection_frame, textvariable=self.selection_y2_var, width=8, state="readonly").grid(row=1, column=3, padx=2, pady=2) # 选区尺寸 ttk.Label(selection_frame, text="尺寸:").grid(row=2, column=0, padx=5, pady=2, sticky=tk.W) self.selection_width_var = tk.StringVar(value="0") self.selection_height_var = tk.StringVar(value="0") ttk.Entry(selection_frame, textvariable=self.selection_width_var, width=8, state="readonly").grid(row=2, column=1, padx=2, pady=2) ttk.Label(selection_frame, text="×").grid(row=2, column=2, padx=0, pady=2) ttk.Entry(selection_frame, textvariable=self.selection_height_var, width=8, state="readonly").grid(row=2, column=3, padx=2, pady=2) # 选区重置按钮 ttk.Button(right_frame, text="重置选区", command=self.reset_selection).pack(fill=tk.X, padx=5, pady=5) # 提取按钮 ttk.Button(right_frame, text="提取文本", command=self.extract_text).pack(fill=tk.X, padx=5, pady=5) # 提取所有页面相同区域按钮 ttk.Button(right_frame, text="提取所有页面相同区域", command=self.extract_all_pages_same_region).pack(fill=tk.X, padx=5, pady=5) # 导出按钮 ttk.Button(right_frame, text="导出到Excel", command=self.export_to_excel).pack(fill=tk.X, padx=5, pady=5) # 提取结果显示 result_frame = ttk.LabelFrame(right_frame, text="提取结果") result_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) self.result_text = tk.Text(result_frame, height=10) self.result_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) # 状态栏 self.status_var = tk.StringVar(value="就绪") self.status_bar = ttk.Label(self.root, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W) self.status_bar.pack(side=tk.BOTTOM, fill=tk.X) # 绑定回车键到页码跳转 self.page_entry.bind("<Return>", lambda event: self.jump_to_page()) # 绑定缩略图画布配置事件 self.thumbnails_frame.bind("<Configure>", self.on_thumbnails_configure) def apply_table_settings(self): """应用表格识别设置""" try: self.column_tolerance = float(self.column_tolerance_var.get()) self.row_tolerance = float(self.row_tolerance_var.get()) self.status_var.set(f"已应用表格识别设置: 列容差={self.column_tolerance}, 行容差={self.row_tolerance}") except ValueError: messagebox.showerror("输入错误", "请输入有效的数值") def on_thumbnails_configure(self, event): """更新缩略图画布的滚动区域""" self.thumbnail_canvas.configure(scrollregion=self.thumbnail_canvas.bbox("all")) def open_pdf(self): file_path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")]) if file_path: try: self.pdf_document = fitz.open(file_path) self.total_pages = len(self.pdf_document) self.current_page = 0 self.extracted_data = [] self.table_data = None self.all_pages_table_data = None self.result_text.delete(1.0, tk.END) # 更新页面标签 self.page_label.config(text=f"{self.current_page + 1}/{self.total_pages}") # 生成缩略图 self.generate_thumbnails() # 显示当前页面 self.display_current_page() self.status_var.set(f"已打开文件: {os.path.basename(file_path)}") except Exception as e: messagebox.showerror("错误", f"无法打开PDF文件: {str(e)}") def generate_thumbnails(self): """生成PDF页面缩略图""" # 清除现有缩略图 self.thumbnails = [] for widget in self.thumbnails_frame.winfo_children(): widget.destroy() y_offset = 5 for page_num in range(self.total_pages): # 创建缩略图容器 thumbnail_frame = ttk.Frame(self.thumbnails_frame) thumbnail_frame.pack(fill=tk.X, padx=5, pady=2) # 生成缩略图 page = self.pdf_document[page_num] pix = page.get_pixmap(matrix=fitz.Matrix(0.2, 0.2)) img = Image.open(io.BytesIO(pix.tobytes("ppm"))) photo = ImageTk.PhotoImage(img) # 存储引用以防止垃圾回收 self.thumbnails.append(photo) # 创建缩略图按钮 thumbnail_btn = tk.Button(thumbnail_frame, image=photo, command=lambda p=page_num: self.go_to_page(p), relief=tk.FLAT) thumbnail_btn.pack(side=tk.LEFT, padx=5, pady=5) # 添加页码标签 page_label = ttk.Label(thumbnail_frame, text=f"页面 {page_num + 1}") page_label.pack(side=tk.LEFT, padx=5) # 高亮显示当前页面 if page_num == self.current_page: thumbnail_frame.config(style="ActiveThumbnail.TFrame") page_label.config(style="ActiveThumbnail.TLabel") else: # 鼠标悬停效果 thumbnail_frame.bind("<Enter>", lambda e, f=thumbnail_frame: f.config(style="HoverThumbnail.TFrame")) thumbnail_frame.bind("<Leave>", lambda e, f=thumbnail_frame: f.config(style="")) page_label.bind("<Enter>", lambda e, l=page_label: l.config(style="HoverThumbnail.TLabel")) page_label.bind("<Leave>", lambda e, l=page_label: l.config(style="")) def display_current_page(self): if not self.pdf_document: return self.pdf_canvas.delete("all") self.selection_start = None self.selection_end = None self.is_selecting = False # 重置选区坐标显示 self.update_selection_coordinates(0, 0, 0, 0) page = self.pdf_document[self.current_page] # 根据自适应模式计算缩放比例 if self.auto_fit_mode == "page": # 自适应整页 self.adjust_zoom_to_fit_page() elif self.auto_fit_mode == "width": # 自适应宽度 self.adjust_zoom_to_fit_width() else: # 实际大小 pass # 使用当前zoom_factor # 使用计算的缩放比例渲染页面 matrix = fitz.Matrix(self.zoom_factor, self.zoom_factor) pix = page.get_pixmap(matrix=matrix) img = Image.open(io.BytesIO(pix.tobytes("ppm"))) photo = ImageTk.PhotoImage(img) # 存储引用以防止垃圾回收 self.current_image = photo # 显示图像 self.image_id = self.pdf_canvas.create_image(0, 0, anchor=tk.NW, image=photo) # 获取页面边界框(用于限制选区) self.page_bbox = self.pdf_canvas.bbox(self.image_id) # 设置画布滚动区域 self.pdf_canvas.config(scrollregion=self.pdf_canvas.bbox(tk.ALL)) # 居中显示 self.center_view() # 状态栏显示页面信息 self.status_var.set(f"页面 {self.current_page + 1}/{self.total_pages}, 缩放比例: {self.zoom_factor:.1f}x") # 更新缩略图高亮 self.generate_thumbnails() def adjust_zoom_to_fit_page(self): """调整缩放比例以适应整个页面""" if not self.pdf_document: return # 获取当前页面和画布尺寸 page = self.pdf_document[self.current_page] canvas_width = self.pdf_canvas.winfo_width() canvas_height = self.pdf_canvas.winfo_height() # 考虑滚动条宽度 scrollbar_width = 15 canvas_width -= scrollbar_width canvas_height -= scrollbar_width # 计算页面宽高 page_width = page.rect.width page_height = page.rect.height # 计算适应画布的缩放比例 width_factor = canvas_width / page_width height_factor = canvas_height / page_height # 取较小的缩放比例以确保整个页面可见 self.zoom_factor = min(width_factor, height_factor) def adjust_zoom_to_fit_width(self): """调整缩放比例以适应页面宽度""" if not self.pdf_document: return # 获取当前页面和画布宽度 page = self.pdf_document[self.current_page] canvas_width = self.pdf_canvas.winfo_width() # 考虑滚动条宽度 scrollbar_width = 15 canvas_width -= scrollbar_width # 计算页面宽度 page_width = page.rect.width # 计算适应画布宽度的缩放比例 self.zoom_factor = canvas_width / page_width def center_view(self): """居中显示PDF页面""" if not self.pdf_document: return # 获取画布和页面尺寸 canvas_width = self.pdf_canvas.winfo_width() canvas_height = self.pdf_canvas.winfo_height() page_width = self.pdf_canvas.bbox(tk.ALL)[2] page_height = self.pdf_canvas.bbox(tk.ALL)[3] # 计算居中位置 if page_width > canvas_width: # 页面宽度大于画布,使用滚动条 xview = 0 else: # 页面宽度小于画布,居中显示 xview = (canvas_width - page_width) / 2 / canvas_width if page_height > canvas_height: # 页面高度大于画布,使用滚动条 yview = 0 else: # 页面高度小于画布,居中显示 yview = (canvas_height - page_height) / 2 / canvas_height # 设置视图位置 self.pdf_canvas.xview_moveto(xview) self.pdf_canvas.yview_moveto(yview) def go_to_page(self, page_num): """跳转到指定页面""" if 0 <= page_num < self.total_pages: self.current_page = page_num self.page_label.config(text=f"{self.current_page + 1}/{self.total_pages}") self.display_current_page() self.page_entry.delete(0, tk.END) self.page_entry.insert(0, str(page_num + 1)) def jump_to_page(self): """根据输入的页码跳转到指定页面""" if not self.pdf_document: return try: page_num = int(self.page_entry.get()) - 1 if 0 <= page_num < self.total_pages: self.go_to_page(page_num) else: messagebox.showwarning("页码错误", f"请输入1到{self.total_pages}之间的页码") self.page_entry.delete(0, tk.END) self.page_entry.insert(0, str(self.current_page + 1)) except ValueError: messagebox.showwarning("输入错误", "请输入有效的页码") self.page_entry.delete(0, tk.END) self.page_entry.insert(0, str(self.current_page + 1)) def prev_page(self): if self.current_page > 0: self.current_page -= 1 self.page_label.config(text=f"{self.current_page + 1}/{self.total_pages}") self.display_current_page() self.page_entry.delete(0, tk.END) self.page_entry.insert(0, str(self.current_page + 1)) def next_page(self): if self.current_page < self.total_pages - 1: self.current_page += 1 self.page_label.config(text=f"{self.current_page + 1}/{self.total_pages}") self.display_current_page() self.page_entry.delete(0, tk.END) self.page_entry.insert(0, str(self.current_page + 1)) def on_mouse_down(self, event): # 检查是否点击在页面内 if self.page_bbox and self.is_point_in_page(event.x, event.y): self.is_selecting = True self.selection_start = (event.x, event.y) # 限制起点在页面内 self.selection_start = self.clamp_point_to_page(self.selection_start[0], self.selection_start[1]) # 创建选区矩形 self.selection_rect = self.pdf_canvas.create_rectangle( self.selection_start[0], self.selection_start[1], self.selection_start[0], self.selection_start[1], outline="red", width=2, stipple="gray25", fill="#FF000033") # 更新选区坐标显示 self.update_selection_coordinates( int(self.selection_start[0]), int(self.selection_start[1]), int(self.selection_start[0]), int(self.selection_start[1]) ) # 状态栏显示选区信息 self.status_var.set(f"选区起点: ({int(self.selection_start[0])}, {int(self.selection_start[1])})") # 清除之前的辅助线条 for line in self.helper_lines: self.pdf_canvas.delete(line) self.helper_lines = [] def on_mouse_drag(self, event): if self.is_selecting and hasattr(self, 'selection_rect'): # 限制终点在页面内 end_x, end_y = self.clamp_point_to_page(event.x, event.y) # 更新选区矩形 self.pdf_canvas.coords(self.selection_rect, self.selection_start[0], self.selection_start[1], end_x, end_y) # 计算选区坐标 x1, y1 = self.selection_start x2, y2 = end_x, end_y # 确保坐标按左上右下排序 if x1 > x2: x1, x2 = x2, x1 if y1 > y2: y1, y2 = y2, y1 # 更新选区坐标显示 self.update_selection_coordinates(int(x1), int(y1), int(x2), int(y2)) # 计算选区尺寸 width = x2 - x1 height = y2 - y1 # 显示选区坐标和尺寸 coord_text = f"选区: ({int(x1)}, {int(y1)}) - ({int(x2)}, {int(y2)}) | 尺寸: {int(width)}×{int(height)} 像素" self.status_var.set(coord_text) # 更新辅助线条 for line in self.helper_lines: self.pdf_canvas.delete(line) self.helper_lines = [] # 绘制水平辅助线 h_line1 = self.pdf_canvas.create_line(x1, y1, x2, y1, fill="blue", dash=(4, 4)) h_line2 = self.pdf_canvas.create_line(x1, y2, x2, y2, fill="blue", dash=(4, 4)) # 绘制垂直辅助线 v_line1 = self.pdf_canvas.create_line(x1, y1, x1, y2, fill="blue", dash=(4, 4)) v_line2 = self.pdf_canvas.create_line(x2, y1, x2, y2, fill="blue", dash=(4, 4)) self.helper_lines.extend([h_line1, h_line2, v_line1, v_line2]) def on_mouse_up(self, event): if self.is_selecting: self.is_selecting = False # 限制终点在页面内 end_x, end_y = self.clamp_point_to_page(event.x, event.y) self.selection_end = (end_x, end_y) # 检查选区是否太小 width = abs(self.selection_end[0] - self.selection_start[0]) height = abs(self.selection_end[1] - self.selection_start[1]) if width < 10 or height < 10: # 选区太小,删除选区 self.pdf_canvas.delete(self.selection_rect) self.selection_start = None self.selection_end = None # 重置选区坐标显示 self.update_selection_coordinates(0, 0, 0, 0) self.status_var.set("选区太小,已重置") else: # 更新选区矩形 self.pdf_canvas.coords(self.selection_rect, self.selection_start[0], self.selection_start[1], self.selection_end[0], self.selection_end[1]) # 计算最终选区坐标 x1, y1 = self.selection_start x2, y2 = self.selection_end # 确保坐标按左上右下排序 if x1 > x2: x1, x2 = x2, x1 if y1 > y2: y1, y2 = y2, y1 # 更新选区坐标显示 self.update_selection_coordinates(int(x1), int(y1), int(x2), int(y2)) # 计算选区尺寸 width = x2 - x1 height = y2 - y1 # 显示最终选区坐标和尺寸 coord_text = f"选区已完成: ({int(x1)}, {int(y1)}) - ({int(x2)}, {int(y2)}) | 尺寸: {int(width)}×{int(height)} 像素" self.status_var.set(coord_text) # 清除辅助线条 for line in self.helper_lines: self.pdf_canvas.delete(line) self.helper_lines = [] def update_selection_coordinates(self, x1, y1, x2, y2): """更新选区坐标显示""" # 确保坐标按左上右下排序 if x1 > x2: x1, x2 = x2, x1 if y1 > y2: y1, y2 = y2, y1 # 更新坐标显示 self.selection_x1_var.set(str(x1)) self.selection_y1_var.set(str(y1)) self.selection_x2_var.set(str(x2)) self.selection_y2_var.set(str(y2)) # 计算并更新尺寸 width = x2 - x1 height = y2 - y1 self.selection_width_var.set(str(width)) self.selection_height_var.set(str(height)) def is_point_in_page(self, x, y): """检查点是否在页面内""" if not self.page_bbox: return False x1, y1, x2, y2 = self.page_bbox return x1 <= x <= x2 and y1 <= y <= y2 def clamp_point_to_page(self, x, y): """将点限制在页面范围内""" if not self.page_bbox: return (x, y) x1, y1, x2, y2 = self.page_bbox clamped_x = max(x1, min(x, x2)) clamped_y = max(y1, min(y, y2)) return (clamped_x, clamped_y) def extract_text(self): if not self.pdf_document: messagebox.showinfo("提示", "请先打开PDF文件") return if self.extract_type.get() == "selection": if not self.selection_start or not self.selection_end: messagebox.showinfo("提示", "请先选择文本区域") return # 确保选区坐标正确排序 x0, y0 = min(self.selection_start[0], self.selection_end[0]), min(self.selection_start[1], self.selection_end[1]) x1, y1 = max(self.selection_start[0], self.selection_end[0]), max(self.selection_start[1], self.selection_end[1]) # 计算选区在原始PDF中的比例 page = self.pdf_document[self.current_page] # 调整选区坐标为PDF坐标 pdf_x0 = x0 / self.zoom_factor pdf_y0 = y0 / self.zoom_factor pdf_x1 = x1 / self.zoom_factor pdf_y1 = y1 / self.zoom_factor # 创建选区矩形 rect = fitz.Rect(pdf_x0, pdf_y0, pdf_x1, pdf_y1) # 使用dict模式提取文本,获取更详细的布局信息 text_data = page.get_text("dict", clip=rect) else: # 整页提取 page = self.pdf_document[self.current_page] text_data = page.get_text("dict") if text_data and 'blocks' in text_data: # 分析文本块,识别表格结构 table_data = self.analyze_table_structure(text_data) # 添加页码到表格数据 if table_data: self.table_data = [[f"页面 {self.current_page + 1}"] + row for row in table_data] else: self.table_data = None # 更新结果显示 self.result_text.delete(1.0, tk.END) if self.table_data and len(self.table_data) > 0: # 显示表格预览 self.result_text.insert(tk.END, "已识别表格结构:\n\n") for row in self.table_data: row_text = " | ".join([cell if cell else " " for cell in row]) self.result_text.insert(tk.END, f"{row_text}\n") self.status_var.set(f
06-10
<think>好的,我现在需要帮助用户找到使用pywin32库将Word文档转换为图片格式的实现方法或代码示例。首先,我得回忆一下pywin32库的用途,它主要用于操作Windows的COM对象,可以控制Office应用程序如Word、Excel等。用户的需求是将Word文档转成图片,所以可能需要通过Word的API来实现。 首先,我应该考虑使用Word的另存为功能,但常规的另存为选项可能不支持直接保存为图片。可能需要逐页将文档内容复制为图片。或者,是否有其他方法,比如将每一页导出图片格式? 我记得在Word中,可以通过“文件”->“另存为”选择图片格式,但可能仅限于当前页。如果要批量处理所有页,可能需要使用VBA宏或者通过COM接口调用相关方法。由于用户使用的是pywin32,也就是通过Python调用Word的COM接口,所以需要找到对应的对象和方法。 接下来,我应该查找相关的VBA代码示例,然后将其转换为Python代码。例如,在VBA中,可以使用ExportAsFixedFormat方法将文档导出为PDF,但导出图片可能需要不同的方法。或者,使用Chart对象的Export方法,将文档内容复制为图表再导出图片? 另外,还有一种方法是将Word文档先转换为PDF,再将PDF转换为图片。这可能更可靠,但需要依赖额外的库,比如pdf2image。不过用户可能希望直接用pywin32完成,所以需要确认是否有直接的方法。 在引用中,用户提供的参考内容提到了使用StringIO和pyPdf处理PDF旋转,这可能与当前问题无关,但说明用户可能对处理PDF和图片转换有一定了解。不过当前问题聚焦于Word图片,可能需要不同的方法。 我需要考虑步骤:启动Word应用程序,打开文档,然后遍历每一页,将其导出图片。在VBA中,可能使用ActiveDocument.ExportAsFixedFormat到PDF,然后再处理PDF转图片,但用户希望直接转图片。或者,使用Selection.CopyAsPicture方法,然后将剪贴板中的图片保存下来。 可能的步骤: 1. 使用pywin32启动Word并打开文档。 2. 遍历文档的每一页。 3. 将每一页的内容复制为图片。 4. 从剪贴板中获取图片数据并保存为文件。 但如何确定每一页的范围呢?Word的页面划分可能比较复杂,特别是当文档有复杂的格式时。另一种方法是使用ExportAsFixedFormat方法导出图片格式,但需要确认Word是否支持直接导出图片。 查阅资料发现,WordExportAsFixedFormat方法主要用于导出为PDF或XPS。要导出图片,可能需要使用另存为功能,选择图片格式如PNG或JPEG。但每个文档只能保存当前页为图片,因此需要逐页处理。 所以可能的步骤是: 1. 打开Word文档。 2. 遍历每一页。 3. 将当前视图切换到该页。 4. 使用ActiveDocument.SaveAs方法保存为图片格式,但可能需要设置参数。 或者,使用Selection对象,将每一页的内容选中,然后复制为图片,再保存剪贴板内容。这需要处理剪贴板操作,可能涉及PIL库或其他图像处理库来保存剪贴板中的图像。 例如,代码的大致流程可能是: - 启动Word应用。 - 打开文档。 - 获取文档的总页数。 - 循环每一页: - 跳转到该页。 - 选择该页内容。 - 复制为图片。 - 从剪贴板获取图像并保存。 - 关闭文档和应用。 但如何获取总页数?在Word中,可以通过ActiveDocument.Content.Information(wdActiveEndPageNumber)获取总页数。在Python中,可能需要访问相应的COM属性。 此外,复制为图片后,如何从剪贴板读取图像数据?Python中可以使用win32clipboard模块来处理剪贴板内容。然后使用PIL或pywin32的位图处理功能将剪贴板中的图像保存为文件。 可能的代码结构: ```python import win32com.client as win32 import win32clipboard from PIL import ImageGrab word = win32.Dispatch('Word.Application') doc = word.Documents.Open('document.docx') word.Visible = True # 可选,显示Word界面 for page in range(1, doc.ComputeStatistics(2) + 1): # 获取总页数 # 跳转到指定页 selection = word.Selection selection.GoTo(What=win32.constants.wdGoToPage, Which=win32.constants.wdGoToAbsolute, Count=page) # 选择当前页内容 selection.WholeStory() # 复制为图片 selection.CopyAsPicture() # 从剪贴板获取图片 win32clipboard.OpenClipboard() image = ImageGrab.grabclipboard() if image: image.save(f'page_{page}.png') win32clipboard.CloseClipboard() doc.Close() word.Quit() ``` 不过,上述代码可能需要调整,比如正确选择当前页的内容,以及处理剪贴板中的图像。此外,ComputeStatistics(2)可能返回的是页数,但需要确认参数是否正确。 另外,需要注意Word的页面跳转可能不总是准确,特别是在复杂文档中。因此,可能需要其他方法来确保正确截取每一页的内容。 如果这种方法不可行,可能需要考虑将文档先导出为PDF,再使用其他库将PDF转为图片。但用户希望使用pywin32直接处理,所以可能需要结合两种方法。 总结可能的解决方案: 1. 使用pywin32控制Word,逐页复制内容为图片,保存剪贴板中的图像。 2. 将Word文档导出为PDF,再使用pdf2image等库将PDF转为图片。 3. 使用Word的SaveAs方法,但可能只能保存当前页为图片。 用户的问题明确要求使用pywin32,所以优先考虑第一种方法。但需要处理剪贴板图像的问题,并且确保能够正确获取每一页的内容。 此外,可能需要处理Word中的视图设置,例如切换到“打印布局”视图,以确保页面分页正确。否则,页面跳转可能不准确。 最后,需要测试代码的可行性,并提供给用户参考,同时指出可能的注意事项,比如依赖的库(Pillow处理图像,pywin32处理COM接口),以及处理多页时的准确性。</think>以下是使用pywin32将Word文档转换为图片格式的代码实现方法: ### 分步实现方法 1. **创建Word应用程序对象** 2. **打开目标文档** 3. **遍历文档所有页面** 4. **逐页复制内容为图片** 5. **保存剪贴板中的图像** ```python import win32com.client as win32 import win32clipboard from PIL import Image import io def word_to_images(doc_path, output_folder): # 初始化Word应用 word = win32.Dispatch("Word.Application") word.Visible = False # 后台运行 try: doc = word.Documents.Open(doc_path) total_pages = doc.ComputeStatistics(2) # 获取总页数 # 设置打印布局视图确保分页准确 word.ActiveWindow.View.Type = 3 # wdPrintView for page in range(1, total_pages + 1): # 跳转到指定页 selection = word.Selection selection.GoTo( What=win32.constants.wdGoToPage, Which=win32.constants.wdGoToAbsolute, Count=page ) # 选择当前页内容 selection.EndKey(Unit=win32.constants.wdStory) selection.MoveLeft(Unit=win32.constants.wdCharacter, Count=1) selection.GoTo( What=win32.constants.wdGoToPage, Which=win32.constants.wdGoToPrevious, Count=1 ) selection.MoveRight(Unit=win32.constants.wdCharacter, Count=1) # 复制为图片 selection.CopyAsPicture() # 从剪贴板获取图像 win32clipboard.OpenClipboard() if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB): data = win32clipboard.GetClipboardData(win32clipboard.CF_DIB) img = Image.open(io.BytesIO(data)) img.save(f"{output_folder}/page_{page}.png") win32clipboard.CloseClipboard() doc.Close() finally: word.Quit() ``` ### 注意事项 1. 需要安装依赖库: ```bash pip install pywin32 Pillow ``` 2. 该方法使用Windows剪贴板传输图像数据,操作期间不要操作剪贴板 3. 复杂文档可能出现分页不准确的情况,建议先转换为PDF再处理[^1] 4. 需要完整安装Microsoft Word应用程序支持 ### 替代方案(通过PDF中转) ```python # 需要先实现Word转PDF(使用同个库) doc.SaveAs(FileName="temp.pdf", FileFormat=17) # 再使用pdf2image转换 from pdf2image import convert_from_path images = convert_from_path("temp.pdf") for i, image in enumerate(images): image.save(f'page_{i+1}.jpg', 'JPEG') ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值