2016-09-15 10 views
0

単語のpdfファイルを検索して置き換える必要があります。例えば。 「間違った」を検索し、「右」で置き換えます。 私はiText v5.5.9(礼儀http://www.codeguru.com/columns/vb/manipulating-pdf-files-with-itextsharp-and-vb.net-2012.htm)を使用してテストプログラムを実行しましたが、これは正常に動作します(置き換えられるテキストは一番上にあるようです)。 v7 iTextがより優れた/よりシンプルになるかどうか、そして誰かがそれをやったことがあれば助けてくれるのだろうかと思います。iText7 .NET検索/置換

' Based on http://www.codeguru.com/columns/vb/manipulating-pdf-files-with-itextsharp-and-vb.net-2012.htm 

Imports System.IO 'Working With Files 
Imports System.Text 'Working With Text 
Imports System.Data.SqlClient 

Imports iTextSharp.text 'Core PDF Text Functionalities 
Imports iTextSharp.text.pdf 'PDF Content 
Imports iTextSharp.text.pdf.parser 'Content Parser 

Imports pdf_clr.LocTextExtraction 'Import LocationTextExtractionStrategy Capabilities 

Public Class Class1 

Public Shared Sub ReplacePDFText(ByVal strSource As String, ByVal strDest As String, ByVal iDocType As SByte, ByVal strSearch As String, ByVal strReplace As String, ByVal bCase As Boolean) 
    ' strSource is an int 
    Dim i As Integer 
    Dim strSqlConnection As String = "context connection=true" 
    strSqlConnection = "Data Source=SERVER;Initial Catalog=DATABASE;Integrated Security=True" 
    Dim dbPDF As Byte() = Nothing 'For doc from database 
    Dim pcbContent As PdfContentByte = Nothing 'Read PDF Content 
    Dim psStamp As PdfStamper = Nothing 'PDF Stamper Object 
    Dim strPassword As String = strSource 

    Using connection As New SqlConnection(strSqlConnection) 
     connection.Open() 
     Dim command As New SqlCommand("SELECT pdf FROM docstore WHERE id=" & strSource, connection) 
     dbPDF = command.ExecuteScalar() 
    End Using 

    If IsNothing(dbPDF) <> True Then 'Check if dbPDF filled 

     'Dim pdfFileReader As New PdfReader(strSource, Encoding.ASCII.GetBytes(strPassword)) 'Read Our File 
     Dim pdfFileReader As New PdfReader(dbPDF, Encoding.ASCII.GetBytes(strPassword)) 'Read PDF 

     If strDest.ToString = "" Then 
      'strDest = System.IO.Path.GetTempPath() & System.IO.Path.GetRandomFileName() 
      strDest = "C:\tmp\" & System.IO.Path.GetRandomFileName() & ".pdf" 
     End If 

     Dim msPDF As New MemoryStream() 

     psStamp = New PdfStamper(pdfFileReader, msPDF) 'Memorystream as destination 
     psStamp.Writer.CloseStream = False 

     ' set r/w password to 
     psStamp.SetEncryption(Nothing, Encoding.ASCII.GetBytes(strPassword), PdfWriter.ALLOW_PRINTING, PdfWriter.DO_NOT_ENCRYPT_METADATA) 

     For intCurrPage As Integer = 1 To pdfFileReader.NumberOfPages 'Loop Through All Pages 

      Dim lteStrategy As LocTextExtractionStrategy = New LocTextExtractionStrategy 'Read PDF File Content Blocks 

      pcbContent = psStamp.GetUnderContent(intCurrPage) 'Look At Current Block 

      'Determine Spacing of Block To See If It Matches Our Search String 
      lteStrategy.UndercontentCharacterSpacing = pcbContent.CharacterSpacing 
      lteStrategy.UndercontentHorizontalScaling = pcbContent.HorizontalScaling 

      'Trigger The Block Reading Process 
      Dim currentText As String = PdfTextExtractor.GetTextFromPage(pdfFileReader, intCurrPage, lteStrategy) 
      Dim scCase As StringComparison = IIf(bCase = 0, StringComparison.CurrentCultureIgnoreCase, StringComparison.CurrentCulture) 

      'Call 
      DoSearchReplace(lteStrategy, pcbContent, psStamp, strSearch, strReplace, scCase, "SearchReplaceLayer") 

     Next 'page 

     psStamp.Close() 'Close Stamp Destination Object 

     msPDF.Position = 0 

     dbPDF = msPDF.ToArray 

     msPDF.Close() 
     msPDF.Dispose() 

     ' Write file as check during testing 
     File.WriteAllBytes(strDest, dbPDF) 

     If IsNumeric(strSource) And 1 = 1 Then 
      Using connection As New SqlConnection(strSqlConnection) 
       Dim cmd As New SqlCommand 
       cmd.CommandText = "sp_DOCSTORE_ADD_binary" ' updates or inserts into db 
       ' stored procedure parameters as needed 
       cmd.Parameters.Add("@FILE", Data.SqlDbType.VarBinary) : cmd.Parameters("@FILE").Value = dbPDF 
       cmd.Parameters.Add("@retvalue", Data.SqlDbType.Int).Direction = Data.ParameterDirection.ReturnValue 
       cmd.CommandType = Data.CommandType.StoredProcedure 
       cmd.Connection = connection 

       connection.Open() 

       i = cmd.ExecuteNonQuery() 

      End Using 
     End If 

    End If 

End Sub 

Public Shared Sub DoSearchReplace(ByRef lteStrategy As LocTextExtractionStrategy, ByRef pcbContent As PdfContentByte, ByRef psStamp As PdfStamper, ByVal strSearch As String, ByVal strReplace As String, ByVal scCase As StringComparison, ByVal strLayer As String) 
    'Determine Match(es) 
    Dim lstMatches As List(Of iTextSharp.text.Rectangle) = lteStrategy.GetTextLocations(strSearch, scCase) 
    Dim pdLayer As New PdfLayer(strLayer, psStamp.Writer) 'New layer and enable Overwriting Capabilities 

    'Set Fill Colour Of Replacing Layer 
    pcbContent.SetColorFill(BaseColor.WHITE) 

    For Each rctRect As Rectangle In lstMatches 'Loop Through Each Match 

     pcbContent.Rectangle(rctRect.Left, rctRect.Bottom, rctRect.Width, rctRect.Height) 'Create New Rectangle For Replacing Layer 
     pcbContent.Fill() 'Fill With Colour Specified 
     pcbContent.BeginLayer(pdLayer) 'Create Layer 
     pcbContent.SetColorFill(BaseColor.DARK_GRAY) 'Fill Layer 
     pcbContent.Fill() 'Fill Underlying Content 

     Dim pgState As PdfGState 'Create GState Object 
     pgState = New PdfGState() 

     pcbContent.SetGState(pgState) 'Set Current State 
     pcbContent.SetColorFill(BaseColor.BLACK) 'Fill Letters 
     pcbContent.BeginText() 'Start Text Replace Procedure 
     pcbContent.SetTextMatrix(rctRect.Left, rctRect.Bottom) 'Get Text Location 

     'Set New Font And Size 
     pcbContent.SetFontAndSize(BaseFont.CreateFont(BaseFont.HELVETICA_OBLIQUE, BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 12) 
     pcbContent.ShowText(strReplace) 'Replacing Text 
     pcbContent.EndText() 'Stop Text Replace Procedure 
     pcbContent.EndLayer() 'Stop Layer replace Procedure 

    Next 'rectangle 
End Sub 

乾杯:以下 は、R/Wパスワードでそれを書き込み、その後、R/Wパスワードを使用してデータベースからPDFファイルを読み出しV5テストコードです。

+0

VBでソリューションをお探しですか?私はC#で私のアイデアを表現することができます... – mkl

答えて

0

基本的な考え方(擬似コード)は、あなたのクラスは、のために通知されるドキュメント

  • のすべてのページのためのPdfTextExtractorへの引数として、このクラスを使用し
  • IEventListener/ITextExtractionStrategyを実装

    1. だろうドキュメント内のすべてのイベント。あなたは
    2. 文書内のテキストの概要を取得するTextRenderInfoタイプのイベント(論理的な読み順で)(ページにテキストを描画するイベント)
    3. 集計TextRenderInfoイベント、およびそれらを並べ替えることに興味があるに正規表現を使用しますあなたの所望の特性に一致するすべてのテキストを検索し、バック彼らはTextRenderInfoに基づいた.pdfドキュメントは、あなたが収集したオブジェクトを、あなたが望むものは
    を交換し再構築
  • から来たTextRenderInfoオブジェクトにテキストをマッピング