2010-07-13 11 views

Nous avons une application C# qui va lire un document pdf pour code à barres et si le code à barres est trouvé, il va télécharger le document dans un fichier IBM FileNet P8.Vérifiez si le document PDF est vide dans C#

Si le code-barres n'est pas trouvé, il sera déplacé dans un dossier différent.

Nous avons maintenant une autre requête du client. Vérifiez si le PDF est vide. Dans le sens où un utilisateur scanne une page blanche et envoie un e-mail au fichier PDF.

Je dois vérifier si elle est vide si c'est le cas, aviser l'utilisateur.

J'utilise iTextSharp.



Quelle est la question? Avez-vous essayé quelque chose et cela n'a pas fonctionné? Est-ce que cela aide? - > http://stackoverflow.com/questions/2550796/reading-pdf-content-with-itextsharp-dll-in-vb-net-or-c En résumé: analyser le contenu d'un fichier PDF N'EST PAS POSSIBLE avec iText. –


Cela a-t-il fonctionné pour vous acadia? –





Semble ce sera de retour 'outp.WriteLine ("Aucun texte trouvé à la page" + pageNum);' pour toutes les pages ou 0/1 :)

Je pense que cela peut vous aider ... j'espère.

namespace iTextSharp.text.pdf.parser{ 

    * Tool that parses the content of a PDF document. 
    * @since 2.1.4 
    public class PdfContentReaderTool { 

     * Shows the detail of a dictionary. 
     * This is similar to the PdfLister functionality. 
     * @param dic the dictionary of which you want the detail 
     * @return a String representation of the dictionary 
     public static String GetDictionaryDetail(PdfDictionary dic){ 
      return GetDictionaryDetail(dic, 0); 

     * Shows the detail of a dictionary. 
     * @param dic the dictionary of which you want the detail 
     * @param depth the depth of the current dictionary (for nested dictionaries) 
     * @return a String representation of the dictionary 
     public static String GetDictionaryDetail(PdfDictionary dic, int depth){ 
      StringBuilder builder = new StringBuilder(); 
      IList<PdfName> subDictionaries = new List<PdfName>(); 
      foreach (PdfName key in dic.Keys) { 
       PdfObject val = dic.GetDirectObject(key); 
       if (val.IsDictionary()) 
       builder.Append(", "); 
      builder.Length = builder.Length-2; 
      foreach (PdfName pdfSubDictionaryName in subDictionaries) { 
       for (int i = 0; i < depth+1; i++){ 
       builder.Append("Subdictionary "); 
       builder.Append(" = "); 
       builder.Append(GetDictionaryDetail(dic.GetAsDict(pdfSubDictionaryName), depth+1)); 
      return builder.ToString(); 

     * Displays a summary of the entries in the XObject dictionary for the stream 
     * @param resourceDic the resource dictionary for the stream 
     * @return a string with the summary of the entries 
     * @throws IOException 
     * @since 5.0.2 
     public static String GetXObjectDetail(PdfDictionary resourceDic) { 
      StringBuilder sb = new StringBuilder(); 

      PdfDictionary xobjects = resourceDic.GetAsDict(PdfName.XOBJECT); 
      if (xobjects == null) 
       return "No XObjects"; 
      foreach (PdfName entryName in xobjects.Keys) { 
       PdfStream xobjectStream = xobjects.GetAsStream(entryName); 

       sb.Append("------ " + entryName + " - subtype = " + xobjectStream.Get(PdfName.SUBTYPE) + " = " + xobjectStream.GetAsNumber(PdfName.LENGTH) + " bytes ------\n"); 

       if (!xobjectStream.Get(PdfName.SUBTYPE).Equals(PdfName.IMAGE)){ 

        byte[] contentBytes = ContentByteUtils.GetContentBytesFromContentObject(xobjectStream); 

        foreach (byte b in contentBytes) { 

        sb.Append("------ " + entryName + " - subtype = " + xobjectStream.Get(PdfName.SUBTYPE) + "End of Content" + "------\n"); 

      return sb.ToString(); 

     * Writes information about a specific page from PdfReader to the specified output stream. 
     * @since 2.1.5 
     * @param reader the PdfReader to read the page content from 
     * @param pageNum the page number to read 
     * @param out  the output stream to send the content to 
     * @throws IOException 
     public static void ListContentStreamForPage(PdfReader reader, int pageNum, TextWriter outp) { 
      outp.WriteLine("==============Page " + pageNum + "===================="); 
      outp.WriteLine("- - - - - Dictionary - - - - - -"); 
      PdfDictionary pageDictionary = reader.GetPageN(pageNum); 

      outp.WriteLine("- - - - - XObject Summary - - - - - -"); 

      outp.WriteLine("- - - - - Content Stream - - - - - -"); 
      RandomAccessFileOrArray f = reader.SafeFile; 

      byte[] contentBytes = reader.GetPageContent(pageNum, f); 


      foreach (byte b in contentBytes) { 


      outp.WriteLine("- - - - - Text Extraction - - - - - -"); 
      String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy()); 
      if (extractedText.Length != 0) 
       outp.WriteLine("No text found on page " + pageNum); 



     * Writes information about each page in a PDF file to the specified output stream. 
     * @since 2.1.5 
     * @param pdfFile a File instance referring to a PDF file 
     * @param out  the output stream to send the content to 
     * @throws IOException 
     public static void ListContentStream(string pdfFile, TextWriter outp) { 
      PdfReader reader = new PdfReader(pdfFile); 

      int maxPageNum = reader.NumberOfPages; 

      for (int pageNum = 1; pageNum <= maxPageNum; pageNum++){ 
       ListContentStreamForPage(reader, pageNum, outp); 


     * Writes information about the specified page in a PDF file to the specified output stream. 
     * @since 2.1.5 
     * @param pdfFile a File instance referring to a PDF file 
     * @param pageNum the page number to read 
     * @param out  the output stream to send the content to 
     * @throws IOException 
     public static void ListContentStream(string pdfFile, int pageNum, TextWriter outp) { 
      PdfReader reader = new PdfReader(pdfFile); 

      ListContentStreamForPage(reader, pageNum, outp); 

     * Writes information about each page in a PDF file to the specified file, or System.out. 
     * @param args 
     public static void Main(String[] args) { 
       if (args.Length < 1 || args.Length > 3){ 
        Console.WriteLine("Usage: PdfContentReaderTool <pdf file> [<output file>|stdout] [<page num>]"); 

       TextWriter writer = Console.Out; 
       if (args.Length >= 2){ 
        if (!Util.EqualsIgnoreCase(args[1], "stdout")) { 
         Console.WriteLine("Writing PDF content to " + args[1]); 
         writer = new StreamWriter(args[1]); 

       int pageNum = -1; 
       if (args.Length >= 3){ 
        pageNum = int.Parse(args[2]); 

       if (pageNum == -1){ 
        ListContentStream(args[0], writer); 
       } else { 
        ListContentStream(args[0], pageNum, writer); 

       if (args.Length >= 2){ 
        Console.WriteLine("Finished writing content to " + args[1]); 
      } catch (Exception e){ 