Uncategorized

Automatically remove blank pages from pdf

I’ve found myself in a situation where I have to automatically generate PDFs from spreadsheets and automatically remove blank pages from them. I found an unfinished (and badly formatted) solution here, so I took some minutes to finish it:

non-blank-page-ranges.py

#! /usr/bin/python3
"""Read text input and print non-blank page ranges
(pages should be separated by ^L pagebreaks)"""

import sys

# find non-blank pages
page = 1
blank = True
nonblanks = []
for line in sys.stdin:
  for char in line:
    if char == "\x0c": # ^L, pagebreak
      if not blank:
        nonblanks.append(page)
      # new page
      page += 1
      blank = True
    else:
      blank = False

# exit if no non-blank pages found
if not nonblanks:
  exit(1)

# print ranges of non-blank pages in format used by pdftk
# (e.g. "1-3 5-8 10-10")
ranges = []
nonblanks = sorted(nonblanks)
first_in_range = 0

for i in range(1, len(nonblanks)):
  # if the page increased by more than 1 (i.e. at least one
  # page got skipped), append the current range and start a
  # new one
  if nonblanks[i] > nonblanks[i-1] + 1:
    ranges.append("{}-{}".format(nonblanks[first_in_range],
                                 nonblanks[i-1]))
    first_in_range = i

# append the last range
ranges.append("{}-{}".format(nonblanks[first_in_range],
                             nonblanks[-1]))
print(" ".join(ranges))

pdf-remove-blank-pages

#! /bin/bash

for filename in "$@"; do
  # get non-blank ranges
  ranges="$(pdftotext "$filename" - | \
    "$HOME/.bin/non-blank-page-ranges.py")"

  if [ -z "$ranges" ]; then
    echo "no non-blank pages found in $filename" >&2
    continue
  fi

  # rename pdf
  if [ -e "${filename}.old" ]; then
    echo "file exists: ${filename}.old" >&2
    continue
  fi

  mv -n "$filename" "${filename}.old"

  if [ -e "$filename" -o ! -e "${filename}.old" ]; then
    echo "couldn't rename file $filename" >&2
    continue
  fi

  # create new pdf with non-blank pages only
  pdftk "${filename}.old" cat $ranges output "$filename"
done

The export to pdf macro

Automatic export of PDFs comes from here, with this fix (i.e. first argument of executeDispatch becomes document.getCurrentController().getFrame() instead of just document).

Sub exportPDF(xlsFile)

  xlsURL = ConvertToURL(xlsFile)

  ' Open the document.
  xlsDoc = StarDesktop.loadComponentFromURL(xlsURL, "_blank", 0, Array(_
             MakePropertyValue("Hidden", True),_
             )_
           )

  ' Export PDF
  ' http://user.services.openoffice.org/en/forum/viewtopic.php?f=9&t=31957#p146050
  ' corereflection error: http://www.oooforum.org/forum/viewtopic.phtml?t=27661
  pdfFile = Left(xlsFile, Len(xlsFile)-3) + "pdf"
  pdfURL = ConvertToURL(pdfFile)

  createUnoService("com.sun.star.frame.DispatchHelper").executeDispatch(_
    xlsDoc.getCurrentController().getFrame(), ".uno:ExportToPDF", "", 0, Array(_
      MakePropertyValue("URL", pdfURL),_
      MakePropertyValue("FilterName", "calc_pdf_Export"),_
      MakePropertyValue("FilterData",_
        Array(_
          Array("UseLosslessCompression",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("Quality",0,90,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ReduceImageResolution",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("MaxImageResolution",0,300,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("UseTaggedPDF",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("SelectPdfVersion",0,0,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ExportNotes",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ExportBookmarks",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("OpenBookmarkLevels",0,-1,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("UseTransitionEffects",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("IsSkipEmptyPages",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("IsAddStream",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("FormsType",0,0,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ExportFormFields",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("HideViewerToolbar",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("HideViewerMenubar",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("HideViewerWindowControls",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ResizeWindowToInitialPage",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("CenterWindow",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("OpenInFullScreenMode",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("DisplayPDFDocumentTitle",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("InitialView",0,0,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("Magnification",0,0,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("Zoom",0,100,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("PageLayout",0,0,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("FirstPageOnLeft",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("InitialPage",0,1,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("Printing",0,2,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("Changes",0,4,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("EnableCopyingOfContent",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("EnableTextAccessForAccessibilityTools",0,true,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ExportLinksRelativeFsys",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("PDFViewSelection",0,0,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ConvertOOoTargetToPDFTarget",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("ExportBookmarksToPDFDestination",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("_OkButtonString",0,"",com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("EncryptFile",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("DocumentOpenPassword",0,"",com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("RestrictPermissions",0,false,com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("PermissionPassword",0,"",com.sun.star.beans.PropertyState.DIRECT_VALUE),_
          Array("",0,,com.sun.star.beans.PropertyState.DIRECT_VALUE)_
          )_
        )_
      )_
    )

  xlsDoc.Close(True)
End Sub

 

Advertisements
Standard