Source code for pympress.document

# -*- coding: utf-8 -*-
#
#       document.py
#
#       Copyright 2009, 2010 Thomas Jost <thomas.jost@gmail.com>
#       Copyright 2015 Cimbali <me@cimba.li>
#
#       This program is free software; you can redistribute it and/or modify
#       it under the terms of the GNU General Public License as published by
#       the Free Software Foundation; either version 2 of the License, or
#       (at your option) any later version.
#
#       This program is distributed in the hope that it will be useful,
#       but WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#       GNU General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program; if not, write to the Free Software
#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#       MA 02110-1301, USA.
"""
:mod:`pympress.document` -- document handling
---------------------------------------------

This module contains several classes that are used for managing documents (only
PDF documents are supported at the moment, but other formats may be added in the
future).

An important point is that this module is *completely* independent from the GUI:
there should not be any GUI-related code here, except for page rendering (and
only rendering itself: the preparation of the target surface must be done
elsewhere).
"""

import logging
logger = logging.getLogger(__name__)

import math
import enum
import pathlib
import tempfile
import mimetypes
import webbrowser
import collections
from urllib.request import url2pathname
from urllib.parse import urlsplit

import gi
gi.require_version('Poppler', '0.18')
from gi.repository import Poppler

from pympress.util import fileopen


[docs] def get_extension(mime_type): """ Returns a valid filename extension (recognized by python) for a given mime type. Args: mime_type (`str`): The mime type for which to find an extension Returns: `str`: A file extension used for the given mimetype """ if not mimetypes.inited: mimetypes.init() for ext in mimetypes.types_map: if mimetypes.types_map[ext] == mime_type: return ext
[docs] class PdfPage(enum.IntEnum): """ Represents the part of a PDF page that we want to draw. """ #: No notes on PDF page, only falsy value NONE = 0 #: Full PDF page (without notes) FULL = 1 #: Bottom half of PDF page BOTTOM = 2 #: Top half of PDF page TOP = 3 #: Right half of PDF page RIGHT = 4 #: Left half of PDF page LEFT = 5 #: Full page + draw another page for notes, which is after the slides AFTER = 6 #: Complementary of AFTER: for a notes page, the slide page is BEFORE by half a document BEFORE = 7 #: Slides on even pages (0-indexed), notes on uneven pages ODD = 8 #: Complementary of ODD EVEN = 9 #: An arbitrary mapping of notes pages to slide pages MAP = 10 #: Reverse the arbitrary mapping MAP RMAP = 11
[docs] def complement(val): """ Return the enum value for the other part of the page. """ return PdfPage(val ^ 1)
[docs] def scale(val): """ Return the enum value that does only scaling not shifting. """ return PdfPage(val | 1)
[docs] def direction(val): """ Returns whether the pdf page/notes mode is horizontal or vertical. Returns: `str`: a string representing the direction that can be used as the key in the config section """ if val == PdfPage.LEFT or val == PdfPage.RIGHT: return 'horizontal' elif val == PdfPage.TOP or val == PdfPage.BOTTOM: return 'vertical' elif val == PdfPage.AFTER or val == PdfPage.BEFORE: return 'page number' elif val == PdfPage.EVEN or val == PdfPage.ODD: return 'page parity' elif val == PdfPage.MAP or val == PdfPage.RMAP: return 'page mapping' else: return None
[docs] def from_screen(val, x, y, x2 = None, y2 = None): """ Transform visible part of the page coordinates to full page coordinates. Pass 2 floats to transform coordinates, 4 to transform margins, i.e. the second pair of coordinates is taken from the opposite corner. Args: x (`float`): x coordinate on the screen, on a scale 0..1 y (`float`): y coordinate on the screen, on a scale 0..1 x2 (`float`): second x coordinate on the screen, from the other side, on a scale 0..1 y2 (`float`): second y coordinate on the screen, from the other side, on a scale 0..1 """ if val == PdfPage.RIGHT: page = ((1 + x) / 2., y) elif val == PdfPage.LEFT: page = (x / 2., y) elif val == PdfPage.BOTTOM: page = (x, (1 + y) / 2.) elif val == PdfPage.TOP: page = (x, y / 2.) else: page = (x, y) if x2 is None or y2 is None: return page else: return page + val.complement().from_screen(x2, y2)
[docs] def to_screen(val, x, y, x2 = None, y2 = None): """ Transform full page coordinates to visible part coordinates. Pass 2 floats to transform coordinates, 4 to transform margins, i.e. the second pair of coordinates is taken from the opposite corner. Args: x (`float`): x coordinate on the page, on a scale 0..1 y (`float`): y coordinate on the page, on a scale 0..1 x2 (`float`): optional second x coordinate on the page, on a scale 0..1 y2 (`float`): optional second y coordinate on the page, on a scale 0..1 """ if val == PdfPage.RIGHT: screen = (x * 2 - 1, y) elif val == PdfPage.LEFT: screen = (x * 2, y) elif val == PdfPage.BOTTOM: screen = (x, y * 2 - 1) elif val == PdfPage.TOP: screen = (x, y * 2) else: screen = (x, y) if x2 is None or y2 is None: return screen else: return screen + val.to_screen(x2, y2)
#: A class that holds all the properties for media files Media = collections.namedtuple('Media', ['x1', 'y1', 'x2', 'y2', 'filename', 'autoplay', 'repeat', 'poster', 'show_controls', 'type', 'start_pos', 'duration'], defaults=[False, False, False, False, '', 0., 0.])
[docs] class Page(object): """ Class representing a single page. It provides several methods used by the GUI for preparing windows for displaying pages, managing hyperlinks, etc. Args: doc (:class:`~Poppler.Page`): the poppler object around the page number (`int`): number of the page to fetch in the document parent (:class:`~pympress.document.Document`): the parent Document class """ #: Page handled by this class (instance of :class:`~Poppler.Page`) page = None #: `int`, number of the current page (starting from 0) page_nb = -1 #: `str` representing the page label page_label = None #: All the links in the page, as a `list` of :class:`~pympress.document.Link` instances links = [] #: All the media in the page, as a `list` of :class:`~pympress.document.Media` medias = [] #: `float`, page width pw = 0. #: `float`, page height ph = 0. #: All text annotations annotations = [] #: Instance of :class:`~pympress.document.Document` that contains this page. parent = None def __init__(self, page, number, parent): self.page = page self.page_nb = number self.parent = parent self.links = [] self.medias = [] self.annotations = [] if self.page is None: return # Get page label self.page_label = self.page.get_label() # Read page size self.pw, self.ph = self.page.get_size() # Read links on the page for link in self.page.get_link_mapping(): action = self.get_link_action(link.action.type, link.action) my_link = Link(link.area.x1, link.area.y1, link.area.x2, link.area.y2, action) self.links.append(my_link) # Read annotations, in particular those that indicate media for annotation in self.page.get_annot_mapping(): annot_type = annotation.annot.get_annot_type() if annot_type == Poppler.AnnotType.LINK: # just an Annot, not subclassed -- probably redundant with links continue elif annot_type == Poppler.AnnotType.MOVIE: movie = annotation.annot.get_movie() filepath = self.parent.get_full_path(movie.get_filename()) if not filepath: logger.error(_("Pympress can not find file ") + movie.get_filename()) continue media_rect = ( annotation.area.x1 / self.pw, # left 1 - annotation.area.y2 / self.ph, # top annotation.area.x2 / self.pw, # right 1 - annotation.area.y1 / self.ph, # bottom ) movie_options = {'show_controls': movie.show_controls(), 'poster': movie.need_poster()} try: movie_options['repeat'] = movie.get_play_mode() == Poppler.MoviePlayMode.REPEAT movie_options['start_pos'] = movie.get_start() / 1e9 movie_options['duration'] = movie.get_duration() / 1e9 # NB: autoplay not part of Poppler’s MovieActivationParameters struct except AttributeError: pass # Missing functions in pre-21.04 Poppler versions media = Media(*media_rect, filepath, **movie_options) self.medias.append(media) action = Link.build_closure(self.parent.play_media, hash(media)) elif annot_type == Poppler.AnnotType.SCREEN: action_obj = annotation.annot.get_action() if not action_obj: continue action = self.get_annot_action(action_obj.any.type, action_obj, annotation.area) if not action: continue elif annot_type == Poppler.AnnotType.FILE_ATTACHMENT: attachment = annotation.annot.get_attachment() filename = pathlib.Path(attachment.name) with tempfile.NamedTemporaryFile('wb', suffix=filename.suffix, prefix=filename.stem, delete=False) as f: # now the file name is shotgunned filename = pathlib.Path(f.name) self.parent.remove_on_exit(filename) if not attachment.save(str(filename)): logger.error(_("Pympress can not extract attached file")) continue action = Link.build_closure(fileopen, filename) elif annot_type in {Poppler.AnnotType.TEXT, Poppler.AnnotType.POPUP, Poppler.AnnotType.FREE_TEXT}: # text-only annotations, hide them from screen and show them in annotations popup content = annotation.annot.get_contents() if content: self.annotations.append(annotation.annot) self.page.remove_annot(annotation.annot) continue elif annot_type in {Poppler.AnnotType.STRIKE_OUT, Poppler.AnnotType.HIGHLIGHT, Poppler.AnnotType.UNDERLINE, Poppler.AnnotType.SQUIGGLY, Poppler.AnnotType.POLYGON, Poppler.AnnotType.POLY_LINE, Poppler.AnnotType.SQUARE, Poppler.AnnotType.CIRCLE, Poppler.AnnotType.CARET, Poppler.AnnotType.LINE, Poppler.AnnotType.STAMP, Poppler.AnnotType.INK}: # Poppler already renders annotation of these types, nothing more can be done # even though the rendering isn't always perfect. continue else: logger.warning(_("Pympress can not interpret annotation of type:") + " {} ".format(annot_type)) continue my_annotation = Link(annotation.area.x1, annotation.area.y1, annotation.area.x2, annotation.area.y2, action) self.links.append(my_annotation)
[docs] def get_annot_action(self, link_type, action, rect): """ Get the function to be called when the link is followed. Args: link_type (:class:`~Poppler.ActionType`): The link type action (:class:`~Poppler.Action`): The action to be performed when the link is clicked rect (:class:`~Poppler.Rectangle`): The region of the page where the link is Returns: `function`: The function to be called to follow the link """ if link_type == Poppler.ActionType.RENDITION: media = action.rendition.media if media.is_embedded(): ext = get_extension(media.get_mime_type()) with tempfile.NamedTemporaryFile('wb', suffix=ext, prefix='pdf_embed_', delete=False) as f: # now the file name is shotgunned filename = pathlib.Path(f.name) self.parent.remove_on_exit(filename) if not media.save(str(filename)): logger.error(_("Pympress can not extract embedded media")) return None else: filename = self.parent.get_full_path(media.get_filename()) if not filename: logger.error(_("Pympress can not find file ") + media.get_filename()) return None media_rect = ( rect.x1 / self.pw, # left 1 - rect.y2 / self.ph, # top rect.x2 / self.pw, # right 1 - rect.y1 / self.ph, # bottom ) media_options = {'type': media.get_mime_type()} try: media_options['autoplay'] = media.get_auto_play() media_options['show_controls'] = media.get_show_controls() media_options['repeat'] = media.get_repeat_count() - 1 # NB: no poster in Poppler’s MediaParameters except AttributeError: pass media = Media(*media_rect, filename, **media_options) self.medias.append(media) return Link.build_closure(self.parent.play_media, hash(media)) else: return self.get_link_action(link_type, action)
[docs] def number(self): """ Get the page number. """ return self.page_nb
[docs] def label(self): """ Get the page label. """ return self.page_label
[docs] def get_size(self, dtype=PdfPage.FULL): """ Get the page size. Args: dtype (:class:`~pympress.document.PdfPage`): the type of document to consider Returns: `(float, float)`: page size """ return dtype.scale().from_screen(self.pw, self.ph)
[docs] def get_aspect_ratio(self, dtype=PdfPage.FULL): """ Get the page aspect ratio. Args: dtype (:class:`~pympress.document.PdfPage`): the type of document to consider Returns: `float`: page aspect ratio """ w, h = self.get_size(dtype) return w / h
[docs] def get_annotations(self): """ Get the list of text annotations on this page. Returns: `list` of `str`: annotations on this page """ return self.annotations
[docs] def new_annotation(self, pos, rect=None, value=''): """ Add an annotation to this page Args: pos (`int`): The position in the list of annotations in which to insert this annotation rect (:class:`~Poppler.Rectangle`): A rectangle for the position of this annotation value (`str`): The contents of the annotation """ if self.parent.doc is None: return if pos < 0: pos = 0 if pos > len(self.annotations): pos = len(self.annotations) if rect is None: rect = Poppler.Rectangle() rect.x1 = self.pw - 20 rect.x2 = rect.x1 + 20 rect.y2 = self.ph - len(self.annotations) * 20 rect.y1 = rect.y2 - 20 new_annot = Poppler.AnnotText.new(self.parent.doc, rect) new_annot.set_icon(Poppler.ANNOT_TEXT_ICON_NOTE) new_annot.set_contents(value) self.annotations.insert(pos, new_annot) self.parent.made_changes()
[docs] def set_annotation(self, pos, value): """ Update an annotation on this page Args: pos (`int`): The number of the annotation value (`str`): The new contents of the annotation """ try: rect = self.annotations[pos].get_rectangle() except IndexError: # Often because no document is loaded logger.error(_("Pympress can not edit PDF annotation {}").format(pos)) else: self.remove_annotation(pos) self.new_annotation(pos, rect, value)
[docs] def remove_annotation(self, pos): """ Remove an annotation from this page Args: pos (`int`): The number of the annotation """ self.parent.made_changes() del self.annotations[pos]
[docs] def get_media(self): """ Get the list of medias this page might want to play. Returns: `list`: medias in this page """ return self.medias
[docs] def render_cairo(self, cr, ww, wh, dtype=PdfPage.FULL): """ Render the page on a Cairo surface. Args: cr (:class:`~Gdk.CairoContext`): target surface ww (`int`): target width in pixels wh (`int`): target height in pixels dtype (:class:`~pympress.document.PdfPage`): the type of document that should be rendered """ pw, ph = self.get_size(dtype) cr.set_source_rgb(1, 1, 1) # Scale scale = min(ww / pw, wh / ph) cr.scale(scale, scale) cr.rectangle(0, 0, pw, ph) cr.fill() # For "regular" pages, there is no problem: just render them. # For other pages (i.e. half of a page), the widget already has correct # dimensions so we don't need to deal with that. But for right and bottom # halves we must translate the output in order to only show the correct half. if dtype == PdfPage.RIGHT: cr.translate(-pw, 0) elif dtype == PdfPage.BOTTOM: cr.translate(0, -ph) self.page.render(cr)
[docs] def can_render(self): """ Informs that rendering *is* necessary (avoids checking the type). Returns: `bool`: `True`, do rendering """ return True
[docs] class Document(object): """ This is the main document handling class. The page numbering starts as 0 and is aware of notes (i.e. number of pages may change to account for note pages). The document page numbers are the same as in Poppler, and also start at 0 but do not depend on notes. Args: builder (:class:`pympress.builder.Builder`): A builder to load callbacks pop_doc (:class:`~pympress.Poppler.Document`): Instance of the Poppler document that this class will wrap uri (`str`): URI of the PDF file to open page (`int`): page number to which the file should be opened """ #: Current PDF document (:class:`~Poppler.Document` instance) doc = None #: `str` full path to pdf uri = None #: :class:`~pathlib.Path` to pdf if uri is a file: URI path = None #: Number of pages in the document nb_pages = -1 #: Pages cache (`dict` of :class:`~pympress.document.Page`). This makes #: navigation in the document faster by avoiding calls to Poppler when loading #: a page that has already been loaded. pages_cache = {} #: `set` of :class:`~pathlib.Path` representing the temporary files which need to be removed temp_files = set() #: History of pages we have visited, using note-aware page numbers history = [] #: Our position in the history hist_pos = -1 #: `list` of slide page labels, indexed on note-aware page numbers page_labels = [] #: `list` of all the page labels, indexed on document page numbers doc_page_labels = [] #: `list` of (slide's document page number, notes' document page number) tuples, or `None` if there are no notes notes_mapping = None #: `bool` indicating whether there were modifications to the document changes = False #: callback, to be connected to :func:`~pympress.extras.Media.play` play_media = lambda *args: None #: callback, to be connected to :func:`~pympress.editable_label.PageNumber.start_editing` start_editing_page_number = lambda *args: None #: callback, to be connected to :func:`~pympress.ui.UI.goto_page` navigate = lambda *args: None def __init__(self, builder, pop_doc, uri): if builder is not None: # Connect callbacks self.play_media = builder.get_callback_handler('medias.play') self.start_editing_page_number = builder.get_callback_handler('page_number.start_editing') self.goto_page = builder.get_callback_handler('goto_page') self.goto_next_hist = builder.get_callback_handler('doc_hist_next') self.goto_prev_hist = builder.get_callback_handler('doc_hist_prev') # Setup PDF file self.uri = uri self.doc = pop_doc self.changes = False if uri is not None: uri_parts = urlsplit(uri, scheme='file') self.path = pathlib.Path(url2pathname(uri_parts.path)) if uri_parts.scheme == 'file': self.path = pathlib.Path.cwd().joinpath(self.path.name) else: self.path = None # Pages numbers and labels self.nb_pages = 0 if pop_doc is None else self.doc.get_n_pages() self.doc_page_labels = [self.doc.get_page(n).get_label() for n in range(self.nb_pages)] self.page_labels = self.doc_page_labels # Pages cache self.pages_cache = {}
[docs] def get_structure(self, index_iter = None): """ Gets the structure of the document from its index. Recursive, pass the iterator. Args: index_iter (:class:`~Poppler.IndexIter` or `None`): the iterator for the child index to explore. Returns: `list`: A list of tuples (depth, page number, title) """ try: if index_iter is None: index_iter = Poppler.IndexIter(self.doc) except TypeError: return {} if index_iter is None: return {} index = {} while True: action = index_iter.get_action() title = '' page = None try: if action.type == Poppler.ActionType.GOTO_DEST: title = action.goto_dest.title if action.goto_dest.dest.type == Poppler.DestType.NAMED: page = self.doc.find_dest(action.goto_dest.dest.named_dest).page_num - 1 elif action.goto_dest.dest.type == Poppler.DestType.UNKNOWN: raise AssertionError('Unknown type of destination') else: page = action.goto_dest.dest.page_num - 1 else: raise AssertionError('Unexpected type of action') except Exception: logger.error(_('Unexpected action in index "{}"').format(action.type)) page = None new_entry = {'title': title} child = index_iter.get_child() if child: new_entry['children'] = self.get_structure(child) if page is None: page = min(new_entry['children']) # there should not be synonymous sections, correct the page here to a better guess if page in index: lower_bound = max(index.keys()) find = index[lower_bound] while 'children' in find: lower_bound = max(find['children'].keys()) find = find['children'][lower_bound] try: page = min(number for number, label in enumerate(self.page_labels) if label == self.page_labels[page] and number > lower_bound) except ValueError: # empty iterator page = lower_bound + 1 if page is not None: index[page] = new_entry if not index_iter.next(): break return index
[docs] @staticmethod def create(builder, uri): """ Initializes a Document by passing it a :class:`~Poppler.Document`. Args: builder (:class:`pympress.builder.Builder`): A builder to load callbacks uri (`str`): URI to the PDF file to open page (`int`): page number to which the file should be opened Returns: :class:`~pympress.document.Document`: The initialized document """ if uri is None: doc = EmptyDocument() else: poppler_doc = Poppler.Document.new_from_file(uri, None) doc = Document(builder, poppler_doc, uri) return doc
[docs] def made_changes(self): """ Notify the document that some changes were made (e.g. annotations edited) """ self.changes = True
[docs] def has_changes(self): """ Return whether that some changes were made (e.g. annotations edited) """ return self.changes
[docs] def save_changes(self, dest_uri=None): """ Save the changes Args: dest_uri (`str` or `None`): The URI where to save the file, or None to save in-place """ if self.doc is None: return for page in self.pages_cache.values(): for annot in page.get_annotations(): page.page.add_annot(annot) if dest_uri is not None and dest_uri != self.uri: if self.doc.save(dest_uri): self.changes = False else: # We can’t overwrite the current file directly, so create a temporary file and then overwrite with tempfile.NamedTemporaryFile('wb', suffix=self.path.suffix, prefix=self.path.stem, dir=self.path.parent, delete=False) as f: temp_path = pathlib.Path(f.name) if self.doc.save(temp_path.as_uri()): self.changes = False temp_path.replace(self.path) for page in self.pages_cache.values(): for annot in page.get_annotations(): page.page.remove_annot(annot)
[docs] def guess_notes(self, horizontal, vertical, current_page=0): """ Get our best guess for the document mode. Args: horizontal (`str`): A string representing the preference for horizontal slides vertical (`str`): A string representing the preference for vertical slides Returns: :class:`~pympress.document.PdfPage`: the notes mode """ if any(label.startswith('notes:') for label in self.page_labels): return PdfPage.MAP page = self.page(current_page) if page is None or not page.can_render(): return PdfPage.NONE ar = page.get_aspect_ratio() # Check whether we have N slides with one aspect ratio then N slides with a different aspect ratio # that is the sign if Libreoffice notes pages if self.nb_pages and self.nb_pages % 2 == 0: half_doc = self.nb_pages // 2 ar_slides = self.page(0).get_aspect_ratio() ar_notes = self.page(half_doc).get_aspect_ratio() if ar_slides != ar_notes and \ all(self.page(p).get_aspect_ratio() == ar_slides for p in range(1, half_doc)) and \ all(self.page(half_doc + p).get_aspect_ratio() == ar_notes for p in range(1, half_doc)): return PdfPage.AFTER # "Regular" slides will have an aspect ratio of 4/3, 16/9, 16/10... i.e. in the range [1..2] # So if the aspect ratio is >= 2, we can assume it is a document with notes on the side. if ar >= 2: try: return PdfPage[horizontal.upper()] except KeyError: return PdfPage.RIGHT # Make exception for classic american letter format and ISO (A4, B5, etc.) if abs(ar - 8.5 / 11) < 1e-3 or abs(ar - 1 / math.sqrt(2)) < 1e-3: return PdfPage.NONE # If the aspect ratio is < 1, we can assume it is a document with notes above or below. if ar < 1: try: return PdfPage[vertical.upper()] except KeyError: return PdfPage.BOTTOM return PdfPage.NONE
[docs] def set_notes_pos(self, notes_direction): """ Set whether where the notes pages are relative to normal pages Valid values are returned by :meth:`~pympress.document.PdfPage.direction` - page number (aka Libreoffice notes mode) - page parity (can not be detected automatically, where every other page contains notes) - page mapping (where labels of notes pages are corresponding slide labels prefixed with “notes:”) Args: notes_direction (`str`): Where the notes pages are """ if notes_direction == 'page number': self.notes_mapping = [(n, n + self.nb_pages // 2) for n in range(self.nb_pages // 2)] elif notes_direction == 'page parity': self.notes_mapping = [(n, n + 1) for n in range(0, self.nb_pages, 2)] elif notes_direction == 'page mapping': notes_mapping = collections.OrderedDict() for n, (label, prev_label) in enumerate(zip(self.doc_page_labels, [None, *self.doc_page_labels[:-1]])): # Here the condition (could be adjusted) is 2 successive pages labeled <label> and notes:<label>, # with the prior page not being a notes page. if n - 1 in notes_mapping and label == 'notes:' + prev_label: notes_mapping[n - 1] = n else: notes_mapping[n] = None self.notes_mapping = list(notes_mapping.items()) else: self.notes_mapping = None self.page_labels = self.doc_page_labels return self.page_labels = [self.doc_page_labels[page] for page, note in self.notes_mapping]
[docs] def page(self, number): """ Get the specified page. Args: number (`int`): number of the page to return Returns: :class:`~pympress.document.Page`: the wanted page, or `None` if it does not exist """ if number >= self.pages_number() or number < 0: return None if self.notes_mapping is not None: number = self.notes_mapping[number][0] if number < 0: return None if number not in self.pages_cache: self.pages_cache[number] = Page(self.doc.get_page(number), number, self) return self.pages_cache[number]
[docs] def notes_page(self, number): """ Get the specified page. Args: number (`int`): number of the page to return Returns: :class:`~pympress.document.Page`: the wanted page, or `None` if it does not exist """ if number >= self.pages_number() or number < 0: return None if self.notes_mapping is not None: number = self.notes_mapping[number][1] if number is None: return None if number not in self.pages_cache: self.pages_cache[number] = Page(self.doc.get_page(number), number, self) return self.pages_cache[number]
[docs] def pages_number(self): """ Get the number of pages in the document. Returns: `int`: the number of pages in the document """ return len(self.notes_mapping) if self.notes_mapping is not None else self.nb_pages
[docs] def has_labels(self): """ Return whether this document has useful labels. Returns: `bool`: False iff there are no labels or they are just the page numbers """ return self.doc_page_labels != [str(n + 1) for n in range(self.nb_pages)]
[docs] def get_last_label_pages(self): """ Return the last page number for each consecutively distinct page label In other words, squash together consecutive same labels """ last = None pages = [] for page, label in enumerate(self.page_labels): if label != last: pages.append(page) last = label return pages
[docs] def lookup_label(self, label, prefix_unique=True): """ Find a page from its label. Args: label (`str`): the label we are searching for prefix_unique (`bool`): whether a prefix match should be unique, e.g. when the user is still typing Returns: `int`: the page """ # somehow this always returns None: # page = self.doc.get_page_by_label(label).get_index() # make a shortlist: squash synonymous labels, keeping the last one compatible_labels = {l: n for n, l in enumerate(self.page_labels) if l.lower().startswith(label.lower())} if len(compatible_labels) == 1: return set(compatible_labels.values()).pop() # try exact match try: return compatible_labels[label] except KeyError: pass # try case-insensitive match, prefix case-sensitive match, prefix case-insensitive match (unless prefix_unique) filters = [lambda lbl: len(lbl) == len(label), lambda lbl: lbl.startswith(label), lambda lbl: not prefix_unique] for filtering in filters: try: found = next(label for label in compatible_labels if filtering(label)) except StopIteration: pass else: return compatible_labels[found] return None
[docs] def goto(self, number): """ Switch to another page. Validates the number and returns one in the correct range. Also updates history. Args: number (`int`): number of the destination page """ if number < 0: number = 0 if number >= self.pages_number(): number = self.pages_number() - 1 if 0 <= self.hist_pos < len(self.history) and self.history[self.hist_pos] == number: return number # chop off history where we were and append self.hist_pos = min(len(self.history), self.hist_pos + 1) del self.history[self.hist_pos:] self.history.append(number) return number
[docs] def label_after(self, page): """ Switch to the next page with different label. If we're within a set of pages with the same label we want to go to the last one. """ labels_after = enumerate(self.page_labels[page + 1:], page + 1) try: next_page, next_label = next(labels_after) except StopIteration: # we're already at the last page! return page # will stop as soon as next_page + 1 (aka following_page) is a different label or due to end of iterator for following_page, following_label in labels_after: if following_label == next_label: next_page = following_page else: break return next_page
[docs] def label_before(self, page): """ Switch to the previous page with different label. If we're within a set of pages with the same label we want to go *before* the first one. """ # will stop as soon as we find a different label or due to end of iterator for prev_page, prev_label in enumerate(reversed(self.page_labels[:page])): if prev_label != self.page_labels[page]: return page - 1 - prev_page return 0
[docs] def hist_next(self, *args): """ Switch to the page we viewed next. """ if self.hist_pos + 1 == len(self.history): return None self.hist_pos += 1 return self.history[self.hist_pos]
[docs] def hist_prev(self, *args): """ Switch to the page we viewed before. """ if self.hist_pos == 0: return None self.hist_pos -= 1 return self.history[self.hist_pos]
[docs] def get_uri(self): """ Gives access to the URI, rather than the path, of this document. Returns: `str`: the URI to the file currently opened. """ return self.uri
[docs] def get_full_path(self, filename): """ Returns full path, extrapolated from a path relative to this document or to the current directory. Args: filename (:class:`~pathlib.Path` or `str`): Name of the file or relative path to it Returns: :class:`~pathlib.Path`: the full path to the file or None if it doesn't exist """ filename = pathlib.Path(filename) if filename.is_absolute(): return filename.resolve() if filename.exists() else None for dirname in [pathlib.Path(url2pathname(urlsplit(self.uri).path)).parent, pathlib.Path.cwd()]: filepath = dirname.joinpath(filename) if filepath.exists(): return filepath
[docs] def remove_on_exit(self, filename): """ Remember a temporary file to delete later. Args: filename (:class:`~pathlib.Path`): The path to the file to delete """ self.temp_files.add(filename)
[docs] def cleanup_media_files(self): """ Removes all files that were extracted from the pdf into the filesystem. """ for f in self.temp_files: if f.exists(): f.unlink() self.temp_files.clear()
[docs] class EmptyPage(Page): """ A dummy page, placeholder for when there are no valid pages around. This page is a non-notes page with an aspect ratio of 1.3 and nothing else inside. Also, it has no "rendering" capability, and is made harmless by overriding its render function. """ def __init__(self, parent): super(EmptyPage, self).__init__(None, -1, parent) self.page_label = None # by default, anything that will have a 1.3 asapect ratio self.pw, self.ph = 1.3, 1.0
[docs] def render_cairo(self, cr, ww, wh, dtype=PdfPage.FULL): """ Overriding this purely for safety: make sure we do not accidentally try to render. Args: cr (:class:`~Gdk.CairoContext`): target surface ww (`int`): target width in pixels wh (`int`): target height in pixels dtype (:class:`~pympress.document.PdfPage`): the type of document that should be rendered """ pass
[docs] def can_render(self): """ Informs that rendering is *not* necessary (avoids checking the type). Returns: `bool`: `False`, no rendering """ return False
[docs] class EmptyDocument(Document): """ A dummy document, placeholder for when no document is open. """ def __init__(self): super(EmptyDocument, self).__init__(None, None, None) self.pages_cache[-1] = EmptyPage(self)
[docs] def page(self, number): """ Retrieve a page from the document. Args: number (`int`): page number to be retrieved Returns: :class:`~pympress.document.EmptyPage` or `None`: -1 returns the empty page so we can display something. """ return self.pages_cache[number] if number in self.pages_cache else None
[docs] def notes_page(self, number): """ Retrieve a page from the document. Args: number (`int`): page number to be retrieved Returns: :class:`~pympress.document.EmptyPage` or `None`: -1 returns the empty page so we can display something. """ return self.pages_cache[number] if number in self.pages_cache else None
## # Local Variables: # mode: python # indent-tabs-mode: nil # py-indent-offset: 4 # fill-column: 80 # end: