22
33from __future__ import annotations
44
5+ import copy
6+ from typing import TYPE_CHECKING
7+
58from docx .oxml .xmlchemy import BaseOxmlElement
9+ from docx .shared import lazyproperty
10+
11+ if TYPE_CHECKING :
12+ from docx .oxml .text .hyperlink import CT_Hyperlink
13+ from docx .oxml .text .paragraph import CT_P
614
715
816class CT_LastRenderedPageBreak (BaseOxmlElement ):
@@ -16,3 +24,151 @@ class CT_LastRenderedPageBreak(BaseOxmlElement):
1624 `w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
1725 distinguished behavior. CT_Empty is used for many elements.
1826 """
27+
28+ @property
29+ def precedes_all_content (self ) -> bool :
30+ """True when a `w:lastRenderedPageBreak` precedes all paragraph content.
31+
32+ This is a common case; it occurs whenever the page breaks on an even paragraph
33+ boundary.
34+ """
35+ # -- a page-break inside a hyperlink never meets these criteria because there
36+ # -- is always part of the hyperlink text before the page-break.
37+ if self ._is_in_hyperlink :
38+ return False
39+
40+ return bool (
41+ # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
42+ self ._enclosing_p .xpath (
43+ # -- in first run of paragraph --
44+ f"./w:r[1]"
45+ # -- all page-breaks --
46+ f"/w:lastRenderedPageBreak"
47+ # -- that are not preceded by any content-bearing elements --
48+ f"[not(preceding-sibling::*[{ self ._run_inner_content_xpath } ])]"
49+ )
50+ )
51+
52+ @property
53+ def preceding_fragment_p (self ) -> CT_P :
54+ """A "loose" `CT_P` containing only the paragraph content before this break.
55+
56+ Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
57+ paragraph in its paragraph.
58+
59+ The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
60+ page-break with this `w:lastRenderedPageBreak` element and all its following
61+ siblings removed.
62+ """
63+ if not self == self ._first_lrpb_in_p (self ._enclosing_p ):
64+ raise ValueError ("only defined on first rendered page-break in paragraph" )
65+
66+ # -- splitting approach is different when break is inside a hyperlink --
67+ return (
68+ self ._preceding_frag_in_hlink
69+ if self ._is_in_hyperlink
70+ else self ._preceding_frag_in_run
71+ )
72+
73+ def _enclosing_hyperlink (self , lrpb : CT_LastRenderedPageBreak ) -> CT_Hyperlink :
74+ """The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
75+
76+ Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
77+ when `._is_in_hyperlink` is True.
78+ """
79+ return lrpb .xpath ("./parent::w:r/parent::w:hyperlink" )[0 ]
80+
81+ @property
82+ def _enclosing_p (self ) -> CT_P :
83+ """The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
84+ return self .xpath ("./ancestor::w:p[1]" )[0 ]
85+
86+ def _first_lrpb_in_p (self , p : CT_P ) -> CT_LastRenderedPageBreak :
87+ """The first `w:lastRenderedPageBreak` element in `p`.
88+
89+ Raises `ValueError` if there are no rendered page-breaks in `p`.
90+ """
91+ lrpbs = p .xpath (
92+ "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
93+ )
94+ if not lrpbs :
95+ raise ValueError ("no rendered page-breaks in paragraph element" )
96+ return lrpbs [0 ]
97+
98+ @lazyproperty
99+ def _is_in_hyperlink (self ) -> bool :
100+ """True when this page-break is embedded in a hyperlink run."""
101+ return bool (self .xpath ("./parent::w:r/parent::w:hyperlink" ))
102+
103+ @lazyproperty
104+ def _preceding_frag_in_hlink (self ) -> CT_P :
105+ """Preceding CT_P fragment when break occurs within a hyperlink.
106+
107+ Note this is a *partial-function* and raises when `lrpb` is not inside a
108+ hyperlink.
109+ """
110+ if not self ._is_in_hyperlink :
111+ raise ValueError ("only defined on a rendered page-break in a hyperlink" )
112+
113+ # -- work on a clone `w:p` so our mutations don't persist --
114+ p = copy .deepcopy (self ._enclosing_p )
115+
116+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
117+ lrpb = self ._first_lrpb_in_p (p )
118+
119+ # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
120+ hyperlink = lrpb ._enclosing_hyperlink (lrpb )
121+
122+ # -- delete all w:p inner-content following the hyperlink --
123+ for e in hyperlink .xpath ("./following-sibling::*" ):
124+ p .remove (e )
125+
126+ # -- remove this page-break from inside the hyperlink --
127+ lrpb .getparent ().remove (lrpb )
128+
129+ # -- that's it, the entire hyperlink goes into the preceding fragment so
130+ # -- the hyperlink is not "split".
131+ return p
132+
133+ @lazyproperty
134+ def _preceding_frag_in_run (self ) -> CT_P :
135+ """Preceding CT_P fragment when break does not occur in a hyperlink.
136+
137+ Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
138+ """
139+ if self ._is_in_hyperlink :
140+ raise ValueError ("only defined on a rendered page-break not in a hyperlink" )
141+
142+ # -- work on a clone `w:p` so our mutations don't persist --
143+ p = copy .deepcopy (self ._enclosing_p )
144+
145+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
146+ lrpb = self ._first_lrpb_in_p (p )
147+
148+ # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
149+ enclosing_r = lrpb .xpath ("./parent::w:r" )[0 ]
150+
151+ # -- delete all `w:p` inner-content following that run --
152+ for e in enclosing_r .xpath ("./following-sibling::*" ):
153+ p .remove (e )
154+
155+ # -- then delete all `w:r` inner-content following this lrpb in its run and
156+ # -- also remove the page-break itself
157+ for e in lrpb .xpath ("./following-sibling::*" ):
158+ enclosing_r .remove (e )
159+ enclosing_r .remove (lrpb )
160+
161+ return p
162+
163+ @lazyproperty
164+ def _run_inner_content_xpath (self ) -> str :
165+ """XPath fragment matching any run inner-content elements."""
166+ return (
167+ "self::w:br"
168+ " | self::w:cr"
169+ " | self::w:drawing"
170+ " | self::w:noBreakHyphen"
171+ " | self::w:ptab"
172+ " | self::w:t"
173+ " | self::w:tab"
174+ )
0 commit comments