55
66from .. import results , lists , zips
77from .document_xml import read_document_xml_element
8+ from .header_xml import (read_header_xml_element , read_footer_xml_element )
89from .content_types_xml import empty_content_types , read_content_types_xml_element
910from .relationships_xml import read_relationships_xml_element , Relationships
1011from .numbering_xml import read_numbering_xml_element , Numbering
@@ -27,12 +28,14 @@ def read(fileobj):
2728 zip_file ,
2829 part_paths = part_paths ,
2930 )
30-
31+
3132 return results .combine ([
3233 _read_notes (read_part_with_body , part_paths ),
3334 _read_comments (read_part_with_body , part_paths ),
35+ _read_headers (read_part_with_body , part_paths ),
36+ _read_footers (read_part_with_body , part_paths )
3437 ]).bind (lambda referents :
35- _read_document (zip_file , read_part_with_body , notes = referents [0 ], comments = referents [1 ], part_paths = part_paths )
38+ _read_document (zip_file , read_part_with_body , notes = referents [0 ], comments = referents [1 ], headers = referents [ 2 ], footers = referents [ 3 ], part_paths = part_paths )
3639 )
3740
3841
@@ -43,6 +46,8 @@ class _PartPaths(object):
4346 endnotes = cobble .field ()
4447 footnotes = cobble .field ()
4548 numbering = cobble .field ()
49+ headers = cobble .field ()
50+ footers = cobble .field ()
4651 styles = cobble .field ()
4752
4853
@@ -55,21 +60,24 @@ def _find_part_paths(zip_file):
5560 _find_relationships_path_for (document_filename ),
5661 )
5762
58- def find (name ):
63+ def find (name , multiple = False ):
5964 return _find_part_path (
6065 zip_file = zip_file ,
6166 relationships = document_relationships ,
6267 relationship_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/" + name ,
6368 fallback_path = "word/{0}.xml" .format (name ),
6469 base_path = zips .split_path (document_filename )[0 ],
70+ multiple = multiple
6571 )
66-
72+
6773 return _PartPaths (
6874 main_document = document_filename ,
6975 comments = find ("comments" ),
7076 endnotes = find ("endnotes" ),
7177 footnotes = find ("footnotes" ),
7278 numbering = find ("numbering" ),
79+ headers = find ("header" , multiple = True ),
80+ footers = find ("footer" , multiple = True ),
7381 styles = find ("styles" ),
7482 )
7583
@@ -88,7 +96,7 @@ def _find_document_filename(zip_file, relationships):
8896 raise IOError ("Could not find main document part. Are you sure this is a valid .docx file?" )
8997
9098
91- def _find_part_path (zip_file , relationships , relationship_type , base_path , fallback_path ):
99+ def _find_part_path (zip_file , relationships , relationship_type , base_path , fallback_path , multiple = False ):
92100 targets = [
93101 zips .join_path (base_path , target ).lstrip ("/" )
94102 for target in relationships .find_targets_by_type (relationship_type )
@@ -97,7 +105,7 @@ def _find_part_path(zip_file, relationships, relationship_type, base_path, fallb
97105 if len (valid_targets ) == 0 :
98106 return fallback_path
99107 else :
100- return valid_targets [0 ]
108+ return valid_targets if multiple else valid_targets [0 ]
101109
102110
103111def _read_notes (read_part_with_body , part_paths ):
@@ -111,7 +119,7 @@ def _read_notes(read_part_with_body, part_paths):
111119 lambda root , body_reader : read_endnotes_xml_element (root , body_reader = body_reader ),
112120 default = _empty_result ,
113121 )
114-
122+
115123 return results .combine ([footnotes , endnotes ]).map (lists .flatten )
116124
117125
@@ -122,14 +130,42 @@ def _read_comments(read_part_with_body, part_paths):
122130 default = _empty_result ,
123131 )
124132
133+ def _read_headers (read_part_with_body , part_paths ):
134+ if type (part_paths .headers ) == str :
135+ header_paths = [part_paths .headers ]
136+ else :
137+ header_paths = part_paths .headers
138+
139+ headers = [
140+ read_part_with_body (header ,
141+ lambda root , body_reader : read_header_xml_element (root , body_reader = body_reader ),
142+ default = _empty_result ) for header in header_paths ]
143+ return [h for h in headers if h .value != []]
144+
145+
146+ def _read_footers (read_part_with_body , part_paths ):
147+ if type (part_paths .footers ) == str :
148+ footer_paths = [part_paths .footers ]
149+ else :
150+ footer_paths = part_paths .footers
151+
152+ footers = [
153+ read_part_with_body (footer ,
154+ lambda root , body_reader : read_footer_xml_element (root , body_reader = body_reader ),
155+ default = _empty_result ) for footer in footer_paths ]
156+
157+ return [f for f in footers if f .value != []]
158+
125159
126- def _read_document (zip_file , read_part_with_body , notes , comments , part_paths ):
160+ def _read_document (zip_file , read_part_with_body , notes , comments , headers , footers , part_paths ):
127161 return read_part_with_body (
128162 part_paths .main_document ,
129163 partial (
130164 read_document_xml_element ,
131165 notes = notes ,
132166 comments = comments ,
167+ headers = headers ,
168+ footers = footers
133169 ),
134170 )
135171
0 commit comments