Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
**v0.55.0**
* [[TeamMsgExtractor #465](https://github.com/TeamMsgExtractor/msg-extractor/issues/465)] Added missing `msg.close()` to `openMsg()`. If the MSG file was actually just a plain OLE file, it would be left open.
* Adjusted the default value of `maxNameLength` for `MessageBase.save()` to 40 instead of 256.
* Adjusted exception handling for `MessageBase.save()` to properly report the reason a folder fails to be created.
* Simplified some of the code for `MessageBase.save()`.
* Fixed some typing information.

**v0.54.1**
* [[TeamMsgExtractor #462](https://github.com/TeamMsgExtractor/msg-extractor/issues/462)] Fix potential issue where child MSG might have incompatible encoding to parent MSG when trying to grab a stream from the parent.
* Added code to attempt to significantly improve RTF deencapsulation times. This tries to strip away unneeded data before passing it to `RTFDE`. This shows improvements on all files that take more than one second. Currently, this actually fixes some files previously outputting wrong from `RTFDE` when deencapsulating the HTML body, specifically around non breaking spaces sometimes not transferring over.
Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ your access to the newest major version of extract-msg.
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.54.1-blue.svg
:target: https://pypi.org/project/extract-msg/0.54.1/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.55.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.55.0/

.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-3810/
Expand Down
4 changes: 2 additions & 2 deletions extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__author__ = 'Destiny Peterson & Matthew Walker'
__date__ = '2025-04-10'
__version__ = '0.54.1'
__date__ = '2025-08-12'
__version__ = '0.55.0'

__all__ = [
# Modules:
Expand Down
8 changes: 5 additions & 3 deletions extract_msg/attachments/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import string
import zipfile

from typing import TYPE_CHECKING
from typing import Optional, TYPE_CHECKING

from .. import constants
from .attachment_base import AttachmentBase
Expand Down Expand Up @@ -72,7 +72,7 @@ def getFilename(self, **kwargs) -> str:

return filename

def regenerateRandomName(self) -> str:
def regenerateRandomName(self) -> None:
"""
Used to regenerate the random filename used if the attachment cannot
find a usable filename.
Expand Down Expand Up @@ -166,9 +166,11 @@ def save(self, **kwargs) -> constants.SAVE_TYPE:
_zip.close()

@property
def data(self) -> bytes:
def data(self) -> Optional[bytes]:
"""
The bytes making up the attachment data.

If the attachment data stream does not exist, returns None.
"""
return self.__data

Expand Down
33 changes: 12 additions & 21 deletions extract_msg/msg_classes/message_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,12 +726,12 @@ def save(self, **kwargs) -> constants.SAVE_TYPE:
pdf = kwargs.get('pdf', False)
allowFallback = kwargs.get('allowFallback', False)
_zip = kwargs.get('zip')
maxNameLength = kwargs.get('maxNameLength', 256)
maxNameLength = kwargs.get('maxNameLength', 40)

# Variables involved in the save location.
customFilename = kwargs.get('customFilename')
useMsgFilename = kwargs.get('useMsgFilename', False)
#maxPathLength = kwargs.get('maxPathLength', 255)
#maxPathLength = kwargs.get('maxPathLength', 255) # TODO

# Track if we are only saving the attachments.
attachOnly = kwargs.get('attachmentsOnly', False)
Expand All @@ -742,6 +742,8 @@ def save(self, **kwargs) -> constants.SAVE_TYPE:
# raising an exception.
skipBodyNotFound = kwargs.get('skipBodyNotFound', False)

fext = None

if pdf:
kwargs['preparedHtml'] = True

Expand All @@ -758,30 +760,22 @@ def save(self, **kwargs) -> constants.SAVE_TYPE:
if self.htmlBody:
useHtml = True
fext = 'html'
elif not allowFallback:
if skipBodyNotFound:
fext = None
else:
raise DataNotFoundError('Could not find the htmlBody.')
elif not allowFallback and not skipBodyNotFound:
raise DataNotFoundError('Could not find the htmlBody.')

if pdf:
if self.htmlBody:
usePdf = True
fext = 'pdf'
elif not allowFallback:
if skipBodyNotFound:
fext = None
else:
raise DataNotFoundError('Count not find the htmlBody to convert to pdf.')
elif not allowFallback and not skipBodyNotFound:
raise DataNotFoundError('Count not find the htmlBody to convert to pdf.')

if rtf or (html and not useHtml) or (pdf and not usePdf):
if self.rtfBody:
useRtf = True
fext = 'rtf'
elif not allowFallback:
if skipBodyNotFound:
fext = None
else:
if not skipBodyNotFound:
raise DataNotFoundError('Could not find the rtfBody.')
else:
# This was the last resort before plain text, so fall
Expand All @@ -794,10 +788,7 @@ def save(self, **kwargs) -> constants.SAVE_TYPE:
# We need to check if the plain text body was found. If it
# was found but was empty that is considered valid, so we
# specifically check against None.
if self.body is None:
if skipBodyNotFound:
fext = None
else:
if self.body is None and not skipBodyNotFound:
if allowFallback:
raise DataNotFoundError('Could not find a valid body using current options.')
else:
Expand Down Expand Up @@ -872,12 +863,12 @@ def save(self, **kwargs) -> constants.SAVE_TYPE:
if not _zip:
try:
os.makedirs(path)
except Exception:
except Exception as e:
newDirName = addNumToDir(path)
if newDirName:
path = newDirName
else:
raise OSError(f'Failed to create directory "{path}". Does it already exist?')
raise OSError(f'Failed to create directory "{path}". Reason: {e}')
else:
# In my testing I ended up with multiple files in a zip at the
# same location so let's try to handle that.
Expand Down
1 change: 1 addition & 0 deletions extract_msg/open_msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def openMsg(path, **kwargs) -> MSGFile:
# lower function. So let's make sure we got a good return first.
if not ct:
if kwargs.get('strict', True):
msg.close()
raise InvalidFileFormatError('File was confirmed to be an olefile, but was not an MSG file.')
else:
# If strict mode is off, we'll just return an MSGFile anyways.
Expand Down