Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions LICENSE.MD2DOCX
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
This license applies to templateprocessor/md2docx.py file

MIT License

Copyright (c) 2024 Shlok T

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
43 changes: 19 additions & 24 deletions examples/generate_ecss_demo.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,21 @@
#!/bin/bash
mkdir -p output

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_1_software_static_architecture.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_1_software_static_architecture.pdf output/ecss-e-st-40c_4_1_software_static_architecture.md

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_2_software_dynamic_architecture.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_2_software_dynamic_architecture.pdf output/ecss-e-st-40c_4_2_software_dynamic_architecture.md

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_4_interfaces_context.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_4_interfaces_context.pdf output/ecss-e-st-40c_4_4_interfaces_context.md

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_2_overall_architecture.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_2_overall_architecture.pdf output/ecss-e-st-40c_5_2_overall_architecture.md

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_3_software_components_design.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_3_software_components_design.pdf output/ecss-e-st-40c_5_3_software_components_design.md

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_4_aspects_of_each_component.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_4_aspects_of_each_component.pdf output/ecss-e-st-40c_5_4_aspects_of_each_component.md

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_5_internal_interface_design.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_5_internal_interface_design.pdf output/ecss-e-st-40c_5_5_internal_interface_design.md

template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_6_requirement_traceability.tmplt
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_6_requirement_traceability.pdf output/ecss-e-st-40c_6_requirement_traceability.md
# List of template names
templates=(
"ecss-e-st-40c_4_1_software_static_architecture"
"ecss-e-st-40c_4_2_software_dynamic_architecture"
"ecss-e-st-40c_4_4_interfaces_context"
"ecss-e-st-40c_5_2_overall_architecture"
"ecss-e-st-40c_5_3_software_components_design"
"ecss-e-st-40c_5_4_aspects_of_each_component"
"ecss-e-st-40c_5_5_internal_interface_design"
"ecss-e-st-40c_6_requirement_traceability"
)

# Loop through templates
for template in "${templates[@]}"; do
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt -p md2docx
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt -p md2html
pandoc --pdf-engine=pdfroff --output=output/${template}.pdf output/${template}.md
done
3 changes: 2 additions & 1 deletion examples/generate_so_list.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
mkdir -p output
template-processor --verbosity info --system-objects ../data/events.csv -o output -t so_list.tmplt
pandoc --pdf-engine=pdfroff --output=output/so_list.pdf output/so_list.md
pandoc --pdf-engine=pdfroff --output=output/so_list.pdf output/so_list.md
template-processor --verbosity info --system-objects ../data/events.csv -o output -t so_list.tmplt -p md2docx
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@
pytest==7.4.2
black==24.3.0
mako==1.3.10

python-docx==1.2.0
bs4==0.0.2
markdown2==2.5.4
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@
include_package_data=True,
python_requires='>=3.8',
install_requires=[
"mako==1.3.10"
"mako==1.3.10",
"python-docx==1.2.0",
"beautifulsoup4==4.12.3",
"markdown2==2.5.4"
],
extras_require={
'dev': [
Expand Down
48 changes: 42 additions & 6 deletions templateprocessor/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@
from templateprocessor.soreader import SOReader
from templateprocessor.dvreader import DVReader
from templateprocessor.so import SystemObjectType
from templateprocessor.postprocessor import (
PostprocessorType,
Md2docxPostprocessor,
Md2HtmlPostprocessor,
PassthroughPostprocessor,
Postprocessor,
)


def parse_arguments() -> argparse.Namespace:
Expand Down Expand Up @@ -88,7 +95,7 @@ def parse_arguments() -> argparse.Namespace:
parser.add_argument(
"-p",
"--postprocess",
choices=["none", "md2docx"],
choices=["none", "md2docx", "md2html"],
help="Output postprocessing",
default="none",
)
Expand All @@ -107,6 +114,16 @@ def get_log_level(level_str: str) -> int:
return log_levels.get(level_str.lower(), logging.WARNING)


def get_postprocessor_type(type_str: str) -> PostprocessorType:
types = {
PostprocessorType.NONE.value: PostprocessorType.NONE,
PostprocessorType.MD2DOCX.value: PostprocessorType.MD2DOCX,
PostprocessorType.MD2HTML.value: PostprocessorType.MD2HTML,
}

return types.get(type_str.lower(), PostprocessorType.NONE)


def get_values_dictionary(values: list[str]) -> dict[str, str]:
if not values or not isinstance(values, list):
return {}
Expand Down Expand Up @@ -143,8 +160,10 @@ def read_sots(file_names: list[str]) -> dict[str, SystemObjectType]:

def instantiate(
instantiator: TemplateInstantiator,
postprocessor: Postprocessor,
template_file: str,
module_directory: str,
postprocessor_type: PostprocessorType,
output_directory: str,
):
try:
Expand All @@ -157,10 +176,9 @@ def instantiate(
logging.debug(f"Instantiating template:\n {template}")
instantiated_template = instantiator.instantiate(template, module_directory)
logging.debug(f"Instantiation:\n {instantiated_template}")
output = Path(output_directory) / f"{name}.md"
logging.debug(f"Saving to {output}")
with open(output, "w") as f:
f.write(instantiated_template)
output = str(Path(output_directory) / f"{name}")
logging.debug(f"Postprocessing with {postprocessor_type}")
postprocessor.process(postprocessor_type, instantiated_template, output)
except FileNotFoundError as e:
logging.error(f"File not found: {e.filename}")
except Exception as e:
Expand All @@ -173,6 +191,7 @@ def main():
args = parse_arguments()
logging_level = get_log_level(args.verbosity)
logging.basicConfig(level=logging_level)
postprocessor_type = get_postprocessor_type(args.postprocess)

logging.info("Template Processor")
logging.debug(f"Interface View: {args.iv}")
Expand All @@ -182,6 +201,7 @@ def main():
logging.debug(f"Templates: {args.template}")
logging.debug(f"Output Directory: {args.output}")
logging.debug(f"Module directory: {args.module_directory}")
logging.debug(f"Postprocessing: {postprocessor_type.value}")

logging.info(f"Reading Interface View from {args.iv}")
iv = IVReader().read(args.iv) if args.iv else InterfaceView()
Expand All @@ -198,10 +218,26 @@ def main():
logging.info(f"Instantiating the TemplateInstantiator")
instantiator = TemplateInstantiator(iv, dv, sots, values)

logging.info(f"Instantiating the Postprocessor")
postprocessor = Postprocessor(
{
PostprocessorType.NONE: PassthroughPostprocessor(),
PostprocessorType.MD2DOCX: Md2docxPostprocessor(),
PostprocessorType.MD2HTML: Md2HtmlPostprocessor(),
}
)

if args.template:
logging.info(f"Instantiating templates")
for template_file in args.template:
instantiate(instantiator, template_file, args.module_directory, args.output)
instantiate(
instantiator,
postprocessor,
template_file,
args.module_directory,
postprocessor_type,
args.output,
)

return 0

Expand Down
116 changes: 116 additions & 0 deletions templateprocessor/md2docx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""

Markdown to DOCX conversion module extracted from md2docx-python project.

Project address: https://github.com/shloktech/md2docx-python/
Project LICENSE: LICENSE.MD2DOCX

The reason for extraction is to align the API and features with the needs.
Changes:
- input is text, not file
- markdown2 is used instead of markdown
- table support is added via markdown2 extras and additional HTML processing

"""

import markdown2
from docx import Document
from bs4 import BeautifulSoup, Tag


def get_element_text(element: Tag) -> str:
if hasattr(element, "get_text"):
return element.get_text(strip=True)
else:
return str(element).strip()


def process_list_items(list_element: Tag, doc: Document, style_base: str, level=0):
# Get direct children li elements only (not nested)
for li in list_element.find_all("li", recursive=False):
# Get text content, excluding nested lists
text_parts = []
for child in li.children:
if child.name not in ["ul", "ol"]:
text_parts.append(get_element_text(child))

text = " ".join(text_parts).strip()

# Add paragraph with appropriate indentation level
if text:
style = style_base if level == 0 else f"{style_base} {level + 1}"
doc.add_paragraph(text, style=style)

# Process nested lists
nested_ul = li.find("ul", recursive=False)
nested_ol = li.find("ol", recursive=False)

if nested_ul:
process_list_items(nested_ul, doc, "List Bullet", level + 1)
if nested_ol:
process_list_items(nested_ol, doc, "List Number", level + 1)


def markdown_to_word_file(markdown_source: str, word_file_path: str):
doc = markdown_to_word_object(markdown_source)
doc.save(word_file_path)


def markdown_to_word_object(markdown_source: str) -> Document:
# Converting Markdown to HTML
html_content = markdown2.markdown(markdown_source, extras=["tables", "wiki-tables"])

# Creating a new Word Document
doc = Document()

# Converting HTML to text and adding it to the Word Document
soup = BeautifulSoup(html_content, "html.parser")

# Adding content to the Word Document
for element in soup:
if element.name == "h1":
doc.add_heading(element.text, level=1)
elif element.name == "h2":
doc.add_heading(element.text, level=2)
elif element.name == "h3":
doc.add_heading(element.text, level=3)
elif element.name == "p":
paragraph = doc.add_paragraph()
for child in element.children:
if child.name == "strong":
paragraph.add_run(child.text).bold = True
elif child.name == "em":
paragraph.add_run(child.text).italic = True
else:
paragraph.add_run(child)
elif element.name == "ul":
process_list_items(element, doc, "List Bullet")
elif element.name == "ol":
process_list_items(element, doc, "List Number")
elif element.name == "table":
rows_data = []
for row in element.find_all("tr"):
cells = row.find_all(["th", "td"])
row_data = [cell.get_text(strip=True) for cell in cells]
if row_data:
rows_data.append(row_data)

if rows_data:
columns_count = len(rows_data[0])
table = doc.add_table(rows=len(rows_data), cols=columns_count)
table.style = "Table Grid"

for row_index, row_data in enumerate(rows_data):
for column_index, cell_text in enumerate(row_data):
if column_index < columns_count:
table.rows[row_index].cells[column_index].text = cell_text

# Make the first row bold if it is a header
first_row = element.find("tr")
if first_row and first_row.find("th"):
for cell in table.rows[0].cells:
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.bold = True

return doc
Loading