Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions packtools/sps/models/base_text_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from packtools.sps.utils import xml_utils


class BaseTextNode:
def __init__(
self,
node,
lang,
tags_to_keep=None,
tags_to_keep_with_content=None,
tags_to_remove_with_content=None,
tags_to_convert_to_html=None,
):
self._node = node
self._lang = lang
self.configure(
tags_to_keep,
tags_to_keep_with_content,
tags_to_remove_with_content,
tags_to_convert_to_html,
)

def configure(
self,
tags_to_keep,
tags_to_keep_with_content,
tags_to_remove_with_content,
tags_to_convert_to_html,
):
self.tags_to_keep = tags_to_keep
self.tags_to_keep_with_content = tags_to_keep_with_content
self.tags_to_remove_with_content = tags_to_remove_with_content
self.tags_to_convert_to_html = tags_to_convert_to_html

@property
def item(self):
return dict(
lang=self._lang,
plain_text=self.plain_text,
html_text=self.html_text,
)

@property
def plain_text(self):
return xml_utils.node_plain_text(self._node)

@property
def html_text(self):
# se desejável modificar o resultado, executar configure antes de html_text
return xml_utils.process_subtags(
self._node,
self.tags_to_keep,
self.tags_to_keep_with_content,
self.tags_to_remove_with_content,
self.tags_to_convert_to_html,
)
160 changes: 160 additions & 0 deletions packtools/sps/models/kwd_group.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from packtools.sps.utils import xml_utils
from packtools.sps.models.base_text_node import BaseTextNode


class KwdGroup:
Expand Down Expand Up @@ -235,3 +236,162 @@ def extract_kwd_data_with_lang_html_format(self,
resp["html_text"] = keyword_html_text

yield resp


class KwdTextNode(BaseTextNode):
pass


class KwdGroupTextNode(BaseTextNode):

@property
def language(self):
return (
self._node.get("{http://www.w3.org/XML/1998/namespace}lang") or
self._lang
)

@property
def items(self):
for node_kwd in self._node.xpath("kwd"):
kn = KwdTextNode(
node_kwd,
self.language,
self.tags_to_keep,
self.tags_to_keep_with_content,
self.tags_to_remove_with_content,
self.tags_to_convert_to_html,
)
yield kn.item

@property
def items_by_lang(self):
return {
self.language: list(self.items)
}


class ArticleKeywords:

def __init__(self, xmltree):
self._xmltree = xmltree

def configure(
self,
tags_to_keep,
tags_to_keep_with_content,
tags_to_remove_with_content,
tags_to_convert_to_html,
):
self.tags_to_keep = tags_to_keep
self.tags_to_keep_with_content = tags_to_keep_with_content
self.tags_to_remove_with_content = tags_to_remove_with_content
self.tags_to_convert_to_html = tags_to_convert_to_html

@property
def items(self):
"""
Extract keyword data with language information from XML tree nodes.

Params
------
tags_to_keep (list, optional): Tags to be preserved. Eg.:
['bold', 'p']
tags_to_keep_with_content (list, optional): Tags to be preserved with the respective content. Eg.:
['bold', 'p']
tags_to_remove_with_content (list, optional): Tags to be removed with its content. Eg.:
['bold', 'p']
tags_to_convert_to_html (dict, optional): Tags to be converted into HTML format. Eg.:
{'bold': 'b'}

Returns
-------
list: A list of dictionaries. Eg.:
[
{
'lang': 'en',
'text': 'Chagas Disease, transmission'
},...
]
"""
article_lang = self._xmltree.find(".").get("{http://www.w3.org/XML/1998/namespace}lang")
nodes = self._xmltree.xpath('.//article-meta | .//sub-article')

for node in nodes:
sub_article_lang = node.get("{http://www.w3.org/XML/1998/namespace}lang")

for kwd_group in node.xpath('.//kwd-group'):
kwd_group_text_node = KwdGroupTextNode(
node=kwd_group,
lang=sub_article_lang or article_lang,
)
kwd_group_text_node.configure(
tags_to_keep=self.tags_to_keep,
tags_to_keep_with_content=self.tags_to_keep_with_content,
tags_to_remove_with_content=self.tags_to_remove_with_content,
tags_to_convert_to_html=self.tags_to_convert_to_html,
)
for item in kwd_group_text_node.items:
item["parent_name"] = node.tag if node.tag == "sub-article" else 'article'
item["id"] = node.get("id")
yield item

@property
def items_by_lang(self):
"""
Extract keyword data with language information from XML tree nodes.

Params
------
tags_to_keep (list, optional): Tags to be preserved. Eg.:
['bold', 'p']
tags_to_keep_with_content (list, optional): Tags to be preserved with the respective content. Eg.:
['bold', 'p']
tags_to_remove_with_content (list, optional): Tags to be removed with its content. Eg.:
['bold', 'p']
tags_to_convert_to_html (dict, optional): Tags to be converted into HTML format. Eg.:
{'bold': 'b'}

Returns
-------
dict: A dict. Eg.:
{
'en': [
'Primary health care',
'Ambulatory care facilities',
'Chronic pain',
'Analgesia',
'Pain management'
],
'pt': [
'Atenção primária à saúde',
'Instituições de assistência ambulatorial',
'Dor crônica',
'Analgesia',
'Tratamento da dor'
]
}
"""
article_lang = self._xmltree.find(".").get("{http://www.w3.org/XML/1998/namespace}lang")
nodes = self._xmltree.xpath('.//article-meta | .//sub-article')

data = {}
for node in nodes:
sub_article_lang = node.get("{http://www.w3.org/XML/1998/namespace}lang")

for kwd_group in node.xpath('.//kwd-group'):
kwd_group_text_node = KwdGroupTextNode(
node=kwd_group,
lang=sub_article_lang or article_lang,
)
kwd_group_text_node.configure(
tags_to_keep=self.tags_to_keep,
tags_to_keep_with_content=self.tags_to_keep_with_content,
tags_to_remove_with_content=self.tags_to_remove_with_content,
tags_to_convert_to_html=self.tags_to_convert_to_html,
)
for item in kwd_group_text_node.items_by_lang:
data.update(item)

return data