Skip to content

Commit 0df66df

Browse files
committed
NiFi: de-id script update.
1 parent 10788ba commit 0df66df

1 file changed

Lines changed: 24 additions & 4 deletions

File tree

nifi/user-scripts/anonymise_doc.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,38 @@
1-
from medcat.utils.ner.deid import DeIdModel
1+
from medcat.utils.ner import deid_text
22
import sys
33
import os
4+
import json
5+
6+
from medcat.cat import CAT
7+
8+
9+
def special_deid(cat, text, record):
10+
return record, deid_text(cat, text)
411

512
input_text = sys.stdin.read()
613

714
model_pack_path = os.environ.get("MODEL_PACK_PATH", "/opt/models/de_id_base.zip")
15+
text_field_name = "document"
16+
nproc = 100
817

918
for arg in sys.argv:
1019
_arg = arg.split("=", 1)
1120
if _arg[0] == "model_pack_path":
1221
model_pack_path = _arg[1]
22+
if _arg[0] == "text_field_name":
23+
text_field_name = _arg[1]
24+
if _arg[0] == "nproc":
25+
nproc = _arg[1]
26+
27+
28+
records = json.loads(str(input_text))
29+
final_records = []
1330

14-
cat = DeIdModel.load_model_pack(model_pack_path)
31+
cat = CAT.load_model_pack(model_pack_path)
1532

16-
anon_text = cat.deid_text(input_text, redact=True)
33+
for record in records:
34+
_anon_text = deid_text(cat, record[text_field_name])
35+
record[text_field_name] = _anon_text
36+
final_records.append(record)
1737

18-
sys.stdout.write(anon_text)
38+
sys.stdout.write(json.dumps(final_records))

0 commit comments

Comments
 (0)