-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathredactapp.py
132 lines (105 loc) · 3.23 KB
/
redactapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Core Packages
import streamlit as st
import os
# import base64
#NLP Pkgs
import spacy
from spacy import displacy
nlp = spacy.load('en')
# # Time Pkg
# import time
# timestr = time.strftime("%Y%m%d-%H%M%S")
#Templates
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem">{}</div>"""
#Function to Redact
def sanitize_names(text):
docx = nlp(text)
redacted_sentences = []
for ent in docx.ents:
ent.merge()
for token in docx:
if token.ent_type_ == 'PERSON':
redacted_sentences.append(" [REDACTED NAME] ")
else:
redacted_sentences.append(token.string)
return "".join(redacted_sentences)
def sanitize_places(text):
docx = nlp(text)
redacted_sentences = []
for ent in docx.ents:
ent.merge()
for token in docx:
if token.ent_type_ == 'GPE':
redacted_sentences.append(" [REDACTED PLACES] ")
else:
redacted_sentences.append(token.string)
return "".join(redacted_sentences)
def sanitize_dates(text):
docx = nlp(text)
redacted_sentences = []
for ent in docx.ents:
ent.merge()
for token in docx:
if token.ent_type_ == 'DATE':
redacted_sentences.append(" [REDACTED PLACES] ")
else:
redacted_sentences.append(token.string)
return "".join(redacted_sentences)
def sanitize_org(text):
docx = nlp(text)
redacted_sentences = []
for ent in docx.ents:
ent.merge()
for token in docx:
if token.ent_type_ == 'ORG':
redacted_sentences.append(" [REDACTED PLACES] ")
else:
redacted_sentences.append(token.string)
return "".join(redacted_sentences)
#Function Display Entities
# @st.cache
def render_entities(rawtext):
docx = nlp(rawtext)
html = displacy.render(docx, style='ent')
html = html.replace("\n\n","\n")
result = HTML_WRAPPER.format(html)
return result
# Function to write to a file
# def writetofile(text,file_name):
# with open(os.path.join("downloads",filename),"w") as f:
# filename = f.write(text)
# return filename
def main():
st.title("Document Redactor Application")
# st.text("Built with Streamlit and SpaCy")
activities = ["Redaction","About"]
choice = st.sidebar.selectbox("Select Task", activities)
if choice == "Redaction":
st.subheader("Redaction of Terms")
raw_text = st.text_area("Enter Text","Type Here")
redaction_item = ["names","places","org","dates"]
redaction_choice = st.selectbox("Select Term to Censor", redaction_item)
# save_option = st.radio("Save To File", ("Yes", "No"))
if st.button("Submit"):
if redaction_choice == 'names':
result = sanitize_names(raw_text)
elif redaction_choice == 'places':
result = sanitize_places(raw_text)
elif redaction_choice == 'dates':
result = sanitize_dates(raw_text)
elif redaction_choice == 'org':
result = sanitize_org(raw_text)
st.subheader("Original Text")
st.write(render_entities(raw_text), unsafe_allow_html = True)
st.subheader("Redacted Text")
st.write(result)
# elif choice == "Downloads":
# st.subheader("Download List")
elif choice == "About":
st.subheader("About")
st.text("Major Project - Document Redaction")
st.text("Usha Mittal Institute of Technology")
st.text("Guide: Amarpali ")
st.text("Members: Diksha Verma, Aishwarya Rao, Prachi Rane")
if __name__ == '__main__':
main()