Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
drgnfrts
GitHub Repository: drgnfrts/Singapore-Locations-NER
Path: blob/main/streamlit/model_demo.py
744 views
1
# This is the script to run the Streamlit mini-app.
2
3
from pickle import TRUE
4
import streamlit as st
5
import spacy
6
from spacy import displacy, load
7
import re
8
import csv
9
10
11
# Page title and icon for the browser bar
12
st.set_page_config(
13
page_title="NER for SG Locations",
14
page_icon="πŸ‡ΈπŸ‡¬",
15
)
16
17
18
# Makes the app width the full length of the screen, unless it exceeds 1400px.
19
def _max_width_():
20
max_width_str = f"max-width: 1400px;"
21
st.markdown(
22
f"""
23
<style>
24
.main .block-container{{
25
{max_width_str}
26
}}
27
</style>
28
""",
29
unsafe_allow_html=True,
30
)
31
32
33
# Call _max_width_() function
34
_max_width_()
35
36
37
# Function to load models, and cache them to ensure no reloading of the model occurs every time we try to re-run the whole app. The models are stored in a dictionary to enable easy access.
38
@st.cache(show_spinner=False, allow_output_mutation=True, suppress_st_warning=True)
39
def load_models():
40
standard_model = spacy.load("en_core_web_md")
41
er_model = spacy.load("./models/model_v2.1")
42
doccano_model = spacy.load("./models/model_v3.0/model-best")
43
v31_model = spacy.load("./models/model_v3.1/model-best")
44
models = {"std": standard_model, "erl": er_model,
45
"dcn": doccano_model, "v31": v31_model}
46
return models
47
48
49
models = load_models()
50
51
52
# Pull the list of locations.
53
abbreviation_dictionary = []
54
with open("./data/extracted_locations/sg_abbreviations.csv", "r") as csv_file:
55
csvtest = csv.reader(csv_file, delimiter=",")
56
for row in csvtest:
57
abbreviation_dictionary.append(row)
58
59
60
# Function below utilises the list to lengthen the abbreviations
61
def lengthen_abbreviations(text):
62
split = re.findall(r"[\w']+|[.,!?;&] | |-", text)
63
i = 0
64
for word in split:
65
for row in abbreviation_dictionary:
66
check_column = 0
67
while check_column < 4:
68
if word == "":
69
split[i] = ''
70
elif word == row[check_column]:
71
split[i] = row[3]
72
check_column += 1
73
i += 1
74
cleaned_text = ''.join(split)
75
return cleaned_text
76
77
78
### ACTUAL START OF APP CONTENTS ###
79
st.title("Named Entity Recogniser for Singapore Locations πŸ“")
80
st.write("Compare the Standard English NLP Model with the Trained SG Location Names Model.")
81
82
83
# Function to clear the inputs in the form by clearing the session state of the input box. The key for the text input box here is "1".
84
def clear_form():
85
st.session_state[1] = ""
86
87
88
# Function to enable selection of any number of the NLP models
89
def select_models(all_models_selected):
90
if all_models_selected == True:
91
selected_models = container.multiselect("Choose one or more models to analyse text with:", [
92
'Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'], ['Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'])
93
else:
94
selected_models = container.multiselect("Choose one or more models to analyse text with:", [
95
'Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'])
96
return selected_models
97
98
99
# Function to find the entities in text, depending on choice of model and whether abbreviations need to be lengthened
100
def find_ents(model, input, abr_lengthen):
101
if abr_lengthen == True:
102
doc = model(lengthen_abbreviations(input))
103
else:
104
doc = model(input)
105
ent_html = displacy.render(doc, style="ent", jupyter=False)
106
st.markdown(ent_html, unsafe_allow_html=True)
107
st.write("")
108
109
110
# Dictionary for def display_models() to reference
111
model_choice = {"Standard Model": ("Pre-trained Standard English Model πŸ’‚", models["std"]),
112
"Dictionary Model": ("Dictionary-centric Model for SG Locations πŸ“–", models["erl"]),
113
"NER Model 3.0": ("Enhanced NER-centric Model 3.0 for SG Locations 🦁", models["dcn"]),
114
"NER Model 3.1": ("Enhanced NER-centric Model 3.1 for SG Locations πŸ†•", models["v31"])
115
}
116
117
118
# Function to display the models and the text analysed with def find_ents()
119
def display_models(selected_models, text_input, abbreviation_status):
120
for selected_model in selected_models:
121
st.header(model_choice[selected_model][0])
122
find_ents(model_choice[selected_model][1],
123
text_input, abbreviation_status)
124
125
126
# The actual form with inputs for model type, lengthening abbreviations and text to be analysed.
127
with st.form("NER_form"):
128
# First item is a container that will display models to be used
129
container = st.container()
130
# Below the container is the checkboxes to enable selection of all models and abbreviations lengthening
131
c_all_model_selection, c_abbreviate_selection, c_selection_last = st.columns([
132
1, 1, 3])
133
with c_all_model_selection:
134
all_models_selected = st.checkbox("Select all models")
135
with c_abbreviate_selection:
136
abbreviation_status = st.checkbox("Lengthen abbreviations")
137
# Multiselect option for the container is below
138
selected_models = select_models(all_models_selected)
139
# Clear the text input the first time around
140
text_input = st.empty()
141
# Text input is a text area box with the key "1". The key is to allow reference for def clear_form() to clear the text input box by resetting the session state.
142
input = text_input.text_area('Text to analyze:', key=1)
143
# Buttons to find the locations and clear the form
144
c_submit, c_clear, c_last = st.columns([1, 1, 5])
145
with c_submit:
146
submitted = st.form_submit_button("Find Locations 🌎")
147
with c_clear:
148
click_clear = st.form_submit_button(
149
'Clear text input ⌫', on_click=clear_form)
150
if submitted:
151
display_models(selected_models, input, abbreviation_status)
152
153
154
# Drop-down "About" section
155
with st.expander("ℹ️ - About this app", expanded=False):
156
st.write(
157
"""
158
- This *Named Entity Recognition model for Singapore Location Names* detects Singaporean addresses, place names and building names from text.
159
- It was made with [spaCy v3](https://spacy.io/), an open-source library for Natural Language Processing.
160
- Check out the source code here on my [Github repo](https://github.com/drgnfrts/Singapore-Locations-NER)! :)
161
"""
162
)
163
st.markdown("")
164
165