Path: blob/main/streamlit/model_demo.py
744 views
# This is the script to run the Streamlit mini-app.12from pickle import TRUE3import streamlit as st4import spacy5from spacy import displacy, load6import re7import csv8910# Page title and icon for the browser bar11st.set_page_config(12page_title="NER for SG Locations",13page_icon="πΈπ¬",14)151617# Makes the app width the full length of the screen, unless it exceeds 1400px.18def _max_width_():19max_width_str = f"max-width: 1400px;"20st.markdown(21f"""22<style>23.main .block-container{{24{max_width_str}25}}26</style>27""",28unsafe_allow_html=True,29)303132# Call _max_width_() function33_max_width_()343536# Function to load models, and cache them to ensure no reloading of the model occurs every time we try to re-run the whole app. The models are stored in a dictionary to enable easy access.37@st.cache(show_spinner=False, allow_output_mutation=True, suppress_st_warning=True)38def load_models():39standard_model = spacy.load("en_core_web_md")40er_model = spacy.load("./models/model_v2.1")41doccano_model = spacy.load("./models/model_v3.0/model-best")42v31_model = spacy.load("./models/model_v3.1/model-best")43models = {"std": standard_model, "erl": er_model,44"dcn": doccano_model, "v31": v31_model}45return models464748models = load_models()495051# Pull the list of locations.52abbreviation_dictionary = []53with open("./data/extracted_locations/sg_abbreviations.csv", "r") as csv_file:54csvtest = csv.reader(csv_file, delimiter=",")55for row in csvtest:56abbreviation_dictionary.append(row)575859# Function below utilises the list to lengthen the abbreviations60def lengthen_abbreviations(text):61split = re.findall(r"[\w']+|[.,!?;&] | |-", text)62i = 063for word in split:64for row in abbreviation_dictionary:65check_column = 066while check_column < 4:67if word == "":68split[i] = ''69elif word == row[check_column]:70split[i] = row[3]71check_column += 172i += 173cleaned_text = ''.join(split)74return cleaned_text757677### ACTUAL START OF APP CONTENTS ###78st.title("Named Entity Recogniser for Singapore Locations π")79st.write("Compare the Standard English NLP Model with the Trained SG Location Names Model.")808182# Function to clear the inputs in the form by clearing the session state of the input box. The key for the text input box here is "1".83def clear_form():84st.session_state[1] = ""858687# Function to enable selection of any number of the NLP models88def select_models(all_models_selected):89if all_models_selected == True:90selected_models = container.multiselect("Choose one or more models to analyse text with:", [91'Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'], ['Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'])92else:93selected_models = container.multiselect("Choose one or more models to analyse text with:", [94'Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'])95return selected_models969798# Function to find the entities in text, depending on choice of model and whether abbreviations need to be lengthened99def find_ents(model, input, abr_lengthen):100if abr_lengthen == True:101doc = model(lengthen_abbreviations(input))102else:103doc = model(input)104ent_html = displacy.render(doc, style="ent", jupyter=False)105st.markdown(ent_html, unsafe_allow_html=True)106st.write("")107108109# Dictionary for def display_models() to reference110model_choice = {"Standard Model": ("Pre-trained Standard English Model π", models["std"]),111"Dictionary Model": ("Dictionary-centric Model for SG Locations π", models["erl"]),112"NER Model 3.0": ("Enhanced NER-centric Model 3.0 for SG Locations π¦", models["dcn"]),113"NER Model 3.1": ("Enhanced NER-centric Model 3.1 for SG Locations π", models["v31"])114}115116117# Function to display the models and the text analysed with def find_ents()118def display_models(selected_models, text_input, abbreviation_status):119for selected_model in selected_models:120st.header(model_choice[selected_model][0])121find_ents(model_choice[selected_model][1],122text_input, abbreviation_status)123124125# The actual form with inputs for model type, lengthening abbreviations and text to be analysed.126with st.form("NER_form"):127# First item is a container that will display models to be used128container = st.container()129# Below the container is the checkboxes to enable selection of all models and abbreviations lengthening130c_all_model_selection, c_abbreviate_selection, c_selection_last = st.columns([1311, 1, 3])132with c_all_model_selection:133all_models_selected = st.checkbox("Select all models")134with c_abbreviate_selection:135abbreviation_status = st.checkbox("Lengthen abbreviations")136# Multiselect option for the container is below137selected_models = select_models(all_models_selected)138# Clear the text input the first time around139text_input = st.empty()140# Text input is a text area box with the key "1". The key is to allow reference for def clear_form() to clear the text input box by resetting the session state.141input = text_input.text_area('Text to analyze:', key=1)142# Buttons to find the locations and clear the form143c_submit, c_clear, c_last = st.columns([1, 1, 5])144with c_submit:145submitted = st.form_submit_button("Find Locations π")146with c_clear:147click_clear = st.form_submit_button(148'Clear text input β«', on_click=clear_form)149if submitted:150display_models(selected_models, input, abbreviation_status)151152153# Drop-down "About" section154with st.expander("βΉοΈ - About this app", expanded=False):155st.write(156"""157- This *Named Entity Recognition model for Singapore Location Names* detects Singaporean addresses, place names and building names from text.158- It was made with [spaCy v3](https://spacy.io/), an open-source library for Natural Language Processing.159- Check out the source code here on my [Github repo](https://github.com/drgnfrts/Singapore-Locations-NER)! :)160"""161)162st.markdown("")163164165