Kernel: Python 3.8.9 ('venv': venv)
In [1]:
import spacy from spacy import displacy import re import csv path = "en_core_web_md"
In [2]:
nlp = spacy.load(path)
In [3]:
abbreviation_dictionary = [] with open("../data/extracted_locations/sg_abbreviations.csv", "r") as csv_file: csvtest = csv.reader(csv_file, delimiter=",") for row in csvtest: abbreviation_dictionary.append(row) def lengthen_abbreviations(text): split = re.findall(r"[\w']+|[.,!?;&@] | |-", text) i = 0 for word in split: for row in abbreviation_dictionary: check_column = 0 while check_column < 4: if word == "": split[i] = '' elif word == row[check_column]: split[i] = row[3] check_column += 1 csv_file.close() i += 1 cleaned_text = ''.join(split) return cleaned_text
In [8]:
test_sent = "Free parking is available at City Square Mall in Kitchener Road for the first 1.5 hours during the weekday lunchtime period between 12pm and 2pm, while IMM Building in Jurong East offers two hours of free parking upon entry every day." doc = nlp(lengthen_abbreviations(test_sent)) second_doc = nlp(test_sent) #displacy.render(doc, style="ent") displacy.render(second_doc, style="ent")
Out[8]: