python sentence splitter

Solutions on MaxInterview for python sentence splitter by the best coders in the world

showing results for - "python sentence splitter"

02 Feb 2020

1>>> from nltk import tokenize
2>>> p = "Good morning Dr. Adams. The patient is waiting for you in room number 3."
3
4>>> tokenize.sent_tokenize(p)
5['Good morning Dr. Adams.', 'The patient is waiting for you in room number 3.']
6

source

Finn

09 May 2016

1# -*- coding: utf-8 -*-
2import re
3alphabets= "([A-Za-z])"
4prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
5suffixes = "(Inc|Ltd|Jr|Sr|Co)"
6starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
7acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
8websites = "[.](com|net|org|io|gov)"
9
10def split_into_sentences(text):
11    text = " " + text + "  "
12    text = text.replace("\n"," ")
13    text = re.sub(prefixes,"\\1<prd>",text)
14    text = re.sub(websites,"<prd>\\1",text)
15    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
16    text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
17    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
18    text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
19    text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
20    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
21    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
22    text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
23    if "”" in text: text = text.replace(".”","”.")
24    if "\"" in text: text = text.replace(".\"","\".")
25    if "!" in text: text = text.replace("!\"","\"!")
26    if "?" in text: text = text.replace("?\"","\"?")
27    text = text.replace(".",".<stop>")
28    text = text.replace("?","?<stop>")
29    text = text.replace("!","!<stop>")
30    text = text.replace("<prd>",".")
31    sentences = text.split("<stop>")
32    sentences = sentences[:-1]
33    sentences = [s.strip() for s in sentences]
34    return sentences
35

source