- import nltk
- nltk.download("stopwords")
- from nltk.corpus import stopwords
- from string import punctuation
- english_stopwords = stopwords.words('english')
- from nltk.stem import WordNetLemmatizer
- def remove_punct(text):
- table = {33: ' ', 34: ' ', 35: ' ', 36: ' ', 37: ' ', 38: ' ', 39: ' ', 40: ' ', 41: ' ', 42: ' ', 43: ' ', 44: ' ', 45: ' ', 46: ' ', 47: ' ', 58: ' ', 59: ' ', 60: ' ', 61: ' ', 62: ' ', 63: ' ', 64: ' ', 91: ' ', 92: ' ', 93: ' ', 94: ' ', 95: ' ', 96: ' ', 123: ' ', 124: ' ', 125: ' ', 126: ' '}
- return text.translate(table)
- lemma = WordNetLemmatizer()
- train['text'] = train['text'].map(lambda x: x.lower())
- train['text'] = train['text'].map(lambda x: remove_punct(x))
- train['text'] = train['text'].map(lambda x: x.split(' '))
- train['text'] = train['text'].map(lambda x: [token for token in x if token not in english_stopwords\
- and token != " " \
- and token.strip() not in punctuation])
- train["text"] = train["text"].map(lambda x: [lemma.lemmatize(word) for word in x])
- train['text'] = train['text'].map(lambda x: ' '.join(x))
[text] Text converting for TF-IDF Vectorizer
Viewer
*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.
Editor
You can edit this paste and save as new: