[python] Sentiment Analysis

Viewer

*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.

copy download embed printName: Sentiment Analysis

from textblob import TextBlob
import matplotlib.pyplot as plt
from pyspark import SparkConf, SparkContext
import re
import string
##OTHER FUNCTIONS/CLASSES
def resolve_emoticon(line):
emoticon = {
':-)' : 'smile',
':)' : 'sad',
':))' : 'very happy',
':)' : 'happy',
':((' : 'very sad',
':(' : 'sad',
':-P' : 'tongue',
':-o' : 'gasp',
'>:-)':'angry'
}
for key in emoticon:
line = line.replace(key, emoticon[key])
return line
def abb_bm(line):
abbreviation_bm = {
'sy': 'saya',
'sk': 'suka',
'byk': 'banyak',
'sgt' : 'sangat',
'mcm' : 'macam',
'bodo':'bodoh',
'kat':'dekat'
}
abbrev = ' '.join (abbreviation_bm.get(word, word) for word in line.split())
return (resolve_emoticon(abbrev))
def abb_en(line):
abbreviation_en = {
'u': 'you',
'thr': 'there',
'asap': 'as soon as possible',
'lv' : 'love',
'c' : 'see'
}
abbrev = ' '.join (abbreviation_en.get(word, word) for word in line.split())
return (resolve_emoticon(abbrev))
def make_plot(pos,neg):
#This function plots the counts of positive and negative words
Polarity = [1,2]
LABELS = ["Positive", "Negative"]
Count_polarity = [int(pos), int(neg)]
plt.xlabel('Polarity')
plt.ylabel('Count')
plt.title('Sentiment Analysis - Lexical Based')
plt.grid(True)
plt.bar(Polarity, Count_polarity, align='center')
plt.xticks(Polarity, LABELS)
plt.show()
def remove_features(data_str):
url_re = re.compile(r'https?://(\S+)')
num_re = re.compile(r'(\d+)')
mention_re = re.compile(r'(@|#)(\w+)')
RT_re = re.compile(r'RT(\s+)')
data_str = str(data_str)
data_str = RT_re.sub('', data_str) # remove RT
data_str = url_re.sub('', data_str) # remove hyperlinks
data_str = mention_re.sub('', data_str) # remove @mentions and hash
data_str = num_re.sub('', data_str) # remove numerical digit
data_str = resolve_emoticon(data_str) # replace emoji
return data_str.lower()
def main(sc,filename):
# CODE IT YOURSELF
rdd = sc.textFile(filename).map(lambda text: remove_features(text)) #remove and replace
rdd_en = rdd.filter(lambda text: TextBlob(text).detect_language() == 'en').map(lambda text: abb_en(text)) #filter to english
rdd_ms = rdd.filter(lambda text: TextBlob(text).detect_language() == 'ms').map(lambda text: abb_bm(text)).map(lambda text: str(TextBlob(text).translate(to='en'))) #filter to bahasa and translate to en
rdd = rdd_en.union(rdd_ms)
positive_rdd = rdd.filter(lambda text: TextBlob(text).sentiment.polarity > 0)
negative_rdd = rdd.filter(lambda text: TextBlob(text).sentiment.polarity < 0)
make_plot(int(positive_rdd.count()),int(negative_rdd.count())) #the cast is just to ensure the value is in integer data type
if __name__ == "__main__":
# Configure your Spark environment
conf = SparkConf().setMaster("local[*]").setAppName("My Spark Application - Sentiment Analysis")
sc = SparkContext(conf=conf)
# CODE IT YOURSELF
filename = "simple_sentences.txt"
main(sc, filename)
sc.stop()

Editor

You can edit this paste and save as new:

fullscreen copy clear

Syntax Highlighting

Title / Paste Name

Meta robots tag

reCaptcha

File Description

Sentiment Analysis
Paste Code
06 May-2021
3.19 Kb

You can Share it:

Latest Code Pastes

Full list

Tools

[python] Sentiment Analysis

Viewer

Editor