[python] Sent Analysis
Viewer
*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.
- def remove_features(data_str):
- url_re = re.compile(r'https?://(\S+)')
- num_re = re.compile(r'(\d+)')
- mention_re = re.compile(r'(@|#)(\w+)')
- RT_re = re.compile(r'RT(\s+)')
- data_str = str(data_str)
- data_str = RT_re.sub('', data_str) # remove RT
- data_str = url_re.sub('', data_str) # remove hyperlinks
- data_str = mention_re.sub('', data_str) # remove @mentions and hash
- data_str = num_re.sub('', data_str) # remove numerical digit
- data_str = resolve_emoticon(data_str) # replace emoji
- return data_str.lower()
- def main(sc,filename):
- # CODE IT YOURSELF
- rdd = sc.textFile(filename).map(lambda text: remove_features(text)) #remove and replace
- rdd_en = rdd.filter(lambda text: TextBlob(text).detect_language() == 'en').map(lambda text: abb_en(text)) #filter to english
- rdd_ms = rdd.filter(lambda text: TextBlob(text).detect_language() == 'ms').map(lambda text: abb_bm(text)).map(lambda text: str(TextBlob(text).translate(to='en'))) #filter to bahasa and translate to en
- rdd = rdd_en.union(rdd_ms)
- positive_rdd = rdd.filter(lambda text: TextBlob(text).sentiment.polarity > 0)
- negative_rdd = rdd.filter(lambda text: TextBlob(text).sentiment.polarity < 0)
- make_plot(int(positive_rdd.count()),int(negative_rdd.count())) #the cast is just to ensure the value is in integer data type
Editor
You can edit this paste and save as new: