-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
128 lines (103 loc) · 5.18 KB
/
app.py
File metadata and controls
128 lines (103 loc) · 5.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import pandas as pd
from flask import Flask, render_template, request, send_file, jsonify
from textblob import TextBlob
import collections
import random
import io
app = Flask(__name__)
def get_analysis(text):
if not isinstance(text, str): text = str(text)
analysis = TextBlob(text)
polarity = analysis.sentiment.polarity
subjectivity = analysis.sentiment.subjectivity
if polarity > 0.1: sent_type = 'Positive'
elif polarity < -0.1: sent_type = 'Negative'
else: sent_type = 'Neutral'
return sent_type, polarity, subjectivity
@app.route('/')
def home():
return render_template('index.html')
@app.route('/analyze_file', methods=['POST'])
def analyze_file():
file = request.files.get('file')
if not file: return jsonify({"error": "No file uploaded"}), 400
try:
if file.filename.endswith('.csv'):
try:
df = pd.read_csv(file, encoding='utf-8')
except UnicodeDecodeError:
file.seek(0)
df = pd.read_csv(file, encoding='latin1')
else:
df = pd.read_excel(file)
target_col = next((col for col in df.columns if any(x in col.lower() for x in ['review', 'text', 'comment', 'desc', 'body', 'content'])), None)
if not target_col:
return jsonify({"error": f"Column not found. Available: {list(df.columns)}"}), 400
df = df.dropna(subset=[target_col])
df[target_col] = df[target_col].astype(str)
results = df[target_col].apply(get_analysis)
df['Sentiment'] = [r[0] for r in results]
df['Score'] = [r[1] for r in results]
df['Subjectivity'] = [r[2] for r in results]
summary = df['Sentiment'].value_counts().to_dict()
for cat in ['Positive', 'Neutral', 'Negative']: summary.setdefault(cat, 0)
avg_score = df['Score'].mean()
star_rating = round((avg_score + 1) * 2.5, 1)
star_rating = max(0.0, min(5.0, star_rating))
pos_samples = df[df['Sentiment'] == 'Positive'].sort_values(by='Score', ascending=False)[target_col].head(5).tolist()
neg_samples = df[df['Sentiment'] == 'Negative'].sort_values(by='Score', ascending=True)[target_col].head(5).tolist()
neu_samples = df[df['Sentiment'] == 'Neutral'][target_col].head(5).tolist()
mis_df = df[(df['Score'].abs() < 0.1) & (df['Subjectivity'] > 0.5)]
mismatch = mis_df[target_col].head(5).tolist() if not mis_df.empty else df[(df['Score'].abs() < 0.05)][target_col].head(5).tolist()
pos_samples = pos_samples if pos_samples else ["No positive reviews found."]
neg_samples = neg_samples if neg_samples else ["No negative reviews found."]
neu_samples = neu_samples if neu_samples else ["No neutral reviews found."]
mismatch = mismatch if mismatch else ["No edge cases detected."]
all_text = " ".join(df[target_col]).lower()
words = all_text.split()
stop_words = {'the', 'and', 'is', 'it', 'to', 'of', 'in', 'for', 'with', 'on', 'that', 'this', 'was', 'my', 'at', 'as', 'but', 'are', 'be', 'have', 'not', 'you', 'very', 'good'}
common_words = collections.Counter([w for w in words if w.isalpha() and len(w) > 3 and w not in stop_words]).most_common(50)
word_cloud = [{"text": w[0], "value": w[1]} for w in common_words]
samples_text = df[target_col].tolist()
flying_samples = random.sample(samples_text, min(len(samples_text), 30))
date_col = next((col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()), None)
has_date = False
timeline_data, timeline_labels = [], []
if date_col:
try:
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.sort_values(by=date_col)
daily = df.set_index(date_col).resample('D')['Score'].mean().fillna(0)
timeline_data = daily.tolist()
timeline_labels = daily.index.strftime('%Y-%m-%d').tolist()
has_date = True
except: pass
if not has_date:
timeline_data = df['Score'].rolling(window=max(1, len(df)//20)).mean().fillna(0).tolist()
timeline_labels = list(range(len(timeline_data)))
df.to_excel("analyzed_result.xlsx", index=False)
return jsonify({
"status": "success",
"summary": summary,
"total": len(df),
"avg": round(avg_score, 2),
"subj": round(df['Subjectivity'].mean(), 2),
"star_rating": star_rating,
"word_cloud": word_cloud,
"timeline_data": timeline_data,
"timeline_labels": timeline_labels,
"has_date": has_date,
"flying_samples": flying_samples,
"pos_samples": pos_samples,
"neu_samples": neu_samples,
"neg_samples": neg_samples,
"mis_samples": mismatch
})
except Exception as e:
print(f"Server Error: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/download_ready')
def download_ready():
return send_file("analyzed_result.xlsx", as_attachment=True)
if __name__ == '__main__':
app.run(debug=True)