-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
226 lines (176 loc) · 6.17 KB
/
app.py
File metadata and controls
226 lines (176 loc) · 6.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
from flask import Flask, jsonify, request
import nltk
from nltk.stem import WordNetLemmatizer
import pickle
import numpy as np
from tensorflow.keras.models import load_model
import json
import random
from flask_cors import CORS
import os
from dotenv import load_dotenv
from pymongo import MongoClient
from bson import json_util
import pymongo
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
class Config(object):
DEBUG = True
DEVELOPMENT = True
MODEL_PATH = "./chatbot/"
# SECRET_KEY = 'do-i-really-need-this'
# FLASK_HTPASSWD_PATH = '/secret/.htpasswd'
# FLASK_SECRET = SECRET_KEY
#DB_HOST = 'database' # a docker link
class ProductionConfig(Config):
DEVELOPMENT = False
DEBUG = False
MODEL_PATH = "./chatbot/"
def connect_db(connection_string,verbose=False):
try:
client = MongoClient()
client = MongoClient(connection_string)
db = client["stackoverflow"]
if verbose:
print("DB loaded with success, list of collections:\n",db.list_collection_names())
return db
except Exception as err:
print(err)
def get_query(terms,verbose=False,limit=1):
""" straightforward query to mongodb, returns a list of items(qs)"""
res = db.questions.find(
{ "$text": { "$search": terms } },
{ "score": { "$meta": "textScore" } }
).limit(limit)
res.sort([("score", { "$meta": "textScore" })])
res = [r for r in res]
if verbose:
for r in res:
print(r["Id"]," -- ",r["Title"]," ==> ",r["score"])
return res
def filter_qs(ls_qs,threshold=2.0):
return [q for q in ls_qs if q['score']>=threshold]
def get_best_answer(id):
children = list(db.answers.find({"ParentId":id}))
children = sorted(children, key=lambda k: k['stackoverflow_score'],reverse=True)
if len(children)>0:
return children[0]
else:
return None
def return_thread_link(id):
return f'https://stackoverflow.com/questions/{id}/'
def define_response(text):
ls_qs = get_query(terms=text,verbose=False)
ls_qs = filter_qs(ls_qs)
if len(ls_qs)<1:
return {"status":"Not OK" ,
"Body":"I apologize. I do not know about your question."}
else:
id = ls_qs[0]['Id']
besti = get_best_answer(id)
if besti:
link = return_thread_link(id)
besti['link'] = link
besti['status'] = "OK"
return besti
else:
return {"status":"Not OK" ,
"Body":"It seems there are no good answers for your question."}
def download_nltk(DEBUG=True):
if DEBUG:
nltk.data.path.append('./chatbot/nltk_data')
try:
nltk.download('punkt', download_dir='./chatbot/nltk_data')
nltk.download('wordnet', download_dir='./chatbot/nltk_data')
except:
print("Internet connection issue")
load_dotenv(".env")
connection_string = os.environ.get("DB_CONNECTION")
db = connect_db(connection_string=connection_string ,verbose=False)
ENV = os.environ.get("ENV")
print("ENV", ENV)
config = ProductionConfig()
if ENV == 'development':
config = Config()
download_nltk(config.DEBUG)
lemmatizer = WordNetLemmatizer()
model = load_model(config.MODEL_PATH + 'chatbot_model.h5')
intents = json.loads(open(config.MODEL_PATH + 'intents.json').read())
words = pickle.load(open(config.MODEL_PATH +'words.pkl','rb'))
classes = pickle.load(open(config.MODEL_PATH +'classes.pkl','rb'))
app = Flask(__name__)
CORS(app)
limiter = Limiter(
app,
key_func=get_remote_address,
default_limits=["200 per day", "120 per hour"]
)
def clean_up_sentence(sentence):
# tokenize the pattern - split words into array
sentence_words = nltk.word_tokenize(sentence)
# stem each word - create short form for word
sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
return sentence_words
def bow(sentence, words, show_details=True):
# tokenize the pattern
sentence_words = clean_up_sentence(sentence)
# bag of words - matrix of N words, vocabulary matrix
bag = [0]*len(words)
for s in sentence_words:
for i,w in enumerate(words):
if w == s:
# assign 1 if current word is in the vocabulary position
bag[i] = 1
if show_details:
print ("found in bag: %s" % w)
return(np.array(bag))
def predict_class(sentence, model):
# filter out predictions below a threshold
p = bow(sentence, words,show_details=False)
res = model.predict(np.array([p]))[0]
ERROR_THRESHOLD = 0.25
results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
# sort by strength of probability
results.sort(key=lambda x: x[1], reverse=True)
return_list = []
for r in results:
return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
return return_list
def getQuestionAnswer(sentence):
answer = define_response(sentence)
return answer
def getResponse(sentence, ints, intents_json):
tag = ints[0]['intent']
if tag == 'question':
result = getQuestionAnswer(sentence)
else:
list_of_intents = intents_json['intents']
for i in list_of_intents:
if(i['tag']== tag):
result = random.choice(i['responses'])
break
return result
def parse_json(data):
return json.loads(json_util.dumps(data))
@app.route('/', methods=['GET'])
@limiter.limit("60 per hour")
def home():
url_string = "https://github.com/khaledadrani/StackoverflowChabot_api/tree/master/chatbot"
url = f'<a href={url_string}><span>the API Github repository</span></a>'
string = "<p> For more information about the api, visit "+url+" for source code</p>"
return "<h1> The api is working, yaay!</h1>" + string
@app.route('/response', methods=['POST','GET'])
@limiter.limit("60 per hour")
def chatbot_response():
if request.method == 'POST':
data = request.json
text = data["text"]
ints = predict_class(text, model)
res = getResponse(text, ints, intents)
response = parse_json(res) #jsonify(res)
#response.headers.add('Access-Control-Allow-Origin', '*')
return response
elif request.method == 'GET':
return '<span> This route is for the chatbot responses </span>'
if __name__ == "__main__":
app.run(debug=config.DEBUG)