-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
58 lines (44 loc) · 1.68 KB
/
main.py
File metadata and controls
58 lines (44 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
load_dotenv()
#EMBEDDING_MODEL="text-embedding-3-large" #"text-embedding-3-small" #"text-embedding-ada-002"
# EMBEDDING_MODEL="text-embedding-3-small
EMBEDDING_MODEL="text-embedding-ada-002"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai_embeddings = OpenAIEmbeddings(
model=EMBEDDING_MODEL,
openai_api_key=OPENAI_API_KEY
)
def get_embedding(text: str) -> np.ndarray:
try:
embeddings = openai_embeddings.embed_query(text)
return np.array(embeddings)
except Exception as e:
print(f"Error generating embedding: {e}")
return None
def plot_embeddings_3d(embeddings_3d, labels):
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
for i, label in enumerate(labels):
x, y, z = embeddings_3d[i]
text= f"{label} ({x:.2f}, {y:.2f}, {z:.2f})"
ax.scatter(x, y, z, label=label)
ax.text(x, y, z, text, fontsize=10)
ax.set_title("3D Visualization of Similar Words Embeddings")
ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5), title="Words") # Adjust position
plt.tight_layout()
plt.savefig("3d_plot_small.png", dpi=1000, bbox_inches='tight')
texts = open("words.txt", "r").read().splitlines()
embeddings = []
for text in texts:
embedding = get_embedding(text)
if embedding is not None:
embeddings.append(embedding)
embeddings = np.array(embeddings)
pca = PCA(n_components=3)
embeddings_3d = pca.fit_transform(embeddings)
plot_embeddings_3d(embeddings_3d, texts)