Skip to content

Commit 5d3907f

Browse files
Fix typing and formatting for Multinomial Naive Bayes
1 parent 6dd885c commit 5d3907f

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Multinomial Naive Bayes Classifier implementation.
3+
4+
This module implements Multinomial Naive Bayes from scratch without using
5+
external machine learning libraries. It is commonly used for text
6+
classification tasks such as spam detection.
7+
8+
References:
9+
https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_bayes
10+
"""
11+
12+
import math
13+
14+
15+
class MultinomialNaiveBayes:
16+
"""
17+
Multinomial Naive Bayes classifier.
18+
"""
19+
20+
def __init__(self, alpha: float = 1.0) -> None:
21+
"""
22+
Initialize the classifier.
23+
24+
:param alpha: Laplace smoothing parameter
25+
"""
26+
if alpha <= 0:
27+
raise ValueError("Alpha must be greater than 0")
28+
29+
self.alpha = alpha
30+
self.class_priors: dict[int, float] = {}
31+
self.feature_log_prob: dict[int, list[float]] = {}
32+
self.num_features: int = 0
33+
34+
def fit(self, features: list[list[int]], labels: list[int]) -> None:
35+
"""
36+
Train the Multinomial Naive Bayes classifier.
37+
38+
:param features: Feature matrix (counts of features)
39+
:param labels: Class labels
40+
:raises ValueError: If input sizes mismatch
41+
42+
>>> model = MultinomialNaiveBayes()
43+
>>> X = [[2, 1], [1, 1], [0, 2]]
44+
>>> y = [0, 0, 1]
45+
>>> model.fit(X, y)
46+
"""
47+
if len(features) != len(labels):
48+
raise ValueError("Features and labels must have the same length")
49+
50+
if not features:
51+
raise ValueError("Feature matrix must not be empty")
52+
53+
self.num_features = len(features[0])
54+
55+
separated: dict[int, list[list[int]]] = {}
56+
for row, label in zip(features, labels):
57+
separated.setdefault(label, []).append(row)
58+
59+
total_samples = len(labels)
60+
61+
for label, rows in separated.items():
62+
self.class_priors[label] = math.log(len(rows) / total_samples)
63+
64+
feature_counts = [0] * self.num_features
65+
total_count = 0
66+
67+
for row in rows:
68+
for index, value in enumerate(row):
69+
feature_counts[index] += value
70+
total_count += value
71+
72+
self.feature_log_prob[label] = [
73+
math.log(
74+
(count + self.alpha)
75+
/ (total_count + self.alpha * self.num_features)
76+
)
77+
for count in feature_counts
78+
]
79+
80+
def predict(self, features: list[list[int]]) -> list[int]:
81+
"""
82+
Predict class labels for input features.
83+
84+
:param features: Feature matrix
85+
:return: Predicted labels
86+
87+
>>> model = MultinomialNaiveBayes()
88+
>>> X = [[2, 1], [1, 1], [0, 2]]
89+
>>> y = [0, 0, 1]
90+
>>> model.fit(X, y)
91+
>>> model.predict([[1, 0], [0, 2]])
92+
[0, 1]
93+
"""
94+
predictions: list[int] = []
95+
96+
for row in features:
97+
class_scores: dict[int, float] = {}
98+
99+
for label in self.class_priors:
100+
score = self.class_priors[label]
101+
102+
for index, value in enumerate(row):
103+
score += value * self.feature_log_prob[label][index]
104+
105+
class_scores[label] = score
106+
107+
predicted_label = max(
108+
class_scores.items(),
109+
key=lambda item: item[1],
110+
)[0]
111+
predictions.append(predicted_label)
112+
113+
return predictions

0 commit comments

Comments
 (0)