Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions 0438.Find-All-Anagrams-in-a-String/memo.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# 438. Find All Anagrams in a String

## step1

ナイーブな判定機を作り、次にanagramが出現しうる場所までスキップする方針。

25mぐらいかかった。方針を思いつくのよりも変数の取り間違え、添字のミスで時間を消費。添字にはもっと慣れる必要がある。

O(len(s)len(p))。遅い方だったので改善できるのかもしれない

## step2

https://github.com/huyfififi/coding-challenges/pull/62

Sliding Windowを使うと O(len(s))となる。この解法の方が自然にも思える。

- 当初 `ord` を「文字コード依存」のように感じ `string.ascii_lowercase.find` を採用。冗長さは認識していた。
- `dict` / `Counter`は、将来ほかの文字種に広げる要件が出てからでよいとしスキップ。カウント 0 のキーを払う手間で少し複雑になるイメージだったとのこと。
- `ord` は Unicode のコードポイントを返しエンコーディングとは無関係。
- `find` は文字列を毎回線形探索

標準ライブラリstring知らなかった。string.ascii_lowercase.findを呼ぶと毎回26ステップかかることになる。

https://docs.python.org/ja/3.13/library/string.html

> ordはUnicodeのコードポイントを返すので、エンコーディングは関係なく、いつも同じ値を返します。findの線形探索のコストが気になりました。

なるほど


> そこまで時間をかけずに方針を立てることができたが、ループの end 範囲を間違えて (`len(source) - anagram_length + 1` を `len(source) - anagram_length` としてしまった)、延々とハマっていた。何度見直しても、変な思い込みで間違っている部分をスルーしてしまうから、インデックスの境界に気をつけなければならない問題は苦手だな。

自分も同じところで間違えた。

整理すると

(last =) start + len(p) − 1 < len(s) <-> start < len(s) − len(p) +

は同値。似たケースで間違えないようにしたい。
35 changes: 35 additions & 0 deletions 0438.Find-All-Anagrams-in-a-String/step1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import collections


class Solution:
def findAnagrams(self, s: str, p: str) -> List[int]:
char_to_count = collections.Counter(p)

def validate_anagram(start: int) -> bool:
char_to_count_copy = char_to_count.copy()
for i in range(start, start + len(p)):
char_to_count_copy[s[i]] -= 1
if char_to_count_copy[s[i]] < 0:
return False
return True

result: list[int] = []
start = 0
while start < len(s) - len(p) + 1:
if not validate_anagram(start):
start += 1
continue
result.append(start)

start += 1
last = start + len(p) - 1
while start < len(s) - len(p) + 1 and s[start - 1] == s[last]:
result.append(start)
start += 1
last += 1

while last < len(s) and s[last] != s[start - 1]:
last += 1
start = last - len(p) + 1

return result
58 changes: 58 additions & 0 deletions 0438.Find-All-Anagrams-in-a-String/step2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import collections


class Solution:
def findAnagrams(self, s: str, p: str) -> List[int]:
len_p = len(p)
len_s = len(s)
if len_p > len_s:
return []

count_p = collections.Counter(p)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

動詞の原形または命令形が変数名の先頭にあると、メソッド名のように感じます。 char_frequencies_in_p や char_histogram_in_p あたりはいかがでしょうか?

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

長すぎるのを避けるため、histogram_in_pとしました。sの方もそのようにします。

window = collections.Counter(s[:len_p])
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

window は文字列であれば部分文字列のことを指すと思いますので、 Counter の変数名としては違和感を感じました。


result: list[int] = []
if window == count_p:
result.append(0)

for i in range(len_p, len_s):
left, right = s[i - len_p], s[i]
window[right] += 1
window[left] -= 1
if window[left] == 0:
del window[left]
if window == count_p:
result.append(i - len_p + 1)

return result


class Solution:
def findAnagrams(self, s: str, p: str) -> List[int]:
len_p = len(p)
len_s = len(s)
if len_p > len_s:
return []

def idx(c: str) -> int:
return ord(c) - ord("a")

count_p = [0] * 26
for c in p:
count_p[idx(c)] += 1

window = [0] * 26
for i in range(len_p):
window[idx(s[i])] += 1

result: list[int] = []
if window == count_p:
result.append(0)

for i in range(len_p, len_s):
window[idx(s[i])] += 1
window[idx(s[i - len_p])] -= 1
if window == count_p:
result.append(i - len_p + 1)

return result
58 changes: 58 additions & 0 deletions 0438.Find-All-Anagrams-in-a-String/step2_revised.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import collections


class Solution:
def findAnagrams(self, s: str, p: str) -> List[int]:
len_p = len(p)
len_s = len(s)
if len_p > len_s:
return []

histogram_in_p = collections.Counter(p)
histogram_in_s = collections.Counter(s[:len_p])

result: list[int] = []
if histogram_in_s == histogram_in_p:
result.append(0)

for i in range(len_p, len_s):
left, right = s[i - len_p], s[i]
histogram_in_s[right] += 1
histogram_in_s[left] -= 1
if histogram_in_s[left] == 0:
del histogram_in_s[left]
if histogram_in_s == histogram_in_p:
result.append(i - len_p + 1)

return result


class Solution:
def findAnagrams(self, s: str, p: str) -> List[int]:
len_p = len(p)
len_s = len(s)
if len_p > len_s:
return []

def idx(c: str) -> int:
return ord(c) - ord("a")

histogram_in_p = [0] * 26
for c in p:
histogram_in_p[idx(c)] += 1

histogram_in_s = [0] * 26
for i in range(len_p):
histogram_in_s[idx(s[i])] += 1

result: list[int] = []
if histogram_in_s == histogram_in_p:
result.append(0)

for i in range(len_p, len_s):
histogram_in_s[idx(s[i])] += 1
histogram_in_s[idx(s[i - len_p])] -= 1
if histogram_in_s == histogram_in_p:
result.append(i - len_p + 1)

return result
25 changes: 25 additions & 0 deletions 0438.Find-All-Anagrams-in-a-String/step3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
class Solution:
def findAnagrams(self, s: str, p: str) -> List[int]:
if len(s) < len(p):
return []

def idx(c: str) -> int:
return ord(c) - ord("a")

count_p = [0] * 26
window = [0] * 26
for i in range(len(p)):
count_p[idx(p[i])] += 1
window[idx(s[i])] += 1

anagram_starts = []
for start in range(len(s) - len(p) + 1):
if count_p == window:
anagram_starts.append(start)

if start < len(s) - len(p):
start += 1
window[idx(s[start - 1])] -= 1
window[idx(s[start + len(p) - 1])] += 1

return anagram_starts