Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions 0721.Accounts-Merge/memo.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# 721. Accounts Merge

## step1
40m ぐらいかかった。UnionFindは記憶に自信がなく少し調べてしまった。
同値類を管理するデータ構造としてこれが素直な方針だと個人的には思った

revised: 変数名を改善


## 他の人のコード
https://github.com/huyfififi/coding-challenges/pull/48

```python
for i, (_, *emails) in enumerate(accounts):
```
この書き方は知らなかった。

https://peps.python.org/pep-3132/

> This PEP proposes a change to iterable unpacking syntax, allowing to specify a “catch-all” name which will be assigned a list of all items not assigned to a “regular” name.

```python
[accounts[i][0]] + sorted(emails)
```
より
```python
[accounts[i][0], *sorted(emails)]
```
の方がlistの生成回数が少ない

自分はnameで分離した後にunion-findで同値類を求めたがnameで分離しない方が自然だな。

DFSでも書ける、なるほど。

## step2
DFSとunion findで書き直し

時間計算量:

N = accounts数、E = 全email出現回数、U = ユニークemail数

DFS: O(E+UlogU)

UnionFind: O(E\alpha(N)+UlogU) \alpha: 逆アッカーマン関数

## C++
LLMのコードを写経する形になった。自分ではまだ書けない。

- unionは共用体を表す予約語
- 共用体:すべてのメンバ変数が、メモリ上の同じ場所を共有する
- std::setは赤黒木で管理されているので重複排除とソートを同時に行える
- https://ja.wikipedia.org/wiki/%E8%B5%A4%E9%BB%92%E6%9C%A8

61 changes: 61 additions & 0 deletions 0721.Accounts-Merge/step1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
class UnionFind:
def __init__(self, n):
self.parent = list(range(n))
self.size = [1] * n

def find(self, x):
if x != self.parent[x]:
self.parent[x] = self.find(self.parent[x])
return self.parent[x]

def union(self, x, y):
parent_x = self.find(x)
parent_y = self.find(y)

if parent_x == parent_y:
return parent_x

if self.size[parent_x] < self.size[parent_y]:
parent_x, parent_y = parent_y, parent_x

self.parent[parent_y] = parent_x
self.size[parent_x] += self.size[parent_y]


class Solution:
def accountsMerge(self, accounts: list[list[str]]) -> list[list[str]]:
def merge_accounts_with_same_name(name, indexes):
if len(indexes) == 1:
return [
[accounts[indexes[0]][0]] + sorted(set(accounts[indexes[0]][1:]))
]

mail_to_rank = {}
account_union_find = UnionFind(len(indexes))
for r, i in enumerate(indexes):
for email in accounts[i][1:]:
if email in mail_to_rank:
account_union_find.union(r, mail_to_rank[email])
else:
mail_to_rank[email] = r
rank_to_index = {r: i for r, i in enumerate(indexes)}
root_to_emails = {}
for r in range(len(indexes)):
root = account_union_find.find(r)
root_to_emails.setdefault(root, []).extend(
accounts[rank_to_index[r]][1:]
)
result = []
for emails in root_to_emails.values():
result.append([name] + sorted(set(emails)))
return result

name_to_indexes = {}
for i, account in enumerate(accounts):
name_to_indexes.setdefault(account[0], []).append(i)

accounts_merged = []
for name, indexes in name_to_indexes.items():
accounts_merged.extend(merge_accounts_with_same_name(name, indexes))

return accounts_merged
67 changes: 67 additions & 0 deletions 0721.Accounts-Merge/step1_revised.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
class UnionFind:
def __init__(self, n):
self.parent = list(range(n))
self.size = [1] * n

def find(self, x):
if x != self.parent[x]:
self.parent[x] = self.find(self.parent[x])
return self.parent[x]

def union(self, x, y):
parent_x = self.find(x)
parent_y = self.find(y)

if parent_x == parent_y:
return parent_x

if self.size[parent_x] < self.size[parent_y]:
parent_x, parent_y = parent_y, parent_x

self.parent[parent_y] = parent_x
self.size[parent_x] += self.size[parent_y]


class Solution:
def accountsMerge(self, accounts: list[list[str]]) -> list[list[str]]:
def merge_accounts_with_same_name(name, account_indexes):
if len(account_indexes) == 1:
return [
[accounts[account_indexes[0]][0]]
+ sorted(set(accounts[account_indexes[0]][1:]))
]

mail_to_pos = {}
account_union_find = UnionFind(len(account_indexes))
for account_pos, account_index in enumerate(account_indexes):
for email in accounts[account_index][1:]:
if email in mail_to_pos:
account_union_find.union(account_pos, mail_to_pos[email])
else:
mail_to_pos[email] = account_pos
pos_to_account_index = {
account_pos: account_index
for account_pos, account_index in enumerate(account_indexes)
}
root_to_emails = {}
for account_pos in range(len(account_indexes)):
root = account_union_find.find(account_pos)
root_to_emails.setdefault(root, []).extend(
accounts[pos_to_account_index[account_pos]][1:]
)
result = []
for emails in root_to_emails.values():
result.append([name] + sorted(set(emails)))
return result

name_to_account_indexes = {}
for account_index, account in enumerate(accounts):
name_to_account_indexes.setdefault(account[0], []).append(account_index)

accounts_merged = []
for name, account_indexes in name_to_account_indexes.items():
accounts_merged.extend(
merge_accounts_with_same_name(name, account_indexes)
)

return accounts_merged
36 changes: 36 additions & 0 deletions 0721.Accounts-Merge/step2_dfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
class Solution:
def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
email_to_name = {}
email_to_neighbor = {}
hubs = []
for name, hub, *rest in accounts:
email_to_name[hub] = name
hubs.append(hub)
for email in rest:
email_to_name[email] = name
email_to_neighbor.setdefault(hub, []).append(email)
email_to_neighbor.setdefault(email, []).append(hub)

accounts_merged = []
visited = set()

def traverse(email_start):
if email_start in visited:
return

visited.add(email_start)
stack = [email_start]
emails = [email_start]
while stack:
email = stack.pop()
for neighbor in email_to_neighbor.get(email, []):
if neighbor not in visited:
stack.append(neighbor)
emails.append(neighbor)
visited.add(neighbor)
accounts_merged.append([email_to_name[email_start], *sorted(set(emails))])

for email_start in hubs:
traverse(email_start)

return accounts_merged
83 changes: 83 additions & 0 deletions 0721.Accounts-Merge/step2_union_find.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include <vector>
#include <string>
#include <unordered_map>
#include <set>
#include <numeric>
#include <algorithm>

class UnionFind {
public:
std::vector<int> parent;
std::vector<int> size;

UnionFind(int n) {
parent.resize(n);
std::iota(parent.begin(), parent.end(), 0);
size.assign(n, 1);
}

int find(int x) {
if (x != parent[x]) {
parent[x] = find(parent[x]);
}
return parent[x];
}

void union_op(int x, int y) {
int parent_x = find(x);
int parent_y = find(y);

if (parent_x == parent_y) {
return;
}

if (size[parent_x] < size[parent_y]) {
std::swap(parent_x, parent_y);
}

parent[parent_y] = parent_x;
size[parent_x] += size[parent_y];
return;
}
};

class Solution {
public:
std::vector<std::vector<std::string>> accountsMerge(std::vector<std::vector<std::string>>& accounts) {
UnionFind union_find(accounts.size());
std::unordered_map<std::string, int> mail_to_account_index;
std::vector<std::vector<std::string>> accounts_merged;

for (int account_index = 0; account_index < accounts.size(); ++account_index) {
for (size_t i = 1; i < accounts[account_index].size(); ++i) {
const std::string& email = accounts[account_index][i];
if (mail_to_account_index.count(email)) {
union_find.union_op(account_index, mail_to_account_index[email]);
} else {
mail_to_account_index[email] = account_index;
}
}
}

std::unordered_map<int, std::vector<std::string>> root_to_emails;
for (int account_index = 0; account_index < accounts.size(); ++account_index) {
int root = union_find.find(account_index);
for (size_t i = 1; i < accounts[account_index].size(); ++i) {
root_to_emails[root].push_back(accounts[account_index][i]);
}
}

for (auto& [root, emails] : root_to_emails) {
std::set<std::string> unique_emails(emails.begin(), emails.end());

std::vector<std::string> merged_row;
merged_row.push_back(accounts[root][0]);
for (const auto& email : unique_emails) {
merged_row.push_back(email);
}
accounts_merged.push_back(merged_row);
}

return accounts_merged;
}
};
46 changes: 46 additions & 0 deletions 0721.Accounts-Merge/step2_union_find.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
class UnionFind:
def __init__(self, n):
self.parent = list(range(n))
self.size = [1] * n

def find(self, x):
if x != self.parent[x]:
self.parent[x] = self.find(self.parent[x])
return self.parent[x]

def union(self, x, y):
parent_x = self.find(x)
parent_y = self.find(y)

if parent_x == parent_y:
return parent_x

if self.size[parent_x] < self.size[parent_y]:
parent_x, parent_y = parent_y, parent_x

self.parent[parent_y] = parent_x
self.size[parent_x] += self.size[parent_y]


class Solution:
def accountsMerge(self, accounts: list[list[str]]) -> list[list[str]]:
union_find = UnionFind(len(accounts))
mail_to_account_index = {}
accounts_merged = []
for account_index, (_, *emails) in enumerate(accounts):
for email in emails:
if email in mail_to_account_index:
union_find.union(account_index, mail_to_account_index[email])
else:
mail_to_account_index[email] = account_index

root_to_emails = {}
for account_index in range(len(accounts)):
root = union_find.find(account_index)
root_to_emails.setdefault(root, []).extend(accounts[account_index][1:])

accounts_merged = []
for root, emails in root_to_emails.items():
accounts_merged.append([accounts[root][0], *sorted(set(emails))])

return accounts_merged