-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgraph_storage.py
More file actions
297 lines (246 loc) · 11 KB
/
graph_storage.py
File metadata and controls
297 lines (246 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
"""
图结构表示与存储模块
Graph Structure Representation and Storage Module
"""
from collections import defaultdict
import sys
class SocialNetwork:
"""
社交网络类 - 图结构表示与存储
Social Network Class - Graph Structure Representation and Storage
图存储结构分析 Graph Storage Structure Analysis:
1. 邻接表 (Adjacency List):
- 空间复杂度 Space Complexity: O(V + E),其中V是顶点数,E是边数
- 查找邻居时间复杂度 Time Complexity for finding neighbors: O(degree of vertex)
- 添加边时间复杂度 Time Complexity for adding edge: O(1) 平均
- 适用于稀疏图 Suitable for sparse graphs
2. 邻接矩阵 (Adjacency Matrix):
- 空间复杂度 Space Complexity: O(V^2)
- 查找邻居时间复杂度 Time Complexity for finding neighbors: O(V)
- 添加边时间复杂度 Time Complexity for adding edge: O(1)
- 适用于稠密图 Suitable for dense graphs
"""
def __init__(self, n, directed=True, storage_type="adj_list"):
"""
初始化图结构
Initialize graph structure
Args:
n (int): 节点数量 Number of nodes
directed (bool): 是否是有向图 Whether the graph is directed
storage_type (str): 存储类型 ("adj_list" 或 "adj_matrix") Storage type
"""
self.n = n # 用户数量 Number of users
self.directed = directed # 是否有向 Whether directed
self.storage_type = storage_type # 存储类型 Storage type
# 存储用户属性的数据结构 Data structures for user attributes
self.user_attributes = {}
for i in range(n):
self.user_attributes[i] = {
'id': i,
'followers': 0, # 粉丝数 Number of followers
'posts': 0, # 发帖数 Number of posts
'label': i # 标签 Label (for community detection)
}
# 根据存储类型初始化图结构 Initialize graph structure based on storage type
if storage_type == "adj_list":
# 邻接表 Adjacency list
self.graph = defaultdict(list)
self.in_edges = defaultdict(list) # 反向边,用于计算粉丝列表 Reverse edges for calculating followers
elif storage_type == "adj_matrix":
# 邻接矩阵 Adjacency matrix
self.graph = [[0 for _ in range(n)] for _ in range(n)]
self.in_edges = None # 邻接矩阵不需要反向边结构 No reverse edge structure needed for adjacency matrix
else:
raise ValueError("存储类型必须是 'adj_list' 或 'adj_matrix'")
def add_edge(self, u, v):
"""
添加关注关系
Add following relationship
Args:
u (int): 关注者 Follower
v (int): 被关注者 Followee
"""
if self.storage_type == "adj_list":
# 在邻接表中添加边 Add edge to adjacency list
if v not in self.graph[u]:
self.graph[u].append(v)
if self.directed:
# 对于有向图,v的粉丝数加1 For directed graph, increment v's follower count
self.user_attributes[v]['followers'] += 1
# 同时记录反向边 Also record reverse edge
if u not in self.in_edges[v]:
self.in_edges[v].append(u)
else: # adj_matrix
# 在邻接矩阵中添加边 Add edge to adjacency matrix
self.graph[u][v] = 1
if self.directed:
# 对于有向图,v的粉丝数加1 For directed graph, increment v's follower count
self.user_attributes[v]['followers'] += 1
def remove_edge(self, u, v):
"""
移除关注关系
Remove following relationship
Args:
u (int): 关注者 Follower
v (int): 被关注者 Followee
"""
if self.storage_type == "adj_list":
# 从邻接表中移除边 Remove edge from adjacency list
if v in self.graph[u]:
self.graph[u].remove(v)
if self.directed:
# 更新粉丝数 Update follower count
self.user_attributes[v]['followers'] -= 1
if u in self.in_edges[v]:
self.in_edges[v].remove(u)
else: # adj_matrix
# 从邻接矩阵中移除边 Remove edge from adjacency matrix
self.graph[u][v] = 0
if self.directed:
# 更新粉丝数 Update follower count
self.user_attributes[v]['followers'] -= 1
def has_edge(self, u, v):
"""
检查是否存在边
Check if edge exists
Args:
u (int): 起始节点 Start node
v (int): 目标节点 Target node
Returns:
bool: 是否存在边 Whether edge exists
"""
if self.storage_type == "adj_list":
return v in self.graph[u]
else: # adj_matrix
return self.graph[u][v] == 1
def get_followers(self, user):
"""
获取指定用户的粉丝列表
Get followers list for a specific user
Args:
user (int): 用户ID User ID
Returns:
list: 粉丝列表 Followers list
"""
if self.storage_type == "adj_list":
return self.in_edges[user][:]
else: # adj_matrix
# 在邻接矩阵中查找所有指向该用户的节点 Find all nodes pointing to this user in adjacency matrix
followers = []
for i in range(self.n):
if self.graph[i][user] == 1:
followers.append(i)
return followers
def get_following(self, user):
"""
获取指定用户的关注列表
Get following list for a specific user
Args:
user (int): 用户ID User ID
Returns:
list: 关注列表 Following list
"""
if self.storage_type == "adj_list":
return self.graph[user][:]
else: # adj_matrix
# 在邻接矩阵中查找该用户指向的所有节点 Find all nodes this user points to in adjacency matrix
following = []
for i in range(self.n):
if self.graph[user][i] == 1:
following.append(i)
return following
def get_degree(self, user):
"""
获取用户的总度数(关注数+粉丝数)
Get total degree of user (following + followers)
Args:
user (int): 用户ID User ID
Returns:
int: 总度数 Total degree
"""
following = len(self.get_following(user))
followers = self.user_attributes[user]['followers']
return following + followers
def get_node_density(self):
"""
计算图的密度
Calculate graph density
Returns:
float: 图的密度 Graph density
"""
total_possible_edges = self.n * (self.n - 1) if self.directed else self.n * (self.n - 1) // 2
if total_possible_edges == 0:
return 0.0
if self.storage_type == "adj_list":
actual_edges = sum(len(self.graph[node]) for node in range(self.n))
else: # adj_matrix
actual_edges = sum(sum(row) for row in self.graph)
return actual_edges / total_possible_edges
def generate_test_data(network_size, edge_count):
"""
生成测试数据
Generate test data
Args:
network_size (int): 网络大小 Network size
edge_count (int): 边的数量 Number of edges
Returns:
SocialNetwork: 生成的社交网络 Generated social network
"""
import random
# 创建社交网络 Create social network
network = SocialNetwork(network_size, directed=True)
# 随机生成边 Randomly generate edges
edges_added = 0
while edges_added < edge_count:
u = random.randint(0, network_size - 1)
v = random.randint(0, network_size - 1)
# 避免自环 Avoid self-loops
if u != v:
# 检查是否已存在边 Check if edge already exists
if network.storage_type == "adj_list":
if v not in network.graph[u]:
network.add_edge(u, v)
edges_added += 1
else: # adj_matrix
if network.graph[u][v] == 0:
network.add_edge(u, v)
edges_added += 1
return network
# 测试数据生成函数 Test data generation functions
def generate_small_network():
"""生成小型网络 (N=1000, 边数=5000)"""
"""Generate small network (N=1000, edges=5000)"""
return generate_test_data(1000, 5000)
def generate_medium_network():
"""生成中型网络 (N=10000, 边数=100000)"""
"""Generate medium network (N=10000, edges=100000)"""
return generate_test_data(10000, 100000)
def generate_large_network():
"""生成大型网络 (N=100000, 边数=1000000)"""
"""Generate large network (N=100000, edges=1000000)"""
return generate_test_data(100000, 1000000)
# 运行示例代码 Example usage
if __name__ == "__main__":
print("图结构表示与存储 - 示例运行")
print("Graph Structure Representation and Storage - Example Run")
# 创建一个小的社交网络 Create a small social network
network = SocialNetwork(5, directed=True, storage_type="adj_list")
# 添加一些关注关系 Add some following relationships
network.add_edge(0, 1)
network.add_edge(0, 2)
network.add_edge(1, 2)
network.add_edge(2, 3)
network.add_edge(3, 4)
print(f"\n用户0的关注列表: {network.get_following(0)}") # User 0's following list
print(f"用户2的粉丝列表: {network.get_followers(2)}") # User 2's followers list
print(f"用户2的粉丝数: {network.user_attributes[2]['followers']}") # User 2's follower count
# 测试邻接矩阵存储方式 Test adjacency matrix storage
network_matrix = SocialNetwork(5, directed=True, storage_type="adj_matrix")
network_matrix.add_edge(0, 1)
network_matrix.add_edge(0, 2)
network_matrix.add_edge(1, 2)
network_matrix.add_edge(2, 3)
network_matrix.add_edge(3, 4)
print(f"\n邻接矩阵 - 用户0的关注列表: {network_matrix.get_following(0)}")
print(f"邻接矩阵 - 用户2的粉丝列表: {network_matrix.get_followers(2)}")
print(f"邻接矩阵 - 用户2的粉丝数: {network_matrix.user_attributes[2]['followers']}")