Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[geometric]Move graph-related incubate api to geometric #44970

Merged
merged 20 commits into from Aug 29, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions python/paddle/__init__.py
Expand Up @@ -78,6 +78,7 @@
import paddle.reader # noqa: F401
import paddle.static # noqa: F401
import paddle.vision # noqa: F401
import paddle.geometric # noqa: F401

from .tensor.attribute import is_complex # noqa: F401
from .tensor.attribute import is_integer # noqa: F401
Expand Down
208 changes: 208 additions & 0 deletions python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py
Expand Up @@ -226,5 +226,213 @@ def test_sample_result_static_without_eids(self):
self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0])


class TestGeometricGraphKhopSampler(unittest.TestCase):

def setUp(self):
num_nodes = 20
edges = np.random.randint(num_nodes, size=(100, 2))
edges = np.unique(edges, axis=0)
edges_id = np.arange(0, len(edges))
sorted_edges = edges[np.argsort(edges[:, 1])]
sorted_eid = edges_id[np.argsort(edges[:, 1])]

# Calculate dst index cumsum counts.
dst_count = np.zeros(num_nodes)
dst_src_dict = {}
for dst in range(0, num_nodes):
true_index = sorted_edges[:, 1] == dst
dst_count[dst] = np.sum(true_index)
dst_src_dict[dst] = sorted_edges[:, 0][true_index]
dst_count = dst_count.astype("int64")
colptr = np.cumsum(dst_count)
colptr = np.insert(colptr, 0, 0)

self.row = sorted_edges[:, 0].astype("int64")
self.colptr = colptr.astype("int64")
self.sorted_eid = sorted_eid.astype("int64")
self.nodes = np.unique(np.random.randint(num_nodes,
size=5)).astype("int64")
self.sample_sizes = [5, 5]
self.dst_src_dict = dst_src_dict

def func_sample_result(self):
paddle.disable_static()
row = paddle.to_tensor(self.row)
colptr = paddle.to_tensor(self.colptr)
nodes = paddle.to_tensor(self.nodes)

edge_src, edge_dst, sample_index, reindex_nodes = \
paddle.geometric.khop_sampler(row, colptr,
nodes, self.sample_sizes,
return_eids=False)
# Reindex edge_src and edge_dst to original index.
edge_src = edge_src.reshape([-1])
edge_dst = edge_dst.reshape([-1])
sample_index = sample_index.reshape([-1])

for i in range(len(edge_src)):
edge_src[i] = sample_index[edge_src[i]]
edge_dst[i] = sample_index[edge_dst[i]]

for n in self.nodes:
edge_src_n = edge_src[edge_dst == n]
if edge_src_n.shape[0] == 0:
continue
# Ensure no repetitive sample neighbors.
self.assertTrue(
edge_src_n.shape[0] == paddle.unique(edge_src_n).shape[0])
# Ensure the correct sample size.
self.assertTrue(edge_src_n.shape[0] == self.sample_sizes[0]
or edge_src_n.shape[0] == len(self.dst_src_dict[n]))
in_neighbors = np.isin(edge_src_n.numpy(), self.dst_src_dict[n])
# Ensure the correct sample neighbors.
self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0])

def test_sample_result(self):
with fluid.framework._test_eager_guard():
self.func_sample_result()
self.func_sample_result()

def func_uva_sample_result(self):
paddle.disable_static()
if paddle.fluid.core.is_compiled_with_cuda():
row = None
if fluid.framework.in_dygraph_mode():
row = paddle.fluid.core.eager.to_uva_tensor(
self.row.astype(self.row.dtype), 0)
sorted_eid = paddle.fluid.core.eager.to_uva_tensor(
self.sorted_eid.astype(self.sorted_eid.dtype), 0)
else:
row = paddle.fluid.core.to_uva_tensor(
self.row.astype(self.row.dtype))
sorted_eid = paddle.fluid.core.to_uva_tensor(
self.sorted_eid.astype(self.sorted_eid.dtype))
colptr = paddle.to_tensor(self.colptr)
nodes = paddle.to_tensor(self.nodes)

edge_src, edge_dst, sample_index, reindex_nodes, edge_eids = \
paddle.geometric.khop_sampler(row, colptr,
nodes, self.sample_sizes,
sorted_eids=sorted_eid,
return_eids=True)
edge_src = edge_src.reshape([-1])
edge_dst = edge_dst.reshape([-1])
sample_index = sample_index.reshape([-1])

for i in range(len(edge_src)):
edge_src[i] = sample_index[edge_src[i]]
edge_dst[i] = sample_index[edge_dst[i]]

for n in self.nodes:
edge_src_n = edge_src[edge_dst == n]
if edge_src_n.shape[0] == 0:
continue
self.assertTrue(
edge_src_n.shape[0] == paddle.unique(edge_src_n).shape[0])
self.assertTrue(
edge_src_n.shape[0] == self.sample_sizes[0]
or edge_src_n.shape[0] == len(self.dst_src_dict[n]))
in_neighbors = np.isin(edge_src_n.numpy(), self.dst_src_dict[n])
self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0])

def test_uva_sample_result(self):
with fluid.framework._test_eager_guard():
self.func_uva_sample_result()
self.func_uva_sample_result()

def test_sample_result_static_with_eids(self):
paddle.enable_static()
with paddle.static.program_guard(paddle.static.Program()):
row = paddle.static.data(name="row",
shape=self.row.shape,
dtype=self.row.dtype)
sorted_eids = paddle.static.data(name="eids",
shape=self.sorted_eid.shape,
dtype=self.sorted_eid.dtype)
colptr = paddle.static.data(name="colptr",
shape=self.colptr.shape,
dtype=self.colptr.dtype)
nodes = paddle.static.data(name="nodes",
shape=self.nodes.shape,
dtype=self.nodes.dtype)

edge_src, edge_dst, sample_index, reindex_nodes, edge_eids = \
paddle.geometric.khop_sampler(row, colptr,
nodes, self.sample_sizes,
sorted_eids, True)
exe = paddle.static.Executor(paddle.CPUPlace())
ret = exe.run(feed={
'row': self.row,
'eids': self.sorted_eid,
'colptr': self.colptr,
'nodes': self.nodes
},
fetch_list=[edge_src, edge_dst, sample_index])

edge_src, edge_dst, sample_index = ret
edge_src = edge_src.reshape([-1])
edge_dst = edge_dst.reshape([-1])
sample_index = sample_index.reshape([-1])

for i in range(len(edge_src)):
edge_src[i] = sample_index[edge_src[i]]
edge_dst[i] = sample_index[edge_dst[i]]

for n in self.nodes:
edge_src_n = edge_src[edge_dst == n]
if edge_src_n.shape[0] == 0:
continue
self.assertTrue(
edge_src_n.shape[0] == np.unique(edge_src_n).shape[0])
self.assertTrue(
edge_src_n.shape[0] == self.sample_sizes[0]
or edge_src_n.shape[0] == len(self.dst_src_dict[n]))
in_neighbors = np.isin(edge_src_n, self.dst_src_dict[n])
self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0])

def test_sample_result_static_without_eids(self):
paddle.enable_static()
with paddle.static.program_guard(paddle.static.Program()):
row = paddle.static.data(name="row",
shape=self.row.shape,
dtype=self.row.dtype)
colptr = paddle.static.data(name="colptr",
shape=self.colptr.shape,
dtype=self.colptr.dtype)
nodes = paddle.static.data(name="nodes",
shape=self.nodes.shape,
dtype=self.nodes.dtype)
edge_src, edge_dst, sample_index, reindex_nodes = \
paddle.geometric.khop_sampler(row, colptr,
nodes, self.sample_sizes)
exe = paddle.static.Executor(paddle.CPUPlace())
ret = exe.run(feed={
'row': self.row,
'colptr': self.colptr,
'nodes': self.nodes
},
fetch_list=[edge_src, edge_dst, sample_index])
edge_src, edge_dst, sample_index = ret
edge_src = edge_src.reshape([-1])
edge_dst = edge_dst.reshape([-1])
sample_index = sample_index.reshape([-1])

for i in range(len(edge_src)):
edge_src[i] = sample_index[edge_src[i]]
edge_dst[i] = sample_index[edge_dst[i]]

for n in self.nodes:
edge_src_n = edge_src[edge_dst == n]
if edge_src_n.shape[0] == 0:
continue
self.assertTrue(
edge_src_n.shape[0] == np.unique(edge_src_n).shape[0])
self.assertTrue(
edge_src_n.shape[0] == self.sample_sizes[0]
or edge_src_n.shape[0] == len(self.dst_src_dict[n]))
in_neighbors = np.isin(edge_src_n, self.dst_src_dict[n])
self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0])


if __name__ == "__main__":
unittest.main()
155 changes: 155 additions & 0 deletions python/paddle/fluid/tests/unittests/test_graph_reindex.py
Expand Up @@ -173,5 +173,160 @@ def test_reindex_result_static(self):
self.assertTrue(np.allclose(self.out_nodes, out_nodes_2))


class TestGeometricGraphReindex(unittest.TestCase):

def setUp(self):
self.x = np.arange(5).astype("int64")
self.neighbors = np.random.randint(100, size=20).astype("int64")
self.count = np.array([2, 8, 4, 3, 3], dtype="int32")

# Get numpy result.
out_nodes = list(self.x)
for neighbor in self.neighbors:
if neighbor not in out_nodes:
out_nodes.append(neighbor)
self.out_nodes = np.array(out_nodes, dtype="int64")
reindex_dict = {node: ind for ind, node in enumerate(self.out_nodes)}
self.reindex_src = np.array(
[reindex_dict[node] for node in self.neighbors])
reindex_dst = []
for node, c in zip(self.x, self.count):
for i in range(c):
reindex_dst.append(reindex_dict[node])
self.reindex_dst = np.array(reindex_dst, dtype="int64")
self.num_nodes = np.max(np.concatenate([self.x, self.neighbors])) + 1

def test_reindex_result(self):
paddle.disable_static()
x = paddle.to_tensor(self.x)
neighbors = paddle.to_tensor(self.neighbors)
count = paddle.to_tensor(self.count)
value_buffer = paddle.full([self.num_nodes], -1, dtype="int32")
index_buffer = paddle.full([self.num_nodes], -1, dtype="int32")

reindex_src, reindex_dst, out_nodes = \
paddle.geometric.graph_reindex(x, neighbors, count)
self.assertTrue(np.allclose(self.reindex_src, reindex_src))
self.assertTrue(np.allclose(self.reindex_dst, reindex_dst))
self.assertTrue(np.allclose(self.out_nodes, out_nodes))

reindex_src, reindex_dst, out_nodes = \
paddle.geometric.graph_reindex(x, neighbors, count,
value_buffer, index_buffer,
flag_buffer_hashtable=True)
self.assertTrue(np.allclose(self.reindex_src, reindex_src))
self.assertTrue(np.allclose(self.reindex_dst, reindex_dst))
self.assertTrue(np.allclose(self.out_nodes, out_nodes))

def test_heter_reindex_result(self):
paddle.disable_static()
x = paddle.to_tensor(self.x)
neighbors = paddle.to_tensor(self.neighbors)
neighbors = paddle.concat([neighbors, neighbors])
count = paddle.to_tensor(self.count)
count = paddle.concat([count, count])

reindex_src, reindex_dst, out_nodes = \
paddle.geometric.graph_reindex(x, neighbors, count)
self.assertTrue(
np.allclose(self.reindex_src,
reindex_src[:self.neighbors.shape[0]]))
self.assertTrue(
np.allclose(self.reindex_src,
reindex_src[self.neighbors.shape[0]:]))
self.assertTrue(
np.allclose(self.reindex_dst,
reindex_dst[:self.neighbors.shape[0]]))
self.assertTrue(
np.allclose(self.reindex_dst,
reindex_dst[self.neighbors.shape[0]:]))
self.assertTrue(np.allclose(self.out_nodes, out_nodes))

def test_heter_reindex_result_v2(self):
paddle.disable_static()
x = np.arange(5).astype("int64")
neighbors1 = np.random.randint(100, size=20).astype("int64")
count1 = np.array([2, 8, 4, 3, 3], dtype="int32")
neighbors2 = np.random.randint(100, size=20).astype("int64")
count2 = np.array([4, 5, 1, 6, 4], dtype="int32")
neighbors = np.concatenate([neighbors1, neighbors2])
counts = np.concatenate([count1, count2])

# Get numpy result.
out_nodes = list(x)
for neighbor in neighbors:
if neighbor not in out_nodes:
out_nodes.append(neighbor)
out_nodes = np.array(out_nodes, dtype="int64")
reindex_dict = {node: ind for ind, node in enumerate(out_nodes)}
reindex_src = np.array([reindex_dict[node] for node in neighbors])
reindex_dst = []
for count in [count1, count2]:
for node, c in zip(x, count):
for i in range(c):
reindex_dst.append(reindex_dict[node])
reindex_dst = np.array(reindex_dst, dtype="int64")

reindex_src_, reindex_dst_, out_nodes_ = \
paddle.geometric.graph_reindex(paddle.to_tensor(x),
paddle.to_tensor(neighbors),
paddle.to_tensor(counts))
self.assertTrue(np.allclose(reindex_src, reindex_src_))
self.assertTrue(np.allclose(reindex_dst, reindex_dst_))
self.assertTrue(np.allclose(out_nodes, out_nodes_))

def test_reindex_result_static(self):
paddle.enable_static()
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data(name="x",
shape=self.x.shape,
dtype=self.x.dtype)
neighbors = paddle.static.data(name="neighbors",
shape=self.neighbors.shape,
dtype=self.neighbors.dtype)
count = paddle.static.data(name="count",
shape=self.count.shape,
dtype=self.count.dtype)
value_buffer = paddle.static.data(name="value_buffer",
shape=[self.num_nodes],
dtype="int32")
index_buffer = paddle.static.data(name="index_buffer",
shape=[self.num_nodes],
dtype="int32")

reindex_src_1, reindex_dst_1, out_nodes_1 = \
paddle.geometric.graph_reindex(x, neighbors, count)
reindex_src_2, reindex_dst_2, out_nodes_2 = \
paddle.geometric.graph_reindex(x, neighbors, count,
value_buffer, index_buffer,
flag_buffer_hashtable=True)

exe = paddle.static.Executor(paddle.CPUPlace())
ret = exe.run(feed={
'x':
self.x,
'neighbors':
self.neighbors,
'count':
self.count,
'value_buffer':
np.full([self.num_nodes], -1, dtype="int32"),
'index_buffer':
np.full([self.num_nodes], -1, dtype="int32")
},
fetch_list=[
reindex_src_1, reindex_dst_1, out_nodes_1,
reindex_src_2, reindex_dst_2, out_nodes_2
])
reindex_src_1, reindex_dst_1, out_nodes_1, reindex_src_2, \
reindex_dst_2, out_nodes_2 = ret
self.assertTrue(np.allclose(self.reindex_src, reindex_src_1))
self.assertTrue(np.allclose(self.reindex_dst, reindex_dst_1))
self.assertTrue(np.allclose(self.out_nodes, out_nodes_1))
self.assertTrue(np.allclose(self.reindex_src, reindex_src_2))
self.assertTrue(np.allclose(self.reindex_dst, reindex_dst_2))
self.assertTrue(np.allclose(self.out_nodes, out_nodes_2))


if __name__ == "__main__":
unittest.main()