diff --git a/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py b/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py index ad8107860d6a4..57b8209d8d69b 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py +++ b/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py @@ -226,213 +226,5 @@ def test_sample_result_static_without_eids(self): self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) -class TestGeometricGraphKhopSampler(unittest.TestCase): - - def setUp(self): - num_nodes = 20 - edges = np.random.randint(num_nodes, size=(100, 2)) - edges = np.unique(edges, axis=0) - edges_id = np.arange(0, len(edges)) - sorted_edges = edges[np.argsort(edges[:, 1])] - sorted_eid = edges_id[np.argsort(edges[:, 1])] - - # Calculate dst index cumsum counts. - dst_count = np.zeros(num_nodes) - dst_src_dict = {} - for dst in range(0, num_nodes): - true_index = sorted_edges[:, 1] == dst - dst_count[dst] = np.sum(true_index) - dst_src_dict[dst] = sorted_edges[:, 0][true_index] - dst_count = dst_count.astype("int64") - colptr = np.cumsum(dst_count) - colptr = np.insert(colptr, 0, 0) - - self.row = sorted_edges[:, 0].astype("int64") - self.colptr = colptr.astype("int64") - self.sorted_eid = sorted_eid.astype("int64") - self.nodes = np.unique(np.random.randint(num_nodes, - size=5)).astype("int64") - self.sample_sizes = [5, 5] - self.dst_src_dict = dst_src_dict - - def func_sample_result(self): - paddle.disable_static() - row = paddle.to_tensor(self.row) - colptr = paddle.to_tensor(self.colptr) - nodes = paddle.to_tensor(self.nodes) - - edge_src, edge_dst, sample_index, reindex_nodes = \ - paddle.geometric.khop_sampler(row, colptr, - nodes, self.sample_sizes, - return_eids=False) - # Reindex edge_src and edge_dst to original index. - edge_src = edge_src.reshape([-1]) - edge_dst = edge_dst.reshape([-1]) - sample_index = sample_index.reshape([-1]) - - for i in range(len(edge_src)): - edge_src[i] = sample_index[edge_src[i]] - edge_dst[i] = sample_index[edge_dst[i]] - - for n in self.nodes: - edge_src_n = edge_src[edge_dst == n] - if edge_src_n.shape[0] == 0: - continue - # Ensure no repetitive sample neighbors. - self.assertTrue( - edge_src_n.shape[0] == paddle.unique(edge_src_n).shape[0]) - # Ensure the correct sample size. - self.assertTrue(edge_src_n.shape[0] == self.sample_sizes[0] - or edge_src_n.shape[0] == len(self.dst_src_dict[n])) - in_neighbors = np.isin(edge_src_n.numpy(), self.dst_src_dict[n]) - # Ensure the correct sample neighbors. - self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) - - def test_sample_result(self): - with fluid.framework._test_eager_guard(): - self.func_sample_result() - self.func_sample_result() - - def func_uva_sample_result(self): - paddle.disable_static() - if paddle.fluid.core.is_compiled_with_cuda(): - row = None - if fluid.framework.in_dygraph_mode(): - row = paddle.fluid.core.eager.to_uva_tensor( - self.row.astype(self.row.dtype), 0) - sorted_eid = paddle.fluid.core.eager.to_uva_tensor( - self.sorted_eid.astype(self.sorted_eid.dtype), 0) - else: - row = paddle.fluid.core.to_uva_tensor( - self.row.astype(self.row.dtype)) - sorted_eid = paddle.fluid.core.to_uva_tensor( - self.sorted_eid.astype(self.sorted_eid.dtype)) - colptr = paddle.to_tensor(self.colptr) - nodes = paddle.to_tensor(self.nodes) - - edge_src, edge_dst, sample_index, reindex_nodes, edge_eids = \ - paddle.geometric.khop_sampler(row, colptr, - nodes, self.sample_sizes, - sorted_eids=sorted_eid, - return_eids=True) - edge_src = edge_src.reshape([-1]) - edge_dst = edge_dst.reshape([-1]) - sample_index = sample_index.reshape([-1]) - - for i in range(len(edge_src)): - edge_src[i] = sample_index[edge_src[i]] - edge_dst[i] = sample_index[edge_dst[i]] - - for n in self.nodes: - edge_src_n = edge_src[edge_dst == n] - if edge_src_n.shape[0] == 0: - continue - self.assertTrue( - edge_src_n.shape[0] == paddle.unique(edge_src_n).shape[0]) - self.assertTrue( - edge_src_n.shape[0] == self.sample_sizes[0] - or edge_src_n.shape[0] == len(self.dst_src_dict[n])) - in_neighbors = np.isin(edge_src_n.numpy(), self.dst_src_dict[n]) - self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) - - def test_uva_sample_result(self): - with fluid.framework._test_eager_guard(): - self.func_uva_sample_result() - self.func_uva_sample_result() - - def test_sample_result_static_with_eids(self): - paddle.enable_static() - with paddle.static.program_guard(paddle.static.Program()): - row = paddle.static.data(name="row", - shape=self.row.shape, - dtype=self.row.dtype) - sorted_eids = paddle.static.data(name="eids", - shape=self.sorted_eid.shape, - dtype=self.sorted_eid.dtype) - colptr = paddle.static.data(name="colptr", - shape=self.colptr.shape, - dtype=self.colptr.dtype) - nodes = paddle.static.data(name="nodes", - shape=self.nodes.shape, - dtype=self.nodes.dtype) - - edge_src, edge_dst, sample_index, reindex_nodes, edge_eids = \ - paddle.geometric.khop_sampler(row, colptr, - nodes, self.sample_sizes, - sorted_eids, True) - exe = paddle.static.Executor(paddle.CPUPlace()) - ret = exe.run(feed={ - 'row': self.row, - 'eids': self.sorted_eid, - 'colptr': self.colptr, - 'nodes': self.nodes - }, - fetch_list=[edge_src, edge_dst, sample_index]) - - edge_src, edge_dst, sample_index = ret - edge_src = edge_src.reshape([-1]) - edge_dst = edge_dst.reshape([-1]) - sample_index = sample_index.reshape([-1]) - - for i in range(len(edge_src)): - edge_src[i] = sample_index[edge_src[i]] - edge_dst[i] = sample_index[edge_dst[i]] - - for n in self.nodes: - edge_src_n = edge_src[edge_dst == n] - if edge_src_n.shape[0] == 0: - continue - self.assertTrue( - edge_src_n.shape[0] == np.unique(edge_src_n).shape[0]) - self.assertTrue( - edge_src_n.shape[0] == self.sample_sizes[0] - or edge_src_n.shape[0] == len(self.dst_src_dict[n])) - in_neighbors = np.isin(edge_src_n, self.dst_src_dict[n]) - self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) - - def test_sample_result_static_without_eids(self): - paddle.enable_static() - with paddle.static.program_guard(paddle.static.Program()): - row = paddle.static.data(name="row", - shape=self.row.shape, - dtype=self.row.dtype) - colptr = paddle.static.data(name="colptr", - shape=self.colptr.shape, - dtype=self.colptr.dtype) - nodes = paddle.static.data(name="nodes", - shape=self.nodes.shape, - dtype=self.nodes.dtype) - edge_src, edge_dst, sample_index, reindex_nodes = \ - paddle.geometric.khop_sampler(row, colptr, - nodes, self.sample_sizes) - exe = paddle.static.Executor(paddle.CPUPlace()) - ret = exe.run(feed={ - 'row': self.row, - 'colptr': self.colptr, - 'nodes': self.nodes - }, - fetch_list=[edge_src, edge_dst, sample_index]) - edge_src, edge_dst, sample_index = ret - edge_src = edge_src.reshape([-1]) - edge_dst = edge_dst.reshape([-1]) - sample_index = sample_index.reshape([-1]) - - for i in range(len(edge_src)): - edge_src[i] = sample_index[edge_src[i]] - edge_dst[i] = sample_index[edge_dst[i]] - - for n in self.nodes: - edge_src_n = edge_src[edge_dst == n] - if edge_src_n.shape[0] == 0: - continue - self.assertTrue( - edge_src_n.shape[0] == np.unique(edge_src_n).shape[0]) - self.assertTrue( - edge_src_n.shape[0] == self.sample_sizes[0] - or edge_src_n.shape[0] == len(self.dst_src_dict[n])) - in_neighbors = np.isin(edge_src_n, self.dst_src_dict[n]) - self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/geometric/__init__.py b/python/paddle/geometric/__init__.py index 76c64a947c328..6bb14273f07a4 100644 --- a/python/paddle/geometric/__init__.py +++ b/python/paddle/geometric/__init__.py @@ -21,7 +21,6 @@ from .math import segment_max # noqa: F401 from .sampling import graph_reindex # noqa: F401 from .sampling import heter_graph_reindex # noqa: F401 -from .sampling import khop_sampler # noqa: F401 from .sampling import sample_neighbors # noqa: F401 __all__ = [ @@ -34,6 +33,5 @@ 'segment_max', 'graph_reindex', 'heter_graph_reindex', - 'khop_sampler', 'sample_neighbors', ] diff --git a/python/paddle/geometric/sampling/__init__.py b/python/paddle/geometric/sampling/__init__.py index b7de3389fc756..880ee242b9197 100644 --- a/python/paddle/geometric/sampling/__init__.py +++ b/python/paddle/geometric/sampling/__init__.py @@ -14,12 +14,10 @@ from .graph_reindex import graph_reindex # noqa: F401 from .graph_reindex import heter_graph_reindex # noqa: F401 -from .khop_sampler import khop_sampler # noqa: F401 from .neighbors import sample_neighbors # noqa: F401 __all__ = [ 'graph_reindex', 'heter_graph_reindex', - 'khop_sampler', 'sample_neighbors', ] diff --git a/python/paddle/geometric/sampling/khop_sampler.py b/python/paddle/geometric/sampling/khop_sampler.py deleted file mode 100644 index 2e7104fbdd3af..0000000000000 --- a/python/paddle/geometric/sampling/khop_sampler.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle.fluid import core -from paddle import _C_ops - -__all__ = [] - - -def khop_sampler(row, - colptr, - input_nodes, - sample_sizes, - sorted_eids=None, - return_eids=False, - name=None): - """ - Graph Khop Sampler API. - - This API is mainly used in Graph Learning domain, and the main purpose is to - provide high performance graph khop sampling method with subgraph reindex step. - For example, we get the CSC(Compressed Sparse Column) format of the input graph - edges as `row` and `colptr`, so as to covert graph data into a suitable format - for sampling. And the `input_nodes` means the nodes we need to sample neighbors, - and `sample_sizes` means the number of neighbors and number of layers we want - to sample. - - Args: - row (Tensor): One of the components of the CSC format of the input graph, and - the shape should be [num_edges, 1] or [num_edges]. The available - data type is int32, int64. - colptr (Tensor): One of the components of the CSC format of the input graph, - and the shape should be [num_nodes + 1, 1] or [num_nodes]. - The data type should be the same with `row`. - input_nodes (Tensor): The input nodes we need to sample neighbors for, and the - data type should be the same with `row`. - sample_sizes (list|tuple): The number of neighbors and number of layers we want - to sample. The data type should be int, and the shape - should only have one dimension. - sorted_eids (Tensor): The sorted edge ids, should not be None when `return_eids` - is True. The shape should be [num_edges, 1], and the data - type should be the same with `row`. - return_eids (bool): Whether to return the id of the sample edges. Default is False. - name (str, optional): Name for the operation (optional, default is None). - For more information, please refer to :ref:`api_guide_Name`. - - Returns: - edge_src (Tensor): The src index of the output edges, also means the first column of - the edges. The shape is [num_sample_edges, 1] currently. - edge_dst (Tensor): The dst index of the output edges, also means the second column - of the edges. The shape is [num_sample_edges, 1] currently. - sample_index (Tensor): The original id of the input nodes and sampled neighbor nodes. - reindex_nodes (Tensor): The reindex id of the input nodes. - edge_eids (Tensor): Return the id of the sample edges if `return_eids` is True. - - Examples: - - .. code-block:: python - - import paddle - - row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7] - colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13] - nodes = [0, 8, 1, 2] - sample_sizes = [2, 2] - row = paddle.to_tensor(row, dtype="int64") - colptr = paddle.to_tensor(colptr, dtype="int64") - nodes = paddle.to_tensor(nodes, dtype="int64") - - edge_src, edge_dst, sample_index, reindex_nodes = \ - paddle.geometric.khop_sampler(row, colptr, nodes, sample_sizes, False) - - """ - - if _non_static_mode(): - if return_eids: - if sorted_eids is None: - raise ValueError(f"`sorted_eid` should not be None " - f"if return_eids is True.") - edge_src, edge_dst, sample_index, reindex_nodes, edge_eids = \ - _C_ops.graph_khop_sampler(row, sorted_eids, - colptr, input_nodes, - "sample_sizes", sample_sizes, - "return_eids", True) - return edge_src, edge_dst, sample_index, reindex_nodes, edge_eids - else: - edge_src, edge_dst, sample_index, reindex_nodes, _ = \ - _C_ops.graph_khop_sampler(row, None, - colptr, input_nodes, - "sample_sizes", sample_sizes, - "return_eids", False) - return edge_src, edge_dst, sample_index, reindex_nodes - - check_variable_and_dtype(row, "Row", ("int32", "int64"), - "graph_khop_sampler") - - if return_eids: - if sorted_eids is None: - raise ValueError(f"`sorted_eid` should not be None " - f"if return_eids is True.") - check_variable_and_dtype(sorted_eids, "Eids", ("int32", "int64"), - "graph_khop_sampler") - - check_variable_and_dtype(colptr, "Col_Ptr", ("int32", "int64"), - "graph_khop_sampler") - check_variable_and_dtype(input_nodes, "X", ("int32", "int64"), - "graph_khop_sampler") - - helper = LayerHelper("khop_sampler", **locals()) - edge_src = helper.create_variable_for_type_inference(dtype=row.dtype) - edge_dst = helper.create_variable_for_type_inference(dtype=row.dtype) - sample_index = helper.create_variable_for_type_inference(dtype=row.dtype) - reindex_nodes = helper.create_variable_for_type_inference(dtype=row.dtype) - edge_eids = helper.create_variable_for_type_inference(dtype=row.dtype) - helper.append_op(type="graph_khop_sampler", - inputs={ - "Row": row, - "Eids": sorted_eids, - "Col_Ptr": colptr, - "X": input_nodes - }, - outputs={ - "Out_Src": edge_src, - "Out_Dst": edge_dst, - "Sample_Index": sample_index, - "Reindex_X": reindex_nodes, - "Out_Eids": edge_eids - }, - attrs={ - "sample_sizes": sample_sizes, - "return_eids": return_eids - }) - if return_eids: - return edge_src, edge_dst, sample_index, reindex_nodes, edge_eids - else: - return edge_src, edge_dst, sample_index, reindex_nodes diff --git a/python/paddle/incubate/operators/graph_khop_sampler.py b/python/paddle/incubate/operators/graph_khop_sampler.py index e884ca22f7b8e..ba992d1d999a0 100644 --- a/python/paddle/incubate/operators/graph_khop_sampler.py +++ b/python/paddle/incubate/operators/graph_khop_sampler.py @@ -21,11 +21,6 @@ import paddle.utils.deprecated as deprecated -@deprecated( - since="2.4.0", - update_to="paddle.geometric.khop_sampler", - level=1, - reason="paddle.incubate.graph_khop_sampler will be removed in future") def graph_khop_sampler(row, colptr, input_nodes,