diff --git a/.envrc b/.envrc new file mode 100644 index 00000000..eff7387f --- /dev/null +++ b/.envrc @@ -0,0 +1,2 @@ +[[ -d '.venv' ]] || uv venv +source .venv/bin/activate diff --git a/.flake8 b/.flake8 deleted file mode 100644 index daa7560e..00000000 --- a/.flake8 +++ /dev/null @@ -1,69 +0,0 @@ -[flake8] -min_python_version = 3.7.0 -max-line-length = 88 -ban-relative-imports = true -# flake8-use-fstring: https://github.com/MichaelKim0407/flake8-use-fstring#--percent-greedy-and---format-greedy -format-greedy = 1 -inline-quotes = double -enable-extensions = TC, TC1 -type-checking-exempt-modules = typing, typing-extensions -eradicate-whitelist-extend = ^-.*; -extend-ignore = - # E203: Whitespace before ':' (pycqa/pycodestyle#373) - E203, - # SIM106: Handle error-cases first - SIM106, - # ANN101: Missing type annotation for self in method - ANN101, - # ANN102: Missing type annotation for cls in classmethod - ANN102, - F401, - F403, - I252, - SIM115, - N806, - N803, - B007, - E501, - C408, - SIM113, - N802, - N400, - E501, - E802, - E800, - B001, - B006, - B020, - C414, - C416, - E402, - E711, - E712, - E722, - E741, - F405, - F541, - F811, - F821, - F841, - FS001, - FS002, - I250, - N801, - N804, - N812, - N816, - Q002, - SIM102, - SIM105, - SIM111, - SIM114, - SIM118, - SIM201, - SIM203, - SIM300, - SIM401, - TC002 - - diff --git a/.gitignore b/.gitignore index 0c55db21..818accaa 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,4 @@ x64/ *.vcxproj *.vcxproj.filters *.vcxproj.user +**/cmake-build-debug diff --git a/.python-version b/.python-version new file mode 100644 index 00000000..e4fba218 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/_cpp_easygraph/CMakeLists.txt b/_cpp_easygraph/CMakeLists.txt new file mode 100644 index 00000000..151394a6 --- /dev/null +++ b/_cpp_easygraph/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.15) +if(NOT DEFINED SKBUILD_PROJECT_NAME) + set(SKBUILD_PROJECT_NAME "cpp_easygraph_standalone") +endif() + +project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +set(PYBIND11_FINDPYTHON ON) +find_package(pybind11 CONFIG REQUIRED) + +file(GLOB_RECURSE EASYGRAPH_SOURCES "src/*.cpp") + + +pybind11_add_module(cpp_easygraph MODULE ${EASYGRAPH_SOURCES}) + +target_include_directories(cpp_easygraph PUBLIC "include") + +if(SKBUILD) + install(TARGETS cpp_easygraph DESTINATION ${SKBUILD_PROJECT_NAME}) +endif () diff --git a/cpp_easygraph/functions/components/biconnected.h b/_cpp_easygraph/include/biconnected.h similarity index 83% rename from cpp_easygraph/functions/components/biconnected.h rename to _cpp_easygraph/include/biconnected.h index 889173e9..6350391e 100644 --- a/cpp_easygraph/functions/components/biconnected.h +++ b/_cpp_easygraph/include/biconnected.h @@ -1,7 +1,8 @@ #pragma once -#define BOOST_PYTHON_STATIC_LIB -#include "../../common/common.h" + +#include "common.h" +#include "graph.h" class NeighborIterator { public: @@ -32,4 +33,4 @@ typedef struct stackNode { } }stack_node; -py::object _biconnected_dfs_record_edges(py::object G, py::object need_components); +py::list _biconnected_dfs_record_edges(Graph& G, bool need_components); diff --git a/_cpp_easygraph/include/cluster.h b/_cpp_easygraph/include/cluster.h new file mode 100644 index 00000000..856b87b1 --- /dev/null +++ b/_cpp_easygraph/include/cluster.h @@ -0,0 +1,8 @@ +#pragma once + + +#include "common.h" + +py::object clustering(py::object G, + py::object nodes = py::none(), + py::object weight = py::none()); diff --git a/_cpp_easygraph/include/common.h b/_cpp_easygraph/include/common.h new file mode 100644 index 00000000..774f8c7d --- /dev/null +++ b/_cpp_easygraph/include/common.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace py = pybind11; + +using node_t = int; +using weight_t = float; +using node_attr_dict_factory = + std::map; //(weight_key, value) +using edge_attr_dict_factory = + std::map; //(weight_key, value) +using node_dict_factory = + std::unordered_map; //(node, node_attr) +using adj_attr_dict_factory = + std::unordered_map; //(out_node, (weight_key, value)) +using adj_dict_factory = + std::unordered_map; //(node, edge_attr) diff --git a/_cpp_easygraph/include/directed_graph.h b/_cpp_easygraph/include/directed_graph.h new file mode 100644 index 00000000..08287a0f --- /dev/null +++ b/_cpp_easygraph/include/directed_graph.h @@ -0,0 +1,17 @@ +#pragma once + + +#include "graph.h" +#include "common.h" + +struct DiGraph: public Graph +{ + DiGraph(); +}; + +void DiGraph__init__(DiGraph &self, py::kwargs kwargs); +py::dict DiGraph_out_degree(const py::object& self, const py::object& weight); +py::dict DiGraph_in_degree(const py::object& self, const py::object& weight); +py::dict DiGraph_degree(const py::object &self,const py::object &weight); +py::object DiGraph_size(const py::object& self, const py::object& weight); +py::object DiGraph_number_of_edges(const py::object &self,const py::object &u,const py::object &v); diff --git a/_cpp_easygraph/include/evaluation.h b/_cpp_easygraph/include/evaluation.h new file mode 100644 index 00000000..b632c049 --- /dev/null +++ b/_cpp_easygraph/include/evaluation.h @@ -0,0 +1,15 @@ +#pragma once +#include "common.h" +#include "graph.h" + +py::dict constraint(const py::object &G, py::object nodes, + const py::object &weight, + const py::object &n_workers /*未使用但保留签名*/); +py::object effective_size(py::object G, py::object nodes, py::object weight, + py::object n_workers); + + +py::dict hierarchy(Graph& G, + py::object nodes = py::none(), + py::object weight = py::none(), + py::object n_workers = py::none()); diff --git a/_cpp_easygraph/include/graph.h b/_cpp_easygraph/include/graph.h new file mode 100644 index 00000000..a1d84d08 --- /dev/null +++ b/_cpp_easygraph/include/graph.h @@ -0,0 +1,52 @@ +#pragma once + + +#include "common.h" + +struct Graph +{ + node_dict_factory node; + adj_dict_factory adj; + py::dict node_to_id, id_to_node, graph; + node_t id; + bool dirty_nodes, dirty_adj; + py::object nodes_cache, adj_cache; + + Graph(); + py::object get_nodes(); + py::object get_name(); + py::object get_graph(); + py::object get_adj(); + py::object get_edges(); +}; + +void Graph__init__(Graph &self, py::kwargs kwargs); +py::object Graph__iter__(py::object self); +py::object Graph__len__(const py::object& self); +py::object Graph__contains__(const py::object& self, const py::object& node); +py::object Graph__getitem__(py::object self, py::object node); +py::object Graph_add_node(const py::tuple& args, const py::dict& kwargs); +py::object Graph_add_nodes(Graph& self,const py::list& nodes_for_adding,const py::list& nodes_attr); +void Graph_add_nodes_from(Graph& self, const py::iterable& nodes_for_adding, const py::kwargs& kwargs); +void Graph_remove_node(Graph& self, const py::object& node_to_remove); +void Graph_remove_nodes(Graph& self, const py::sequence& nodes_to_remove); +py::object Graph_number_of_nodes(Graph& self); +bool Graph_has_node(Graph &self, py::object node); +py::object Graph_nbunch_iter(py::object self, py::object nbunch); +void Graph_add_edge(Graph& self, const py::object& u_of_edge, const py::object& v_of_edge, const py::kwargs& kwargs); +void Graph_add_edges(Graph& self, const py::sequence& edges_for_adding, const py::sequence& edges_attr) ; +void Graph_add_edges_from(Graph& self, const py::iterable& ebunch, const py::kwargs& attr); +void Graph_add_edges_from_file(Graph& self, const py::str& file, bool weighted); +py::object Graph_add_weighted_edge(Graph& self, py::object u_of_edge, py::object v_of_edge, weight_t weight); +void Graph_remove_edge(Graph& self, const py::object& u, const py::object& v); +void Graph_remove_edges(Graph& self, const py::sequence& edges_to_remove); +int Graph_number_of_edges(const Graph& self, py::object u = py::none(), py::object v = py::none()); +bool Graph_has_edge(const Graph& self, const py::object& u, const py::object& v); +py::object Graph_copy(py::handle self_h); +py::dict Graph_degree(const Graph& self, py::object weight = py::none()); +py::list Graph_neighbors(const Graph& self, const py::object& node); +py::object Graph_nodes_subgraph(py::object self, py::list from_nodes); +py::object Graph_ego_subgraph(py::object self, py::object center); +py::object Graph_size(const Graph& G, py::object weight = py::none()); +bool Graph_is_directed(const Graph& self); +bool Graph_is_multigraph(const Graph& self); diff --git a/_cpp_easygraph/include/operation.h b/_cpp_easygraph/include/operation.h new file mode 100644 index 00000000..d7816136 --- /dev/null +++ b/_cpp_easygraph/include/operation.h @@ -0,0 +1,6 @@ +#pragma once + + +#include "graph.h" + +double density(const Graph& G); diff --git a/_cpp_easygraph/include/path.h b/_cpp_easygraph/include/path.h new file mode 100644 index 00000000..55be34ac --- /dev/null +++ b/_cpp_easygraph/include/path.h @@ -0,0 +1,13 @@ +#pragma once + + +#include "common.h" +#include "graph.h" + +py::dict _dijkstra_multisource(const py::object &G, + const py::object &sources, + const py::object &weight, + const py::object &target); +py::dict Floyd(Graph& G); +py::dict Prim(Graph& G); +py::dict Kruskal(Graph& G); diff --git a/_cpp_easygraph/include/utils.h b/_cpp_easygraph/include/utils.h new file mode 100644 index 00000000..70f7c9d9 --- /dev/null +++ b/_cpp_easygraph/include/utils.h @@ -0,0 +1,7 @@ +#pragma once + +#include "common.h" + +py::dict attr_to_dict(const node_attr_dict_factory& attr); +std::string weight_to_string(py::handle weight); +py::object py_sum(const py::object& o); diff --git a/_cpp_easygraph/src/biconnected.cpp b/_cpp_easygraph/src/biconnected.cpp new file mode 100644 index 00000000..1b5f4821 --- /dev/null +++ b/_cpp_easygraph/src/biconnected.cpp @@ -0,0 +1,135 @@ +#include "biconnected.h" +#include "graph.h" +#include "utils.h" + +node_t index_edge(std::vector>& edges, const std::pair& target) { + for (int i = edges.size() - 1;i >= 0;i--) { + if ((edges[i].first == target.first) && (edges[i].second == target.second)) { + return i; + } + } + return -1; +} + + +py::list _biconnected_dfs_record_edges(Graph& G, bool need_components) { + py::list ret; + std::unordered_set visited; + + auto& nodes = G.node; // id -> node attributes + auto& adj = G.adj; // id -> (neighbor id -> edge attrs) + + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + const node_t start_id = it->first; + if (visited.find(start_id) != visited.end()) { + continue; + } + + std::unordered_map discovery; + std::unordered_map low; + node_t root_children = 0; + + discovery.emplace(start_id, 0); + low.emplace(start_id, 0); + visited.emplace(start_id); + + std::vector> edge_stack; + std::vector stack; + + // 注意:这里与原实现一致,使用 operator[] 获取邻接(如无则创建空条目) + adj_attr_dict_factory start_adj = adj[start_id]; + NeighborIterator neighbors_iter(start_adj); + stack.emplace_back(stack_node{start_id, start_id, neighbors_iter}); + + while (!stack.empty()) { + stack_node& node_info = stack.back(); + const node_t node_grandparent_id = node_info.grandparent; + const node_t node_parent_id = node_info.parent; + + try { + const node_t node_child_id = node_info.neighbors_iter.next(); + + if (node_grandparent_id == node_child_id) { + continue; + } + + if (visited.find(node_child_id) != visited.end()) { + // 回边:更新 low值 + if (discovery[node_child_id] <= discovery[node_parent_id]) { + low[node_parent_id] = std::min(low[node_parent_id], discovery[node_child_id]); + if (need_components) { + edge_stack.emplace_back(node_parent_id, node_child_id); + } + } + } else { + // 树边:首次发现 + low[node_child_id] = discovery[node_child_id] = static_cast(discovery.size()); + visited.emplace(node_child_id); + + // 子节点邻居迭代器 + NeighborIterator child_neighbors_iter(adj[node_child_id]); + + // 压栈:当前 parent 变为 grandparent,新 child 为 parent + stack.emplace_back(stack_node{node_parent_id, node_child_id, child_neighbors_iter}); + + if (need_components) { + edge_stack.emplace_back(node_parent_id, node_child_id); + } + } + } catch (int) { + // 当前迭代器穷尽:弹栈并做割点/双连通分量处理 + stack.pop_back(); + + if (stack.size() > 1) { + // 非根结点情况:检查割点条件 + if (low[node_parent_id] >= discovery[node_grandparent_id]) { + if (need_components) { + py::list tmp_ret; + std::pair iter_edge(-1, -1); + while (iter_edge.first != node_grandparent_id || iter_edge.second != node_parent_id) { + iter_edge = edge_stack.back(); + edge_stack.pop_back(); + // 通过 id_to_node 还原 Python 节点对象 + py::object u = G.id_to_node.attr("__getitem__")(py::cast(iter_edge.first)); + py::object v = G.id_to_node.attr("__getitem__")(py::cast(iter_edge.second)); + tmp_ret.append(py::make_tuple(u, v)); + } + ret.append(tmp_ret); + } else { + py::object ap = G.id_to_node.attr("__getitem__")(py::cast(node_grandparent_id)); + ret.append(ap); // 记录割点 + } + } + // 回溯时更新祖父节点的 low + low[node_grandparent_id] = std::min(low[node_grandparent_id], low[node_parent_id]); + } else if (!stack.empty()) { + // 根的一个子树处理完毕 + ++root_children; + if (need_components) { + const std::pair target(node_grandparent_id, node_parent_id); + const node_t ind = index_edge(edge_stack, target); + if (ind != static_cast(-1)) { + py::list tmp_ret; + for (node_t z = ind; z < edge_stack.size(); ++z) { + const auto& e = edge_stack[z]; + py::object u = G.id_to_node.attr("__getitem__")(py::cast(e.first)); + py::object v = G.id_to_node.attr("__getitem__")(py::cast(e.second)); + tmp_ret.append(py::make_tuple(u, v)); + } + ret.append(tmp_ret); + } + } + } + } + } // while stack + + if (!need_components) { + if (root_children > 1) { + py::object root = G.id_to_node.attr("__getitem__")(py::cast(start_id)); + ret.append(root); // 根是割点 + } + } + } // for nodes + + return ret; +} diff --git a/_cpp_easygraph/src/cluster.cpp b/_cpp_easygraph/src/cluster.cpp new file mode 100644 index 00000000..30bc8fca --- /dev/null +++ b/_cpp_easygraph/src/cluster.cpp @@ -0,0 +1,195 @@ +#include "graph.h" +#include "utils.h" + +inline weight_t wt(adj_dict_factory& adj, node_t u, node_t v, std::string weight, weight_t max_weight = 1) { + auto& attr = adj[u][v]; + return (attr.count(weight) ? attr[weight] : 1) / max_weight; +} + +py::list _weighted_triangles_and_degree(Graph& G, + py::object nodes = py::none(), + py::object weight = py::none()) { + // ---- 0) 权重键 & 最大权重 ---- + std::string weight_key; + bool use_weight = !weight.is_none(); + if (use_weight) { + weight_key = weight_to_string(weight); + } + + // 统计是否有边 + bool has_edges = false; + for (auto it = G.adj.begin(); it != G.adj.end() && !has_edges; ++it) { + if (!it->second.empty()) has_edges = true; + } + + weight_t max_weight = 1; + if (use_weight && has_edges) { + bool assigned = false; + for (auto uit = G.adj.begin(); uit != G.adj.end(); ++uit) { + auto &nbrs = uit->second; + for (auto vit = nbrs.begin(); vit != nbrs.end(); ++vit) { + const auto &d = vit->second; // 属性 map + weight_t cur = 1; + auto dit = d.find(weight_key); + if (dit != d.end()) cur = dit->second; + if (assigned) { + max_weight = std::max(max_weight, cur); + } else { + assigned = true; + max_weight = cur; + } + } + } + } + + // ---- 1) 确定要遍历的节点列表(Python 对象)---- + std::vector nodes_vec; + if (nodes.is_none()) { + // 遍历 G.node(以 id 为键),通过 id_to_node 还原 Python 节点对象 + nodes_vec.reserve(G.node.size()); + for (auto it = G.node.begin(); it != G.node.end(); ++it) { + node_t nid = it->first; + py::object node_obj = G.id_to_node.attr("__getitem__")(py::cast(nid)); + nodes_vec.push_back(std::move(node_obj)); + } + } else { + // 遍历传入的可迭代对象,过滤不存在的节点 + for (py::handle h : nodes) { + py::object n = py::reinterpret_borrow(h); + if (G.node_to_id.contains(n)) { + nodes_vec.push_back(std::move(n)); + } + } + } + + // ---- 2) 主循环:每个节点的度与加权三角形数 ---- + py::list ret; + for (const py::object &node_obj : nodes_vec) { + // node -> id + node_t i_id = py::cast(G.node_to_id[node_obj]); + + // inbrs = 邻居集合(去掉自环) + std::unordered_set inbrs; + auto itAdjI = G.adj.find(i_id); + if (itAdjI != G.adj.end()) { + auto &nbrs = itAdjI->second; + for (auto kv = nbrs.begin(); kv != nbrs.end(); ++kv) { + if (kv->first != i_id) inbrs.insert(kv->first); + } + } + + // 统计加权三角形 + std::unordered_set seen; + weight_t weighted_triangles = 0; + + for (const auto &j_id : inbrs) { + seen.insert(j_id); + weight_t wij = wt(G.adj, i_id, j_id, weight_key, max_weight); + + // 只考虑 k 不在 seen 中(避免 j-k, k-j 双计) + for (const auto &k_id : inbrs) { + if (seen.count(k_id)) continue; + // 需要 j-k 相邻 + auto itAdjJ = G.adj.find(j_id); + if (itAdjJ == G.adj.end()) continue; + if (!itAdjJ->second.count(k_id)) continue; + + weight_t wjk = wt(G.adj, j_id, k_id, weight_key, max_weight); + weight_t wki = wt(G.adj, k_id, i_id, weight_key, max_weight); + + weighted_triangles += std::cbrt(static_cast(wij) * + static_cast(wjk) * + static_cast(wki)); + } + } + + // 结果条目:(python_node, degree, 2 * weighted_triangles) + // 注意:degree = inbrs.size() + ret.append(py::make_tuple(node_obj, + static_cast(inbrs.size()), + 2.0 * static_cast(weighted_triangles))); + } + + return ret; +} +py::list _triangles_and_degree(Graph& G, py::object nodes = py::none()) { + auto& adj = G.adj; + + // 1) 确定要处理的节点集合(Python 对象形式) + std::vector nodes_vec; + if (nodes.is_none()) { + nodes_vec.reserve(G.node.size()); + for (const auto& kv : G.node) { + const node_t nid = kv.first; + nodes_vec.emplace_back(G.id_to_node.attr("__getitem__")(py::cast(nid))); + } + } else { + for (py::handle h : nodes) { + py::object obj = py::reinterpret_borrow(h); + if (G.node_to_id.contains(obj)) nodes_vec.emplace_back(std::move(obj)); + } + } + + // 2) 主循环 + py::list ret; + for (const py::object& node_obj : nodes_vec) { + // node -> id + const node_t v = py::cast(G.node_to_id[node_obj]); + + // 邻居集合(去掉自环) + std::unordered_set vs; + if (auto it = adj.find(v); it != adj.end()) { + for (const auto& kv : it->second) vs.insert(kv.first); + } + vs.erase(v); + + // 统计邻居之间的连边数(按原逻辑:有序对计数) + weight_t ntriangles = 0; // 原实现是对 (w,node) 有序对计数 + for (const node_t w : vs) { + auto itW = adj.find(w); + if (itW == adj.end()) continue; + const auto& nbrW = itW->second; + + for (const node_t u : vs) { + if (u == w) continue; + ntriangles += static_cast(nbrW.count(u)); + } + } + + // (python_node, degree, ntriangles) + // 注意:原代码未除以 2,这里保持一致 + py::object py_node = G.id_to_node.attr("__getitem__")(py::cast(v)); + ret.append(py::make_tuple(py_node, static_cast(vs.size()), ntriangles)); + } + + return ret; +} +py::object clustering(py::object G, + py::object nodes, + py::object weight) { + if (py::bool_(G.attr("is_directed")())) { + throw py::value_error("Not implemented yet"); + } + + Graph& G_ = py::cast(G); // 用 py::cast 而不是 extract + + py::list td_list = weight.is_none() + ? _triangles_and_degree(G_, nodes) + : _weighted_triangles_and_degree(G_, nodes, weight); + + py::dict clusterc; + const std::size_t n = py::len(td_list); + for (std::size_t i = 0; i < n; ++i) { + py::tuple t = td_list[i].cast(); + py::object v = t[0]; + int d = t[1].cast(); + double tri = t[2].cast(); + double c = (tri != 0.0 && d >= 2) ? (tri / (double(d) * double(d - 1))) : 0.0; + clusterc[v] = c; + } + + if (!nodes.is_none() && G_.node_to_id.contains(nodes)) { + return clusterc.attr("__getitem__")(nodes); + } + return clusterc; +} diff --git a/_cpp_easygraph/src/cpp_easygraph.cpp b/_cpp_easygraph/src/cpp_easygraph.cpp new file mode 100644 index 00000000..7fb2bc45 --- /dev/null +++ b/_cpp_easygraph/src/cpp_easygraph.cpp @@ -0,0 +1,86 @@ +#include "biconnected.h" +#include "cluster.h" +#include "common.h" +#include "directed_graph.h" +#include "evaluation.h" +#include "graph.h" +#include "operation.h" +#include "path.h" + +PYBIND11_MODULE(cpp_easygraph, m) { + + py::class_(m, "Graph") + .def("__init__", &Graph__init__) + .def(py::init<>()) + .def("__iter__", &Graph__iter__) + .def("__len__", &Graph__len__) + .def("__contains__", &Graph__contains__, py::arg("node")) + .def("__getitem__", &Graph__getitem__, py::arg("node")) + .def("add_node", &Graph_add_node) + .def("add_nodes", &Graph_add_nodes, py::arg("nodes_for_adding"), + py::arg("nodes_attr") = py::list()) + .def("add_nodes_from", &Graph_add_nodes_from) + .def("remove_node", &Graph_remove_node, py::arg("node_to_remove")) + .def("remove_nodes", &Graph_remove_nodes, py::arg("nodes_to_remove")) + .def("number_of_nodes", &Graph_number_of_nodes) + .def("has_node", &Graph_has_node, py::arg("node")) + .def("nbunch_iter", &Graph_nbunch_iter, py::arg("nbunch") = py::none()) + .def("add_edge", &Graph_add_edge) + .def("add_edges", &Graph_add_edges, py::arg("edges_for_adding"), + py::arg("edges_attr") = py::list()) + .def("add_edges_from", &Graph_add_edges_from) + .def("add_edges_from_file", &Graph_add_edges_from_file, py::arg("file"), + py::arg("weighted") = false) + .def("add_weighted_edge", &Graph_add_weighted_edge, py::arg("u_of_edge"), + py::arg("v_of_edge"), py::arg("weight")) + .def("remove_edge", &Graph_remove_edge, py::arg("u"), py::arg("v")) + .def("remove_edges", &Graph_remove_edges, py::arg("edges_to_remove")) + .def("number_of_edges", &Graph_number_of_edges, py::arg("u") = py::none(), + py::arg("v") = py::none()) + .def("has_edge", &Graph_has_edge, py::arg("u"), py::arg("y")) + .def("copy", &Graph_copy) + .def("degree", &Graph_degree, py::arg("weight") = py::str("weight")) + .def("neighbors", &Graph_neighbors, py::arg("node")) + .def("all_neighbors", &Graph_neighbors, py::arg("node")) + .def("nodes_subgraph", &Graph_nodes_subgraph, py::arg("from_nodes")) + .def("ego_subgraph", &Graph_ego_subgraph, py::arg("center")) + .def("size", &Graph_size, py::arg("weight") = py::none()) + .def("is_directed", &Graph_is_directed) + .def("is_multigraph", &Graph_is_multigraph) + .def_property_readonly("graph", &Graph::get_graph) + .def_property_readonly("nodes", &Graph::get_nodes) + .def_property_readonly("name", &Graph::get_name) + .def_property_readonly("adj", &Graph::get_adj) + .def_property_readonly("edges", &Graph::get_edges); + + py::class_(m, "DiGraph") + .def("__init__", &DiGraph__init__) + .def(py::init<>()) + .def("out_degree", &DiGraph_out_degree, py::arg("weight") = "weight") + .def("in_degree", &DiGraph_in_degree, py::arg("weight") = "weight") + .def("degree", &DiGraph_degree, py::arg("weight") = "weight") + .def("size", &DiGraph_size, py::arg("weight") = py::none()) + .def("number_of_edges", &DiGraph_number_of_edges, + py::arg("u") = py::none(), py::arg("v") = py::none()); + + m.def("cpp_density", &density, py::arg("G")); + m.def("cpp_constraint", &constraint, py::arg("G"), + py::arg("nodes") = py::none(), py::arg("weight") = py::none(), + py::arg("n_workers") = py::none()); + m.def("cpp_effective_size", &effective_size, py::arg("G"), + py::arg("nodes") = py::none(), py::arg("weight") = py::none(), + py::arg("n_workers") = py::none()); + m.def("cpp_hierarchy", &hierarchy, py::arg("G"), + py::arg("nodes") = py::none(), py::arg("weight") = py::none(), + py::arg("n_workers") = py::none()); + m.def("cpp_dijkstra_multisource", &_dijkstra_multisource, py::arg("G"), + py::arg("sources"), py::arg("weight") = "weight", + py::arg("target") = py::none()); + m.def("cpp_clustering", &clustering, py::arg("G"), + py::arg("nodes") = py::none(), py::arg("weight") = py::none()); + m.def("cpp_biconnected_dfs_record_edges", &_biconnected_dfs_record_edges, + py::arg("G"), py::arg("need_components") = true); + m.def("cpp_Floyd", &Floyd, py::arg("G")); + m.def("cpp_Prim", &Prim, py::arg("G")); + m.def("cpp_Kruskal", &Kruskal, py::arg("G")); +} diff --git a/_cpp_easygraph/src/directed_graph.cpp b/_cpp_easygraph/src/directed_graph.cpp new file mode 100644 index 00000000..2053998f --- /dev/null +++ b/_cpp_easygraph/src/directed_graph.cpp @@ -0,0 +1,174 @@ +#include "directed_graph.h" +#include "common.h" +#include "utils.h" + +DiGraph::DiGraph() : Graph() {} + +void DiGraph___init__(DiGraph &self, py::kwargs kwargs) { + new (&self) DiGraph(); + + py::object MappingProxyType = + py::module_::import("types").attr("MappingProxyType"); + + if (kwargs) { + for (auto &item : kwargs) { + self.graph[py::str(item.first)] = item.second; + } + } + + self.nodes_cache = MappingProxyType(py::dict()); + self.adj_cache = MappingProxyType(py::dict()); +} + +py::dict DiGraph_out_degree(const py::object &self, + const py::object &weight) { + py::dict degree; + + // edges 既可能是属性(list/iterable),也可能是方法(如 edges(data=True)) + py::object edges_obj = self.attr("edges"); + + py::list edges = py::isinstance(edges_obj) + ? py::list(edges_obj()) + : py::list(edges_obj); + + // 遍历边:(u, v, d) + for (py::ssize_t i = 0; i < py::len(edges); ++i) { + py::tuple edge = edges[i].cast(); + py::object u = edge[0]; + // v 在本函数中未使用,但保留一致性 + // py::object v = edge[1]; + py::dict d = (edge.size() >= 3) ? edge[2].cast() : py::dict(); + + py::object w = d.attr("get")(weight, 1); // d.get(weight, 1) + + if (degree.contains(u)) { + // degree[u] += w —— 用 Python 的 __add__ 避免数值类型分歧 + degree[u] = degree[u].attr("__add__")(w); + } else { + degree[u] = w; + } + } + + // 确保所有节点都有条目:不存在则补 0 + py::object nodes_obj = self.attr("nodes"); + py::list nodes = py::isinstance(nodes_obj) + ? py::list(nodes_obj()) + : py::list(nodes_obj); + for (py::ssize_t i = 0; i < py::len(nodes); ++i) { + py::object node = nodes[i]; + if (!degree.contains(node)) { + degree[node] = 0; + } + } + + return degree; +} + +py::dict DiGraph_in_degree(const py::object &self, + const py::object &weight) { + py::dict degree; + + // edges 可能是属性(list/iterable)或方法(如 edges(data=True)) + py::object edges_obj = self.attr("edges"); + py::list edges = py::isinstance(edges_obj) + ? py::list(edges_obj()) + : py::list(edges_obj); + + // 遍历边:(u, v, d) + for (py::ssize_t i = 0; i < py::len(edges); ++i) { + py::tuple edge = edges[i].cast(); + // u 未使用,但保留对齐 + // py::object u = edge[0]; + py::object v = edge[1]; + py::dict d = (edge.size() >= 3) ? edge[2].cast() : py::dict(); + + // d.get(weight, 1) + py::object w = d.attr("get")(weight, 1); + + if (degree.contains(v)) { + // degree[v] += w —— 用 Python 的 __add__ 以避免数值类型差异 + degree[v] = degree[v].attr("__add__")(w); + } else { + degree[v] = w; + } + } + + // 确保所有节点都有条目:不存在则补 0 + py::object nodes_obj = self.attr("nodes"); + py::list nodes = py::isinstance(nodes_obj) + ? py::list(nodes_obj()) + : py::list(nodes_obj); + for (py::ssize_t i = 0; i < py::len(nodes); ++i) { + py::object node = nodes[i]; + if (!degree.contains(node)) { + degree[node] = 0; + } + } + + return degree; +} + +py::dict DiGraph_degree(const py::object &self, + const py::object &weight) { + py::dict degree; + + // out_degree / in_degree 都应返回 dict + py::dict out_degree = self.attr("out_degree")(weight).cast(); + py::dict in_degree = self.attr("in_degree")(weight).cast(); + + // nodes 可能是属性或方法 + py::object nodes_obj = self.attr("nodes"); + + py::list nodes = py::isinstance(nodes_obj) + ? py::list(nodes_obj()) + : py::list(nodes_obj); + + for (py::ssize_t i = 0; i < py::len(nodes); ++i) { + py::object u = nodes[i]; + + // 取度值:若不存在则当 0 + py::object out_v = out_degree.contains(u) ? out_degree[u] : py::int_(0); + py::object in_v = in_degree.contains(u) ? in_degree[u] : py::int_(0); + + // degree[u] = out_v + in_v (用 Python 的 __add__ 以兼容 int/float/Decimal + // 等) + degree[u] = out_v.attr("__add__")(in_v); + } + return degree; +} + +py::object DiGraph_size(const py::object &self, + const py::object &weight) { + py::dict out_degree = self.attr("out_degree")(weight).cast(); + + py::object values = out_degree.attr("values")(); // dict_values 视图 + py::object s = py::module_::import("builtins").attr("sum")(values); + + return weight.is_none() ? py::int_(s) : s; +} + +py::object DiGraph_number_of_edges(const py::object &self, + const py::object &u, + const py::object &v) { + // 若未指定 u(与原先 u == py::object() 等价),直接返回 size() + if (u.is_none()) { + return self.attr("size")(); + } + + // 将 self 视为 Graph 的引用 + Graph &G = self.cast(); + + // node_to_id: Python dict(与原代码保持一致) + py::dict node_to_id = G.node_to_id; + + // u_id = node_to_id[u] + node_t u_id = node_to_id[u].cast(); + + // v_id = node_to_id.get(v, -1) + node_t v_id = node_to_id.attr("get")(v, py::int_(-1)).cast(); + + // 计算是否存在边 (u, v) + int exists = + (v_id != static_cast(-1)) && (G.adj[u_id].count(v_id) ? 1 : 0); + return py::int_(exists); // 与原实现保持返回 0/1 的 int +} diff --git a/_cpp_easygraph/src/evaluation.cpp b/_cpp_easygraph/src/evaluation.cpp new file mode 100644 index 00000000..023d07be --- /dev/null +++ b/_cpp_easygraph/src/evaluation.cpp @@ -0,0 +1,295 @@ +#include "evaluation.h" +#include "graph.h" +#include "utils.h" + +struct pair_hash { + template + std::size_t operator()(const std::pair &p) const { + auto h1 = std::hash()(p.first); + auto h2 = std::hash()(p.second); + return h1 ^ h2; + } +}; + +std::unordered_map, weight_t, pair_hash> sum_nmw_rec, + max_nmw_rec, local_constraint_rec; + +enum norm_t { sum, max }; + +weight_t mutual_weight(adj_dict_factory &G, node_t u, node_t v, + std::string weight) { + weight_t a_uv = 0, a_vu = 0; + if (G.count(u) && G[u].count(v)) { + edge_attr_dict_factory &guv = G[u][v]; + a_uv = guv.count(weight) ? guv[weight] : 1; + } + if (G.count(v) && G[v].count(u)) { + edge_attr_dict_factory &gvu = G[v][u]; + a_uv = gvu.count(weight) ? gvu[weight] : 1; + } + return a_uv + a_vu; +} + +weight_t normalized_mutual_weight(adj_dict_factory &G, node_t u, node_t v, + std::string weight, norm_t norm = sum) { + std::pair edge = std::make_pair(u, v); + auto &nmw_rec = (norm == sum) ? sum_nmw_rec : max_nmw_rec; + if (nmw_rec.count(edge)) { + return nmw_rec[edge]; + } else { + weight_t scale = 0; + for (auto &w : G[u]) { + weight_t temp_weight = mutual_weight(G, u, w.first, weight); + scale = + (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); + } + weight_t nmw = scale ? (mutual_weight(G, u, v, weight) / scale) : 0; + nmw_rec[edge] = nmw; + return nmw; + } +} + +weight_t local_constraint(adj_dict_factory &G, node_t u, node_t v, + std::string weight = "None") { + std::pair edge = std::make_pair(u, v); + if (local_constraint_rec.count(edge)) { + return local_constraint_rec[edge]; + } else { + weight_t direct = normalized_mutual_weight(G, u, v, weight); + weight_t indirect = 0; + for (auto &w : G[u]) { + indirect += normalized_mutual_weight(G, u, w.first, weight) * + normalized_mutual_weight(G, w.first, v, weight); + } + weight_t result = pow((direct + indirect), 2); + local_constraint_rec[edge] = result; + return result; + } +} + +std::pair +compute_constraint_of_v(adj_dict_factory &G, node_t v, std::string weight) { + weight_t constraint_of_v = 0; + if (G[v].size() == 0) { + constraint_of_v = Py_NAN; + } else { + for (const auto &n : G[v]) { + constraint_of_v += local_constraint(G, v, n.first, weight); + } + } + return std::make_pair(v, constraint_of_v); +} + +py::dict constraint(const py::object &G, py::object nodes, + const py::object &weight, + const py::object &n_workers /*未使用但保留签名*/) { + // 1) 预处理 + std::string weight_key = weight_to_string(weight); + sum_nmw_rec.clear(); + max_nmw_rec.clear(); + local_constraint_rec.clear(); + + // 2) nodes 缺省:从 G.nodes 获取;兼容属性或方法 + if (nodes.is_none()) { + nodes = G.attr("nodes"); + } + py::list nodes_list; + if (py::isinstance(nodes) || PyCallable_Check(nodes.ptr())) + nodes_list = py::list(nodes()); + else + nodes_list = py::list(nodes); + + // 3) 主循环 + py::dict result; + Graph &G_ = G.cast(); + + for (py::ssize_t i = 0; i < py::len(nodes_list); ++i) { + py::object v = nodes_list[i]; + + // v_id = G_.node_to_id[v] + // 若 node_to_id 是 py::dict + node_t v_id = G_.node_to_id[v].cast(); + + // 计算 (best_id, value) + std::pair p = + compute_constraint_of_v(G_.adj, v_id, weight_key); + + // key = G_.id_to_node[p.first] + py::object key = G_.id_to_node.attr("get")(py::int_(p.first)); + + // result[key] = p.second + result[key] = py::cast(p.second); + } + + return result; +} + +weight_t redundancy(adj_dict_factory &G, node_t u, node_t v, + std::string weight = "None") { + weight_t r = 0; + for (const auto &neighbor_info : G[u]) { + node_t w = neighbor_info.first; + r += normalized_mutual_weight(G, u, w, weight) * + normalized_mutual_weight(G, v, w, weight, max); + } + return 1 - r; +} + +py::dict effective_size(const py::object &G, py::object nodes, + const py::object &weight, + const py::object &n_workers /*未用,保留签名*/) { + Graph &G_ = G.cast(); + + // 与原逻辑一致的全局缓存清理 + sum_nmw_rec.clear(); + max_nmw_rec.clear(); + + py::dict eff; + + // nodes 缺省:迭代整个图 + if (nodes.is_none()) { + nodes = G; // 假定 G 可迭代出节点 + } + py::list nodes_list(nodes); + + // 为了兼容 G[v] 的写法 + py::object getitem = G.attr("__getitem__"); + py::object py_nan = py::module_::import("math").attr("nan"); + + // 无向图 + 无权:使用 ego_subgraph 的闭式公式 + if (!G.attr("is_directed")().cast() && weight.is_none()) { + for (py::ssize_t i = 0; i < py::len(nodes_list); ++i) { + py::object v = nodes_list[i]; + + // 度为 0 → NaN + if (py::len(getitem(v)) == 0) { + eff[v] = py_nan; + continue; + } + + py::object E = G.attr("ego_subgraph")(v); + auto n = py::len(E); + if (n > 1) { + weight_t size = E.attr("size")().cast(); + // val = n - 1 - (2 * size) / (n - 1) + double nd = static_cast(n); + double val = nd - 1.0 - (2.0 * static_cast(size)) / (nd - 1.0); + eff[v] = py::float_(val); + } else { + eff[v] = py::int_(0); + } + } + } else { + // 有向或有权:按邻接表+redundancy 求和 + const std::string weight_key = weight_to_string(weight); + + for (py::ssize_t i = 0; i < py::len(nodes_list); ++i) { + py::object v = nodes_list[i]; + + if (py::len(getitem(v)) == 0) { + eff[v] = py_nan; + continue; + } + + weight_t redundancy_sum = weight_t(0); + + // v_id = node_to_id[v] + // (pybind11 的 py::dict 不建议用 operator[] 取值,改用 get) + node_t v_id = G_.node_to_id.attr("get")(v, py::none()).cast(); + + for (const auto &nbr : G_.adj[v_id]) { + node_t u_id = nbr.first; + redundancy_sum += redundancy(G_.adj, v_id, u_id, weight_key); + } + eff[v] = py::cast(redundancy_sum); + } + } + + return eff; +} + +py::dict hierarchy(Graph& G, + py::object nodes, + py::object weight, + py::object n_workers) { + sum_nmw_rec.clear(); + max_nmw_rec.clear(); + local_constraint_rec.clear(); + + // 权重键(允许 None;你的 local_constraint/取权重逻辑应能在缺失时回退到 1) + const std::string weight_key = weight_to_string(weight); + + // 构造要处理的节点列表(Python 对象形式) + std::vector nodes_vec; + if (nodes.is_none()) { + nodes_vec.reserve(G.node.size()); + for (const auto &kv : G.node) { + const node_t nid = kv.first; + // 从 id_to_node 取回 Python 节点对象(const 情况下用 __getitem__) + nodes_vec.emplace_back(G.id_to_node.attr("__getitem__")(py::cast(nid))); + } + } else { + for (py::handle h : nodes) { + py::object v = py::reinterpret_borrow(h); + if (G.node_to_id.contains(v)) + nodes_vec.emplace_back(std::move(v)); + } + } + + py::dict result; + + for (const py::object &v_obj : nodes_vec) { + // v -> id;若节点不在图中(并发修改等),跳过 + if (!G.node_to_id.contains(v_obj)) { + result[v_obj] = 0.0; + continue; + } + const node_t v_id = py::cast(G.node_to_id[v_obj]); + + // 邻居集合(去掉自环) + std::vector nbrs_vec; + if (auto it = G.adj.find(v_id); it != G.adj.end()) { + nbrs_vec.reserve(it->second.size()); + for (const auto &kv : it->second) { + const node_t w_id = kv.first; + if (w_id != v_id) + nbrs_vec.push_back(w_id); + } + } + const int n = static_cast(nbrs_vec.size()); + if (n <= 1) { + result[v_obj] = 0.0; + continue; + } + + // 计算每个邻居的 local_constraint 以及总和 C + std::vector c_vals; + c_vals.reserve(n); + + double C = 0.0; + for (const node_t w_id : nbrs_vec) { + const double cw = + static_cast(local_constraint(G.adj, v_id, w_id, weight_key)); + c_vals.push_back(cw); + C += cw; + } + + double h_sum = 0.0; + if (C > 0.0) { + // hierarchy_sum = sum_{w in N(v)} ( (c_w/C)*n * log((c_w/C)*n) ) / (n*log + // n) + const double n_log_n = + static_cast(n) * std::log(static_cast(n)); + for (double cw : c_vals) { + const double p = cw / C; // 归一化权重 + const double t = p * static_cast(n); + if (t > 0.0) { + h_sum += (t * std::log(t)) / n_log_n; + } + } + } + result[v_obj] = h_sum; // 若 C==0 则为 0 + } + + return result; +} diff --git a/_cpp_easygraph/src/graph.cpp b/_cpp_easygraph/src/graph.cpp new file mode 100644 index 00000000..43567e6b --- /dev/null +++ b/_cpp_easygraph/src/graph.cpp @@ -0,0 +1,865 @@ +#include "graph.h" +#include "common.h" +#include "utils.h" + +Graph::Graph() { + py::object MappingProxyType = py::module_::import("types").attr("MappingProxyType"); + this->id = 0; + this->dirty_nodes = true; + this->dirty_adj = true; + this->node_to_id = py::dict(); + this->id_to_node = py::dict(); + this->graph = py::dict(); + this->nodes_cache = MappingProxyType(py::dict()); + this->adj_cache = MappingProxyType(py::dict()); +} + +void Graph___init__(Graph &self, py::kwargs kwargs) { + // 可选:防止重复初始化,placement-new 复位 + new (&self) Graph(); + + // graph.update(kwargs) + if (kwargs && py::len(kwargs) > 0) { + self.graph.attr("update")(kwargs); + // 也可手动拷贝键值: + // for (auto &item : kwargs) self.graph[py::str(item.first)] = item.second; + } + + // 重新创建只读 cache + py::object MappingProxyType = py::module_::import("types").attr("MappingProxyType"); + self.nodes_cache = MappingProxyType(py::dict()); + self.adj_cache = MappingProxyType(py::dict()); +} + +py::object Graph__iter__(py::object self) { + return self.attr("nodes").attr("__iter__")(); +} + +py::object Graph__len__(const py::object& self) { + // 将 self 转成 Graph& + Graph& self_ = self.cast(); + + // 计算 Python 层 len(node_to_id) + py::ssize_t n = py::len(self_.node_to_id); + + // 返回 Python int + return py::int_(n); +} + +py::object Graph__contains__(const py::object& self, const py::object& node) { + Graph& self_ = self.cast(); + + try { + // 若 node_to_id 是 Python dict,可直接调用 contains() + bool has = self_.node_to_id.attr("__contains__")(node).cast(); + return py::bool_(has); + } + catch (const py::error_already_set& e) { + // 捕获 Python 异常 + if (e.matches(PyExc_TypeError)) { + // TypeError: 节点类型不匹配 → 返回 False + PyErr_Clear(); + return py::bool_(false); + } else { + // 重新抛出其他异常,让 Python 层能看到原始错误 + throw; + } + } +} + +py::object Graph__getitem__(py::object self, py::object node) { + return self.attr("adj")[node]; +} + +node_t _add_one_node(Graph& self, + const py::object& one_node_for_adding, + py::object node_attr /* = None */) { + if (node_attr.is_none()) { + node_attr = py::dict(); + } + + node_t id; + // if self.node_to_id.contains(one_node_for_adding) + const bool has = + self.node_to_id.attr("__contains__")(one_node_for_adding).cast(); + + if (has) { + // id = self.node_to_id[one_node_for_adding] + id = self.node_to_id.attr("__getitem__")(one_node_for_adding).cast(); + } else { + id = ++(self.id); + + // self.id_to_node[id] = one_node_for_adding + self.id_to_node.attr("__setitem__")(py::int_(id), one_node_for_adding); + + // self.node_to_id[one_node_for_adding] = id + self.node_to_id.attr("__setitem__")(one_node_for_adding, py::int_(id)); + } + + // items = list(node_attr.items()) + py::list items = py::list(node_attr.attr("items")()); + + // 初始化 C++ 侧的属性/邻接容器 + self.node[id] = node_attr_dict_factory(); + self.adj[id] = adj_attr_dict_factory(); + + for (py::ssize_t i = 0; i < py::len(items); ++i) { + py::tuple kv = items[i].cast(); + py::object pkey = kv[0]; + std::string weight_key = weight_to_string(pkey); + weight_t value = kv[1].cast(); + self.node[id].insert({weight_key, value}); + } + + return id; +} + +py::object Graph_add_node(const py::tuple& args, const py::dict& kwargs) { + // args: [self, node] + Graph& self = args[0].cast(); + py::object one_node_for_adding = args[1]; + + self.dirty_nodes = true; + self.dirty_adj = true; + + // kwargs 直接当作属性字典传入 + py::object node_attr = kwargs; // kwargs 本身就是 dict-like + _add_one_node(self, one_node_for_adding, node_attr); + + return py::none(); // 等价于原来的 py::object() +} + + +py::object Graph_add_nodes(Graph& self, + const py::list& nodes_for_adding, + const py::list& nodes_attr) { + self.dirty_nodes = true; + self.dirty_adj = true; + + if (py::len(nodes_attr) != 0 && + py::len(nodes_for_adding) != py::len(nodes_attr)) { + throw py::value_error("Nodes and attributes lists must have the same length."); + } + + for (py::ssize_t i = 0; i < py::len(nodes_for_adding); ++i) { + py::object one_node_for_adding = nodes_for_adding[i]; + + py::dict node_attr; + if (py::len(nodes_attr) != 0) { + node_attr = nodes_attr[i].cast(); // 若不是 dict,会抛 TypeError + } else { + node_attr = py::dict(); + } + + _add_one_node(self, one_node_for_adding, node_attr); + } + + return py::none(); +} + +void Graph_add_nodes_from(Graph& self, const py::iterable& nodes_for_adding, const py::kwargs& kwargs) { + self.dirty_nodes = true; + self.dirty_adj = true; + + // 预先把 kwargs 复制到一个 dict(避免原地修改 kwargs) + py::dict base_attrs; + for (auto item : kwargs) { + base_attrs[item.first] = item.second; + } + + for (py::handle h : nodes_for_adding) { + py::object n_obj = py::reinterpret_borrow(h); + py::dict merged_attrs = py::dict(base_attrs); // 每个节点的属性从 kwargs 起步 + + // 支持形如 (node, {attr...}) 的二元组 + if (py::isinstance(n_obj)) { + py::tuple t = n_obj.cast(); + if (py::len(t) != 2) + throw py::type_error("Each tuple must be (node, dict)."); + + py::object node = t[0]; + py::object maybe_dict = t[1]; + + if (!py::isinstance(maybe_dict)) + throw py::type_error("Second element of tuple must be a dict."); + + py::dict ndict = maybe_dict.cast(); + // 合并 per-node 属性:kwargs < ndict(ndict 覆盖同名键) + for (auto kv : ndict) { + merged_attrs[kv.first] = kv.second; + } + n_obj = std::move(node); + } + + // 禁止 None 作为节点 + if (n_obj.is_none()) + throw py::value_error("None cannot be a node."); + + // 如为新节点则先添加 + bool newnode = !self.node_to_id.contains(n_obj); + if (newnode) { + _add_one_node(self, n_obj, py::none()); + } + + // 获取 node id;若你的 map 直接存的是 node_t,可去掉 cast + node_t id = py::cast(self.node_to_id[n_obj]); + + // 写入属性 + for (auto kv : merged_attrs) { + py::handle k = kv.first; + py::handle v = kv.second; + + // 若你的 weight_to_string 接受 py::object: + std::string weight_key = weight_to_string(py::reinterpret_borrow(k)); + weight_t weight_val = py::cast(v); + + self.node[id].insert(std::make_pair(std::move(weight_key), std::move(weight_val))); + } + } +} +void Graph_remove_node(Graph& self, const py::object& node_to_remove) { + self.dirty_nodes = true; + self.dirty_adj = true; + + if (!self.node_to_id.contains(node_to_remove)) { + // KeyError: 带上对象 repr + throw py::key_error(py::str("No node {} in graph.").format(node_to_remove)); + } + + // 从 Python dict 中取出 id 并转为 node_t + node_t node_id = py::cast(self.node_to_id[node_to_remove]); + + // 先从所有邻居的邻接表中删除该节点 + // 注:这里修改的是 neighbor 的容器,不会使对 self.adj[node_id] 的遍历失效 + for (const auto& kv : self.adj[node_id]) { + const node_t neighbor_id = kv.first; + self.adj[neighbor_id].erase(node_id); + } + + // 再删掉该节点的邻接表与节点属性 + self.adj.erase(node_id); + self.node.erase(node_id); + + // 同步 Python 侧的映射(与原代码一致使用 pop) + self.node_to_id.attr("pop")(node_to_remove); + self.id_to_node.attr("pop")(node_id); +} + +void Graph_remove_nodes(Graph& self, const py::sequence& nodes_to_remove) { + self.dirty_nodes = true; + self.dirty_adj = true; + + const std::size_t n = py::len(nodes_to_remove); + + // 先整体校验(与原逻辑一致:任一不存在则立即报错并不做部分删除) + for (std::size_t i = 0; i < n; ++i) { + py::object node = nodes_to_remove[i]; + if (!self.node_to_id.contains(node)) { + throw py::key_error(py::str("No node {} in graph.").format(node)); + } + } + + // 再逐个删除(若输入包含重复节点,第二次删除会抛 KeyError,与原行为一致) + for (std::size_t i = 0; i < n; ++i) { + py::object node = nodes_to_remove[i]; + Graph_remove_node(self, node); // 直接调 C++ 实现,避免 Python 回调开销 + } +} + +py::object Graph_number_of_nodes(Graph& self) { + return py::int_(self.node.size()); +} + +bool Graph_has_node(Graph &self, py::object node) { + return self.node_to_id.contains(node); +} + +py::object Graph_nbunch_iter(py::object self, py::object nbunch) { + py::object bunch = py::object(); + if (nbunch.is_none()) { + bunch = self.attr("adj").attr("__iter__")(); + } + else if (self.contains(nbunch)) { + py::list nbunch_wrapper = py::list(); + nbunch_wrapper.append(nbunch); + bunch = nbunch_wrapper.attr("__iter__")(); + } + else { + py::list nbunch_list = py::list(nbunch), nodes_list = py::list(); + for (int i = 0;i < py::len(nbunch_list);i++) { + py::object n = nbunch_list[i]; + if (self.contains(n)) { + nodes_list.append(n); + } + } + bunch = nbunch_list.attr("__iter__")(); + } + return bunch; +} + +void _add_one_edge(Graph& self, const py::object& u_of_edge, const py::object& v_of_edge, const py::object& edge_attr) { + // 1) 端点确保存在并取得 id + node_t u, v; + + if (self.node_to_id.contains(u_of_edge)) { + u = py::cast(self.node_to_id[u_of_edge]); + } else { + // 这里给新节点空属性(与原始代码传 py::none() 等价) + u = _add_one_node(self, u_of_edge, py::dict{}); + } + + if (self.node_to_id.contains(v_of_edge)) { + v = py::cast(self.node_to_id[v_of_edge]); + } else { + v = _add_one_node(self, v_of_edge, py::dict{}); + } + + // 2) 规范化边属性:None -> 空 dict;否则必须是 dict + py::dict attrs; + if (edge_attr.is_none()) { + attrs = py::dict{}; + } else if (py::isinstance(edge_attr)) { + attrs = edge_attr.cast(); + } else { + throw py::type_error("edge_attr must be a dict or None."); + } + + // 3) 初始化双向邻接条目 + self.adj[u][v] = node_attr_dict_factory(); + self.adj[v][u] = node_attr_dict_factory(); + + // 4) 填充属性(dict 直接可迭代:kv.first, kv.second) + for (auto kv : attrs) { + std::string key = weight_to_string(py::reinterpret_borrow(kv.first)); + weight_t val = py::cast(kv.second); + self.adj[u][v].insert(std::make_pair(key, val)); + self.adj[v][u].insert(std::make_pair(std::move(key), val)); + // 注意:上面对 key 复用/移动若底层容器需要独立 key,可改为两次构造字符串 + } +} +void Graph_add_edge(Graph& self, const py::object& u_of_edge, const py::object& v_of_edge, const py::kwargs& kwargs) { + self.dirty_nodes = true; + self.dirty_adj = true; + + // 将 **kwargs 复制为独立 dict,避免在 _add_one_edge 内修改到调用方对象 + py::dict edge_attr; + for (auto kv : kwargs) { + edge_attr[kv.first] = kv.second; + } + + _add_one_edge(self, u_of_edge, v_of_edge, edge_attr); // 其内部负责建点与属性合并 +} + +void Graph_add_edges(Graph& self, const py::sequence& edges_for_adding, const py::sequence& edges_attr) { + self.dirty_nodes = true; + self.dirty_adj = true; + + const std::size_t n = py::len(edges_for_adding); + const std::size_t m = py::len(edges_attr); + + if (m != 0 && n != m) { + // 原代码抛 AssertionError;pybind11 无内建 assertion_error,这里用 value_error 更合适 + throw py::value_error("Edges and Attributes lists must have same length."); + } + + for (std::size_t i = 0; i < n; ++i) { + // 1) 取出一条边,要求是 (u, v) + py::object edge_obj = edges_for_adding[i]; + if (!py::isinstance(edge_obj)) { + throw py::type_error("Each edge must be a tuple (u, v)."); + } + py::tuple e = edge_obj.cast(); + if (py::len(e) != 2) { + throw py::type_error("Each edge tuple must have length 2: (u, v)."); + } + py::object u = e[0]; + py::object v = e[1]; + + // 2) 选择该边对应的属性 + py::dict attr; + if (m != 0) { + attr = edges_attr[i].cast(); // 若不是 dict 会抛 type_error + } else { + attr = py::dict{}; + } + + // 3) 添加该边(_add_one_edge 内部负责必要的建点与属性赋值) + _add_one_edge(self, u, v, attr); + } +} + +void Graph_add_edges_from(Graph& self, const py::iterable& ebunch, const py::kwargs& attr) { + self.dirty_nodes = true; + self.dirty_adj = true; + + // 1) 复制全局属性 kwargs + py::dict base_attrs; + for (auto kv : attr) base_attrs[kv.first] = kv.second; + + // 2) 遍历边集合 + for (py::handle h : ebunch) { + py::object eobj = py::reinterpret_borrow(h); + + // 允许 list/tuple/任意可序列化为 sequence 的对象 + py::sequence e = eobj.cast(); + const std::size_t L = py::len(e); + if (L != 2 && L != 3) { + throw py::value_error(py::str("Edge tuple {} must be a 2-tuple or 3-tuple.").format(eobj)); + } + + py::object u = e[0]; + py::object v = e[1]; + if (u.is_none() || v.is_none()) { + throw py::value_error("None cannot be a node."); + } + + // 每边的局部属性(kwargs < per-edge dict) + py::dict per_edge = py::dict(base_attrs); + if (L == 3) { + py::object maybe_dict = e[2]; + if (!py::isinstance(maybe_dict)) { + throw py::type_error("Edge data (3rd element) must be a dict."); + } + py::dict dd = maybe_dict.cast(); + for (auto kv : dd) per_edge[kv.first] = kv.second; + } + + // 3) 确保端点存在并获得 id + node_t uid, vid; + if (self.node_to_id.contains(u)) { + uid = py::cast(self.node_to_id[u]); + } else { + uid = _add_one_node(self, u, py::dict{}); + } + if (self.node_to_id.contains(v)) { + vid = py::cast(self.node_to_id[v]); + } else { + vid = _add_one_node(self, v, py::dict{}); + } + + // 4) 初始化(若不存在则创建)边属性容器 + if (!self.adj[uid].count(vid)) self.adj[uid][vid] = node_attr_dict_factory(); + if (!self.adj[vid].count(uid)) self.adj[vid][uid] = node_attr_dict_factory(); + + auto& uv = self.adj[uid][vid]; + auto& vu = self.adj[vid][uid]; + + // 5) 写入属性:赋值语义(覆盖旧值),两向一致 + for (auto kv : per_edge) { + std::string k = weight_to_string(py::reinterpret_borrow(kv.first)); + weight_t vval = py::cast(kv.second); + uv[k] = vval; + vu[k] = vval; + } + } +} + +struct commactype : std::ctype { + commactype() : std::ctype(get_table()) {} + + static const std::ctype_base::mask* get_table() { + using mask = std::ctype_base::mask; + static std::vector table(std::ctype::table_size, mask()); // 静态存储期 + static bool inited = false; + if (!inited) { + table[','] = std::ctype_base::space; + table[' '] = std::ctype_base::space; + table['\t'] = std::ctype_base::space; + table['\n'] = std::ctype_base::space; + table['\r'] = std::ctype_base::space; + inited = true; + } + return table.data(); + } +}; + +void Graph_add_edges_from_file(Graph& self, const py::str& file, bool weighted) { + self.dirty_nodes = true; + self.dirty_adj = true; + + std::ios::sync_with_stdio(false); + + const std::string file_path = py::cast(file); + std::ifstream in(file_path); + if (!in.is_open()) { + throw py::value_error("File not found: " + file_path); + } + in.imbue(std::locale(std::locale(), new commactype)); + + std::string su, sv; + const std::string key = "weight"; + weight_t w{}; + + while (in >> su >> sv) { + py::str pu(su), pv(sv); + + // 取得/创建端点 id + node_t uid, vid; + if (self.node_to_id.contains(pu)) { + uid = py::cast(self.node_to_id[pu]); + } else { + uid = _add_one_node(self, pu, py::dict{}); + } + if (self.node_to_id.contains(pv)) { + vid = py::cast(self.node_to_id[pv]); + } else { + vid = _add_one_node(self, pv, py::dict{}); + } + + // 初始化两向邻接条目(若尚不存在) + if (!self.adj[uid].count(vid)) self.adj[uid][vid] = node_attr_dict_factory(); + if (!self.adj[vid].count(uid)) self.adj[vid][uid] = node_attr_dict_factory(); + + if (weighted) { + if (!(in >> w)) { + throw py::value_error("Weighted file format error: missing/invalid weight after edge (" + su + ", " + sv + ")."); + } + self.adj[uid][vid][key] = w; + self.adj[vid][uid][key] = w; + } + // 无权重:仅保证邻接存在,属性留空 + } +} +py::object Graph_add_weighted_edge(Graph& self, py::object u_of_edge, py::object v_of_edge, weight_t weight) { + self.dirty_nodes = true; + self.dirty_adj = true; + py::dict edge_attr; + edge_attr["weight"] = weight; + _add_one_edge(self, u_of_edge, v_of_edge, edge_attr); + return py::object(); +} + +void Graph_remove_edge(Graph& self, const py::object& u, const py::object& v) { + self.dirty_nodes = true; + self.dirty_adj = true; + + if (!self.node_to_id.contains(u) || !self.node_to_id.contains(v)) { + throw py::key_error(py::str("No edge {}-{} in graph.").format(u, v)); + } + + const node_t uid = py::cast(self.node_to_id[u]); + const node_t vid = py::cast(self.node_to_id[v]); + + // 不创建默认邻接表:用 find 检查 + auto it_u = self.adj.find(uid); + if (it_u == self.adj.end()) { + throw py::key_error(py::str("No edge {}-{} in graph.").format(u, v)); + } + auto& nbrs_u = it_u->second; + + auto it_uv = nbrs_u.find(vid); + if (it_uv == nbrs_u.end()) { + throw py::key_error(py::str("No edge {}-{} in graph.").format(u, v)); + } + + // 删除 u->v + nbrs_u.erase(it_uv); + + // 无向图对称删除 v->u(若自环则不重复) + if (uid != vid) { + auto it_v = self.adj.find(vid); + if (it_v != self.adj.end()) { + it_v->second.erase(uid); + } + } +} +void Graph_remove_edges(Graph& self, const py::sequence& edges_to_remove) { + self.dirty_nodes = true; + self.dirty_adj = true; + + const std::size_t n = py::len(edges_to_remove); + for (std::size_t i = 0; i < n; ++i) { + py::object edge_obj = edges_to_remove[i]; + if (!py::isinstance(edge_obj)) { + throw py::type_error("Each edge must be a tuple (u, v)."); + } + py::tuple edge = edge_obj.cast(); + if (py::len(edge) != 2) { + throw py::type_error("Each edge tuple must have length 2: (u, v)."); + } + py::object u = edge[0]; + py::object v = edge[1]; + + Graph_remove_edge(self, u, v); + } +} + +int Graph_number_of_edges(const Graph& self, py::object u, py::object v) { + // 1) 全图边数 + if (u.is_none()) { + std::size_t total = 0; + for (const auto& kv : self.adj) total += kv.second.size(); + return static_cast(total / 2); + } + + // 2) 指定 (u, v) 是否存在:不存在节点 → 0 + if (!self.node_to_id.contains(u) || !self.node_to_id.contains(v)) { + return 0; + } + + const node_t uid = py::cast(self.node_to_id[u]); + const node_t vid = py::cast(self.node_to_id[v]); + + // 不要用 self.adj[uid] 以免创建空条目 + auto it_u = self.adj.find(uid); + if (it_u == self.adj.end()) return 0; + + const auto& nbrs_u = it_u->second; + return static_cast(nbrs_u.count(vid) ? 1 : 0); +} + +bool Graph_has_edge(const Graph& self, const py::object& u, const py::object& v) { + if (!self.node_to_id.contains(u) || !self.node_to_id.contains(v)) + return false; + + const node_t uid = py::cast(self.node_to_id[u]); + const node_t vid = py::cast(self.node_to_id[v]); + + auto it_u = self.adj.find(uid); + if (it_u == self.adj.end()) + return false; + + const auto& nbrs_u = it_u->second; + return nbrs_u.find(vid) != nbrs_u.end(); +} + +py::object Graph_copy(py::handle self_h) { + // 取 C++ 引用 + const Graph& self = py::cast(self_h); + + // 使用 self.__class__() 构造同类型实例 + py::object cls = py::type::of(self_h); + py::object Gobj = cls(); // 调用该类型的无参构造;需在绑定里暴露默认构造 + + // 取得其 C++ 引用,拷贝字段 + Graph& G = py::cast(Gobj); + + G.graph = py::dict(self.graph); + G.id_to_node = py::dict(self.id_to_node); + G.node_to_id = py::dict(self.node_to_id); + G.node = self.node; + G.adj = self.adj; + G.dirty_nodes = self.dirty_nodes; + G.dirty_adj = self.dirty_adj; + + return Gobj; // 返回 Python 对象(保持动态类型) +} + +py::dict Graph_degree(const Graph& self, py::object weight) { + // 1) 预处理权重键 + const bool use_weight = !weight.is_none(); + std::string wkey; + if (use_weight) { + wkey = weight_to_string(py::reinterpret_borrow(weight)); + } + + // 2) 累加度数:用临时 map,避免反复操作 py::dict + std::unordered_map deg; + + // 先为所有节点置 0(保证孤立点也在结果里) + for (const auto& kv : self.node) { + deg.emplace(kv.first, 0.0); + } + + // 3) 遍历边(避免双计:仅在 uid <= vid 时计一次) + for (const auto& [uid, nbrs] : self.adj) { + for (const auto& [vid, attrs] : nbrs) { + if (uid > vid) continue; // 无向图去重 + double w = 1.0; + if (use_weight) { + auto it = attrs.find(wkey); + if (it != attrs.end()) { + w = static_cast(it->second); + } + } + deg[uid] += w; + deg[vid] += w; + } + } + + // 4) 生成 Python dict:键为原始 Python 节点对象 + py::dict degree; + for (const auto& [nid, val] : deg) { + // self.id_to_node[nid] -> py::object + py::object node_obj = self.id_to_node.attr("__getitem__")(py::cast(nid)); + degree[node_obj] = val; + } + + return degree; +} + +py::list Graph_neighbors(const Graph& self, const py::object& node) { + if (!self.node_to_id.contains(node)) { + throw py::key_error(py::str("No node {}").format(node)); + } + + const node_t uid = py::cast(self.node_to_id[node]); + + py::list out; + auto it = self.adj.find(uid); + if (it == self.adj.end()) { + return out; // 没有邻居 + } + + const auto& nbrs = it->second; // map + for (const auto& kv : nbrs) { + const node_t vid = kv.first; + // 从 id_to_node 取回 Python 节点对象(const 上用 __getitem__ 避免 operator[]) + py::object v = self.id_to_node.attr("__getitem__")(py::cast(vid)); + out.append(v); + } + return out; +} + +py::object Graph_nodes_subgraph(py::handle self_h, const py::sequence& from_nodes) { + const Graph& self = py::cast(self_h); + + // 1) 构造“同类”实例:等价于 Python 的 self.__class__() + py::object cls = py::type::of(self_h); + py::object Gobj = cls(); // 需要 Graph 在绑定里有无参构造 + Graph& G = py::cast(Gobj); + + // 2) 复制元数据 graph(浅拷贝) + G.graph = py::dict(self.graph); + + // 3) 收集子集节点(只加入存在于原图的节点) + std::unordered_set subset; + const std::size_t n = py::len(from_nodes); + for (std::size_t i = 0; i < n; ++i) { + py::object node = from_nodes[i]; + if (!self.node_to_id.contains(node)) continue; + + node_t nid = py::cast(self.node_to_id[node]); + subset.insert(nid); + + // 取该节点属性 -> py::dict + py::dict node_attr; + auto it = self.node.find(nid); + if (it != self.node.end()) { + for (const auto& kv : it->second) { + node_attr[py::str(kv.first)] = kv.second; // weight_t 可隐式/显式 cast + } + } + + // 在子图中建点(保持原 Python 节点对象) + _add_one_node(G, node, node_attr); + } + + // 4) 子集诱导边:仅当两端都在 subset 中;无向图用 uid <= vid 去重 + for (node_t uid : subset) { + auto it_u = self.adj.find(uid); + if (it_u == self.adj.end()) continue; + + for (const auto& kv : it_u->second) { + node_t vid = kv.first; + if (!subset.count(vid)) continue; + if (uid > vid) continue; // 无向图避免重复添加 + + // 边属性 -> py::dict + py::dict edge_attr; + for (const auto& e : kv.second) { + edge_attr[py::str(e.first)] = e.second; + } + + // 从 id 取回 Python 节点对象 + py::object u_obj = self.id_to_node.attr("__getitem__")(py::cast(uid)); + py::object v_obj = self.id_to_node.attr("__getitem__")(py::cast(vid)); + + _add_one_edge(G, u_obj, v_obj, edge_attr); + } + } + + return Gobj; +} +py::object Graph_ego_subgraph(py::object self, py::object center) { + py::list neighbors_of_center = py::list(self.attr("all_neighbors")(center)); + neighbors_of_center.append(center); + return self.attr("nodes_subgraph")(neighbors_of_center); +} + +py::object Graph_size(const Graph& G, py::object weight ) { + py::dict deg = Graph_degree(G, weight); // {node_obj: degree_value} + + double sum_deg = 0.0; + for (auto kv : deg) { + sum_deg += py::cast(kv.second); + } + + if (weight.is_none()) { + // 与原逻辑一致:先转整型再除 2(整数除法) + // 这里 sum_deg 应该是偶数(无向图),做一次显式截断 + long long m2 = static_cast(sum_deg); + return py::int_(m2 / 2); + } else { + return py::float_(sum_deg / 2.0); + } +} + +bool Graph_is_directed(const Graph& self) { + return false; +} + +bool Graph_is_multigraph(const Graph& self) { + return false; +} + +py::object Graph::get_nodes() { + py::object MappingProxyType = py::module_::import("types").attr("MappingProxyType"); + if (this->dirty_nodes) { + py::dict nodes = py::dict(); + for (const auto& node_info : node) { + node_t id = node_info.first; + const auto& node_attr = node_info.second; + nodes[this->id_to_node[py::int_(id)]] = MappingProxyType(attr_to_dict(node_attr)); + } + this->nodes_cache = MappingProxyType(nodes); + this->dirty_nodes = false; + } + return this->nodes_cache; +} + +py::object Graph::get_name() { + return this->graph.attr("get")("name", ""); +} + +py::object Graph::get_graph() { + return this->graph; +} + +py::object Graph::get_adj() { + py::object MappingProxyType = py::module_::import("types").attr("MappingProxyType"); + if (this->dirty_adj) { + py::dict adj = py::dict(); + for (const auto& ego_edges : this->adj) { + node_t start_point = ego_edges.first; + py::dict ego_edges_dict = py::dict(); + for (const auto& edge_info : ego_edges.second) { + node_t end_point = edge_info.first; + const auto& edge_attr = edge_info.second; + ego_edges_dict[this->id_to_node[py::int_(end_point)]] = MappingProxyType(attr_to_dict(edge_attr)); + } + adj[this->id_to_node[py::int_(start_point)]] = MappingProxyType(ego_edges_dict); + } + this->adj_cache = MappingProxyType(adj); + this->dirty_adj = false; + } + return this->adj_cache; +} + +py::object Graph::get_edges() { + py::list edges = py::list(); + std::set > seen; + for (const auto& ego_edges : this->adj) { + node_t u = ego_edges.first; + for (const auto& edge_info : ego_edges.second) { + node_t v = edge_info.first; + const auto& edge_attr = edge_info.second; + if (seen.find(std::make_pair(u,v)) == seen.end()) { + seen.insert(std::make_pair(u,v)); + seen.insert(std::make_pair(v,u)); + edges.append(py::make_tuple(this->id_to_node[py::int_(u)], this->id_to_node[py::int_(v)], attr_to_dict(edge_attr))); + } + } + } + return edges; +} diff --git a/_cpp_easygraph/src/operation.cpp b/_cpp_easygraph/src/operation.cpp new file mode 100644 index 00000000..5904ea49 --- /dev/null +++ b/_cpp_easygraph/src/operation.cpp @@ -0,0 +1,18 @@ +#include "operation.h" +#include "graph.h" +double density(const Graph& G) { + const std::size_t n = G.node.size(); + if (n <= 1) return 0.0; + + // m2 为度数总和;无向图中 m2 = 2 * |E| + std::size_t m2 = 0; + for (const auto& kv : G.adj) { + m2 += kv.second.size(); + } + if (m2 == 0) return 0.0; + + // 与原实现一致:d = m2 / (n*(n-1)) + // 对无向简单图,这等价于 2|E| / (n*(n-1)) + return static_cast(m2) / + (static_cast(n) * static_cast(n - 1)); +} diff --git a/_cpp_easygraph/src/path.cpp b/_cpp_easygraph/src/path.cpp new file mode 100644 index 00000000..4fd80dff --- /dev/null +++ b/_cpp_easygraph/src/path.cpp @@ -0,0 +1,371 @@ +#include "path.h" +#include "graph.h" +#include "utils.h" + +#include + +py::dict _dijkstra_multisource(const py::object &G, + const py::object &sources, + const py::object &weight, + const py::object &target) { + Graph &G_ = G.cast(); + const std::string weight_key = weight_to_string(weight); + + // target_id = G_.node_to_id.get(target, -1) + const node_t target_id = + G_.node_to_id.attr("get")(target, py::int_(-1)).cast(); + + std::map dist; + std::map seen; + + using QItem = std::pair; + std::priority_queue, std::greater> Q; + + // sources 列表 + py::list sources_list = py::list(sources); + for (py::ssize_t i = 0; i < py::len(sources_list); ++i) { + const node_t s_id = G_.node_to_id[sources_list[i]].cast(); + seen[s_id] = weight_t(0); + Q.emplace(weight_t(0), s_id); + } + + // Dijkstra + while (!Q.empty()) { + const auto node = Q.top(); + Q.pop(); + const weight_t d = node.first; + const node_t v = node.second; + + if (dist.count(v)) { + continue; + } + dist[v] = d; + + if (v == target_id) { + break; // 已到目标 + } + + auto &adj = G_.adj; // 邻接结构 + for (auto &neighbor_info : adj[v]) { // neighbor_info: (u, attr) + const node_t u = neighbor_info.first; + + // attr: 例如 std::unordered_map + auto &attr = neighbor_info.second; + const weight_t cost = + (attr.count(weight_key) ? attr[weight_key] : weight_t(1)); + + const weight_t vu_dist = dist[v] + cost; + + if (dist.count(u)) { + if (vu_dist < dist[u]) { + throw py::value_error("Contradictory paths found: negative weights?"); + } + // 否则已是最优,跳过 + } else if (!seen.count(u) || vu_dist < seen[u]) { + seen[u] = vu_dist; + Q.emplace(vu_dist, u); + } else { + // 已有更优 seen[u] + } + } + } + + // 转回 Python 映射:node_obj -> distance + py::dict pydist; + for (const auto &kv : dist) { + const node_t vid = kv.first; + const weight_t d = kv.second; + py::object key = G_.id_to_node.attr("__getitem__")(py::cast(vid)); + pydist[key] = py::cast(d); + } + return pydist; +} +py::dict Prim(Graph& G) { + py::dict result_dict; + + // 结果图(临时存:id -> (id -> weight)) + std::unordered_map> res_dict; + + auto& nodes = G.node; + auto& adj = G.adj; + + // 收集节点,初始化 selected/candidate + std::vector selected; + std::vector candidate; + selected.reserve(nodes.size()); + candidate.reserve(nodes.size()); + + for (node_dict_factory::const_iterator it = nodes.begin(); it != nodes.end(); ++it) { + node_t node_id = it->first; + // 先在结果字典中为每个 python 节点放一个空 dict + py::object node_obj = G.id_to_node.attr("__getitem__")(py::cast(node_id)); + result_dict[node_obj] = py::dict(); + + if (selected.empty()) { + selected.emplace_back(node_id); + } else { + candidate.emplace_back(node_id); + } + } + + if (selected.empty()) { + // 空图 + return result_dict; + } + + const weight_t INF = std::numeric_limits::infinity(); + + // 朴素 Prim:每次在 selected 与 candidate 间找最小权边 + while (!candidate.empty()) { + node_t best_u = static_cast(-1); + node_t best_v = static_cast(-1); + weight_t best_w = INF; + + for (std::size_t i = 0; i < selected.size(); ++i) { + const node_t u = selected[i]; + + // 找 u 的邻接 + adj_attr_dict_factory node_adj; + if (adj.find(u) != adj.end()) { + node_adj = adj.at(u); + } else { + continue; + } + + for (std::size_t j = 0; j < candidate.size(); ++j) { + const node_t v = candidate[j]; + + weight_t edge_weight = INF; + // 是否存在 u->v 边 + adj_attr_dict_factory::const_iterator it_uv = node_adj.find(v); + if (it_uv != node_adj.end()) { + const edge_attr_dict_factory& eattr = it_uv->second; + edge_attr_dict_factory::const_iterator wit = eattr.find("weight"); + edge_weight = (wit != eattr.end()) ? wit->second : static_cast(1); + } + + if (nodes.find(u) != nodes.end() && edge_weight < best_w) { + best_u = u; best_v = v; best_w = edge_weight; + } + } + } + + if (best_u != static_cast(-1) && best_v != static_cast(-1)) { + // 选入最小边 (best_u, best_v) + res_dict[best_u][best_v] = best_w; + + // v 从 candidate -> selected + selected.emplace_back(best_v); + std::vector::iterator it = std::find(candidate.begin(), candidate.end(), best_v); + if (it != candidate.end()) candidate.erase(it); + } else { + // 剩余节点与 selected 之间不连通(图不连通,Prim 停止) + break; + } + } + + // 将 res_dict 中的 id 映射成 python 节点对象,并写回 result_dict + for (std::unordered_map>::const_iterator it = res_dict.begin(); + it != res_dict.end(); ++it) { + const node_t u = it->first; + const std::unordered_map& nbrs = it->second; + + py::object u_obj = G.id_to_node.attr("__getitem__")(py::cast(u)); + py::dict u_dict = result_dict.attr("__getitem__")(u_obj).cast(); + + for (std::unordered_map::const_iterator jt = nbrs.begin(); + jt != nbrs.end(); ++jt) { + const node_t v = jt->first; + const weight_t w = jt->second; + + py::object v_obj = G.id_to_node.attr("__getitem__")(py::cast(v)); + u_dict[v_obj] = w; + } + } + + return result_dict; +}bool comp(const std::pair, weight_t> &a, + const std::pair, weight_t> &b) { + return a.second < b.second; +} +py::dict Kruskal(Graph& G) { + py::dict result_dict; + + auto& nodes = G.node; + auto& adj = G.adj; + + // 1) 为每个节点在结果字典中放一个空 dict + std::vector ids; + ids.reserve(nodes.size()); + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + node_t u = it->first; + ids.push_back(u); + py::object u_obj = G.id_to_node.attr("__getitem__")(py::cast(u)); + result_dict[u_obj] = py::dict(); + } + if (ids.empty()) return result_dict; + + // 2) 收集边列表 (u,v,w) + // 若是无向图:仅收集 u < v 侧以去重;若是有向图,请移除 "if (u < v)"。 + struct Edge { node_t u, v; weight_t w; }; + std::vector edges; + for (auto uit = adj.begin(); uit != adj.end(); ++uit) { + node_t u = uit->first; + const auto& nbrs = uit->second; + for (auto vit = nbrs.begin(); vit != nbrs.end(); ++vit) { + node_t v = vit->first; + const edge_attr_dict_factory& eattr = vit->second; + auto w_it = eattr.find("weight"); + weight_t w = (w_it != eattr.end()) ? w_it->second : static_cast(1); + + if (u < v) { // 无向图去重;有向图请删除此行条件 + edges.push_back({u, v, w}); + } + } + } + + // 3) Kruskal:边按权升序 + std::sort(edges.begin(), edges.end(), + [](const Edge& a, const Edge& b) { return a.w < b.w; }); + + // 4) 并查集(DSU) + // 将 node_t 映射到 0..N-1 索引 + std::unordered_map idx; + idx.reserve(ids.size()); + for (int i = 0; i < static_cast(ids.size()); ++i) idx[ids[i]] = i; + + std::vector parent(ids.size()), rank_(ids.size(), 0); + std::iota(parent.begin(), parent.end(), 0); + + auto find = [&](int x) { + while (parent[x] != x) { + parent[x] = parent[parent[x]]; + x = parent[x]; + } + return x; + }; + auto unite = [&](int a, int b) { + a = find(a); b = find(b); + if (a == b) return false; + if (rank_[a] < rank_[b]) std::swap(a, b); + parent[b] = a; + if (rank_[a] == rank_[b]) ++rank_[a]; + return true; + }; + + // 5) 选择 MST 边(以 id->id->weight 暂存) + std::unordered_map> res_dict; + + for (const auto& e : edges) { + auto it_u = idx.find(e.u); + auto it_v = idx.find(e.v); + if (it_u == idx.end() || it_v == idx.end()) continue; // 安全性 + if (unite(it_u->second, it_v->second)) { + // 只写一个方向;若你想对称写入,可在最终输出时补回另一侧 + res_dict[e.u][e.v] = e.w; + } + } + + // 6) 将 id -> Python 节点对象,并写回 result_dict + for (auto it = res_dict.begin(); it != res_dict.end(); ++it) { + node_t u = it->first; + py::object u_obj = G.id_to_node.attr("__getitem__")(py::cast(u)); + py::dict u_dict = result_dict.attr("__getitem__")(u_obj).cast(); + + const auto& nbrs = it->second; + for (auto jt = nbrs.begin(); jt != nbrs.end(); ++jt) { + node_t v = jt->first; + weight_t w = jt->second; + + py::object v_obj = G.id_to_node.attr("__getitem__")(py::cast(v)); + u_dict[v_obj] = w; + + // 如需无向对称输出,取消下面两行注释: + // py::dict v_dict = result_dict.attr("__getitem__")(v_obj).cast(); + // v_dict[u_obj] = w; + } + } + + return result_dict; +} +py::dict Floyd(Graph& G) { + py::dict result_dict; + + auto& nodes = G.node; + auto& adj = G.adj; + + // 节点 id 列表 + std::vector ids; + ids.reserve(nodes.size()); + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + ids.push_back(it->first); + // 先为每个节点放一个空 dict + py::object u_obj = G.id_to_node.attr("__getitem__")(py::cast(it->first)); + result_dict[u_obj] = py::dict(); + } + if (ids.empty()) return result_dict; + + const weight_t INF = std::numeric_limits::infinity(); + + // 距离矩阵(用哈希映射表示) + std::unordered_map> dist; + dist.reserve(ids.size()); + for (node_t u : ids) { + auto& row = dist[u]; // 创建一行 + row.reserve(ids.size()); + for (node_t v : ids) { + // 自环距离 0 + if (u == v) { + row[v] = static_cast(0); + continue; + } + // 有边则取权重,否则 INF + weight_t w = INF; + auto it_u = adj.find(u); + if (it_u != adj.end()) { + const auto& nbrs = it_u->second; + auto it_uv = nbrs.find(v); + if (it_uv != nbrs.end()) { + const auto& eattr = it_uv->second; + auto wit = eattr.find("weight"); + w = (wit != eattr.end()) ? wit->second : static_cast(1); + } + } + row[v] = w; + } + } + + // Floyd–Warshall:O(n^3) + for (node_t k : ids) { + for (node_t i : ids) { + // 小优化:如果 i->k 已不可达,跳过内层 + weight_t dik = dist[i][k]; + if (dik == INF) continue; + + for (node_t j : ids) { + weight_t kj = dist[k][j]; + if (kj == INF) continue; + + weight_t through = static_cast(dik + kj); + weight_t& ij = dist[i][j]; + if (through < ij) ij = through; + } + } + } + + // 写出为 {py_node: {py_node: distance}} + for (const auto& row : dist) { + node_t u = row.first; + py::object u_obj = G.id_to_node.attr("__getitem__")(py::cast(u)); + py::dict u_dict = result_dict.attr("__getitem__")(u_obj).cast(); + + for (const auto& kv : row.second) { + node_t v = kv.first; + weight_t d = kv.second; + py::object v_obj = G.id_to_node.attr("__getitem__")(py::cast(v)); + u_dict[v_obj] = d; + } + } + + return result_dict; +} diff --git a/_cpp_easygraph/src/utils.cpp b/_cpp_easygraph/src/utils.cpp new file mode 100644 index 00000000..b58fe398 --- /dev/null +++ b/_cpp_easygraph/src/utils.cpp @@ -0,0 +1,30 @@ +#include "utils.h" + +py::dict attr_to_dict(const node_attr_dict_factory& attr) { + py::dict attr_dict; + for (const auto& kv : attr) { + // kv.first 是 std::string,kv.second 是 weight_t + attr_dict[py::str(kv.first)] = py::cast(kv.second); + } + return attr_dict; +} + +std::string weight_to_string(py::handle weight) { + // 如果 weight 不是 str,则尝试转换为 str,并在 None 情况下发警告 + if (!py::isinstance(weight)) { + if (weight.is_none()) { + py::module_::import("warnings") + .attr("warn")("None will be transformed into an instance of str."); + } + // 强制转为 Python 字符串 + weight = py::str(weight); + } + + // 转成 std::string + return py::cast(weight); +} + +py::object py_sum(const py::object& o) { + py::object sum_func = py::module_::import("builtins").attr("sum"); + return sum_func(o); +} diff --git a/cpp_easygraph/classes/__init__.h b/cpp_easygraph/classes/__init__.h deleted file mode 100644 index 939a4309..00000000 --- a/cpp_easygraph/classes/__init__.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "graph.h" -#include "directed_graph.h" -#include "operation.h" \ No newline at end of file diff --git a/cpp_easygraph/classes/directed_graph.cpp b/cpp_easygraph/classes/directed_graph.cpp deleted file mode 100644 index a08312d5..00000000 --- a/cpp_easygraph/classes/directed_graph.cpp +++ /dev/null @@ -1,100 +0,0 @@ -#include "directed_graph.h" -#include "../common/utils.h" - -DiGraph::DiGraph(): Graph() { - -} - -py::object DiGraph__init__(py::tuple args, py::dict kwargs) { - py::object MappingProxyType = py::import("types").attr("MappingProxyType"); - py::object self = args[0]; - self.attr("__init__")(); - DiGraph& self_ = py::extract(self); - py::dict graph_attr = kwargs; - self_.graph.update(graph_attr); - self_.nodes_cache = MappingProxyType(py::dict()); - self_.adj_cache = MappingProxyType(py::dict()); - return py::object(); -} - -py::object DiGraph_out_degree(py::object self, py::object weight) { - py::dict degree = py::dict(); - py::list edges = py::extract(self.attr("edges")); - py::object u, v; - py::dict d; - for (int i = 0;i < py::len(edges);i++) { - py::tuple edge = py::extract(edges[i]); - u = edge[0]; - v = edge[1]; - d = py::extract(edge[2]); - if (degree.contains(u)) { - degree[u] += d.get(weight, 1); - } - else { - degree[u] = d.get(weight, 1); - } - } - py::list nodes = py::list(self.attr("nodes")); - for (int i = 0;i < py::len(nodes);i++) { - py::object node = nodes[i]; - if (!degree.contains(node)) { - degree[node] = 0; - } - } - return degree; -} - -py::object DiGraph_in_degree(py::object self, py::object weight) { - py::dict degree = py::dict(); - py::list edges = py::extract(self.attr("edges")); - py::object u, v; - py::dict d; - for (int i = 0;i < py::len(edges);i++) { - py::tuple edge = py::extract(edges[i]); - u = edge[0]; - v = edge[1]; - d = py::extract(edge[2]); - if (degree.contains(v)) { - degree[v] += d.get(weight, 1); - } - else { - degree[v] = d.get(weight, 1); - } - } - py::list nodes = py::list(self.attr("nodes")); - for (int i = 0;i < py::len(nodes);i++) { - py::object node = nodes[i]; - if (!degree.contains(node)) { - degree[node] = 0; - } - } - return degree; -} - -py::object DiGraph_degree(py::object self, py::object weight) { - py::dict degree = py::dict(); - py::dict out_degree = py::extract(self.attr("out_degree")(weight)); - py::dict in_degree = py::extract(self.attr("in_degree")(weight)); - py::list nodes = py::list(self.attr("nodes")); - for (int i = 0;i < py::len(nodes);i++) { - py::object u = nodes[i]; - degree[u] = out_degree[u] + in_degree[u]; - } - return degree; -} - -py::object DiGraph_size(py::object self, py::object weight) { - py::dict out_degree = py::extract(self.attr("out_degree")(weight)); - py::object s = py_sum(out_degree.values()); - return (weight == py::object()) ? py::object(py::extract(s)) : s; -} - -py::object DiGraph_number_of_edges(py::object self, py::object u, py::object v) { - if (u == py::object()) { - return self.attr("size")(); - } - Graph& G = py::extract(self); - node_t u_id = py::extract(G.node_to_id[u]); - node_t v_id = py::extract(G.node_to_id.get(v, -1)); - return py::object(int(v != -1 && G.adj[u_id].count(v_id))); -} \ No newline at end of file diff --git a/cpp_easygraph/classes/directed_graph.h b/cpp_easygraph/classes/directed_graph.h deleted file mode 100644 index ea6bb6b4..00000000 --- a/cpp_easygraph/classes/directed_graph.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "graph.h" -#include "../common/common.h" - -struct DiGraph: public Graph -{ - DiGraph(); -}; - -py::object DiGraph__init__(py::tuple args, py::dict kwargs); - -py::object DiGraph_out_degree(py::object self, py::object weight); -py::object DiGraph_in_degree(py::object self, py::object weight); -py::object DiGraph_degree(py::object self, py::object weight); -py::object DiGraph_size(py::object self, py::object weight); -py::object DiGraph_number_of_edges(py::object self, py::object u, py::object v); \ No newline at end of file diff --git a/cpp_easygraph/classes/graph.cpp b/cpp_easygraph/classes/graph.cpp deleted file mode 100644 index 343387b6..00000000 --- a/cpp_easygraph/classes/graph.cpp +++ /dev/null @@ -1,643 +0,0 @@ -#include "graph.h" -#include "../common/utils.h" - -Graph::Graph() { - py::object MappingProxyType = py::import("types").attr("MappingProxyType"); - this->id = 0; - this->dirty_nodes = true; - this->dirty_adj = true; - this->node_to_id = py::dict(); - this->id_to_node = py::dict(); - this->graph = py::dict(); - this->nodes_cache = MappingProxyType(py::dict()); - this->adj_cache = MappingProxyType(py::dict()); -} - -py::object Graph__init__(py::tuple args, py::dict kwargs) { - py::object MappingProxyType = py::import("types").attr("MappingProxyType"); - py::object self = args[0]; - self.attr("__init__")(); - Graph& self_ = py::extract(self); - py::dict graph_attr = kwargs; - self_.graph.update(graph_attr); - self_.nodes_cache = MappingProxyType(py::dict()); - self_.adj_cache = MappingProxyType(py::dict()); - return py::object(); -} - -py::object Graph__iter__(py::object self) { - return self.attr("nodes").attr("__iter__")(); -} - -py::object Graph__len__(py::object self) { - Graph& self_ = py::extract(self); - return py::object(py::len(self_.node_to_id)); -} - -py::object Graph__contains__(py::object self, py::object node) { - Graph& self_ = py::extract(self); - try { - return self_.node_to_id.contains(node); - } - catch (const py::error_already_set&) { - PyObject* type, * value, * traceback; - PyErr_Fetch(&type, &value, &traceback); - if (PyErr_GivenExceptionMatches(PyExc_TypeError, type)) { - return py::object(false); - } - else { - PyErr_Restore(type, value, traceback); - return py::object(); - } - } -} - -py::object Graph__getitem__(py::object self, py::object node) { - return self.attr("adj")[node]; -} - -node_t _add_one_node(Graph& self, py::object one_node_for_adding, py::object node_attr = py::dict()) { - node_t id; - if (self.node_to_id.contains(one_node_for_adding)) { - id = py::extract(self.node_to_id[one_node_for_adding]); - } - else { - id = ++(self.id); - self.id_to_node[id] = one_node_for_adding; - self.node_to_id[one_node_for_adding] = id; - } - py::list items = py::list(node_attr.attr("items")()); - self.node[id] = node_attr_dict_factory(); - self.adj[id] = adj_attr_dict_factory(); - for (int i = 0; i < len(items);i++) { - py::tuple kv = py::extract(items[i]); - py::object pkey = kv[0]; - std::string weight_key = weight_to_string(pkey); - weight_t value = py::extract(kv[1]); - self.node[id].insert(std::make_pair(weight_key, value)); - } - return id; -} - -py::object Graph_add_node(py::tuple args, py::dict kwargs) { - Graph& self = py::extract(args[0]); - self.dirty_nodes = true; - self.dirty_adj = true; - py::object one_node_for_adding = args[1]; - py::dict node_attr = kwargs; - _add_one_node(self, one_node_for_adding, node_attr); - return py::object(); -} - -py::object Graph_add_nodes(Graph& self, py::list nodes_for_adding, py::list nodes_attr) { - self.dirty_nodes = true; - self.dirty_adj = true; - if (py::len(nodes_attr) != 0) { - if (py::len(nodes_for_adding) != py::len(nodes_attr)) { - PyErr_Format(PyExc_AssertionError, "Nodes and Attributes lists must have same length."); - return py::object(); - } - } - for (int i = 0;i < py::len(nodes_for_adding);i++) { - py::object one_node_for_adding = nodes_for_adding[i]; - py::dict node_attr; - if (py::len(nodes_attr)) { - node_attr = py::extract(nodes_attr[i]); - } - else { - node_attr = py::dict(); - } - _add_one_node(self, one_node_for_adding, node_attr); - } - return py::object(); -} - -py::object Graph_add_nodes_from(py::tuple args, py::dict kwargs) { - Graph& self = py::extract(args[0]); - self.dirty_nodes = true; - self.dirty_adj = true; - py::list nodes_for_adding = py::list(args[1]); - for (int i = 0;i < py::len(nodes_for_adding);i++) { - bool newnode; - py::dict attr = kwargs; - py::dict newdict, ndict; - py::object n = nodes_for_adding[i]; - try { - newnode = !self.node_to_id.contains(n); - newdict = attr; - } - catch (const py::error_already_set&) { - PyObject* type, * value, * traceback; - PyErr_Fetch(&type, &value, &traceback); - if (PyErr_GivenExceptionMatches(PyExc_TypeError, type)) { - py::tuple n_pair = py::extract(n); - n = n_pair[0]; - ndict = py::extract(n_pair[1]); - newnode = !self.node_to_id.contains(n); - newdict = attr.copy(); - newdict.update(ndict); - } - else { - PyErr_Restore(type, value, traceback); - return py::object(); - } - } - if (newnode) { - if (n == py::object()) { - PyErr_Format(PyExc_ValueError, "None cannot be a node"); - return py::object(); - } - _add_one_node(self, n); - } - node_t id = py::extract(self.node_to_id[n]); - py::list items = py::list(newdict.items()); - for (int i = 0; i < len(items);i++) { - py::tuple kv = py::extract(items[i]); - py::object pkey = kv[0]; - std::string weight_key = weight_to_string(pkey); - weight_t value = py::extract(kv[1]); - self.node[id].insert(std::make_pair(weight_key, value)); - } - } - return py::object(); -} - -py::object Graph_remove_node(Graph& self, py::object node_to_remove) { - self.dirty_nodes = true; - self.dirty_adj = true; - if (!self.node_to_id.contains(node_to_remove)) { - PyErr_Format(PyExc_KeyError, "No node %R in graph.", node_to_remove.ptr()); - return py::object(); - } - node_t node_id = py::extract(self.node_to_id[node_to_remove]); - for (const auto& neighbor_info : self.adj[node_id]) { - node_t neighbor_id = neighbor_info.first; - self.adj[neighbor_id].erase(node_id); - } - self.adj.erase(node_id); - self.node.erase(node_id); - self.node_to_id.attr("pop")(node_to_remove); - self.id_to_node.attr("pop")(node_id); - return py::object(); -} - -py::object Graph_remove_nodes(py::object self, py::list nodes_to_remove) { - Graph& self_ = py::extract(self); - self_.dirty_nodes = true; - self_.dirty_adj = true; - for (int i = 0;i < py::len(nodes_to_remove);i++) { - py::object node_to_remove = nodes_to_remove[i]; - if (!self_.node_to_id.contains(node_to_remove)) { - PyErr_Format(PyExc_KeyError, "No node %R in graph.", node_to_remove.ptr()); - return py::object(); - } - } - for (int i = 0;i < py::len(nodes_to_remove);i++) { - py::object node_to_remove = nodes_to_remove[i]; - self.attr("remove_node")(node_to_remove); - } - return py::object(); -} - -py::object Graph_number_of_nodes(Graph& self) { - return py::object(int(self.node.size())); -} - -py::object Graph_has_node(Graph& self, py::object node) { - return self.node_to_id.contains(node); -} - -py::object Graph_nbunch_iter(py::object self, py::object nbunch) { - py::object bunch = py::object(); - if (nbunch == py::object()) { - bunch = self.attr("adj").attr("__iter__")(); - } - else if (self.contains(nbunch)) { - py::list nbunch_wrapper = py::list(); - nbunch_wrapper.append(nbunch); - bunch = nbunch_wrapper.attr("__iter__")(); - } - else { - py::list nbunch_list = py::list(nbunch), nodes_list = py::list(); - for (int i = 0;i < py::len(nbunch_list);i++) { - py::object n = nbunch_list[i]; - if (self.contains(n)) { - nodes_list.append(n); - } - } - bunch = nbunch_list.attr("__iter__")(); - } - return bunch; -} - -void _add_one_edge(Graph& self, py::object u_of_edge, py::object v_of_edge, py::object edge_attr) { - node_t u, v; - if (!self.node_to_id.contains(u_of_edge)) { - u = _add_one_node(self, u_of_edge); - } - else { - u = py::extract(self.node_to_id[u_of_edge]); - } - if (!self.node_to_id.contains(v_of_edge)) { - v = _add_one_node(self, v_of_edge); - } - else { - v = py::extract(self.node_to_id[v_of_edge]); - } - py::list items = py::list(edge_attr.attr("items")()); - self.adj[u][v] = node_attr_dict_factory(); - self.adj[v][u] = node_attr_dict_factory(); - for (int i = 0; i < len(items);i++) { - py::tuple kv = py::extract(items[i]); - py::object pkey = kv[0]; - std::string weight_key = weight_to_string(pkey); - weight_t value = py::extract(kv[1]); - self.adj[u][v].insert(std::make_pair(weight_key, value)); - self.adj[v][u].insert(std::make_pair(weight_key, value)); - } -} - -py::object Graph_add_edge(py::tuple args, py::dict kwargs) { - Graph& self = py::extract(args[0]); - self.dirty_nodes = true; - self.dirty_adj = true; - py::object u_of_edge = args[1], v_of_edge = args[2]; - py::dict edge_attr = kwargs; - _add_one_edge(self, u_of_edge, v_of_edge, edge_attr); - return py::object(); -} - -py::object Graph_add_edges(Graph& self, py::list edges_for_adding, py::list edges_attr) { - self.dirty_nodes = true; - self.dirty_adj = true; - if (py::len(edges_attr) != 0) { - if (py::len(edges_for_adding) != py::len(edges_attr)) { - PyErr_Format(PyExc_AssertionError, "Edges and Attributes lists must have same length."); - return py::object(); - } - } - for (int i = 0;i < py::len(edges_for_adding);i++) { - py::tuple one_edge_for_adding = py::extract(edges_for_adding[i]); - py::dict edge_attr; - if (py::len(edges_attr)) { - edge_attr = py::extract(edges_attr[i]); - } - else { - edge_attr = py::dict(); - } - _add_one_edge(self, one_edge_for_adding[0], one_edge_for_adding[1], edge_attr); - } - return py::object(); -} - -py::object Graph_add_edges_from(py::tuple args, py::dict attr) { - Graph& self = py::extract(args[0]); - self.dirty_nodes = true; - self.dirty_adj = true; - py::list ebunch_to_add = py::list(args[1]); - for (int i = 0;i < len(ebunch_to_add);i++) { - py::list e = py::list(ebunch_to_add[i]); - py::object u, v; - py::dict dd; - switch (len(e)) { - case 2: { - u = e[0]; - v = e[1]; - break; - } - case 3: { - u = e[0]; - v = e[1]; - dd = py::extract(e[2]); - break; - } - default: { - PyErr_Format(PyExc_ValueError, "Edge tuple %R must be a 2 - tuple or 3 - tuple.", e.ptr()); - return py::object(); - } - } - node_t u_id, v_id; - if (!self.node_to_id.contains(u)) { - if (u == py::object()) { - PyErr_Format(PyExc_ValueError, "None cannot be a node"); - return py::object(); - } - u_id = _add_one_node(self, u); - } - if (!self.node_to_id.contains(v)) { - if (v == py::object()) { - PyErr_Format(PyExc_ValueError, "None cannot be a node"); - return py::object(); - } - v_id = _add_one_node(self, v); - } - auto datadict = self.adj[u_id].count(v_id) ? self.adj[u_id][v_id] : node_attr_dict_factory(); - py::list items = py::list(attr); - items.extend(py::list(dd)); - for (int i = 0;i < py::len(items);i++) { - py::tuple kv = py::extract(items[i]); - py::object pkey = kv[0]; - std::string weight_key = weight_to_string(pkey); - weight_t value = py::extract(kv[1]); - datadict.insert(std::make_pair(weight_key, value)); - } - //Warning: in Graph.py the edge attr is directed assigned by the dict extended from the original attr - self.adj[u_id][v_id].insert(datadict.begin(), datadict.end()); - self.adj[v_id][u_id].insert(datadict.begin(), datadict.end()); - } - return py::object(); -} - -py::object Graph_add_edges_from_file(Graph& self, py::str file, py::object weighted) { - self.dirty_nodes = true; - self.dirty_adj = true; - struct commactype : std::ctype { - commactype() : std::ctype(get_table()) {} - std::ctype_base::mask const* get_table() { - std::ctype_base::mask* rc = 0; - if (rc == 0) { - rc = new std::ctype_base::mask[std::ctype::table_size]; - std::fill_n(rc, std::ctype::table_size, std::ctype_base::mask()); - rc[','] = std::ctype_base::space; - rc[' '] = std::ctype_base::space; - rc['\t'] = std::ctype_base::space; - rc['\n'] = std::ctype_base::space; - rc['\r'] = std::ctype_base::space; - } - return rc; - } - }; - - std::ios::sync_with_stdio(0); - std::string file_path = py::extract(file); - std::ifstream in; - in.open(file_path); - if (!in.is_open()) { - PyErr_Format(PyExc_FileNotFoundError, "Please check the file and make sure the path only contains English"); - return py::object(); - } - in.imbue(std::locale(std::locale(), new commactype)); - std::string data, key("weight"); - std::string su, sv; - weight_t weight; - while (in >> su >> sv) { - py::object pu(su), pv(sv); - node_t u, v; - if (!self.node_to_id.contains(pu)) { - u = _add_one_node(self, pu); - } - else { - u = py::extract(self.node_to_id[pu]); - } - if (!self.node_to_id.contains(pv)) { - v = _add_one_node(self, pv); - } - else { - v = py::extract(self.node_to_id[pv]); - } - if (weighted) { - in >> weight; - self.adj[u][v][key] = self.adj[v][u][key] = weight; - } - else { - if (!self.adj[u].count(v)) { - self.adj[u][v] = node_attr_dict_factory(); - } - if (!self.adj[v].count(u)) { - self.adj[v][u] = node_attr_dict_factory(); - } - } - } - in.close(); - return py::object(); -} - -py::object Graph_add_weighted_edge(Graph& self, py::object u_of_edge, py::object v_of_edge, weight_t weight) { - self.dirty_nodes = true; - self.dirty_adj = true; - py::dict edge_attr; - edge_attr["weight"] = weight; - _add_one_edge(self, u_of_edge, v_of_edge, edge_attr); - return py::object(); -} - -py::object Graph_remove_edge(Graph& self, py::object u, py::object v) { - self.dirty_nodes = true; - self.dirty_adj = true; - if (self.node_to_id.contains(u) && self.node_to_id.contains(v)) { - node_t u_id = py::extract(self.node_to_id[u]); - node_t v_id = py::extract(self.node_to_id[v]); - auto& v_neighbors_info = self.adj[u_id]; - if (v_neighbors_info.find(v_id) != v_neighbors_info.end()) { - v_neighbors_info.erase(v_id); - if (u_id != v_id) { - self.adj[v_id].erase(u_id); - } - return py::object(); - } - } - PyErr_Format(PyExc_KeyError, "No edge %R-%R in graph.", u.ptr(), v.ptr()); - return py::object(); -} - -py::object Graph_remove_edges(py::object self, py::list edges_to_remove) { - Graph& self_ = py::extract(self); - for (int i = 0;i < py::len(edges_to_remove);i++) { - py::tuple edge = py::extract(edges_to_remove[i]); - py::object u = edge[0], v = edge[1]; - self.attr("remove_edge")(u, v); - } - self_.dirty_nodes = true; - self_.dirty_adj = true; - return py::object(); -} - -py::object Graph_number_of_edges(py::object self, py::object u, py::object v) { - if (u == py::object()) { - return self.attr("size")(); - } - Graph& self_ = py::extract(self); - node_t u_id = py::extract(self_.node_to_id.get(u, -1)); - node_t v_id = py::extract(self_.node_to_id.get(v, -1)); - return py::object(int(self_.adj.count(u_id) && self_.adj[u_id].count(v_id))); -} - -py::object Graph_has_edge(Graph& self, py::object u, py::object v) { - if (self.node_to_id.contains(u) && self.node_to_id.contains(v)) { - node_t u_id = py::extract(self.node_to_id[u]); - node_t v_id = py::extract(self.node_to_id[v]); - auto& v_neighbors_info = self.adj[u_id]; - if (v_neighbors_info.find(v_id) != v_neighbors_info.end()) { - return py::object(true); - } - } - return py::object(false); -} - -py::object Graph_copy(py::object self) { - Graph& self_ = py::extract(self); - py::object G = self.attr("__class__")(); - Graph& G_ = py::extract(G); - G_.graph.update(self_.graph); - G_.id_to_node.update(self_.id_to_node); - G_.node_to_id.update(self_.node_to_id); - G_.node = self_.node; - G_.adj = self_.adj; - return py::object(G); -} - -py::object Graph_degree(py::object self, py::object weight) { - py::dict degree; - py::list edges = py::extract(self.attr("edges")); - py::object u, v; - py::dict d; - for (int i = 0;i < py::len(edges);i++) { - py::tuple edge = py::extract(edges[i]); - u = edge[0]; - v = edge[1]; - d = py::extract(edge[2]); - if (degree.contains(u)) { - degree[u] += d.get(weight, 1); - } - else { - degree[u] = d.get(weight, 1); - } - if (degree.contains(v)) { - degree[v] += d.get(weight, 1); - } - else { - degree[v] = d.get(weight, 1); - } - } - py::list nodes = py::list(self.attr("nodes")); - for (int i = 0;i < py::len(nodes);i++) { - py::object node = nodes[i]; - if (!degree.contains(node)) { - degree[node] = 0; - } - } - return degree; -} - -py::object Graph_neighbors(py::object self, py::object node) { - Graph& self_ = py::extract(self); - if (self_.node_to_id.contains(node)) { - return self.attr("adj")[node].attr("__iter__")(); - } - else { - PyErr_Format(PyExc_KeyError, "No node %R", node.ptr()); - return py::object(); - } -} - -py::object Graph_nodes_subgraph(py::object self, py::list from_nodes) { - py::object G = self.attr("__class__")(); - Graph& self_ = py::extract(self); - Graph& G_ = py::extract(G); - G_.graph.update(self_.graph); - py::object nodes = self.attr("nodes"); - py::object adj = self.attr("adj"); - for (int i = 0;i < py::len(from_nodes);i++) { - py::object node = from_nodes[i]; - if (self_.node_to_id.contains(node)) { - py::object node_attr = nodes[node]; - _add_one_node(G_, node, node_attr); - } - py::object out_edges = adj[node]; - py::list edge_items = py::list(out_edges.attr("items")()); - for (int j = 0;j < py::len(edge_items);j++) { - py::tuple item = py::extract(edge_items[j]); - py::object v = item[0]; - py::object edge_attr = item[1]; - if (from_nodes.contains(v)) { - _add_one_edge(G_, node, v, edge_attr); - } - } - } - return G; -} - -py::object Graph_ego_subgraph(py::object self, py::object center) { - py::list neighbors_of_center = py::list(self.attr("all_neighbors")(center)); - neighbors_of_center.append(center); - return self.attr("nodes_subgraph")(neighbors_of_center); -} - -py::object Graph_size(py::object self, py::object weight) { - py::dict degree = py::extract(self.attr("degree")(weight)); - py::list items = degree.items(); - weight_t s = 0; - for (int i = 0;i < py::len(items);i++) { - s += py::extract(items[i][1]); - } - return (weight == py::object()) ? py::object(int(s) / 2) : py::object(s / 2); -} - -py::object Graph_is_directed(py::object self) { - return py::object(false); -} - -py::object Graph_is_multigraph(py::object self) { - return py::object(false); -} - -py::object Graph::get_nodes() { - py::object MappingProxyType = py::import("types").attr("MappingProxyType"); - if (this->dirty_nodes) { - py::dict nodes = py::dict(); - for (const auto& node_info : node) { - node_t id = node_info.first; - const auto& node_attr = node_info.second; - nodes[this->id_to_node[id]] = MappingProxyType(attr_to_dict(node_attr)); - } - this->nodes_cache = MappingProxyType(nodes); - this->dirty_nodes = false; - } - return this->nodes_cache; -} - -py::object Graph::get_name() { - return this->graph.attr("get")("name", ""); -} - -py::object Graph::get_graph() { - return this->graph; -} - -py::object Graph::get_adj() { - py::object MappingProxyType = py::import("types").attr("MappingProxyType"); - if (this->dirty_adj) { - py::dict adj = py::dict(); - for (const auto& ego_edges : this->adj) { - node_t start_point = ego_edges.first; - py::dict ego_edges_dict = py::dict(); - for (const auto& edge_info : ego_edges.second) { - node_t end_point = edge_info.first; - const auto& edge_attr = edge_info.second; - ego_edges_dict[this->id_to_node[end_point]] = MappingProxyType(attr_to_dict(edge_attr)); - } - adj[this->id_to_node[start_point]] = MappingProxyType(ego_edges_dict); - } - this->adj_cache = MappingProxyType(adj); - this->dirty_adj = false; - } - return this->adj_cache; -} - -py::object Graph::get_edges() { - py::list edges = py::list(); - std::set > seen; - for (const auto& ego_edges : this->adj) { - node_t u = ego_edges.first; - for (const auto& edge_info : ego_edges.second) { - node_t v = edge_info.first; - const auto& edge_attr = edge_info.second; - if (seen.find(std::make_pair(u,v)) == seen.end()) { - seen.insert(std::make_pair(u,v)); - seen.insert(std::make_pair(v,u)); - edges.append(py::make_tuple(this->id_to_node[u], this->id_to_node[v], attr_to_dict(edge_attr))); - } - } - } - return edges; -} diff --git a/cpp_easygraph/classes/graph.h b/cpp_easygraph/classes/graph.h deleted file mode 100644 index b7b1e931..00000000 --- a/cpp_easygraph/classes/graph.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "../common/common.h" - -struct Graph -{ - node_dict_factory node; - adj_dict_factory adj; - py::dict node_to_id, id_to_node, graph; - node_t id; - bool dirty_nodes, dirty_adj; - py::object nodes_cache, adj_cache; - - Graph(); - py::object get_nodes(); - py::object get_name(); - py::object get_graph(); - py::object get_adj(); - py::object get_edges(); -}; - - -py::object Graph__init__(py::tuple args, py::dict kwargs); -py::object Graph__iter__(py::object self); -py::object Graph__len__(py::object self); -py::object Graph__contains__(py::object self, py::object node); -py::object Graph__getitem__(py::object self, py::object node); -py::object Graph_add_node(py::tuple args, py::dict kwargs); -py::object Graph_add_nodes(Graph& self, py::list nodes_for_adding, py::list nodes_attr); -py::object Graph_add_nodes_from(py::tuple args, py::dict kwargs); -py::object Graph_remove_node(Graph& self, py::object node_to_remove); -py::object Graph_remove_nodes(py::object self, py::list nodes_to_remove); -py::object Graph_number_of_nodes(Graph& self); -py::object Graph_has_node(Graph& self, py::object node); -py::object Graph_nbunch_iter(py::object self, py::object nbunch); -py::object Graph_add_edge(py::tuple args, py::dict kwargs); -py::object Graph_add_edges(Graph& self, py::list edges_for_adding, py::list edges_attr); -py::object Graph_add_edges_from(py::tuple args, py::dict attr); -py::object Graph_add_edges_from_file(Graph& self, py::str file, py::object weighted); -py::object Graph_add_weighted_edge(Graph& self, py::object u_of_edge, py::object v_of_edge, weight_t weight); -py::object Graph_remove_edge(Graph& self, py::object u, py::object v); -py::object Graph_remove_edges(py::object self, py::list edges_to_remove); -py::object Graph_number_of_edges(py::object self, py::object u, py::object v); -py::object Graph_has_edge(Graph& self, py::object u, py::object v); -py::object Graph_copy(py::object self); -py::object Graph_degree(py::object self, py::object weight); -py::object Graph_neighbors(py::object self, py::object node); -py::object Graph_nodes_subgraph(py::object self, py::list from_nodes); -py::object Graph_ego_subgraph(py::object self, py::object center); -py::object Graph_size(py::object self, py::object weight); -py::object Graph_is_directed(py::object self); -py::object Graph_is_multigraph(py::object self); - diff --git a/cpp_easygraph/classes/operation.cpp b/cpp_easygraph/classes/operation.cpp deleted file mode 100644 index 7e15c08f..00000000 --- a/cpp_easygraph/classes/operation.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "operation.h" -#include "graph.h" -py::object density(py::object G) { - Graph &G_ = py::extract(G); - node_t n = G_.node.size(); - adj_dict_factory adj=G_.adj; - node_t m=0; - for(adj_dict_factory::iterator i=adj.begin();i!=adj.end();i++){ - adj_attr_dict_factory node_edge=i->second; - m+=node_edge.size(); - } - if (m == 0 || n <= 1) { - return py::object(0); - } - weight_t d = m * 1.0 / (n * (n - 1)); - return py::object(d); -} diff --git a/cpp_easygraph/classes/operation.h b/cpp_easygraph/classes/operation.h deleted file mode 100644 index 619ac629..00000000 --- a/cpp_easygraph/classes/operation.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "../common/common.h" - -py::object density(py::object G); \ No newline at end of file diff --git a/cpp_easygraph/common/common.h b/cpp_easygraph/common/common.h deleted file mode 100644 index 096b7552..00000000 --- a/cpp_easygraph/common/common.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace py = boost::python; - -typedef int node_t; -typedef float weight_t; -typedef std::map node_attr_dict_factory; //(weight_key, value) -typedef std::map edge_attr_dict_factory; //(weight_key, value) -typedef std::unordered_map node_dict_factory; //(node, node_attr) -typedef std::unordered_map adj_attr_dict_factory; //(out_node, (weight_key, value)) -typedef std::unordered_map adj_dict_factory; //(node, edge_attr) \ No newline at end of file diff --git a/cpp_easygraph/common/utils.cpp b/cpp_easygraph/common/utils.cpp deleted file mode 100644 index 5c488280..00000000 --- a/cpp_easygraph/common/utils.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "utils.h" - -py::object attr_to_dict(const node_attr_dict_factory& attr) { - py::dict attr_dict = py::dict(); - for (const auto& kv : attr) { - attr_dict[kv.first] = kv.second; - } - return attr_dict; -} - -std::string weight_to_string(py::object weight) { - py::object warn = py::import("warnings").attr("warn"); - if (weight.attr("__class__") != py::str().attr("__class__")) { - if (weight != py::object()) { - warn(py::str(weight) + py::str(" would be transformed into an instance of str.")); - } - weight = py::str(weight); - } - std::string weight_key = py::extract(weight); - return weight_key; -} - -py::object py_sum(py::object o) { - py::object sum = py::import("builtins").attr("sum"); - return sum(o); -} \ No newline at end of file diff --git a/cpp_easygraph/common/utils.h b/cpp_easygraph/common/utils.h deleted file mode 100644 index f41d7931..00000000 --- a/cpp_easygraph/common/utils.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "common.h" - -py::object attr_to_dict(const node_attr_dict_factory& attr); -std::string weight_to_string(py::object weight); -py::object py_sum(py::object o); \ No newline at end of file diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp deleted file mode 100644 index 5c7c1be1..00000000 --- a/cpp_easygraph/cpp_easygraph.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "classes/__init__.h" -#include "functions/__init__.h" - -BOOST_PYTHON_MODULE(cpp_easygraph) -{ - - py::class_("Graph", py::no_init) - .def("__init__", py::raw_function(&Graph__init__)) - .def(py::init<>()) - .def("__iter__", &Graph__iter__) - .def("__len__", &Graph__len__) - .def("__contains__", &Graph__contains__, (py::arg("node"))) - .def("__getitem__", &Graph__getitem__, (py::arg("node"))) - .def("add_node", py::raw_function(&Graph_add_node)) - .def("add_nodes", &Graph_add_nodes, (py::arg("nodes_for_adding"), py::arg("nodes_attr") = py::list())) - .def("add_nodes_from", py::raw_function(&Graph_add_nodes_from)) - .def("remove_node", &Graph_remove_node, (py::arg("node_to_remove"))) - .def("remove_nodes", &Graph_remove_nodes, (py::arg("nodes_to_remove"))) - .def("number_of_nodes", &Graph_number_of_nodes) - .def("has_node", &Graph_has_node, (py::arg("node"))) - .def("nbunch_iter", &Graph_nbunch_iter, (py::arg("nbunch") = py::object())) - .def("add_edge", py::raw_function(&Graph_add_edge)) - .def("add_edges", &Graph_add_edges, (py::arg("edges_for_adding"), py::arg("edges_attr") = py::list())) - .def("add_edges_from", py::raw_function(&Graph_add_edges_from)) - .def("add_edges_from_file", &Graph_add_edges_from_file, (py::arg("file"), py::arg("weighted") = false)) - .def("add_weighted_edge", &Graph_add_weighted_edge, (py::arg("u_of_edge"), py::arg("v_of_edge"), py::arg("weight"))) - .def("remove_edge", &Graph_remove_edge, (py::arg("u"), py::arg("v"))) - .def("remove_edges", &Graph_remove_edges, (py::arg("edges_to_remove"))) - .def("number_of_edges", &Graph_number_of_edges, (py::arg("u") = py::object(), py::arg("v") = py::object())) - .def("has_edge", &Graph_has_edge, (py::arg("u"), py::arg("y"))) - .def("copy", &Graph_copy) - .def("degree", &Graph_degree, (py::arg("weight") = py::object("weight"))) - .def("neighbors", &Graph_neighbors, (py::arg("node"))) - .def("all_neighbors", &Graph_neighbors, (py::arg("node"))) - .def("nodes_subgraph", &Graph_nodes_subgraph, (py::arg("from_nodes"))) - .def("ego_subgraph", &Graph_ego_subgraph, (py::arg("center"))) - .def("size", &Graph_size, (py::arg("weight") = py::object())) - .def("is_directed", &Graph_is_directed) - .def("is_multigraph", &Graph_is_multigraph) - .add_property("graph", &Graph::get_graph) - .add_property("nodes", &Graph::get_nodes) - .add_property("name", &Graph::get_name) - .add_property("adj", &Graph::get_adj) - .add_property("edges", &Graph::get_edges); - - py::class_>("DiGraph", py::no_init) - .def("__init__", py::raw_function(&DiGraph__init__)) - .def(py::init<>()) - .def("out_degree", &DiGraph_out_degree, (py::arg("weight") = "weight")) - .def("in_degree", &DiGraph_in_degree, (py::arg("weight") = "weight")) - .def("degree", &DiGraph_degree, (py::arg("weight") = "weight")) - .def("size", &DiGraph_size, (py::arg("weight") = py::object())) - .def("number_of_edges", &DiGraph_number_of_edges, (py::arg("u") = py::object(), py::arg("v") = py::object())); - - py::def("cpp_density",&density,(py::arg("G"))); - py::def("cpp_constraint", &constraint, (py::arg("G"), py::arg("nodes") = py::object(), py::arg("weight") = py::object(), py::arg("n_workers") = py::object())); - py::def("cpp_effective_size", &effective_size, (py::arg("G"), py::arg("nodes") = py::object(), py::arg("weight") = py::object(), py::arg("n_workers") = py::object())); - py::def("cpp_hierarchy", &hierarchy, (py::arg("G"), py::arg("nodes") = py::object(), py::arg("weight") = py::object(), py::arg("n_workers") = py::object())); - py::def("cpp_dijkstra_multisource", &_dijkstra_multisource, (py::arg("G"), py::arg("sources"), py::arg("weight") = "weight", py::arg("target") = py::object())); - py::def("cpp_clustering", &clustering, (py::arg("G"), py::arg("nodes") = py::object(), py::arg("weight") = py::object())); - py::def("cpp_biconnected_dfs_record_edges", &_biconnected_dfs_record_edges, (py::arg("G"), py::arg("need_components") = true)); - py::def("cpp_Floyd", &Floyd, (py::arg("G"))); - py::def("cpp_Prim", &Prim, (py::arg("G"))); - py::def("cpp_Kruskal", &Kruskal, (py::arg("G"))); -} \ No newline at end of file diff --git a/cpp_easygraph/functions/__init__.h b/cpp_easygraph/functions/__init__.h deleted file mode 100644 index 485b9381..00000000 --- a/cpp_easygraph/functions/__init__.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "components/__init__.h" -#include "not_sorted/__init__.h" -#include "path/__init__.h" -#include "structural_holes/__init__.h" \ No newline at end of file diff --git a/cpp_easygraph/functions/components/__init__.h b/cpp_easygraph/functions/components/__init__.h deleted file mode 100644 index 1b91d451..00000000 --- a/cpp_easygraph/functions/components/__init__.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "biconnected.h" \ No newline at end of file diff --git a/cpp_easygraph/functions/components/biconnected.cpp b/cpp_easygraph/functions/components/biconnected.cpp deleted file mode 100644 index ffd2001c..00000000 --- a/cpp_easygraph/functions/components/biconnected.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include "biconnected.h" -#include "../../classes/graph.h" -#include "../../common/utils.h" - -node_t index_edge(std::vector>& edges, const std::pair& target) { - for (int i = edges.size() - 1;i >= 0;i--) { - if ((edges[i].first == target.first) && (edges[i].second == target.second)) { - return i; - } - } - return -1; -} - - -py::object _biconnected_dfs_record_edges(py::object G, py::object need_components) { - py::list ret = py::list(); - std::unordered_set visited; - Graph& G_ = py::extract(G); - node_dict_factory nodes_list = G_.node; - for (node_dict_factory::iterator iter = nodes_list.begin();iter != nodes_list.end();iter++) { - node_t start_id = iter->first; - if (visited.find(start_id) != visited.end()) { - continue; - } - std::unordered_map discovery; - std::unordered_map low; - node_t root_children = 0; - discovery.emplace(start_id, 0); - low.emplace(start_id, 0); - visited.emplace(start_id); - std::vector> edge_stack; - std::vector stack; - adj_attr_dict_factory start_adj = G_.adj[start_id]; - NeighborIterator neighbors_iter = NeighborIterator(start_adj); - stack_node initial_stack_node(start_id, start_id, neighbors_iter); - stack.emplace_back(initial_stack_node); - while (!stack.empty()) { - stack_node& node_info = stack.back(); - node_t node_grandparent_id = node_info.grandparent; - node_t node_parent_id = node_info.parent; - try { - node_t node_child_id = node_info.neighbors_iter.next(); - if (node_grandparent_id == node_child_id) { - continue; - } - if (visited.find(node_child_id) != visited.end()) { - if (discovery[node_child_id] <= discovery[node_parent_id]) { - low[node_parent_id] = std::min(low[node_parent_id], discovery[node_child_id]); - if (need_components) { - edge_stack.emplace_back(std::make_pair(node_parent_id, node_child_id)); - } - } - } - else { - low[node_child_id] = discovery[node_child_id] = discovery.size(); - visited.emplace(node_child_id); - adj_attr_dict_factory node_child_adj = G_.adj[node_child_id]; - NeighborIterator child_neighbors_iter = NeighborIterator(G_.adj[node_child_id]); - stack.emplace_back(node_parent_id, node_child_id, child_neighbors_iter); - if (need_components) { - edge_stack.emplace_back(std::make_pair(node_parent_id, node_child_id)); - } - } - } - catch (int) { - stack.pop_back(); - if (stack.size() > 1) { - if (low[node_parent_id] >= discovery[node_grandparent_id]) { - if (need_components) { - py::list tmp_ret = py::list(); - std::pair iter_edge = std::make_pair(-1, -1); - while ((iter_edge.first != node_grandparent_id || iter_edge.second != node_parent_id)) { - iter_edge = edge_stack.back(); - edge_stack.pop_back(); - tmp_ret.append(py::make_tuple(G_.id_to_node[iter_edge.first], G_.id_to_node[iter_edge.second])); - } - ret.append(tmp_ret); - } - else { - ret.append(G_.id_to_node[node_grandparent_id]); - } - } - low[node_grandparent_id] = std::min(low[node_grandparent_id], low[node_parent_id]); - } - else if (stack.size() > 0) { - root_children += 1; - if (need_components == true) { - std::pair target = std::make_pair(node_grandparent_id, node_parent_id); - node_t ind = index_edge(edge_stack, target); - if (ind != -1) { - py::list tmp_ret = py::list(); - for (node_t z = ind;z < edge_stack.size();z++) { - tmp_ret.append(py::make_tuple(G_.id_to_node[edge_stack[z].first], G_.id_to_node[edge_stack[z].second])); - } - ret.append(tmp_ret); - } - } - } - } - } - if (!need_components) { - if (root_children > 1) { - ret.append(G_.id_to_node(start_id)); - } - } - } - return ret; -} diff --git a/cpp_easygraph/functions/not_sorted/__init__.h b/cpp_easygraph/functions/not_sorted/__init__.h deleted file mode 100644 index b3f8406a..00000000 --- a/cpp_easygraph/functions/not_sorted/__init__.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "cluster.h" \ No newline at end of file diff --git a/cpp_easygraph/functions/not_sorted/cluster.cpp b/cpp_easygraph/functions/not_sorted/cluster.cpp deleted file mode 100644 index f823f0ad..00000000 --- a/cpp_easygraph/functions/not_sorted/cluster.cpp +++ /dev/null @@ -1,113 +0,0 @@ -#include "cluster.h" -#include "../../classes/graph.h" -#include "../../common/utils.h" - -inline weight_t wt(adj_dict_factory& adj, node_t u, node_t v, std::string weight, weight_t max_weight = 1) { - auto& attr = adj[u][v]; - return (attr.count(weight) ? attr[weight] : 1) / max_weight; -} - -py::list _weighted_triangles_and_degree(py::object G, py::object nodes, py::object weight) { - std::string weight_key = weight_to_string(weight); - Graph& G_ = py::extract(G); - weight_t max_weight = 1; - if (weight == py::object() || G.attr("number_of_edges")() == 0) { - max_weight = 1; - } - else { - int assigned = 0; - for (auto& u_info : G_.adj) { - for (auto& v_info : u_info.second) { - auto& d = v_info.second; - if (assigned) { - max_weight = std::max(max_weight, d.count(weight_key) ? d[weight_key] : 1); - } - else { - assigned = 1; - max_weight = d.count(weight_key) ? d[weight_key] : 1; - } - } - } - } - py::list nodes_list = py::list(nodes == py::object() ? G.attr("nodes") : G.attr("nbunch_iter")(nodes)); - py::list ret = py::list(); - for (int i = 0;i < py::len(nodes_list);i++) { - node_t i_id = py::extract(G_.node_to_id[nodes_list[i]]); - std::unordered_set inbrs, seen; - auto& adj = G_.adj; - for (const auto& pair : adj[i_id]) { - inbrs.insert(pair.first); - } - inbrs.erase(i_id); - weight_t weighted_triangles = 0; - for (const auto& j_id : inbrs) { - seen.insert(j_id); - weight_t wij = wt(adj, i_id, j_id, weight_key, max_weight); - for (const auto& k_id : inbrs) { - if (adj[j_id].count(k_id) && !seen.count(k_id)) { - weight_t wjk = wt(adj, j_id, k_id, weight_key, max_weight); - weight_t wki = wt(adj, k_id, i_id, weight_key, max_weight); - weighted_triangles += std::cbrt(wij * wjk * wki); - } - } - } - ret.append(py::make_tuple(G_.id_to_node[i_id], inbrs.size(), 2 * weighted_triangles)); - } - return ret; -} - -py::list _triangles_and_degree(py::object G, py::object nodes = py::object()) { - Graph& G_ = py::extract(G); - auto& adj = G_.adj; - py::list nodes_list = py::list(nodes == py::object() ? G.attr("nodes") : G.attr("nbunch_iter")(nodes)); - py::list ret = py::list(); - for (int i = 0;i < py::len(nodes_list);i++) { - node_t v = py::extract(G_.node_to_id[nodes_list[i]]); - std::unordered_set vs; - for (const auto& pair : adj[v]) { - vs.insert(pair.first); - } - vs.erase(v); - weight_t ntriangles = 0; - for (const auto& w : vs) { - for (const auto& node : vs) { - ntriangles += node != w && adj[w].count(node); - } - } - ret.append(py::make_tuple(G_.id_to_node[v], vs.size(), ntriangles)); - } - return ret; -} - -py::object clustering(py::object G, py::object nodes, py::object weight) { - py::dict clusterc = py::dict(); - if (G.attr("is_directed")()) { - PyErr_Format(PyExc_RuntimeError, "Not implemented yet"); - return py::object(); - } - else { - py::list td_list; - if (weight != py::object()) { - td_list = _weighted_triangles_and_degree(G, nodes, weight); - } - else { - td_list = _triangles_and_degree(G, nodes); - } - for (int i = 0;i < py::len(td_list);i++) { - py::tuple tuple = py::extract(td_list[i]); - py::object v = tuple[0]; - int d = py::extract(tuple[1]); - weight_t t = py::extract(tuple[2]); - if (t == 0) { - clusterc[v] = 0; - } - else { - clusterc[v] = t / (d * (d - 1)); - } - } - } - if (G.contains(nodes)) { - return clusterc[nodes]; - } - return clusterc; -} \ No newline at end of file diff --git a/cpp_easygraph/functions/not_sorted/cluster.h b/cpp_easygraph/functions/not_sorted/cluster.h deleted file mode 100644 index 2a547c6d..00000000 --- a/cpp_easygraph/functions/not_sorted/cluster.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "../../common/common.h" - -py::object clustering(py::object G, py::object nodes, py::object weight); \ No newline at end of file diff --git a/cpp_easygraph/functions/path/__init__.h b/cpp_easygraph/functions/path/__init__.h deleted file mode 100644 index f09fd758..00000000 --- a/cpp_easygraph/functions/path/__init__.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "path.h" \ No newline at end of file diff --git a/cpp_easygraph/functions/path/path.cpp b/cpp_easygraph/functions/path/path.cpp deleted file mode 100644 index 75761c8c..00000000 --- a/cpp_easygraph/functions/path/path.cpp +++ /dev/null @@ -1,226 +0,0 @@ -#include "path.h" -#include "../../classes/graph.h" -#include "../../common/utils.h" - -py::object _dijkstra_multisource(py::object G, py::object sources, py::object weight, py::object target) { - Graph& G_ = py::extract(G); - std::string weight_key = weight_to_string(weight); - node_t target_id = py::extract(G_.node_to_id.get(target, -1)); - std::map dist, seen; - std::priority_queue, std::vector>, std::greater>> Q; - py::list sources_list = py::list(sources); - for (int i = 0;i < py::len(sources_list);i++) { - node_t source = py::extract(G_.node_to_id[sources_list[i]]); - seen[source] = 0; - Q.push(std::make_pair(0, source)); - } - while (!Q.empty()) { - std::pair node = Q.top(); - Q.pop(); - weight_t d = node.first; - node_t v = node.second; - if (dist.count(v)) { - continue; - } - dist[v] = d; - if (v == target_id) { - break; - } - adj_dict_factory& adj = G_.adj; - for (auto& neighbor_info : adj[v]) { - node_t u = neighbor_info.first; - weight_t cost = neighbor_info.second.count(weight_key) ? neighbor_info.second[weight_key] : 1; - weight_t vu_dist = dist[v] + cost; - if (dist.count(u)) { - if (vu_dist < dist[u]) { - PyErr_Format(PyExc_ValueError, "Contradictory paths found: negative weights?"); - return py::object(); - } - } - else if (!seen.count(u) || vu_dist < seen[u]) { - seen[u] = vu_dist; - Q.push(std::make_pair(vu_dist, u)); - } - else { - continue; - } - } - } - py::dict pydist = py::dict(); - for (const auto& kv : dist) { - pydist[G_.id_to_node[kv.first]] = kv.second; - } - return pydist; -} - -py::object Prim(py::object G) { - std::unordered_map> res_dict; - py::dict result_dict = py::dict(); - Graph& G_ = py::extract(G); - adj_dict_factory adj = G_.adj; - std::vector selected; - std::vector candidate; - node_dict_factory node_list = G_.node; - for (node_dict_factory::iterator i = node_list.begin();i != node_list.end();i++) { - node_t node_id = i->first; - result_dict[G_.id_to_node[node_id]] = py::dict(); - if (selected.size() == 0) { - selected.emplace_back(node_id); - } - else { - candidate.emplace_back(node_id); - } - } - while (candidate.size() > 0) { - node_t start_id = -1; - node_t end_id = -1; - weight_t min_weight = INFINITY; - for (node_t i = 0;i < selected.size();i++) { - for (node_t j = 0;j < candidate.size();j++) { - adj_attr_dict_factory node_adj = G_.adj[selected[i]]; - edge_attr_dict_factory edge_attr; - weight_t edge_weight = INFINITY; - bool j_exist = false; - if (node_adj.find(candidate[j]) != node_adj.end()) { - edge_attr = node_adj[candidate[j]]; - edge_weight = edge_attr.find("weight") != edge_attr.end() ? edge_attr["weight"] : 1; - j_exist = true; - } - if ((node_list.find(selected[i]) != node_list.end()) && - j_exist && - (edge_weight < min_weight)) { - start_id = selected[i]; - end_id = candidate[j]; - min_weight = edge_weight; - } - } - } - if (start_id != -1 && end_id != -1) { - res_dict[start_id][end_id] = min_weight; - selected.emplace_back(end_id); - std::vector::iterator temp_iter; - temp_iter = std::find(candidate.begin(), candidate.end(), end_id); - candidate.erase(temp_iter); - } - else { - break; - } - } - for (std::unordered_map>::iterator k = res_dict.begin(); - k != res_dict.end();k++) { - py::object res_node = G_.id_to_node[k->first]; - for (std::unordered_map::iterator z = k->second.begin();z != k->second.end();z++) { - py::object res_adj_node = G_.id_to_node[z->first]; - result_dict[res_node][res_adj_node] = z->second; - } - } - return result_dict; - -} -bool comp(const std::pair, weight_t>& a, const std::pair, weight_t>& b) { - return a.second < b.second; -} -py::object Kruskal(py::object G) { - std::unordered_map> res_dict; - py::dict result_dict = py::dict(); - std::vector> group; - Graph& G_ = py::extract(G); - adj_dict_factory adj = G_.adj; - node_dict_factory node_list = G_.node; - std::vector, weight_t>>edge_list; - for (node_dict_factory::iterator i = node_list.begin();i != node_list.end();i++) { - node_t i_id = i->first; - result_dict[G_.id_to_node[i_id]] = py::dict(); - std::vector temp_vector; - temp_vector.emplace_back(i_id); - group.emplace_back(temp_vector); - adj_attr_dict_factory i_adj = adj[i_id]; - for (adj_attr_dict_factory::iterator j = i_adj.begin();j != i_adj.end();j++) { - node_t j_id = j->first; - weight_t weight = adj[i_id][j_id].find("weight") != adj[i_id][j_id].end() ? adj[i_id][j_id]["weight"] : 1; - edge_list.emplace_back(std::make_pair(std::make_pair(i_id, j_id), weight)); - } - } - std::sort(edge_list.begin(), edge_list.end(), comp); - node_t m, n; - for (auto edge : edge_list) { - for (node_t i = 0;i < group.size();i++) { - for (node_t j = 0;j < group[i].size();j++) { - if (group[i][j] == edge.first.first) { - m = i; - break; - } - } - for (node_t j = 0;j < group[i].size();j++) { - if (group[i][j] == edge.first.second) { - n = i; - break; - } - } - } - if (m != n) { - - res_dict[edge.first.first][edge.first.second] = edge.second; - std::vector temp_vector; - group[m].insert(group[m].end(), group[n].begin(), group[n].end()); - group[n].clear(); - } - } - for (std::unordered_map>::iterator k = res_dict.begin(); - k != res_dict.end();k++) { - py::object res_node = G_.id_to_node[k->first]; - for (std::unordered_map::iterator z = k->second.begin();z != k->second.end();z++) { - py::object res_adj_node = G_.id_to_node[z->first]; - result_dict[res_node][res_adj_node] = z->second; - } - } - return result_dict; -} - -py::object Floyd(py::object G) { - std::unordered_map> res_dict; - Graph& G_ = py::extract(G); - adj_dict_factory adj = G_.adj; - py::dict result_dict = py::dict(); - node_dict_factory node_list = G_.node; - for (node_dict_factory::iterator i = node_list.begin();i != node_list.end();i++) { - result_dict[G_.id_to_node[i->first]] = py::dict(); - adj_attr_dict_factory temp_key = adj[i->first]; - for (node_dict_factory::iterator j = node_list.begin();j != node_list.end();j++) { - if (temp_key.find(j->first) != temp_key.end()) { - if (adj[i->first][j->first].count("weight") == 0) { - adj[i->first][j->first]["weight"] = 1; - } - res_dict[i->first][j->first] = adj[i->first][j->first]["weight"]; - } - else { - res_dict[i->first][j->first] = INFINITY; - } - if (i->first == j->first) { - res_dict[i->first][i->first] = 0; - } - } - } - - for (node_dict_factory::iterator k = node_list.begin();k != node_list.end();k++) { - for (node_dict_factory::iterator i = node_list.begin();i != node_list.end();i++) { - for (node_dict_factory::iterator j = node_list.begin();j != node_list.end();j++) { - weight_t temp = res_dict[i->first][k->first] + res_dict[k->first][j->first]; - weight_t i_j_weight = res_dict[i->first][j->first]; - if (i_j_weight > temp) { - res_dict[i->first][j->first] = temp; - } - } - } - } - - for (std::unordered_map>::iterator k = res_dict.begin(); - k != res_dict.end();k++) { - py::object res_node = G_.id_to_node[k->first]; - for (std::unordered_map::iterator z = k->second.begin();z != k->second.end();z++) { - py::object res_adj_node = G_.id_to_node[z->first]; - result_dict[res_node][res_adj_node] = z->second; - } - } - return result_dict; -} diff --git a/cpp_easygraph/functions/path/path.h b/cpp_easygraph/functions/path/path.h deleted file mode 100644 index a125184b..00000000 --- a/cpp_easygraph/functions/path/path.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "../../common/common.h" - -py::object _dijkstra_multisource(py::object G, py::object sources, py::object weight, py::object target); -py::object Floyd(py::object G); -py::object Prim(py::object G); -py::object Kruskal(py::object G); \ No newline at end of file diff --git a/cpp_easygraph/functions/structural_holes/__init__.h b/cpp_easygraph/functions/structural_holes/__init__.h deleted file mode 100644 index 62eb0f9c..00000000 --- a/cpp_easygraph/functions/structural_holes/__init__.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "evaluation.h" \ No newline at end of file diff --git a/cpp_easygraph/functions/structural_holes/evaluation.cpp b/cpp_easygraph/functions/structural_holes/evaluation.cpp deleted file mode 100644 index 1799999b..00000000 --- a/cpp_easygraph/functions/structural_holes/evaluation.cpp +++ /dev/null @@ -1,205 +0,0 @@ -#include "evaluation.h" -#include "../../classes/graph.h" -#include "../../common/utils.h" - -struct pair_hash -{ - template - std::size_t operator() (const std::pair& p) const - { - auto h1 = std::hash()(p.first); - auto h2 = std::hash()(p.second); - return h1 ^ h2; - } -}; - -std::unordered_map, weight_t, pair_hash> sum_nmw_rec, max_nmw_rec, local_constraint_rec; - -enum norm_t { - sum, max -}; - - -weight_t mutual_weight(adj_dict_factory& G, node_t u, node_t v, std::string weight) { - weight_t a_uv = 0, a_vu = 0; - if (G.count(u) && G[u].count(v)) { - edge_attr_dict_factory& guv = G[u][v]; - a_uv = guv.count(weight) ? guv[weight] : 1; - } - if (G.count(v) && G[v].count(u)) { - edge_attr_dict_factory& gvu = G[v][u]; - a_uv = gvu.count(weight) ? gvu[weight] : 1; - } - return a_uv + a_vu; -} - -weight_t normalized_mutual_weight(adj_dict_factory& G, node_t u, node_t v, std::string weight, norm_t norm = sum) { - std::pair edge = std::make_pair(u, v); - auto& nmw_rec = (norm == sum) ? sum_nmw_rec : max_nmw_rec; - if (nmw_rec.count(edge)) { - return nmw_rec[edge]; - } - else { - weight_t scale = 0; - for (auto& w : G[u]) { - weight_t temp_weight = mutual_weight(G, u, w.first, weight); - scale = (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); - } - weight_t nmw = scale ? (mutual_weight(G, u, v, weight) / scale) : 0; - nmw_rec[edge] = nmw; - return nmw; - } -} - -weight_t local_constraint(adj_dict_factory& G, node_t u, node_t v, std::string weight = "None") { - std::pair edge = std::make_pair(u, v); - if (local_constraint_rec.count(edge)) { - return local_constraint_rec[edge]; - } - else { - weight_t direct = normalized_mutual_weight(G, u, v, weight); - weight_t indirect = 0; - for (auto& w : G[u]) { - indirect += normalized_mutual_weight(G, u, w.first, weight) * normalized_mutual_weight(G, w.first, v, weight); - } - weight_t result = pow((direct + indirect), 2); - local_constraint_rec[edge] = result; - return result; - } -} - -std::pair compute_constraint_of_v(adj_dict_factory& G, node_t v, std::string weight) { - weight_t constraint_of_v = 0; - if (G[v].size() == 0) { - constraint_of_v = Py_NAN; - } - else { - for (const auto& n : G[v]) { - constraint_of_v += local_constraint(G, v, n.first, weight); - } - } - return std::make_pair(v, constraint_of_v); -} - -py::object constraint(py::object G, py::object nodes, py::object weight, py::object n_workers) { - std::string weight_key = weight_to_string(weight); - sum_nmw_rec.clear(); - max_nmw_rec.clear(); - local_constraint_rec.clear(); - if (nodes == py::object()) { - nodes = G.attr("nodes"); - } - py::list nodes_list = py::list(nodes); - py::list constraint_results = py::list(); - Graph& G_ = py::extract(G); - for (int i = 0;i < py::len(nodes_list);i++) { - py::object v = nodes_list[i]; - node_t v_id = py::extract(G_.node_to_id[v]); - std::pair constraint_pair = compute_constraint_of_v(G_.adj, v_id, weight_key); - py::tuple constraint_of_v = py::make_tuple(G_.id_to_node[constraint_pair.first], constraint_pair.second); - constraint_results.append(constraint_of_v); - } - py::dict constraint = py::dict(constraint_results); - return constraint; -} - -weight_t redundancy(adj_dict_factory& G, node_t u, node_t v, std::string weight = "None") { - weight_t r = 0; - for (const auto& neighbor_info : G[u]) { - node_t w = neighbor_info.first; - r += normalized_mutual_weight(G, u, w, weight) * normalized_mutual_weight(G, v, w, weight, max); - } - return 1 - r; -} - -py::object effective_size(py::object G, py::object nodes, py::object weight, py::object n_workers) { - Graph& G_ = py::extract(G); - sum_nmw_rec.clear(); - max_nmw_rec.clear(); - py::dict effective_size = py::dict(); - if (nodes == py::object()) { - nodes = G; - } - nodes = py::list(nodes); - if (!G.attr("is_directed")() && weight == py::object()) { - for (int i = 0;i < py::len(nodes);i++) { - py::object v = nodes[i]; - if (py::len(G[v]) == 0) { - effective_size[v] = py::object(Py_NAN); - continue; - } - py::object E = G.attr("ego_subgraph")(v); - if (py::len(E) > 1) { - weight_t size = py::extract(E.attr("size")()); - effective_size[v] = py::len(E) - 1 - (2.0 * size) / (py::len(E) - 1); - } - else { - effective_size[v] = 0; - } - } - } - else { - std::string weight_key = weight_to_string(weight); - for (int i = 0;i < py::len(nodes);i++) { - py::object v = nodes[i]; - if (py::len(G[v]) == 0) { - effective_size[v] = py::object(Py_NAN); - continue; - } - weight_t redundancy_sum = 0; - node_t v_id = py::extract(G_.node_to_id[v]); - for (const auto& neighbor_info : G_.adj[v_id]) { - node_t u_id = neighbor_info.first; - redundancy_sum += redundancy(G_.adj, v_id, u_id, weight_key); - } - effective_size[v] = redundancy_sum; - } - } - return effective_size; -} - -py::object hierarchy(py::object G, py::object nodes, py::object weight, py::object n_workers) { - sum_nmw_rec.clear(); - max_nmw_rec.clear(); - local_constraint_rec.clear(); - std::string weight_key = weight_to_string(weight); - if (nodes == py::object()) { - nodes = G.attr("nodes"); - } - py::list nodes_list = py::list(nodes); - - Graph& G_ = py::extract(G); - py::dict hierarchy = py::dict(); - - for (int i = 0;i < py::len(nodes_list);i++) { - py::object v = nodes_list[i]; - py::object E = G.attr("ego_subgraph")(v); - - int n = py::len(E) - 1; - - weight_t C = 0; - std::map c; - py::list neighbors_of_v = py::list(G.attr("neighbors")(v)); - - for (int j = 0;j < py::len(neighbors_of_v);j++) { - py::object w = neighbors_of_v[j]; - node_t v_id = py::extract(G_.node_to_id[v]); - node_t w_id = py::extract(G_.node_to_id[w]); - C += local_constraint(G_.adj, v_id, w_id, weight_key); - c[w_id] = local_constraint(G_.adj, v_id, w_id, weight_key); - } - if (n > 1) { - weight_t hierarchy_sum = 0; - for (int k = 0;k < py::len(neighbors_of_v);k++) { - py::object w = neighbors_of_v[k]; - node_t w_id = py::extract(G_.node_to_id[w]); - hierarchy_sum += c[w_id] / C * n * log(c[w_id] / C * n) / (n * log(n)); - } - hierarchy[v] = hierarchy_sum; - } - if (!hierarchy.has_key(v)) { - hierarchy[v] = 0; - } - } - return hierarchy; -} \ No newline at end of file diff --git a/cpp_easygraph/functions/structural_holes/evaluation.h b/cpp_easygraph/functions/structural_holes/evaluation.h deleted file mode 100644 index 2695079c..00000000 --- a/cpp_easygraph/functions/structural_holes/evaluation.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once -#define BOOST_PYTHON_STATIC_LIB - -#include "../../common/common.h" - -py::object constraint(py::object G, py::object nodes, py::object weight, py::object n_workers); -py::object effective_size(py::object G, py::object nodes, py::object weight, py::object n_workers); -py::object hierarchy(py::object G, py::object nodes, py::object weight, py::object n_workers); \ No newline at end of file diff --git a/easygraph/__init__.py b/easygraph/__init__.py index c7fd4998..fd312f29 100644 --- a/easygraph/__init__.py +++ b/easygraph/__init__.py @@ -3,7 +3,6 @@ import easygraph.functions import easygraph.readwrite import easygraph.utils - from easygraph.classes import * from easygraph.datasets import * from easygraph.functions import * diff --git a/easygraph/classes/__init__.py b/easygraph/classes/__init__.py index 7d0d57fa..ec2b4b6a 100644 --- a/easygraph/classes/__init__.py +++ b/easygraph/classes/__init__.py @@ -1,7 +1,6 @@ -from .directed_graph import DiGraph -from .directed_multigraph import MultiDiGraph -from .graph import Graph -from .graph import GraphC -from .graphviews import * -from .multigraph import MultiGraph -from .operation import * +from easygraph.classes.directed_graph import DiGraph +from easygraph.classes.directed_multigraph import MultiDiGraph +from easygraph.classes.graph import Graph, GraphC +from easygraph.classes.graphviews import * +from easygraph.classes.multigraph import MultiGraph +from easygraph.classes.operation import * diff --git a/easygraph/classes/directed_graph.py b/easygraph/classes/directed_graph.py index 9515972b..24f80805 100644 --- a/easygraph/classes/directed_graph.py +++ b/easygraph/classes/directed_graph.py @@ -1,9 +1,7 @@ -from copy import deepcopy -from typing import Dict -from typing import List +import builtins +import contextlib import easygraph.convert as convert - from easygraph.classes.graph import Graph from easygraph.utils.exception import EasyGraphError @@ -36,7 +34,7 @@ class DiGraph(Graph): Create an graph with attributes. - >>> G = eg.Graph(name='Karate Club', date='2020.08.21') + >>> G = eg.Graph(name="Karate Club", date="2020.08.21") **Attributes:** @@ -143,11 +141,11 @@ def out_degree(self, weight="weight"): Examples -------- - >>> G.out_degree(weight='weight') + >>> G.out_degree(weight="weight") """ degree = dict() - for u, v, d in self.edges: + for u, _v, d in self.edges: if u in degree: degree[u] += d.get(weight, 1) else: @@ -185,11 +183,11 @@ def in_degree(self, weight="weight"): Examples -------- - >>> G.in_degree(weight='weight') + >>> G.in_degree(weight="weight") """ degree = dict() - for u, v, d in self.edges: + for _u, v, d in self.edges: if v in degree: degree[v] += d.get(weight, 1) else: @@ -229,11 +227,11 @@ def degree(self, weight="weight"): -------- >>> G.degree() - >>> G.degree(weight='weight') + >>> G.degree(weight="weight") or you can customize the weight key - >>> G.degree(weight='weight_1') + >>> G.degree(weight="weight_1") """ degree = dict() @@ -266,7 +264,7 @@ def size(self, weight=None): Returns the total of all edge weights in G: - >>> G.size(weight='weight') + >>> G.size(weight="weight") """ s = sum(d for v, d in self.out_degree(weight=weight).items()) @@ -408,7 +406,7 @@ def neighbors(self, node): Examples -------- >>> G = eg.Graph() - >>> G.add_edges([(1,2), (2,3), (2,4)]) + >>> G.add_edges([(1, 2), (2, 3), (2, 4)]) >>> for neighbor in G.neighbors(node=2): ... print(neighbor) @@ -417,7 +415,7 @@ def neighbors(self, node): try: return iter(self._adj[node]) except KeyError: - print("No node {}".format(node)) + print(f"No node {node}") successors = neighbors @@ -437,7 +435,7 @@ def predecessors(self, node): Examples -------- >>> G = eg.Graph() - >>> G.add_edges([(1,2), (2,3), (2,4)]) + >>> G.add_edges([(1, 2), (2, 3), (2, 4)]) >>> for predecessor in G.predecessors(node=2): ... print(predecessor) @@ -446,7 +444,7 @@ def predecessors(self, node): try: return iter(self._pred[node]) except KeyError: - print("No node {}".format(node)) + print(f"No node {node}") def all_neighbors(self, node): """Returns an iterator of a node's neighbors, including both successors and predecessors. @@ -464,7 +462,7 @@ def all_neighbors(self, node): Examples -------- >>> G = eg.Graph() - >>> G.add_edges([(1,2), (2,3), (2,4)]) + >>> G.add_edges([(1, 2), (2, 3), (2, 4)]) >>> for neighbor in G.all_neighbors(node=2): ... print(neighbor) @@ -475,7 +473,7 @@ def all_neighbors(self, node): neighbors.extend(self._pred[node]) return iter(neighbors) except KeyError: - print("No node {}".format(node)) + print(f"No node {node}") def add_node(self, node_for_adding, **node_attr): """Add one node @@ -498,19 +496,16 @@ def add_node(self, node_for_adding, **node_attr): Examples -------- - >>> G.add_node('a') - >>> G.add_node('hello world') - >>> G.add_node('Jack', age=10) + >>> G.add_node("a") + >>> G.add_node("hello world") + >>> G.add_node("Jack", age=10) - >>> G.add_node('Jack', **{ - ... 'age': 10, - ... 'gender': 'M' - ... }) + >>> G.add_node("Jack", **{"age": 10, "gender": "M"}) """ self._add_one_node(node_for_adding, node_attr) - def add_nodes(self, nodes_for_adding: list, nodes_attr: List[Dict] = []): + def add_nodes(self, nodes_for_adding: list, nodes_attr: list[dict] = None): """Add nodes with a list of nodes. Parameters @@ -530,31 +525,27 @@ def add_nodes(self, nodes_for_adding: list, nodes_attr: List[Dict] = []): You can add with node attributes using a list of Python dict type, each of which is the attribute of each node, respectively. - >>> G.add_nodes([1, 2, 'a', 'b']) + >>> G.add_nodes([1, 2, "a", "b"]) >>> G.add_nodes(range(1, 200)) - >>> G.add_nodes(['Jack', 'Tom', 'Lily'], nodes_attr=[ - ... { - ... 'age': 10, - ... 'gender': 'M' - ... }, - ... { - ... 'age': 11, - ... 'gender': 'M' - ... }, - ... { - ... 'age': 10, - ... 'gender': 'F' - ... } - ... ]) + >>> G.add_nodes( + ... ["Jack", "Tom", "Lily"], + ... nodes_attr=[ + ... {"age": 10, "gender": "M"}, + ... {"age": 11, "gender": "M"}, + ... {"age": 10, "gender": "F"}, + ... ], + ... ) """ if nodes_attr is None: nodes_attr = [] - if not len(nodes_attr) == 0: # Nodes attributes included in input - assert len(nodes_for_adding) == len( - nodes_attr - ), "Nodes and Attributes lists must have same length." + if nodes_attr is None: + nodes_attr = [] + if len(nodes_attr) != 0: # Nodes attributes included in input + assert len(nodes_for_adding) == len(nodes_attr), ( + "Nodes and Attributes lists must have same length." + ) else: # Set empty attribute for each node nodes_attr = [dict() for i in range(len(nodes_for_adding))] @@ -626,7 +617,9 @@ def add_nodes_from(self, nodes_for_adding, **attr): self._node[n] = self.node_attr_dict_factory() self._node[n].update(newdict) - def _add_one_node(self, one_node_for_adding, node_attr: dict = {}): + def _add_one_node(self, one_node_for_adding, node_attr: dict = None): + if node_attr is None: + node_attr = {} node = one_node_for_adding if node not in self._node: self._adj[node] = self.adjlist_inner_dict_factory() @@ -662,14 +655,12 @@ def add_edge(self, u_of_edge, v_of_edge, **edge_attr): Examples -------- - >>> G.add_edge(1,2) - >>> G.add_edge('Jack', 'Tom', weight=10) + >>> G.add_edge(1, 2) + >>> G.add_edge("Jack", "Tom", weight=10) Add edge with attributes, edge weight, for example, - >>> G.add_edge(1, 2, **{ - ... 'weight': 20 - ... }) + >>> G.add_edge(1, 2, **{"weight": 20}) """ self._add_one_edge(u_of_edge, v_of_edge, edge_attr) @@ -677,7 +668,7 @@ def add_edge(self, u_of_edge, v_of_edge, **edge_attr): def add_weighted_edge(self, u_of_edge, v_of_edge, weight): self._add_one_edge(u_of_edge, v_of_edge, edge_attr={"weight": weight}) - def add_edges(self, edges_for_adding, edges_attr: List[Dict] = []): + def add_edges(self, edges_for_adding, edges_attr: list[dict] = None): """Add a list of edges. Parameters @@ -693,30 +684,21 @@ def add_edges(self, edges_for_adding, edges_attr: List[Dict] = []): -------- Add a list of edges into *G* - >>> G.add_edges([ - ... (1, 2), - ... (3, 4), - ... ('Jack', 'Tom') - ... ]) + >>> G.add_edges([(1, 2), (3, 4), ("Jack", "Tom")]) Add edge with attributes, for example, edge weight, - >>> G.add_edges([(1,2), (2, 3)], edges_attr=[ - ... { - ... 'weight': 20 - ... }, - ... { - ... 'weight': 15 - ... } - ... ]) + >>> G.add_edges([(1, 2), (2, 3)], edges_attr=[{"weight": 20}, {"weight": 15}]) """ if edges_attr is None: edges_attr = [] - if not len(edges_attr) == 0: # Edges attributes included in input - assert len(edges_for_adding) == len( - edges_attr - ), "Edges and Attributes lists must have same length." + if edges_attr is None: + edges_attr = [] + if len(edges_attr) != 0: # Edges attributes included in input + assert len(edges_for_adding) == len(edges_attr), ( + "Edges and Attributes lists must have same length." + ) else: # Set empty attribute for each edge edges_attr = [dict() for i in range(len(edges_for_adding))] @@ -724,7 +706,7 @@ def add_edges(self, edges_for_adding, edges_attr: List[Dict] = []): try: edge = edges_for_adding[i] attr = edges_attr[i] - assert len(edge) == 2, "Edge tuple {} must be 2-tuple.".format(edge) + assert len(edge) == 2, f"Edge tuple {edge} must be 2-tuple." self._add_one_edge(edge[0], edge[1], attr) except Exception as err: print(err) @@ -824,32 +806,30 @@ def add_edges_from_file(self, file, weighted=False): Then add them to *G* - >>> G.add_edges_from_file(file='./club_network.txt', weighted=True) + >>> G.add_edges_from_file(file="./club_network.txt", weighted=True) """ import re - with open(file, "r") as fp: + with open(file) as fp: edges = fp.readlines() if weighted: for edge in edges: edge = re.sub(",", " ", edge) edge = edge.split() - try: + with contextlib.suppress(builtins.BaseException): self.add_edge(edge[0], edge[1], weight=float(edge[2])) - except: - pass else: for edge in edges: edge = re.sub(",", " ", edge) edge = edge.split() - try: + with contextlib.suppress(builtins.BaseException): self.add_edge(edge[0], edge[1]) - except: - pass - def _add_one_edge(self, u_of_edge, v_of_edge, edge_attr: dict = {}): + def _add_one_edge(self, u_of_edge, v_of_edge, edge_attr: dict = None): + if edge_attr is None: + edge_attr = {} u, v = u_of_edge, v_of_edge # add nodes if u not in self._node: @@ -878,7 +858,7 @@ def remove_node(self, node_to_remove): -------- Remove node *Jack* from *G* - >>> G.remove_node('Jack') + >>> G.remove_node("Jack") """ try: @@ -886,7 +866,7 @@ def remove_node(self, node_to_remove): preds = list(self._pred[node_to_remove]) del self._node[node_to_remove] except KeyError: # Node not exists in self - raise KeyError("No node {} in graph.".format(node_to_remove)) + raise KeyError(f"No node {node_to_remove} in graph.") for succ in succs: # Remove edges start with node_to_remove del self._pred[succ][node_to_remove] for pred in preds: # Remove edges end with node_to_remove @@ -912,15 +892,13 @@ def remove_nodes(self, nodes_to_remove: list): -------- Remove node *[1, 2, 'a', 'b']* from *G* - >>> G.remove_nodes([1, 2, 'a', 'b']) + >>> G.remove_nodes([1, 2, "a", "b"]) """ - for ( - node - ) in ( + for node in ( nodes_to_remove ): # If not all nodes included in graph, give up removing other nodes - assert node in self._node, "Remove Error: No node {} in graph".format(node) + assert node in self._node, f"Remove Error: No node {node} in graph" for node in nodes_to_remove: self.remove_node(node) @@ -943,14 +921,14 @@ def remove_edge(self, u, v): -------- Remove edge (1,2) from *G* - >>> G.remove_edge(1,2) + >>> G.remove_edge(1, 2) """ try: del self._adj[u][v] del self._pred[v][u] except KeyError: - raise KeyError("No edge {}-{} in graph.".format(u, v)) + raise KeyError(f"No edge {u}-{v} in graph.") def remove_edges(self, edges_to_remove: [tuple]): """Remove a list of edges from your graph. @@ -970,10 +948,7 @@ def remove_edges(self, edges_to_remove: [tuple]): -------- Remove the edges *('Jack', 'Mary')* amd *('Mary', 'Tom')* from *G* - >>> G.remove_edge([ - ... ('Jack', 'Mary'), - ... ('Mary', 'Tom') - ... ]) + >>> G.remove_edge([("Jack", "Mary"), ("Mary", "Tom")]) """ for edge in edges_to_remove: @@ -1080,17 +1055,15 @@ def nodes_subgraph(self, from_nodes: list): -------- >>> G = eg.Graph() - >>> G.add_edges([(1,2), (2,3), (2,4), (4,5)]) - >>> G_sub = G.nodes_subgraph(from_nodes= [1,2,3]) + >>> G.add_edges([(1, 2), (2, 3), (2, 4), (4, 5)]) + >>> G_sub = G.nodes_subgraph(from_nodes=[1, 2, 3]) """ G = self.__class__() G.graph.update(self.graph) for node in from_nodes: - try: + with contextlib.suppress(KeyError): G.add_node(node, **self._node[node]) - except KeyError: - pass # Edge from_nodes = set(from_nodes) @@ -1116,12 +1089,8 @@ def ego_subgraph(self, center): Examples -------- >>> G = eg.Graph() - >>> G.add_edges([ - ... ('Jack', 'Maria'), - ... ('Maria', 'Andy'), - ... ('Jack', 'Tom') - ... ]) - >>> G.ego_subgraph(center='Jack') + >>> G.add_edges([("Jack", "Maria"), ("Maria", "Andy"), ("Jack", "Tom")]) + >>> G.ego_subgraph(center="Jack") """ neighbors_of_center = list(self.all_neighbors(center)) neighbors_of_center.append(center) @@ -1155,11 +1124,7 @@ def to_index_node_graph(self, begin_index=0): as well as node-to-index dictionary. >>> G = eg.Graph() - >>> G.add_edges([ - ... ('Jack', 'Maria'), - ... ('Maria', 'Andy'), - ... ('Jack', 'Tom') - ... ]) + >>> G.add_edges([("Jack", "Maria"), ("Maria", "Andy"), ("Jack", "Tom")]) >>> G_index_graph, index_of_node, node_of_index = G.to_index_node_graph() """ diff --git a/easygraph/classes/directed_multigraph.py b/easygraph/classes/directed_multigraph.py index c0cfeadd..76ff6f3e 100644 --- a/easygraph/classes/directed_multigraph.py +++ b/easygraph/classes/directed_multigraph.py @@ -1,15 +1,11 @@ from copy import deepcopy -from typing import Dict -from typing import List import easygraph as eg import easygraph.convert as convert - from easygraph.classes.directed_graph import DiGraph from easygraph.classes.multigraph import MultiGraph from easygraph.utils.exception import EasyGraphError - __all__ = ["MultiDiGraph"] @@ -248,7 +244,7 @@ def in_edges(self): edges = list() for n, nbrs in self._adj.items(): for nbr, kd in nbrs.items(): - for k, dd in kd.items(): + for k, _dd in kd.items(): edges.append((nbr, n, k)) return edges diff --git a/easygraph/classes/graph.py b/easygraph/classes/graph.py index 73687985..1f373c49 100644 --- a/easygraph/classes/graph.py +++ b/easygraph/classes/graph.py @@ -1,10 +1,7 @@ -from copy import deepcopy -from typing import Dict -from typing import List +import builtins +import contextlib -import easygraph as eg import easygraph.convert as convert - from easygraph.utils.exception import EasyGraphError @@ -36,7 +33,7 @@ class Graph: Create an graph with attributes. - >>> G = eg.Graph(name='Karate Club', date='2020.08.21') + >>> G = eg.Graph(name="Karate Club", date="2020.08.21") **Attributes:** @@ -147,7 +144,7 @@ def degree(self, weight="weight"): if you have customized weight key 'weight_1'. - >>> G.degree(weight='weight_1') + >>> G.degree(weight="weight_1") """ degree = dict() @@ -212,7 +209,7 @@ def size(self, weight=None): Returns the total of all edge weights in G: - >>> G.size(weight='weight') + >>> G.size(weight="weight") """ s = sum(d for v, d in self.degree(weight=weight).items()) @@ -354,7 +351,7 @@ def neighbors(self, node): Examples -------- >>> G = eg.Graph() - >>> G.add_edges([(1,2), (2,3), (2,4)]) + >>> G.add_edges([(1, 2), (2, 3), (2, 4)]) >>> for neighbor in G.neighbors(node=2): ... print(neighbor) @@ -362,7 +359,7 @@ def neighbors(self, node): try: return iter(self._adj[node]) except KeyError: - print("No node {}".format(node)) + print(f"No node {node}") all_neighbors = neighbors @@ -387,19 +384,16 @@ def add_node(self, node_for_adding, **node_attr): Examples -------- - >>> G.add_node('a') - >>> G.add_node('hello world') - >>> G.add_node('Jack', age=10) + >>> G.add_node("a") + >>> G.add_node("hello world") + >>> G.add_node("Jack", age=10) - >>> G.add_node('Jack', **{ - ... 'age': 10, - ... 'gender': 'M' - ... }) + >>> G.add_node("Jack", **{"age": 10, "gender": "M"}) """ self._add_one_node(node_for_adding, node_attr) - def add_nodes(self, nodes_for_adding: list, nodes_attr: List[Dict] = []): + def add_nodes(self, nodes_for_adding: list, nodes_attr: list[dict] = None): """Add nodes with a list of nodes. Parameters @@ -419,29 +413,25 @@ def add_nodes(self, nodes_for_adding: list, nodes_attr: List[Dict] = []): You can add with node attributes using a list of Python dict type, each of which is the attribute of each node, respectively. - >>> G.add_nodes([1, 2, 'a', 'b']) + >>> G.add_nodes([1, 2, "a", "b"]) >>> G.add_nodes(range(1, 200)) - >>> G.add_nodes(['Jack', 'Tom', 'Lily'], nodes_attr=[ - ... { - ... 'age': 10, - ... 'gender': 'M' - ... }, - ... { - ... 'age': 11, - ... 'gender': 'M' - ... }, - ... { - ... 'age': 10, - ... 'gender': 'F' - ... } - ... ]) + >>> G.add_nodes( + ... ["Jack", "Tom", "Lily"], + ... nodes_attr=[ + ... {"age": 10, "gender": "M"}, + ... {"age": 11, "gender": "M"}, + ... {"age": 10, "gender": "F"}, + ... ], + ... ) """ - if not len(nodes_attr) == 0: # Nodes attributes included in input - assert len(nodes_for_adding) == len( - nodes_attr - ), "Nodes and Attributes lists must have same length." + if nodes_attr is None: + nodes_attr = [] + if len(nodes_attr) != 0: # Nodes attributes included in input + assert len(nodes_for_adding) == len(nodes_attr), ( + "Nodes and Attributes lists must have same length." + ) else: # Set empty attribute for each node nodes_attr = [dict() for i in range(len(nodes_for_adding))] @@ -512,7 +502,9 @@ def add_nodes_from(self, nodes_for_adding, **attr): self._node[n] = self.node_attr_dict_factory() self._node[n].update(newdict) - def _add_one_node(self, one_node_for_adding, node_attr: dict = {}): + def _add_one_node(self, one_node_for_adding, node_attr: dict = None): + if node_attr is None: + node_attr = {} node = one_node_for_adding if node not in self._node: self._adj[node] = self.adjlist_inner_dict_factory() @@ -546,14 +538,12 @@ def add_edge(self, u_of_edge, v_of_edge, **edge_attr): Examples -------- - >>> G.add_edge(1,2) - >>> G.add_edge('Jack', 'Tom', weight=10) + >>> G.add_edge(1, 2) + >>> G.add_edge("Jack", "Tom", weight=10) Add edge with attributes, edge weight, for example, - >>> G.add_edge(1, 2, **{ - ... 'weight': 20 - ... }) + >>> G.add_edge(1, 2, **{"weight": 20}) """ self._add_one_edge(u_of_edge, v_of_edge, edge_attr) @@ -561,7 +551,7 @@ def add_edge(self, u_of_edge, v_of_edge, **edge_attr): def add_weighted_edge(self, u_of_edge, v_of_edge, weight): self._add_one_edge(u_of_edge, v_of_edge, edge_attr={"weight": weight}) - def add_edges(self, edges_for_adding, edges_attr: List[Dict] = []): + def add_edges(self, edges_for_adding, edges_attr: list[dict] = None): """Add a list of edges. Parameters @@ -577,30 +567,21 @@ def add_edges(self, edges_for_adding, edges_attr: List[Dict] = []): -------- Add a list of edges into *G* - >>> G.add_edges([ - ... (1, 2), - ... (3, 4), - ... ('Jack', 'Tom') - ... ]) + >>> G.add_edges([(1, 2), (3, 4), ("Jack", "Tom")]) Add edge with attributes, for example, edge weight, - >>> G.add_edges([(1,2), (2, 3)], edges_attr=[ - ... { - ... 'weight': 20 - ... }, - ... { - ... 'weight': 15 - ... } - ... ]) + >>> G.add_edges([(1, 2), (2, 3)], edges_attr=[{"weight": 20}, {"weight": 15}]) """ if edges_attr is None: edges_attr = [] - if not len(edges_attr) == 0: # Edges attributes included in input - assert len(edges_for_adding) == len( - edges_attr - ), "Edges and Attributes lists must have same length." + if edges_attr is None: + edges_attr = [] + if len(edges_attr) != 0: # Edges attributes included in input + assert len(edges_for_adding) == len(edges_attr), ( + "Edges and Attributes lists must have same length." + ) else: # Set empty attribute for each edge edges_attr = [dict() for i in range(len(edges_for_adding))] @@ -608,7 +589,7 @@ def add_edges(self, edges_for_adding, edges_attr: List[Dict] = []): try: edge = edges_for_adding[i] attr = edges_attr[i] - assert len(edge) == 2, "Edge tuple {} must be 2-tuple.".format(edge) + assert len(edge) == 2, f"Edge tuple {edge} must be 2-tuple." self._add_one_edge(edge[0], edge[1], attr) except Exception as err: print(err) @@ -706,32 +687,30 @@ def add_edges_from_file(self, file, weighted=False): Then add them to *G* - >>> G.add_edges_from_file(file='./club_network.txt', weighted=True) + >>> G.add_edges_from_file(file="./club_network.txt", weighted=True) """ import re - with open(file, "r") as fp: + with open(file) as fp: edges = fp.readlines() if weighted: for edge in edges: edge = re.sub(",", " ", edge) edge = edge.split() - try: + with contextlib.suppress(builtins.BaseException): self.add_edge(edge[0], edge[1], weight=float(edge[2])) - except: - pass else: for edge in edges: edge = re.sub(",", " ", edge) edge = edge.split() - try: + with contextlib.suppress(builtins.BaseException): self.add_edge(edge[0], edge[1]) - except: - pass - def _add_one_edge(self, u_of_edge, v_of_edge, edge_attr: dict = {}): + def _add_one_edge(self, u_of_edge, v_of_edge, edge_attr: dict = None): + if edge_attr is None: + edge_attr = {} u, v = u_of_edge, v_of_edge # add nodes if u not in self._node: @@ -760,14 +739,14 @@ def remove_node(self, node_to_remove): -------- Remove node *Jack* from *G* - >>> G.remove_node('Jack') + >>> G.remove_node("Jack") """ try: neighbors = list(self._adj[node_to_remove]) del self._node[node_to_remove] except KeyError: # Node not exists in self - raise EasyGraphError("No node {} in graph.".format(node_to_remove)) + raise EasyGraphError(f"No node {node_to_remove} in graph.") for neighbor in neighbors: # Remove edges with other nodes del self._adj[neighbor][node_to_remove] del self._adj[node_to_remove] # Remove this node @@ -788,15 +767,13 @@ def remove_nodes(self, nodes_to_remove: list): -------- Remove node *[1, 2, 'a', 'b']* from *G* - >>> G.remove_nodes([1, 2, 'a', 'b']) + >>> G.remove_nodes([1, 2, "a", "b"]) """ - for ( - node - ) in ( + for node in ( nodes_to_remove ): # If not all nodes included in graph, give up removing other nodes - assert node in self._node, "Remove Error: No node {} in graph".format(node) + assert node in self._node, f"Remove Error: No node {node} in graph" for node in nodes_to_remove: self.remove_node(node) @@ -819,7 +796,7 @@ def remove_edge(self, u, v): -------- Remove edge (1,2) from *G* - >>> G.remove_edge(1,2) + >>> G.remove_edge(1, 2) """ try: @@ -827,7 +804,7 @@ def remove_edge(self, u, v): if u != v: # self-loop needs only one entry removed del self._adj[v][u] except KeyError: - raise KeyError("No edge {}-{} in graph.".format(u, v)) + raise KeyError(f"No edge {u}-{v} in graph.") def remove_edges(self, edges_to_remove: [tuple]): """Remove a list of edges from your graph. @@ -846,10 +823,7 @@ def remove_edges(self, edges_to_remove: [tuple]): -------- Remove the edges *('Jack', 'Mary')* amd *('Mary', 'Tom')* from *G* - >>> G.remove_edge([ - ... ('Jack', 'Mary'), - ... ('Mary', 'Tom') - ... ]) + >>> G.remove_edge([("Jack", "Mary"), ("Mary", "Tom")]) """ for edge in edges_to_remove: @@ -924,17 +898,15 @@ def nodes_subgraph(self, from_nodes: list): -------- >>> G = eg.Graph() - >>> G.add_edges([(1,2), (2,3), (2,4), (4,5)]) - >>> G_sub = G.nodes_subgraph(from_nodes= [1,2,3]) + >>> G.add_edges([(1, 2), (2, 3), (2, 4), (4, 5)]) + >>> G_sub = G.nodes_subgraph(from_nodes=[1, 2, 3]) """ G = self.__class__() G.graph.update(self.graph) for node in from_nodes: - try: + with contextlib.suppress(KeyError): G.add_node(node, **self._node[node]) - except KeyError: - pass # Edge from_nodes = set(from_nodes) @@ -960,12 +932,8 @@ def ego_subgraph(self, center): Examples -------- >>> G = eg.Graph() - >>> G.add_edges([ - ... ('Jack', 'Maria'), - ... ('Maria', 'Andy'), - ... ('Jack', 'Tom') - ... ]) - >>> G.ego_subgraph(center='Jack') + >>> G.add_edges([("Jack", "Maria"), ("Maria", "Andy"), ("Jack", "Tom")]) + >>> G.ego_subgraph(center="Jack") """ neighbors_of_center = list(self.all_neighbors(center)) neighbors_of_center.append(center) @@ -999,11 +967,7 @@ def to_index_node_graph(self, begin_index=0): as well as node-to-index dictionary. >>> G = eg.Graph() - >>> G.add_edges([ - ... ('Jack', 'Maria'), - ... ('Maria', 'Andy'), - ... ('Jack', 'Tom') - ... ]) + >>> G.add_edges([("Jack", "Maria"), ("Maria", "Andy"), ("Jack", "Tom")]) >>> G_index_graph, index_of_node, node_of_index = G.to_index_node_graph() """ diff --git a/easygraph/classes/graphviews.py b/easygraph/classes/graphviews.py index bbb2a3d8..d68c9ef4 100644 --- a/easygraph/classes/graphviews.py +++ b/easygraph/classes/graphviews.py @@ -1,6 +1,5 @@ from easygraph.utils import only_implemented_for_Directed_graph - __all__ = ["reverse_view"] diff --git a/easygraph/classes/multigraph.py b/easygraph/classes/multigraph.py index fbe6b55b..09549f96 100644 --- a/easygraph/classes/multigraph.py +++ b/easygraph/classes/multigraph.py @@ -1,15 +1,13 @@ """Base class for MultiGraph.""" + +import contextlib from copy import deepcopy -from typing import Dict -from typing import List import easygraph as eg import easygraph.convert as convert - from easygraph.classes.graph import Graph from easygraph.utils.exception import EasyGraphError - __all__ = ["MultiGraph"] @@ -375,10 +373,8 @@ def remove_edges_from(self, ebunch): [] """ for e in ebunch: - try: + with contextlib.suppress(EasyGraphError): self.remove_edge(*e[:3]) - except EasyGraphError: - pass def has_edge(self, u, v, key=None): """Returns True if the graph has an edge between nodes u and v. diff --git a/easygraph/classes/operation.py b/easygraph/classes/operation.py index 03e05bae..3508914b 100644 --- a/easygraph/classes/operation.py +++ b/easygraph/classes/operation.py @@ -1,10 +1,9 @@ +import contextlib from itertools import chain import easygraph as eg - from easygraph.utils import * - __all__ = [ "set_edge_attributes", "add_path", @@ -14,10 +13,8 @@ "number_of_selfloops", "density", ] -try: +with contextlib.suppress(ImportError): from cpp_easygraph import cpp_density -except ImportError: - pass def set_edge_attributes(G, values, name=None): @@ -99,16 +96,12 @@ def set_edge_attributes(G, values, name=None): # if `values` is a dict using `.items()` => {edge: value} if G.is_multigraph(): for (u, v, key), value in values.items(): - try: + with contextlib.suppress(KeyError): G[u][v][key][name] = value - except KeyError: - pass else: for (u, v), value in values.items(): - try: + with contextlib.suppress(KeyError): G[u][v][name] = value - except KeyError: - pass except AttributeError: # treat `values` as a constant for u, v, data in G.edges: @@ -117,16 +110,12 @@ def set_edge_attributes(G, values, name=None): # `values` consists of doct-of-dict {edge: {attr: value}} shape if G.is_multigraph(): for (u, v, key), d in values.items(): - try: + with contextlib.suppress(KeyError): G[u][v][key].update(d) - except KeyError: - pass else: for (u, v), d in values.items(): - try: + with contextlib.suppress(KeyError): G[u][v].update(d) - except KeyError: - pass def add_path(G_to_add_to, nodes_for_path, **attr): @@ -245,20 +234,16 @@ def set_node_attributes(G, values, name=None): # Set node attributes based on type of `values` if name is not None: # `values` must not be a dict of dict try: # `values` is a dict - for n, v in values.items(): - try: + for n, _v in values.items(): + with contextlib.suppress(KeyError): G.nodes[n][name] = values[n] - except KeyError: - pass except AttributeError: # `values` is a constant for n in G: G.nodes[n][name] = values else: # `values` must be dict of dict for n, d in values.items(): - try: + with contextlib.suppress(KeyError): G.nodes[n].update(d) - except KeyError: - pass def topological_generations(G): diff --git a/easygraph/classes/tests/test_multigraph.py b/easygraph/classes/tests/test_multigraph.py index 28c36210..3e30d4d2 100644 --- a/easygraph/classes/tests/test_multigraph.py +++ b/easygraph/classes/tests/test_multigraph.py @@ -1,6 +1,7 @@ -import easygraph as eg import pytest +import easygraph as eg + class TestMultiGraph: def setup_method(self): diff --git a/easygraph/classes/tests/test_operation.py b/easygraph/classes/tests/test_operation.py index 6245656a..b07c050c 100644 --- a/easygraph/classes/tests/test_operation.py +++ b/easygraph/classes/tests/test_operation.py @@ -1,6 +1,6 @@ -import easygraph as eg import pytest +import easygraph as eg from easygraph.utils import edges_equal diff --git a/easygraph/convert.py b/easygraph/convert.py index d60a9f16..39a34faf 100644 --- a/easygraph/convert.py +++ b/easygraph/convert.py @@ -1,12 +1,8 @@ import warnings - -from collections.abc import Collection -from collections.abc import Generator -from collections.abc import Iterator +from collections.abc import Collection, Generator, Iterator import easygraph as eg - __all__ = [ "from_dict_of_dicts", "to_easygraph_graph", @@ -119,7 +115,7 @@ def to_easygraph_graph(data, create_using=None, multigraph_input=False): msg = "Input is not a correct Pandas DataFrame adjacency edge-list." raise eg.EasyGraphError(msg) from err except ImportError: - warnings.warn("pandas not found, skipping conversion test.", ImportWarning) + warnings.warn("pandas not found, skipping conversion test.", ImportWarning, stacklevel=2) # numpy matrix or ndarray try: @@ -133,7 +129,7 @@ def to_easygraph_graph(data, create_using=None, multigraph_input=False): "Input is not a correct numpy matrix or array." ) from err except ImportError: - warnings.warn("numpy not found, skipping conversion test.", ImportWarning) + warnings.warn("numpy not found, skipping conversion test.", ImportWarning, stacklevel=2) # scipy sparse matrix - any format try: @@ -145,7 +141,7 @@ def to_easygraph_graph(data, create_using=None, multigraph_input=False): "Input is not a correct scipy sparse matrix type." ) from err except ImportError: - warnings.warn("scipy not found, skipping conversion test.", ImportWarning) + warnings.warn("scipy not found, skipping conversion test.", ImportWarning, stacklevel=2) # Note: most general check - should remain last in order of execution # Includes containers (e.g. list, set, dict, etc.), generators, and diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py index db2ab2d1..aebbc746 100644 --- a/easygraph/datasets/__init__.py +++ b/easygraph/datasets/__init__.py @@ -1 +1 @@ -from easygraph.datasets.get_sample_graph import * +from easygraph.datasets.get_sample_graph import * diff --git a/easygraph/datasets/get_sample_graph.py b/easygraph/datasets/get_sample_graph.py index bfd6ab3f..18f856f8 100644 --- a/easygraph/datasets/get_sample_graph.py +++ b/easygraph/datasets/get_sample_graph.py @@ -1,6 +1,6 @@ -import easygraph as eg import progressbar +import easygraph as eg __all__ = [ "get_graph_karateclub", @@ -101,7 +101,7 @@ def sparse2graph(x): G = defaultdict(lambda: set()) cx = x.tocoo() - for i, j, v in zip(cx.row, cx.col, cx.data): + for i, j, _v in zip(cx.row, cx.col, cx.data, strict=False): G[i].add(j) return {str(k): [str(x) for x in v] for k, v in G.items()} @@ -132,7 +132,6 @@ def get_graph_youtube(): """ import gzip - from urllib import request url = "http://socialnetworks.mpi-sws.mpg.de/data/youtube-links.txt.gz" @@ -169,7 +168,6 @@ def get_graph_flickr(): """ import gzip - from urllib import request url = "http://socialnetworks.mpi-sws.mpg.de/data/flickr-links.txt.gz" diff --git a/easygraph/functions/centrality/__init__.py b/easygraph/functions/centrality/__init__.py index 4b077965..9458a9ae 100644 --- a/easygraph/functions/centrality/__init__.py +++ b/easygraph/functions/centrality/__init__.py @@ -1,4 +1,4 @@ -from .betweenness import * -from .clossness import * -from .degree import * -from .flowbetweenness import * +from easygraph.functions.centrality.betweenness import * +from easygraph.functions.centrality.clossness import * +from easygraph.functions.centrality.degree import * +from easygraph.functions.centrality.flowbetweenness import * diff --git a/easygraph/functions/centrality/betweenness.py b/easygraph/functions/centrality/betweenness.py index 8b71caa4..8442eb53 100644 --- a/easygraph/functions/centrality/betweenness.py +++ b/easygraph/functions/centrality/betweenness.py @@ -1,7 +1,6 @@ from easygraph.utils import * from easygraph.utils.decorators import * - __all__ = [ "betweenness_centrality", ] @@ -83,7 +82,6 @@ def betweenness_centrality( if n_workers is not None: # use the parallel version for large graph import random - from functools import partial from multiprocessing import Pool @@ -134,10 +132,7 @@ def _rescale(betweenness, n, normalized, directed=False, endpoints=False): else: scale = 1 / ((n - 1) * (n - 2)) else: # rescale by 2 for undirected graphs - if not directed: - scale = 0.5 - else: - scale = None + scale = 0.5 if not directed else None if scale is not None: for v in betweenness: betweenness[v] *= scale @@ -169,8 +164,7 @@ def _single_source_bfs_path(G, source): def _single_source_dijkstra_path(G, source, weight="weight"): - from heapq import heappop - from heapq import heappush + from heapq import heappop, heappush push = heappush pop = heappop diff --git a/easygraph/functions/centrality/clossness.py b/easygraph/functions/centrality/clossness.py index 45f79bcc..16f6757a 100644 --- a/easygraph/functions/centrality/clossness.py +++ b/easygraph/functions/centrality/clossness.py @@ -1,7 +1,6 @@ from easygraph.functions.path import * from easygraph.utils import * - __all__ = [ "closeness_centrality", ] @@ -61,7 +60,6 @@ def closeness_centrality(G, weight=None, n_workers=None): if n_workers is not None: # use parallel version for large graph import random - from functools import partial from multiprocessing import Pool diff --git a/easygraph/functions/centrality/degree.py b/easygraph/functions/centrality/degree.py index 844be16c..001d859f 100644 --- a/easygraph/functions/centrality/degree.py +++ b/easygraph/functions/centrality/degree.py @@ -1,6 +1,5 @@ from easygraph.utils.decorators import * - __all__ = ["degree_centrality", "in_degree_centrality", "out_degree_centrality"] diff --git a/easygraph/functions/centrality/flowbetweenness.py b/easygraph/functions/centrality/flowbetweenness.py index cb866da8..789ddb8d 100644 --- a/easygraph/functions/centrality/flowbetweenness.py +++ b/easygraph/functions/centrality/flowbetweenness.py @@ -3,7 +3,6 @@ from easygraph.utils.decorators import * - __all__ = [ "flowbetweenness_centrality", ] @@ -45,7 +44,7 @@ def flowbetweenness_centrality(G): ----- A flow network is a directed graph where each edge has a capacity and each edge receives a flow. """ - if G.is_directed() == False: + if not G.is_directed(): print("Please input a directed graph") return flow_dict = NumberOfFlow(G) @@ -116,10 +115,7 @@ def edmonds_karp(G, source, sink): if u not in adj[v]: adj[v][u] = dict() flag = 1 - if flag == 1: - x = 0 - else: - x = adj[v][u].get("weight", 1) + x = 0 if flag == 1 else adj[v][u].get("weight", 1) adj[v][u].update({"weight": x}) adj[v][u]["weight"] += path_flow v = parent[v] @@ -139,7 +135,7 @@ def bfs(G, source, sink, parent, adj): if u not in adj: continue for v, attr in adj[u].items(): - if (visited[v] == False) and (attr.get("weight", 1) > 0): + if (not visited[v]) and (attr.get("weight", 1) > 0): queue.append(v) visited[v] = True parent[v] = u diff --git a/easygraph/functions/community/LPA.py b/easygraph/functions/community/LPA.py index 3fb8dd75..2626a4d2 100644 --- a/easygraph/functions/community/LPA.py +++ b/easygraph/functions/community/LPA.py @@ -1,15 +1,13 @@ import copy import random - from collections import defaultdict from queue import Queue -import easygraph as eg import numpy as np +import easygraph as eg from easygraph.utils import * - __all__ = [ "LPA", "SLPA", @@ -72,9 +70,9 @@ def LPA(G): label_dict = Next_label_dict if estimate_stop_cond(G, label_dict) is True: break - for node in label_dict.keys(): + for node in label_dict: label = label_dict[node] - if label not in cluster_community.keys(): + if label not in cluster_community: cluster_community[label] = [node] else: cluster_community[label].append(node) @@ -104,10 +102,7 @@ def SLPA(G, T, r): Examples ---------- - >>> SLPA(G, - ... T = 20, - ... r = 0.05 - ... ) + >>> SLPA(G, T=20, r=0.05) References ---------- @@ -120,7 +115,7 @@ def SLPA(G, T, r): nodes = G.nodes adj = G.adj memory = {i: {i: 1} for i in nodes} - for i in range(0, T): + for _i in range(0, T): listenerslist = list(G.nodes) random.shuffle(listenerslist) for listener in listenerslist: @@ -209,13 +204,7 @@ def HANP(G, m, delta, threshod=1, hier_open=0, combine_open=0): Examples ---------- - >>> HANP(G, - ... m = 0.1, - ... delta = 0.05, - ... threshod = 1, - ... hier_open = 0, - ... combine_open = 0 - ... ) + >>> HANP(G, m=0.1, delta=0.05, threshod=1, hier_open=0, combine_open=0) References ---------- @@ -266,37 +255,33 @@ def HANP(G, m, delta, threshod=1, hier_open=0, combine_open=0): ) score = min(score, score_dict[Next_label_dict[node]]) else: - if old_label == Next_label_dict[node]: - cdelta = 0 - else: - cdelta = delta + cdelta = 0 if old_label == Next_label_dict[node] else delta score_dict[Next_label_dict[node]] = UpdateScore( G, node, label_dict, score_dict, cdelta ) - if hier_open == 1 and combine_open == 1: - if old_score - score > 1 / 3: - old_score = score - ( - records, - G, - label_dict, - score_dict, - node_dict, - Next_label_dict, - nodes, - degrees, - distance_dict, - ) = CombineNodes( - records, - G, - label_dict, - score_dict, - node_dict, - Next_label_dict, - nodes, - degrees, - distance_dict, - ) + if hier_open == 1 and combine_open == 1 and old_score - score > 1 / 3: + old_score = score + ( + records, + G, + label_dict, + score_dict, + node_dict, + Next_label_dict, + nodes, + degrees, + distance_dict, + ) = CombineNodes( + records, + G, + label_dict, + score_dict, + node_dict, + Next_label_dict, + nodes, + degrees, + distance_dict, + ) label_dict = Next_label_dict if ( estimate_stop_cond_HANP(G, label_dict, score_dict, degrees, m, threshod) @@ -310,9 +295,9 @@ def HANP(G, m, delta, threshod=1, hier_open=0, combine_open=0): if loop_count > 20: break print("After %d iterations, HANP complete." % loop_count) - for node in label_dict.keys(): + for node in label_dict: label = label_dict[node] - if label not in cluster_community.keys(): + if label not in cluster_community: cluster_community[label] = [node] else: cluster_community[label].append(node) @@ -349,9 +334,10 @@ def BMLPA(G, p): Examples ---------- - >>> BMLPA(G, - ... p = 0.1, - ... ) + >>> BMLPA( + ... G, + ... p=0.1, + ... ) References ---------- diff --git a/easygraph/functions/community/__init__.py b/easygraph/functions/community/__init__.py index 7836259a..0aabd0d7 100644 --- a/easygraph/functions/community/__init__.py +++ b/easygraph/functions/community/__init__.py @@ -1,4 +1,4 @@ -from .LPA import * -from .modularity import * -from .modularity_max_detection import * -from .motif import * +from easygraph.functions.community.LPA import * +from easygraph.functions.community.modularity import * +from easygraph.functions.community.modularity_max_detection import * +from easygraph.functions.community.motif import * diff --git a/easygraph/functions/community/modularity.py b/easygraph/functions/community/modularity.py index aac3dfda..44dfc7fc 100644 --- a/easygraph/functions/community/modularity.py +++ b/easygraph/functions/community/modularity.py @@ -2,7 +2,6 @@ from easygraph.utils import * - __all__ = ["modularity"] diff --git a/easygraph/functions/community/modularity_max_detection.py b/easygraph/functions/community/modularity_max_detection.py index addb8456..e2d988e2 100644 --- a/easygraph/functions/community/modularity_max_detection.py +++ b/easygraph/functions/community/modularity_max_detection.py @@ -2,7 +2,6 @@ from easygraph.utils import * from easygraph.utils.mapped_queue import MappedQueue - __all__ = ["greedy_modularity_communities"] @@ -138,17 +137,11 @@ def greedy_modularity_communities(G, weight="weight"): # Update rows j and k for row, col in [(j, k), (k, j)]: # Save old value for finding heap index - if k in j_set: - d_old = (-dq_dict[row][col], row, col) - else: - d_old = None + d_old = (-dq_dict[row][col], row, col) if k in j_set else None # Update dict for j,k only (i is removed below) dq_dict[row][col] = dq_jk # Save old max of per-row heap - if len(dq_heap[row]) > 0: - d_oldmax = dq_heap[row].h[0] - else: - d_oldmax = None + d_oldmax = dq_heap[row].h[0] if len(dq_heap[row]) > 0 else None # Add/update heaps d = (-dq_jk, row, col) if d_old is None: diff --git a/easygraph/functions/community/motif.py b/easygraph/functions/community/motif.py index 24ad59a0..0016c75a 100644 --- a/easygraph/functions/community/motif.py +++ b/easygraph/functions/community/motif.py @@ -1,10 +1,8 @@ import random import easygraph as eg - from easygraph.utils import * - __all__ = ["enumerate_subgraph", "random_enumerate_subgraph"] diff --git a/easygraph/functions/components/__init__.py b/easygraph/functions/components/__init__.py index d1770498..cb5a89b2 100644 --- a/easygraph/functions/components/__init__.py +++ b/easygraph/functions/components/__init__.py @@ -1,3 +1,3 @@ -from .biconnected import * -from .connected import * -from .ego_betweenness import * +from easygraph.functions.components.biconnected import * +from easygraph.functions.components.connected import * +from easygraph.functions.components.ego_betweenness import * diff --git a/easygraph/functions/components/biconnected.py b/easygraph/functions/components/biconnected.py index 11af37d9..7069d5fc 100644 --- a/easygraph/functions/components/biconnected.py +++ b/easygraph/functions/components/biconnected.py @@ -1,12 +1,10 @@ +import contextlib from itertools import chain from easygraph.utils import * - -try: +with contextlib.suppress(ImportError): from cpp_easygraph import cpp_biconnected_dfs_record_edges -except ImportError: - pass __all__ = [ "is_biconnected", diff --git a/easygraph/functions/components/connected.py b/easygraph/functions/components/connected.py index 41e8d7cf..91e8ef09 100644 --- a/easygraph/functions/components/connected.py +++ b/easygraph/functions/components/connected.py @@ -1,6 +1,5 @@ from easygraph.utils.decorators import * - __all__ = [ "is_connected", "number_connected_components", @@ -111,7 +110,7 @@ def connected_component_of_node(G, node): -------- Returns the connected component of one node `Jack`. - >>> connected_component_of_node(G, node='Jack') + >>> connected_component_of_node(G, node="Jack") """ return set(_plain_bfs(G, node)) diff --git a/easygraph/functions/components/ego_betweenness.py b/easygraph/functions/components/ego_betweenness.py index c53c1e4f..d07b7e3e 100644 --- a/easygraph/functions/components/ego_betweenness.py +++ b/easygraph/functions/components/ego_betweenness.py @@ -25,7 +25,7 @@ def ego_betweenness(G, node): -------- Returns the betwenness centrality of node 1. - >>> ego_betweenness(G,node=1) + >>> ego_betweenness(G, node=1) Reference --------- @@ -47,6 +47,6 @@ def ego_betweenness(G, node): for j in range(n): if i != j and C[i, j] == 1 and B[i, j] != 0: sum += 1.0 / B[i, j] - if flag == False: + if not flag: sum /= 2 return sum diff --git a/easygraph/functions/drawing/__init__.py b/easygraph/functions/drawing/__init__.py index 1d8f9532..af602304 100644 --- a/easygraph/functions/drawing/__init__.py +++ b/easygraph/functions/drawing/__init__.py @@ -1,3 +1,3 @@ -from .drawing import * -from .plot import * -from .positioning import * +from easygraph.functions.drawing.drawing import * +from easygraph.functions.drawing.plot import * +from easygraph.functions.drawing.positioning import * diff --git a/easygraph/functions/drawing/drawing.py b/easygraph/functions/drawing/drawing.py index bee494fe..83ffd7e9 100644 --- a/easygraph/functions/drawing/drawing.py +++ b/easygraph/functions/drawing/drawing.py @@ -1,9 +1,9 @@ import random -import easygraph as eg import matplotlib.pyplot as plt import numpy as np +import easygraph as eg __all__ = ["draw_SHS_center", "draw_SHS_center_kk", "draw_kamada_kawai"] @@ -353,7 +353,7 @@ def draw_kamada_kawai(G, rate=1, style="side"): """ pos = eg.kamada_kawai_layout(G) node = np.zeros((len(pos), 2), float) - m, n = 0, 0 + m, _n = 0, 0 if rate == 1: for i in pos: node[m][0] = pos[i][0] diff --git a/easygraph/functions/drawing/plot.py b/easygraph/functions/drawing/plot.py index 82ae2709..90a3ae60 100644 --- a/easygraph/functions/drawing/plot.py +++ b/easygraph/functions/drawing/plot.py @@ -1,9 +1,8 @@ -import easygraph as eg import numpy as np import statsmodels.api as sm - from matplotlib import pyplot as plt +import easygraph as eg __all__ = [ "plot_Followers", @@ -38,7 +37,7 @@ def plot_Followers(G, SHS): degree = G.degree() sample1 = [] sample2 = [] - for i in degree.keys(): + for i in degree: if i in OU: sample1.append(degree[i]) elif i in SHS: @@ -142,7 +141,7 @@ def plot_Betweenness_Centrality(G, SHS): bc = eg.betweenness_centrality(G) sample1 = [] sample2 = [] - for i in bc.keys(): + for i in bc: if i in OU: sample1.append(bc[i]) elif i in SHS: diff --git a/easygraph/functions/drawing/positioning.py b/easygraph/functions/drawing/positioning.py index 03ac1082..817dfc4c 100644 --- a/easygraph/functions/drawing/positioning.py +++ b/easygraph/functions/drawing/positioning.py @@ -1,6 +1,6 @@ -import easygraph as eg import numpy as np +import easygraph as eg __all__ = [ "random_position", @@ -40,7 +40,7 @@ def random_position(G, center=None, dim=2, random_seed=None): rng = np.random.RandomState(seed=random_seed) pos = rng.rand(len(G), dim) + center pos = pos.astype(np.float32) - pos = dict(zip(G, pos)) + pos = dict(zip(G, pos, strict=False)) return pos @@ -78,7 +78,7 @@ def circular_position(G, center=None, scale=1): theta = theta.astype(np.float32) pos = np.column_stack([np.cos(theta), np.sin(theta)]) pos = rescale_position(pos, scale=scale) + center - pos = dict(zip(G, pos)) + pos = dict(zip(G, pos, strict=False)) return pos @@ -142,7 +142,7 @@ def shell_position(G, nlist=None, scale=1, center=None): pos = rescale_position(pos, scale=scale * radius / len(nlist)) + center else: pos = np.array([(scale * radius + center[0], center[1])]) - npos.update(zip(nodes, pos)) + npos.update(zip(nodes, pos, strict=False)) radius += 1.0 return npos @@ -151,10 +151,7 @@ def shell_position(G, nlist=None, scale=1, center=None): def _get_center(center, dim): import numpy as np - if center is None: - center = np.zeros(dim) - else: - center = np.asarray(center) + center = np.zeros(dim) if center is None else np.asarray(center) if dim < 2: raise ValueError("cannot handle dimensions < 2") @@ -260,23 +257,20 @@ def kamada_kawai_layout( elif dim == 2: pos = eg.circular_position(G) else: - pos = {n: pt for n, pt in zip(G, np.linspace(0, 1, len(G)))} + pos = {n: pt for n, pt in zip(G, np.linspace(0, 1, len(G)), strict=False)} pos_arr = np.array([pos[n] for n in G]) pos = _kamada_kawai_solve(dist_mtx, pos_arr, dim) - if center is None: - center = np.zeros(dim) - else: - center = np.asarray(center) + center = np.zeros(dim) if center is None else np.asarray(center) if len(center) != dim: msg = "length of center coordinates must match dimension of layout" raise ValueError(msg) pos = eg.rescale_position(pos, scale=scale) + center - return dict(zip(G, pos)) + return dict(zip(G, pos, strict=False)) def _kamada_kawai_solve(dist_mtx, pos_arr, dim): @@ -285,7 +279,6 @@ def _kamada_kawai_solve(dist_mtx, pos_arr, dim): # and starting locations. import numpy as np - from scipy.optimize import minimize meanwt = 1e-3 diff --git a/easygraph/functions/drawing/tests/test_positioning.py b/easygraph/functions/drawing/tests/test_positioning.py index 709e3333..628c1b56 100644 --- a/easygraph/functions/drawing/tests/test_positioning.py +++ b/easygraph/functions/drawing/tests/test_positioning.py @@ -39,7 +39,7 @@ def test_shell_position(self): self.assertListEqual(list(actual_result_one.keys()), [1, 6, 5, 4, 10]) - for nodes, coordinates in actual_result_one.items(): + for _nodes, coordinates in actual_result_one.items(): self.assertAlmostEqual(coordinates[0], 4, delta=1.0) self.assertAlmostEqual(coordinates[1], 10, delta=1.0) diff --git a/easygraph/functions/graph_embedding/NOBE.py b/easygraph/functions/graph_embedding/NOBE.py index 820ebcf3..e9e1d98a 100644 --- a/easygraph/functions/graph_embedding/NOBE.py +++ b/easygraph/functions/graph_embedding/NOBE.py @@ -1,9 +1,8 @@ -import easygraph as eg import numpy as np - -from easygraph.utils import * from scipy.sparse.linalg import eigs +import easygraph as eg +from easygraph.utils import * __all__ = ["NOBE", "NOBE_GA"] @@ -27,7 +26,7 @@ def NOBE(G, K): Examples -------- - >>> NOBE(G,K=15) + >>> NOBE(G, K=15) References ---------- @@ -66,7 +65,7 @@ def NOBE_GA(G, K): Examples -------- - >>> NOBE_GA(G,K=15) + >>> NOBE_GA(G, K=15) References ---------- @@ -123,7 +122,7 @@ def graph_to_d_atleast2(G): def Transition(LG): - N = len(LG) + len(LG) M = LG.size() LLG = eg.DiGraph() for i in LG.edges: diff --git a/easygraph/functions/graph_embedding/__init__.py b/easygraph/functions/graph_embedding/__init__.py index 6367fea0..7c770b50 100644 --- a/easygraph/functions/graph_embedding/__init__.py +++ b/easygraph/functions/graph_embedding/__init__.py @@ -1,5 +1,5 @@ -from .deepwalk import * -from .line import LINE -from .NOBE import * -from .node2vec import * -from .sdne import SDNE +from easygraph.functions.graph_embedding.deepwalk import * +from easygraph.functions.graph_embedding.line import LINE +from easygraph.functions.graph_embedding.NOBE import * +from easygraph.functions.graph_embedding.node2vec import * +from easygraph.functions.graph_embedding.sdne import SDNE diff --git a/easygraph/functions/graph_embedding/deepwalk.py b/easygraph/functions/graph_embedding/deepwalk.py index 68421740..9cd299bd 100644 --- a/easygraph/functions/graph_embedding/deepwalk.py +++ b/easygraph/functions/graph_embedding/deepwalk.py @@ -1,12 +1,12 @@ import random +from tqdm import tqdm + from easygraph.functions.graph_embedding.node2vec import ( _get_embedding_result_from_gensim_skipgram_model, + learn_embeddings, ) -from easygraph.functions.graph_embedding.node2vec import learn_embeddings from easygraph.utils import * -from tqdm import tqdm - __all__ = ["deepwalk"] @@ -42,16 +42,15 @@ def deepwalk(G, dimensions=128, walk_length=80, num_walks=10, **skip_gram_params Examples -------- - >>> deepwalk(G, - ... dimensions=128, # The graph embedding dimensions. - ... walk_length=80, # Walk length of each random walks. - ... num_walks=10, # Number of random walks. - ... skip_gram_params = dict( # The skip_gram parameters in Python package gensim. - ... window=10, - ... min_count=1, - ... batch_words=4, - ... iter=15 - ... )) + >>> deepwalk( + ... G, + ... dimensions=128, # The graph embedding dimensions. + ... walk_length=80, # Walk length of each random walks. + ... num_walks=10, # Number of random walks. + ... skip_gram_params=dict( # The skip_gram parameters in Python package gensim. + ... window=10, min_count=1, batch_words=4, iter=15 + ... ), + ... ) References ---------- @@ -78,7 +77,7 @@ def simulate_walks(G, walk_length, num_walks): walks = [] nodes = list(G.nodes) print("Walk iteration:") - for walk_iter in tqdm(range(num_walks)): + for _walk_iter in tqdm(range(num_walks)): random.shuffle(nodes) for node in nodes: walks.append(_deepwalk_walk(G, walk_length=walk_length, start_node=node)) diff --git a/easygraph/functions/graph_embedding/line.py b/easygraph/functions/graph_embedding/line.py index 3cd8d21d..814f0efd 100644 --- a/easygraph/functions/graph_embedding/line.py +++ b/easygraph/functions/graph_embedding/line.py @@ -4,8 +4,7 @@ import numpy as np from easygraph.utils import * -from easygraph.utils.alias import alias_sample -from easygraph.utils.alias import create_alias_table +from easygraph.utils.alias import alias_sample, create_alias_table from easygraph.utils.index_of_node import get_relation_of_index_and_node @@ -28,9 +27,7 @@ def create_model(numNodes, embedding_size, order="second"): except ImportWarning: print("tensorflow not found, please install") pass - from tensorflow.python.keras.layers import Embedding - from tensorflow.python.keras.layers import Input - from tensorflow.python.keras.layers import Lambda + from tensorflow.python.keras.layers import Embedding, Input, Lambda from tensorflow.python.keras.models import Model v_i = Input(shape=(1,)) @@ -92,11 +89,11 @@ def __init__( Examples -------- - >>> model = LINE(G, - ... embedding_size=16, - ... order='all') # The order of model LINE. 'first','second' or 'all'. + >>> model = LINE( + ... G, embedding_size=16, order="all" + ... ) # The order of model LINE. 'first','second' or 'all'. >>> model.train(batch_size=1024, epochs=1, verbose=2) - >>> embeddings = model.get_embeddings() # Returns the graph embedding results. + >>> embeddings = model.get_embeddings() # Returns the graph embedding results. References ---------- diff --git a/easygraph/functions/graph_embedding/node2vec.py b/easygraph/functions/graph_embedding/node2vec.py index 808bc02d..44de057b 100644 --- a/easygraph/functions/graph_embedding/node2vec.py +++ b/easygraph/functions/graph_embedding/node2vec.py @@ -1,10 +1,9 @@ import random import numpy as np - -from easygraph.utils import * from tqdm import tqdm +from easygraph.utils import * __all__ = ["node2vec"] @@ -62,18 +61,18 @@ def node2vec( Examples -------- - >>> node2vec(G, - ... dimensions=128, # The graph embedding dimensions. - ... walk_length=80, # Walk length of each random walks. - ... num_walks=10, # Number of random walks. - ... p=1.0, # The `p` possibility in random walk in [1]_ - ... q=1.0, # The `q` possibility in random walk in [1]_ - ... weight_key='weight', - ... skip_gram_params=dict( # The skip_gram parameters in Python package gensim. - ... window=10, - ... min_count=1, - ... batch_words=4 - ... )) + >>> node2vec( + ... G, + ... dimensions=128, # The graph embedding dimensions. + ... walk_length=80, # Walk length of each random walks. + ... num_walks=10, # Number of random walks. + ... p=1.0, # The `p` possibility in random walk in [1]_ + ... q=1.0, # The `q` possibility in random walk in [1]_ + ... weight_key="weight", + ... skip_gram_params=dict( # The skip_gram parameters in Python package gensim. + ... window=10, min_count=1, batch_words=4 + ... ), + ... ) References ---------- @@ -92,8 +91,7 @@ def node2vec( weight_key=weight_key, ) else: - from joblib import Parallel - from joblib import delayed + from joblib import Parallel, delayed num_walks_lists = np.array_split(range(num_walks), workers) walks = Parallel(n_jobs=workers)( @@ -139,9 +137,9 @@ def change_string_to_node_from_gensim_return_value(value_including_str): embedding_vector[node] = model.wv[str(index_of_node[node])] most_similar_nodes = model.wv.most_similar(str(index_of_node[node])) - most_similar_nodes_of_node[ - node - ] = change_string_to_node_from_gensim_return_value(most_similar_nodes) + most_similar_nodes_of_node[node] = ( + change_string_to_node_from_gensim_return_value(most_similar_nodes) + ) return embedding_vector, most_similar_nodes_of_node @@ -150,7 +148,7 @@ def simulate_walks(G, walk_length, num_walks, p, q, weight_key=None): alias_nodes, alias_edges = _preprocess_transition_probs(G, p, q, weight_key) walks = [] nodes = list(G.nodes) - for walk_iter in tqdm(range(num_walks)): + for _walk_iter in tqdm(range(num_walks)): random.shuffle(nodes) for node in nodes: walks.append( @@ -182,7 +180,6 @@ def _preprocess_transition_probs(G, p, q, weight_key=None): alias_nodes[node] = _alias_setup(normalized_probs) alias_edges = {} - triads = {} if is_directed: for edge in G.edges: diff --git a/easygraph/functions/graph_embedding/sdne.py b/easygraph/functions/graph_embedding/sdne.py index f684fd14..ee537a79 100644 --- a/easygraph/functions/graph_embedding/sdne.py +++ b/easygraph/functions/graph_embedding/sdne.py @@ -59,13 +59,14 @@ def loss_1st(y_true, y_pred): return loss_1st -def create_model(node_size, hidden_size=[256, 128], l1=1e-5, l2=1e-4): +def create_model(node_size, hidden_size=None, l1=1e-5, l2=1e-4): + if hidden_size is None: + hidden_size = [256, 128] try: pass except ImportWarning: print("tensorflow not found, please install") - from tensorflow.python.keras.layers import Dense - from tensorflow.python.keras.layers import Input + from tensorflow.python.keras.layers import Dense, Input from tensorflow.python.keras.models import Model from tensorflow.python.keras.regularizers import l1_l2 @@ -101,7 +102,7 @@ class SDNE: def __init__( self, graph, - hidden_size=[32, 16], + hidden_size=None, alpha=1e-6, beta=5.0, nu1=1e-5, @@ -133,7 +134,7 @@ def __init__( >>> model = SDNE(G, ... hidden_size=[256, 128]) # The hidden size in SDNE. >>> model.train(batch_size=3000, epochs=40, verbose=2) - >>> embeddings = model.get_embeddings() # Returns the graph embedding results. + >>> embeddings = model.get_embeddings() # Returns the graph embedding results. References ---------- @@ -142,6 +143,8 @@ def __init__( Knowledge Discovery and Data mining. 2016: 1225-1234. """ + if hidden_size is None: + hidden_size = [32, 16] self.graph = graph self.idx2node, self.node2idx = get_relation_of_index_and_node(self.graph) @@ -191,9 +194,7 @@ def train(self, batch_size=1024, epochs=2, initial_epoch=0, verbose=1): if batch_size >= self.node_size: if batch_size > self.node_size: print( - "batch_size({0}) > node_size({1}),set batch_size = {1}".format( - batch_size, self.node_size - ) + f"batch_size({batch_size}) > node_size({self.node_size}),set batch_size = {self.node_size}" ) batch_size = self.node_size return self.model.fit( @@ -233,8 +234,7 @@ def train(self, batch_size=1024, epochs=2, initial_epoch=0, verbose=1): if verbose > 0: print(f"Epoch {epoch + 1}/{epochs}") print( - "{}s - loss: {: .4f} - 2nd_loss: {: .4f} - 1st_loss: {: .4f}" - .format(epoch_time, losses[0], losses[1], losses[2]) + f"{epoch_time}s - loss: {losses[0]: .4f} - 2nd_loss: {losses[1]: .4f} - 1st_loss: {losses[2]: .4f}" ) return hist diff --git a/easygraph/functions/graph_generator/RandomNetwork.py b/easygraph/functions/graph_generator/RandomNetwork.py index 2b6f71cf..f3ad844a 100644 --- a/easygraph/functions/graph_generator/RandomNetwork.py +++ b/easygraph/functions/graph_generator/RandomNetwork.py @@ -3,7 +3,6 @@ import easygraph as eg - __all__ = [ "erdos_renyi_M", "erdos_renyi_P", @@ -35,7 +34,12 @@ def erdos_renyi_M(n, edge, directed=False, FilePath=None): -------- Returns an Erdős-Rényi random graph G. - >>> erdos_renyi_M(100,180,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt") + >>> erdos_renyi_M( + ... 100, + ... 180, + ... directed=False, + ... FilePath="/users/fudanmsn/downloads/RandomNetwork.txt", + ... ) References ---------- @@ -138,7 +142,12 @@ def erdos_renyi_P(n, p, directed=False, FilePath=None): -------- Returns an Erdős-Rényi random graph G - >>> erdos_renyi_P(100,0.5,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt") + >>> erdos_renyi_P( + ... 100, + ... 0.5, + ... directed=False, + ... FilePath="/users/fudanmsn/downloads/RandomNetwork.txt", + ... ) References ---------- @@ -206,7 +215,12 @@ def fast_erdos_renyi_P(n, p, directed=False, FilePath=None): -------- Returns an Erdős-Rényi random graph G - >>> erdos_renyi_P(100,0.5,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt") + >>> erdos_renyi_P( + ... 100, + ... 0.5, + ... directed=False, + ... FilePath="/users/fudanmsn/downloads/RandomNetwork.txt", + ... ) References ---------- @@ -289,7 +303,7 @@ def WS_Random(n, k, p, FilePath=None): -------- Returns a small-world graph G - >>> WS_Random(100,10,0.3,"/users/fudanmsn/downloads/RandomNetwork.txt") + >>> WS_Random(100, 10, 0.3, "/users/fudanmsn/downloads/RandomNetwork.txt") """ if k >= n: @@ -338,7 +352,7 @@ def WS_Random(n, k, p, FilePath=None): if adjacent[e_del] == []: adjacent.pop(e_del) e_add = random.randint(0, NUM2) - while e_add == i or G.has_edge(i, e_add) == True: + while e_add == i or G.has_edge(i, e_add): e_add = random.randint(0, NUM2) G.add_edge(i, e_add) if i not in adjacent: @@ -356,10 +370,7 @@ def WS_Random(n, k, p, FilePath=None): def writeRandomNetworkToFile(n, adjacent, FilePath): - if FilePath != None: - f = open(FilePath, "w+") - else: - f = open("RandomNetwork.txt", "w+") + f = open(FilePath, "w+") if FilePath is not None else open("RandomNetwork.txt", "w+") adjacent = sorted(adjacent.items(), key=lambda d: d[0]) for i in adjacent: i[1].sort() diff --git a/easygraph/functions/graph_generator/__init__.py b/easygraph/functions/graph_generator/__init__.py index 067fcece..e6d8bba3 100644 --- a/easygraph/functions/graph_generator/__init__.py +++ b/easygraph/functions/graph_generator/__init__.py @@ -1,2 +1,2 @@ -from .classic import * -from .RandomNetwork import * +from easygraph.functions.graph_generator.classic import * +from easygraph.functions.graph_generator.RandomNetwork import * diff --git a/easygraph/functions/graph_generator/classic.py b/easygraph/functions/graph_generator/classic.py index 6b407d52..fbededba 100644 --- a/easygraph/functions/graph_generator/classic.py +++ b/easygraph/functions/graph_generator/classic.py @@ -1,9 +1,7 @@ import itertools from easygraph.classes import Graph -from easygraph.utils import nodes_or_number -from easygraph.utils import pairwise - +from easygraph.utils import nodes_or_number, pairwise __all__ = ["empty_graph", "path_graph", "complete_graph"] diff --git a/easygraph/functions/not_sorted/__init__.py b/easygraph/functions/not_sorted/__init__.py index b652e639..767de842 100644 --- a/easygraph/functions/not_sorted/__init__.py +++ b/easygraph/functions/not_sorted/__init__.py @@ -1,5 +1,5 @@ -from .bridges import * -from .cluster import * -from .laplacian import * -from .mst import * -from .pagerank import * +from easygraph.functions.not_sorted.bridges import * +from easygraph.functions.not_sorted.cluster import * +from easygraph.functions.not_sorted.laplacian import * +from easygraph.functions.not_sorted.mst import * +from easygraph.functions.not_sorted.pagerank import * diff --git a/easygraph/functions/not_sorted/bridges.py b/easygraph/functions/not_sorted/bridges.py index b5ef42fa..d0c67f46 100644 --- a/easygraph/functions/not_sorted/bridges.py +++ b/easygraph/functions/not_sorted/bridges.py @@ -1,10 +1,8 @@ from itertools import chain import easygraph as eg - from easygraph.utils.decorators import * - __all__ = ["bridges", "has_bridges"] @@ -59,7 +57,7 @@ def bridges(G, root=None): """ chains = chain_decomposition(G, root=root) chain_edges = set(chain.from_iterable(chains)) - for u, v, t in G.edges: + for u, v, _t in G.edges: if (u, v) not in chain_edges and (v, u) not in chain_edges: yield u, v @@ -155,7 +153,7 @@ def _build_chain(G, u, v, visited): # For each nontree edge going out of node u... edges = [] for w, v, d in H.edges: - if w == u and d["nontree"] == True: + if w == u and d["nontree"]: edges.append((w, v)) # edges = ((u, v) for u, v, d in H.out_edges(u, data="nontree") if d) for u, v in edges: diff --git a/easygraph/functions/not_sorted/cluster.py b/easygraph/functions/not_sorted/cluster.py index 5777ffc6..571d2f27 100644 --- a/easygraph/functions/not_sorted/cluster.py +++ b/easygraph/functions/not_sorted/cluster.py @@ -1,13 +1,11 @@ +import contextlib from collections import Counter from itertools import chain from easygraph.utils.decorators import not_implemented_for - -try: +with contextlib.suppress(ImportError): from cpp_easygraph import cpp_clustering -except ImportError: - pass __all__ = ["average_clustering", "clustering"] diff --git a/easygraph/functions/not_sorted/laplacian.py b/easygraph/functions/not_sorted/laplacian.py index 914afd39..b07c3547 100644 --- a/easygraph/functions/not_sorted/laplacian.py +++ b/easygraph/functions/not_sorted/laplacian.py @@ -1,6 +1,5 @@ from easygraph.utils import * - __all__ = ["laplacian"] @@ -41,7 +40,6 @@ def laplacian(G, n_workers=None): if n_workers is not None: # use the parallel version for large graph import random - from functools import partial from multiprocessing import Pool @@ -83,7 +81,7 @@ def laplacian(G, n_workers=None): Xi = copy.deepcopy(X) for j in G: - if j in adj.keys() and i in adj[j].keys(): + if j in adj and i in adj[j]: Xi[j] -= adj[j][i].get("weight", 1) Xi[i] = 0 ELGi = sum(Xi[i] * Xi[i] for i in G) + sum(W[i] for i in G) - 2 * W[i] @@ -112,7 +110,7 @@ def laplacian_parallel(nodes, G, X, W, adj, ELG): Xi = copy.deepcopy(X) for j in G: - if j in adj.keys() and i in adj[j].keys(): + if j in adj and i in adj[j]: Xi[j] -= adj[j][i].get("weight", 1) Xi[i] = 0 ELGi = sum(Xi[i] * Xi[i] for i in G) + sum(W[i] for i in G) - 2 * W[i] diff --git a/easygraph/functions/not_sorted/mst.py b/easygraph/functions/not_sorted/mst.py index e3ac216e..61bbb975 100644 --- a/easygraph/functions/not_sorted/mst.py +++ b/easygraph/functions/not_sorted/mst.py @@ -1,12 +1,10 @@ -from heapq import heappop -from heapq import heappush +from heapq import heappop, heappush from itertools import count from math import isnan from operator import itemgetter from easygraph.utils.decorators import * - __all__ = [ "minimum_spanning_edges", "maximum_spanning_edges", @@ -145,7 +143,7 @@ def filter_nan_edges(edges=edges, weight=weight): yield wt, u, v, d edges = sorted(filter_nan_edges(), key=itemgetter(0)) - for wt, u, v, d in edges: + for _wt, u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) @@ -655,7 +653,7 @@ def to_sets(self): """ # Ensure fully pruned paths - for x in self.parents.keys(): + for x in self.parents: _ = self[x] # Evaluated for side-effect only yield from groups(self.parents).values() diff --git a/easygraph/functions/not_sorted/pagerank.py b/easygraph/functions/not_sorted/pagerank.py index 3e349bc7..f4078058 100644 --- a/easygraph/functions/not_sorted/pagerank.py +++ b/easygraph/functions/not_sorted/pagerank.py @@ -1,8 +1,6 @@ import easygraph as eg - from easygraph.utils import * - __all__ = ["pagerank"] @@ -32,7 +30,7 @@ def pagerank(G, alpha=0.85): # eigenvector of largest eigenvalue is at ind, normalized largest = np.array(eigenvectors[:, ind]).flatten().real norm = float(largest.sum()) - return dict(zip(G, map(float, largest / norm))) + return dict(zip(G, map(float, largest / norm), strict=False)) def google_matrix(G, alpha): diff --git a/easygraph/functions/not_sorted/tests/test_cluster.py b/easygraph/functions/not_sorted/tests/test_cluster.py index 2e7384ec..95feb280 100644 --- a/easygraph/functions/not_sorted/tests/test_cluster.py +++ b/easygraph/functions/not_sorted/tests/test_cluster.py @@ -1,6 +1,7 @@ -import easygraph as eg import pytest +import easygraph as eg + class TestClustering: @classmethod diff --git a/easygraph/functions/not_sorted/tests/test_pagerank.py b/easygraph/functions/not_sorted/tests/test_pagerank.py index 456eba5e..797cc830 100644 --- a/easygraph/functions/not_sorted/tests/test_pagerank.py +++ b/easygraph/functions/not_sorted/tests/test_pagerank.py @@ -1,6 +1,6 @@ -import easygraph as eg import pytest +import easygraph as eg np = pytest.importorskip("numpy") diff --git a/easygraph/functions/path/__init__.py b/easygraph/functions/path/__init__.py index a09c6127..71d2e235 100644 --- a/easygraph/functions/path/__init__.py +++ b/easygraph/functions/path/__init__.py @@ -1 +1 @@ -from .path import * +from easygraph.functions.path.path import * diff --git a/easygraph/functions/path/path.py b/easygraph/functions/path/path.py index 072ccaef..c1b04e18 100644 --- a/easygraph/functions/path/path.py +++ b/easygraph/functions/path/path.py @@ -1,5 +1,6 @@ -from easygraph.utils.decorators import * +import contextlib +from easygraph.utils.decorators import * __all__ = [ "Dijkstra", @@ -11,13 +12,8 @@ "multi_source_dijkstra", ] -try: - from cpp_easygraph import cpp_dijkstra_multisource - from cpp_easygraph import cpp_Floyd - from cpp_easygraph import cpp_Kruskal - from cpp_easygraph import cpp_Prim -except ImportError: - pass +with contextlib.suppress(ImportError): + from cpp_easygraph import cpp_dijkstra_multisource, cpp_Floyd, cpp_Kruskal, cpp_Prim @not_implemented_for("multigraph") @@ -39,7 +35,7 @@ def Dijkstra(G, node): -------- Returns the length of paths from node 1 to remaining nodes - >>> Dijkstra(G,node=1) + >>> Dijkstra(G, node=1) """ return single_source_dijkstra(G, node) @@ -136,7 +132,7 @@ def Prim(G): start = i end = j min_weight = adj[i][j].get("weight", 1) - if start != None and end != None: + if start is not None and end is not None: result_dict[start][end] = min_weight selected.append(end) candidate.remove(end) @@ -231,8 +227,7 @@ def multi_source_dijkstra(G, sources, weight="weight", target=None): def _dijkstra_multisource(G, sources, weight="weight", target=None): if G.cflag == 1: return cpp_dijkstra_multisource(G, sources, weight, target) - from heapq import heappop - from heapq import heappush + from heapq import heappop, heappush push = heappush pop = heappop diff --git a/easygraph/functions/structural_holes/AP_Greedy.py b/easygraph/functions/structural_holes/AP_Greedy.py index 9a81725a..d9925fb2 100644 --- a/easygraph/functions/structural_holes/AP_Greedy.py +++ b/easygraph/functions/structural_holes/AP_Greedy.py @@ -2,12 +2,10 @@ import random import easygraph as eg - from easygraph.functions.components.biconnected import generator_articulation_points from easygraph.functions.components.connected import connected_components from easygraph.utils.decorators import * - __all__ = ["common_greedy", "AP_Greedy"] @@ -49,10 +47,12 @@ def common_greedy(G, k, c=1.0, weight="weight"): -------- Returns the top k nodes as structural hole spanners, using **common_greedy**. - >>> common_greedy(G, - ... k = 3, # To find top three structural holes spanners. - ... c = 1.0, # To define zeta: zeta = c * (n*n*n), and zeta is the large value assigned as the shortest distance of two unreachable vertices. - ... weight = 'weight') + >>> common_greedy( + ... G, + ... k=3, # To find top three structural holes spanners. + ... c=1.0, # To define zeta: zeta = c * (n*n*n), and zeta is the large value assigned as the shortest distance of two unreachable vertices. + ... weight="weight", + ... ) References ---------- @@ -90,7 +90,7 @@ def common_greedy(G, k, c=1.0, weight="weight"): def sort_nodes_by_degree(G, weight="weight"): sorted_nodes = [] - for node, degree in sorted( + for node, _degree in sorted( G.degree(weight=weight).items(), key=lambda x: x[1], reverse=True ): sorted_nodes.append(node) @@ -122,7 +122,7 @@ def procedure1(G, c=1.0): num_subtree_nodes = _get_num_subtree_nodes(spanning_tree, random_root) N_tree = num_subtree_nodes[random_root] - for node, num in num_subtree_nodes.items(): + for _node, num in num_subtree_nodes.items(): upper_bound += 2 * num * (N_tree - num) del component_subgraph, spanning_tree @@ -141,7 +141,7 @@ def _get_spanning_tree_of_component(G): seen = set() def _plain_dfs(u): - for v, edge_data in G.adj[u].items(): + for v, _edge_data in G.adj[u].items(): if v not in seen: seen.add(v) spanning_tree.add_edge(u, v) @@ -162,7 +162,7 @@ def _get_num_subtree_nodes(G, root): def _plain_dfs(u): num_nodes = 1 - for v, edge_data in G.adj[u].items(): + for v, _edge_data in G.adj[u].items(): if v not in seen: seen.add(v) num_nodes += _plain_dfs(v) @@ -265,10 +265,12 @@ def AP_Greedy(G, k, c=1.0, weight="weight"): -------- Returns the top k nodes as structural hole spanners, using **AP_greedy**. - >>> AP_greedy(G, - ... k = 3, # To find top three structural holes spanners. - ... c = 1.0, # To define zeta: zeta = c * (n*n*n), and zeta is the large value assigned as the shortest distance of two unreachable vertices. - ... weight = 'weight') + >>> AP_greedy( + ... G, + ... k=3, # To find top three structural holes spanners. + ... c=1.0, # To define zeta: zeta = c * (n*n*n), and zeta is the large value assigned as the shortest distance of two unreachable vertices. + ... weight="weight", + ... ) References ---------- @@ -389,7 +391,7 @@ def _get_upper_bound_of_non_ap_nodes(G, ap: list, c=1.0): components = connected_components(G) for component in components: non_articulation_points = component - set(ap) - for node in non_articulation_points: + for _node in non_articulation_points: upper_bound_value = 0 upper_bound_value += sum( (len(temp) * (N_G - len(temp))) for temp in components diff --git a/easygraph/functions/structural_holes/HAM.py b/easygraph/functions/structural_holes/HAM.py index e668df32..1b0e52ef 100644 --- a/easygraph/functions/structural_holes/HAM.py +++ b/easygraph/functions/structural_holes/HAM.py @@ -1,18 +1,15 @@ import scipy.stats as stat - __all__ = ["get_structural_holes_HAM"] from collections import Counter import numpy as np import scipy.linalg as spl import scipy.sparse as sps - -from easygraph.utils import * -from scipy.cluster.vq import kmeans -from scipy.cluster.vq import vq +from scipy.cluster.vq import kmeans, vq from sklearn import metrics +from easygraph.utils import * eps = 2.220446049250313e-16 @@ -56,7 +53,7 @@ def avg_entropy(predicted_labels, actual_labels): for label, items in predicted_labels_dict.items(): N_i = float(len(items)) p_i = [] - for label2, items2 in actual_labels_dict.items(): + for _label2, items2 in actual_labels_dict.items(): common = set(items.tolist()).intersection(set(items2.tolist())) p_ij = float(len(common)) / N_i p_i.append(p_ij) @@ -82,7 +79,7 @@ def load_adj_matrix(G): listE.append(edge[0] - 1) listE.append(edge[1] - 1) adj_tuples = np.array(listE).reshape(-1, 2) - n = len(np.unique(adj_tuples)) + len(np.unique(adj_tuples)) vals = np.array([1] * len(G.edges)) max_id = max(max(adj_tuples[:, 0]), max(adj_tuples[:, 1])) + 1 A = sps.csr_matrix( @@ -132,7 +129,6 @@ def label_by_neighbors(AdjMat, labels): assert AdjMat.shape[0] == len(labels), "dimensions are not equal" unlabeled_idx = labels == 0 num_unlabeled = sum(unlabeled_idx) - count = 0 while num_unlabeled > 0: idxs = np.array(np.nonzero(unlabeled_idx)[0]) next_labels = np.zeros(len(labels)) @@ -184,11 +180,18 @@ def get_structural_holes_HAM(G, k, c, ground_truth_labels): Examples -------- - >>> get_structural_holes_HAM(G, - ... k = 2, # To find top two structural holes spanners. - ... c = 2, - ... ground_truth_labels = [[0], [0], [1], [0], [1]] # The ground truth labels for each node - community detection result, for example. - ... ) + >>> get_structural_holes_HAM( + ... G, + ... k=2, # To find top two structural holes spanners. + ... c=2, + ... ground_truth_labels=[ + ... [0], + ... [0], + ... [1], + ... [0], + ... [1], + ... ], # The ground truth labels for each node - community detection result, for example. + ... ) References ---------- @@ -216,7 +219,7 @@ def get_structural_holes_HAM(G, k, c, ground_truth_labels): F = sym(np.random.random((n, c))) # Algorithm 1 - for step in range(max_iter): + for _step in range(max_iter): Q = sps.identity(n).tocsr() P = L.dot(F) for i in range(n): diff --git a/easygraph/functions/structural_holes/HIS.py b/easygraph/functions/structural_holes/HIS.py index 44eda18d..5edf2d5d 100644 --- a/easygraph/functions/structural_holes/HIS.py +++ b/easygraph/functions/structural_holes/HIS.py @@ -1,16 +1,13 @@ import math - from itertools import combinations -from typing import List from easygraph.utils import * - __all__ = ["get_structural_holes_HIS"] @not_implemented_for("multigraph") -def get_structural_holes_HIS(G, C: List[frozenset], epsilon=1e-4, weight="weight"): +def get_structural_holes_HIS(G, C: list[frozenset], epsilon=1e-4, weight="weight"): """Structural hole spanners detection via HIS method. Both **HIS** and **MaxD** are methods in [1]_. @@ -50,11 +47,12 @@ def get_structural_holes_HIS(G, C: List[frozenset], epsilon=1e-4, weight="weight Examples -------- - >>> get_structural_holes_HIS(G, - ... C = [frozenset([1,2,3]), frozenset([4,5,6])], # Two communities - ... epsilon = 0.01, - ... weight = 'weight' - ... ) + >>> get_structural_holes_HIS( + ... G, + ... C=[frozenset([1, 2, 3]), frozenset([4, 5, 6])], # Two communities + ... epsilon=0.01, + ... weight="weight", + ... ) References @@ -83,7 +81,7 @@ def get_structural_holes_HIS(G, C: List[frozenset], epsilon=1e-4, weight="weight return S, I, H -def initialize(G, C: List[frozenset], S: [tuple], weight="weight"): +def initialize(G, C: list[frozenset], S: [tuple], weight="weight"): I, H = dict(), dict() for node in G.nodes: I[node] = dict() diff --git a/easygraph/functions/structural_holes/ICC.py b/easygraph/functions/structural_holes/ICC.py index 6a3c24ed..328abd1c 100644 --- a/easygraph/functions/structural_holes/ICC.py +++ b/easygraph/functions/structural_holes/ICC.py @@ -1,8 +1,6 @@ import easygraph as eg - from easygraph.utils import * - __all__ = ["ICC", "BICC", "AP_BICC"] @@ -101,7 +99,7 @@ def ICC(G, k): -------- Returns the top k nodes as structural hole spanners, using **ICC**. - >>> ICC(G,k=3) + >>> ICC(G, k=3) References ---------- @@ -118,7 +116,7 @@ def ICC(G, k): MAX = 0 t = v for i in Q: - if MAX < i[1]: + if i[1] > MAX: MAX = i[1] t = i[0] if i_c < MAX: @@ -159,7 +157,7 @@ def BICC(G, k, K, l): -------- Returns the top k nodes as structural hole spanners, using **BICC**. - >>> BICC(G,k=3,K=5,l=4) + >>> BICC(G, k=3, K=5, l=4) References ---------- @@ -176,7 +174,7 @@ def BICC(G, k, K, l): MIN = 10000000 t = v for i in H: - if MIN > i[1]: + if i[1] < MIN: MIN = i[1] t = i[0] if b_i_c > MIN: @@ -191,7 +189,7 @@ def BICC(G, k, K, l): MAX = 0 t = v for i in V: - if MAX < i[1]: + if i[1] > MAX: MAX = i[1] t = i[0] if i_c < MAX: @@ -233,7 +231,7 @@ def AP_BICC(G, k, K, l): -------- Returns the top k nodes as structural hole spanners, using **AP_BICC**. - >>> AP_BICC(G,k=3,K=5,l=4) + >>> AP_BICC(G, k=3, K=5, l=4) References ---------- @@ -251,7 +249,7 @@ def AP_BICC(G, k, K, l): MIN = 10000000 t = v for i in T: - if MIN > i[1]: + if i[1] < MIN: MIN = i[1] t = i[0] if A[v]["c"] > MIN: @@ -262,7 +260,7 @@ def AP_BICC(G, k, K, l): for i in G.nodes: if i not in A: U.append(i) - kk = k - len(T) + k - len(T) Q = [] for v in U: b_i_c = bounded_inverse_closeness_centrality(G, v, l) @@ -272,7 +270,7 @@ def AP_BICC(G, k, K, l): MIN = 10000000 t = v for i in Q: - if MIN > i[1]: + if i[1] < MIN: MIN = i[1] t = i[0] if b_i_c > MIN: @@ -282,7 +280,7 @@ def AP_BICC(G, k, K, l): MAX = 0 t = None for i in Q: - if MAX < i[1]: + if i[1] > MAX: MAX = i[1] t = i[0] T.append([t, A[t]["c"]]) diff --git a/easygraph/functions/structural_holes/MaxD.py b/easygraph/functions/structural_holes/MaxD.py index 1cc29cab..773d2aa1 100644 --- a/easygraph/functions/structural_holes/MaxD.py +++ b/easygraph/functions/structural_holes/MaxD.py @@ -1,13 +1,10 @@ -from typing import List - from easygraph.utils import * - __all__ = ["get_structural_holes_MaxD"] @not_implemented_for("multigraph") -def get_community_kernel(G, C: List[frozenset], weight="weight"): +def get_community_kernel(G, C: list[frozenset], weight="weight"): """ To get community kernels with most degrees. Parameters @@ -53,7 +50,7 @@ def get_community_kernel(G, C: List[frozenset], weight="weight"): return kernels -def get_structural_holes_MaxD(G, k, C: List[frozenset]): +def get_structural_holes_MaxD(G, k, C: list[frozenset]): """Structural hole spanners detection via MaxD method. Both **HIS** and **MaxD** are methods in [1]_. @@ -77,10 +74,11 @@ def get_structural_holes_MaxD(G, k, C: List[frozenset]): Examples -------- - >>> get_structural_holes_MaxD(G, - ... k = 5, # To find top five structural holes spanners. - ... C = [frozenset([1,2,3]), frozenset([4,5,6])] # Two communities - ... ) + >>> get_structural_holes_MaxD( + ... G, + ... k=5, # To find top five structural holes spanners. + ... C=[frozenset([1, 2, 3]), frozenset([4, 5, 6])], # Two communities + ... ) References @@ -113,7 +111,7 @@ def get_structural_holes_MaxD(G, k, C: List[frozenset]): save.append(True) q = [] ans_list = [] - for step in range(k): + for _step in range(k): q.clear() sflow.clear() for i in range(n): @@ -126,7 +124,7 @@ def get_structural_holes_MaxD(G, k, C: List[frozenset]): sflow[i % n] += flow[k_] k_ = nex[k_] for i in range(n): - if save[i] == False: + if not save[i]: q.append((-1, i)) else: q.append((sflow[i] + G_index.degree(weight="weight")[i + 1], i)) @@ -134,7 +132,7 @@ def get_structural_holes_MaxD(G, k, C: List[frozenset]): q.reverse() candidates = [] for i in range(n): - if save[q[i][1]] == True and len(candidates) < k: + if save[q[i][1]] and len(candidates) < k: candidates.append(q[i][1]) ret = pick_candidates(n, candidates, kernels, save) ans_list.append(ret[1] + 1) @@ -248,7 +246,7 @@ def dinic_bfs(): k_ = Q[cl] i = head[k_] while i >= 0: - if flow[i] < capa[i] and dsave[point[i]] == True and dist[point[i]] < 0: + if flow[i] < capa[i] and dsave[point[i]] and dist[point[i]] < 0: dist[point[i]] = dist[k_] + 1 Q.append(point[i]) i = nex[i] @@ -326,7 +324,7 @@ def max_flow(n, kernels, save, prev_flow=None): for i in range(node): dsave.append(True) - if prev_flow != None: + if prev_flow is not None: for i in range(nedge): flow.append(prev_flow[i]) else: @@ -360,9 +358,8 @@ def init_MaxD(_node, _src, _dest): dest = _dest global point, capa, flow, nex, head head.clear() - for i in range(node): + for _i in range(node): head.append(-1) - nedge = 0 point.clear() capa.clear() flow.clear() diff --git a/easygraph/functions/structural_holes/NOBE.py b/easygraph/functions/structural_holes/NOBE.py index 0485900e..cec8eec0 100644 --- a/easygraph/functions/structural_holes/NOBE.py +++ b/easygraph/functions/structural_holes/NOBE.py @@ -1,9 +1,8 @@ -import easygraph as eg import numpy as np +import easygraph as eg from easygraph.utils import * - __all__ = ["NOBE_SH", "NOBE_GA_SH"] from sklearn.cluster import KMeans @@ -31,7 +30,7 @@ def NOBE_SH(G, K, topk): Examples -------- - >>> NOBE_SH(G,K=8,topk=5) + >>> NOBE_SH(G, K=8, topk=5) References ---------- @@ -94,7 +93,7 @@ def NOBE_GA_SH(G, K, topk): Examples -------- - >>> NOBE_GA_SH(G,K=8,topk=5) + >>> NOBE_GA_SH(G, K=8, topk=5) References ---------- diff --git a/easygraph/functions/structural_holes/SHII_metric.py b/easygraph/functions/structural_holes/SHII_metric.py index 5961c8c0..d69d8079 100644 --- a/easygraph/functions/structural_holes/SHII_metric.py +++ b/easygraph/functions/structural_holes/SHII_metric.py @@ -1,9 +1,10 @@ import math import random -import easygraph as eg import numpy as np +import easygraph as eg + class NodeParams: def __init__(self, active, inWeight, threshold): @@ -87,7 +88,7 @@ def structural_hole_influence_index( for community_label in range(len(C)): nodesInCommunity = [] seedSetInCommunity = [] - for node in node_label_pair.keys(): + for node in node_label_pair: if node_label_pair[node] == community_label: nodesInCommunity.append(node) if node in S: @@ -114,16 +115,15 @@ def structural_hole_influence_index( if neighbor not in seedNeighborSet: seedNeighborSet.append(neighbor) count_neighbor = count_neighbor + 1 - if count_neighbor > 0: - if ( - len(queue) == 1 - and len(oneSeedSet) + len(seedNeighborSet) < seedSetSize - ): - for node in seedNeighborSet: - if node not in oneSeedSet: - oneSeedSet.append(node) - queue.append(node) - seedNeighborSet.clear() + if count_neighbor > 0 and ( + len(queue) == 1 + and len(oneSeedSet) + len(seedNeighborSet) < seedSetSize + ): + for node in seedNeighborSet: + if node not in oneSeedSet: + oneSeedSet.append(node) + queue.append(node) + seedNeighborSet.clear() queue.pop(0) avg_censor_score_1 = 0.0 @@ -174,9 +174,11 @@ def _independent_cascade(G, S, community_label, countIterations, node_label_pair avg_result_1 = 0 avg_result_2 = 0 N = G.number_of_nodes() - for b in range(countIterations): + for _b in range(countIterations): # print(b, " in ", countIterations) - p_vw = np.zeros((N, N)) # 节点被激活时,激活其它节点的概率,a对b的影响等于b对a的影响 + p_vw = np.zeros( + (N, N) + ) # 节点被激活时,激活其它节点的概率,a对b的影响等于b对a的影响 for random_i in range(N): for random_j in range(random_i + 1, N): num = random.random() @@ -222,7 +224,7 @@ def _linear_threshold(G, S, community_label, countIterations, node_label_pair): tol = 0.00001 avg_result_1 = 0 avg_result_2 = 0 - for b in range(countIterations): + for _b in range(countIterations): activeNodes = [] # T is the set of nodes that are to be processed T = [] @@ -232,7 +234,7 @@ def _linear_threshold(G, S, community_label, countIterations, node_label_pair): for neighbor in G.neighbors(node=v): if neighbor not in S: weight_degree = 1.0 / float(G.in_degree()[neighbor]) - if neighbor not in Q.keys(): + if neighbor not in Q: np = NodeParams(False, weight_degree, random.random()) Q[neighbor] = np T.append(neighbor) @@ -248,7 +250,7 @@ def _linear_threshold(G, S, community_label, countIterations, node_label_pair): if neighbor in S: continue weight_degree = 1.0 / float(G.in_degree()[neighbor]) - if neighbor not in Q.keys(): + if neighbor not in Q: np = NodeParams(False, weight_degree, random.random()) Q[neighbor] = np T.append(neighbor) diff --git a/easygraph/functions/structural_holes/__init__.py b/easygraph/functions/structural_holes/__init__.py index 91b10969..275b5944 100644 --- a/easygraph/functions/structural_holes/__init__.py +++ b/easygraph/functions/structural_holes/__init__.py @@ -1,10 +1,10 @@ -from .AP_Greedy import * -from .evaluation import * -from .HAM import * -from .HIS import * -from .ICC import * -from .MaxD import * -from .metrics import * -from .NOBE import * -from .strong_connected_component import * -from .weakTie import * +from easygraph.functions.structural_holes.AP_Greedy import * +from easygraph.functions.structural_holes.evaluation import * +from easygraph.functions.structural_holes.HAM import * +from easygraph.functions.structural_holes.HIS import * +from easygraph.functions.structural_holes.ICC import * +from easygraph.functions.structural_holes.MaxD import * +from easygraph.functions.structural_holes.metrics import * +from easygraph.functions.structural_holes.NOBE import * +from easygraph.functions.structural_holes.strong_connected_component import * +from easygraph.functions.structural_holes.weakTie import * diff --git a/easygraph/functions/structural_holes/evaluation.py b/easygraph/functions/structural_holes/evaluation.py index 07bd3dfe..3fb19b1c 100644 --- a/easygraph/functions/structural_holes/evaluation.py +++ b/easygraph/functions/structural_holes/evaluation.py @@ -1,14 +1,10 @@ +import contextlib import math from easygraph.utils import * - -try: - from cpp_easygraph import cpp_constraint - from cpp_easygraph import cpp_effective_size - from cpp_easygraph import cpp_hierarchy -except ImportError: - pass +with contextlib.suppress(ImportError): + from cpp_easygraph import cpp_constraint, cpp_effective_size, cpp_hierarchy __all__ = ["effective_size", "efficiency", "constraint", "hierarchy"] @@ -103,10 +99,15 @@ def effective_size(G, nodes=None, weight=None, n_workers=None): The Effective Size of node in `nodes`. Examples -------- - >>> effective_size(G, - ... nodes=[1,2,3], # Compute the Effective Size of some nodes. The default is None for all nodes in G. - ... weight='weight' # The weight key of the graph. The default is None for unweighted graph. - ... ) + >>> effective_size( + ... G, + ... nodes=[ + ... 1, + ... 2, + ... 3, + ... ], # Compute the Effective Size of some nodes. The default is None for all nodes in G. + ... weight="weight", # The weight key of the graph. The default is None for unweighted graph. + ... ) References ---------- .. [1] Burt R S. Structural holes: The social structure of competition[M]. @@ -123,7 +124,6 @@ def effective_size(G, nodes=None, weight=None, n_workers=None): if not G.is_directed() and weight is None: if n_workers is not None: import random - from functools import partial from multiprocessing import Pool @@ -154,7 +154,6 @@ def effective_size(G, nodes=None, weight=None, n_workers=None): else: if n_workers is not None: import random - from functools import partial from multiprocessing import Pool @@ -197,10 +196,15 @@ def efficiency(G, nodes=None, weight=None): The Efficiency of node in `nodes`. Examples -------- - >>> efficiency(G, - ... nodes=[1,2,3], # Compute the Efficiency of some nodes. The default is None for all nodes in G. - ... weight='weight' # The weight key of the graph. The default is None for unweighted graph. - ... ) + >>> efficiency( + ... G, + ... nodes=[ + ... 1, + ... 2, + ... 3, + ... ], # Compute the Efficiency of some nodes. The default is None for all nodes in G. + ... weight="weight", # The weight key of the graph. The default is None for unweighted graph. + ... ) References ---------- .. [1] Burt R S. Structural holes: The social structure of competition[M]. @@ -247,11 +251,16 @@ def constraint(G, nodes=None, weight=None, n_workers=None): The Constraint of node in `nodes`. Examples -------- - >>> constraint(G, - ... nodes=[1,2,3], # Compute the Constraint of some nodes. The default is None for all nodes in G. - ... weight='weight', # The weight key of the graph. The default is None for unweighted graph. - ... n_workers=4 # Parallel computing on four workers. The default is None for serial computing. - ... ) + >>> constraint( + ... G, + ... nodes=[ + ... 1, + ... 2, + ... 3, + ... ], # Compute the Constraint of some nodes. The default is None for all nodes in G. + ... weight="weight", # The weight key of the graph. The default is None for unweighted graph. + ... n_workers=4, # Parallel computing on four workers. The default is None for serial computing. + ... ) References ---------- .. [1] Burt R S. Structural holes: The social structure of competition[M]. @@ -278,7 +287,6 @@ def compute_constraint_of_v(v): if n_workers is not None: import random - from functools import partial from multiprocessing import Pool @@ -376,7 +384,6 @@ def hierarchy(G, nodes=None, weight=None, n_workers=None): hierarchy = {} if n_workers is not None: import random - from functools import partial from multiprocessing import Pool diff --git a/easygraph/functions/structural_holes/maxBlock.py b/easygraph/functions/structural_holes/maxBlock.py index a7e8bb66..f308a45d 100644 --- a/easygraph/functions/structural_holes/maxBlock.py +++ b/easygraph/functions/structural_holes/maxBlock.py @@ -2,12 +2,13 @@ import random import sys -import easygraph as eg +from strong_connected_component import ( + condensation, + number_strongly_connected_components, +) +import easygraph as eg from easygraph.utils import * -from strong_connected_component import condensation -from strong_connected_component import number_strongly_connected_components - __all__ = [ "maxBlock", @@ -24,9 +25,9 @@ def __init__(self, N, M): self.h = [] self.ne = [] self.to = [] - for i in range(N + 1): + for _i in range(N + 1): self.h.append(0) - for i in range(max(N + 1, M + 1)): + for _i in range(max(N + 1, M + 1)): self.ne.append(0) self.to.append(0) @@ -190,10 +191,10 @@ def _get_idom(G, G_tr, node_s, ans_real, desc_set_real): desc_set[node_map[node]] = set() _dfs(node_map[node_s], tr, ans, desc_set) - for key in ans.keys(): + for key in ans: ans[key] -= 1 ans_real[index_map[key]] = ans[key] - for key in desc_set.keys(): + for key in desc_set: desc_set_real[index_map[key]] = set() for value in desc_set[key]: desc_set_real[index_map[key]].add(index_map[value]) @@ -299,7 +300,7 @@ def _get_estimated_opt(G, f_set, k, c, delta): print("Estimating the optimal value OPT...") n = G.number_of_nodes() opt_ub = 0 - for f_key in f_set.keys(): + for f_key in f_set: opt_ub = opt_ub + f_set[f_key] opt_ub = opt_ub * k * (n - 1) T = math.log((opt_ub / (delta / 2)), 2) @@ -323,7 +324,7 @@ def _find_separation_nodes(G): incoming_info = G_s.graph["incoming_info"] G_s_undirected = eg.Graph() sep_nodes = set() - for node in (G_s.nodes).keys(): + for node in (G_s.nodes): SCC_mapping[node] = G_s.nodes[node]["member"] if len(G_s.nodes[node]["member"]) == 1: sep_nodes.add(node) @@ -540,7 +541,7 @@ def maxBlockFast(G, k, f_set=None, L=None, flag_weight=False): for node_u in G_tr.nodes: D_u = 0 for desc in desc_set[node_u]: - if desc not in d_dict.keys(): + if desc not in d_dict: print( "Error: desc:", desc, @@ -565,10 +566,10 @@ def maxBlockFast(G, k, f_set=None, L=None, flag_weight=False): non_considered_nodes.remove(chosen_node) else: V_set = set() - for key in SCC_mapping.keys(): + for key in SCC_mapping: for node in SCC_mapping[key]: if (node in non_considered_nodes) and ( - node not in incoming_info.keys() + node not in incoming_info ): V_set.add(node) if len(V_set) > 0: @@ -582,7 +583,7 @@ def maxBlockFast(G, k, f_set=None, L=None, flag_weight=False): for node_u in G_tr.nodes: D_u = 0 for desc in desc_set[node_u]: - if desc not in d_dict.keys(): + if desc not in d_dict: print( "Error: desc:", desc, diff --git a/easygraph/functions/structural_holes/metrics.py b/easygraph/functions/structural_holes/metrics.py index 5ba5491a..4bdc5e27 100644 --- a/easygraph/functions/structural_holes/metrics.py +++ b/easygraph/functions/structural_holes/metrics.py @@ -1,12 +1,11 @@ import math import random -import easygraph as eg import numpy as np +import easygraph as eg from easygraph.utils import * - __all__ = [ "sum_of_shortest_paths", "nodes_of_max_cc_without_shs", @@ -34,8 +33,8 @@ def sum_of_shortest_paths(G, S): Examples -------- - >>> G_t=eg.datasets.get_graph_blogcatalog() - >>> S_t=eg.AP_Greedy(G_t, 10000) + >>> G_t = eg.datasets.get_graph_blogcatalog() + >>> S_t = eg.AP_Greedy(G_t, 10000) >>> diff = sum_of_shortest_paths(G_t, S_t) >>> print(diff) @@ -84,8 +83,8 @@ def nodes_of_max_cc_without_shs(G, S): Examples -------- - >>> G_t=eg.datasets.get_graph_blogcatalog() - >>> S_t=eg.AP_Greedy(G_t, 10000) + >>> G_t = eg.datasets.get_graph_blogcatalog() + >>> S_t = eg.AP_Greedy(G_t, 10000) >>> maxx = nodes_of_max_cc_without_shs(G_t, S_t) >>> print(maxx) @@ -187,7 +186,7 @@ def structural_hole_influence_index( for community_label in range(len(C)): nodesInCommunity = [] seedSetInCommunity = [] - for node in node_label_pair.keys(): + for node in node_label_pair: if node_label_pair[node] == community_label: nodesInCommunity.append(node) if node in S: @@ -214,16 +213,15 @@ def structural_hole_influence_index( if neighbor not in seedNeighborSet: seedNeighborSet.append(neighbor) count_neighbor = count_neighbor + 1 - if count_neighbor > 0: - if ( - len(queue) == 1 - and len(oneSeedSet) + len(seedNeighborSet) < seedSetSize - ): - for node in seedNeighborSet: - if node not in oneSeedSet: - oneSeedSet.append(node) - queue.append(node) - seedNeighborSet.clear() + if count_neighbor > 0 and ( + len(queue) == 1 + and len(oneSeedSet) + len(seedNeighborSet) < seedSetSize + ): + for node in seedNeighborSet: + if node not in oneSeedSet: + oneSeedSet.append(node) + queue.append(node) + seedNeighborSet.clear() queue.pop(0) avg_censor_score_1 = 0.0 @@ -274,9 +272,11 @@ def _independent_cascade(G, S, community_label, countIterations, node_label_pair avg_result_1 = 0 avg_result_2 = 0 N = G.number_of_nodes() - for b in range(countIterations): + for _b in range(countIterations): # print(b, " in ", countIterations) - p_vw = np.zeros((N, N)) # 节点被激活时,激活其它节点的概率,a对b的影响等于b对a的影响 + p_vw = np.zeros( + (N, N) + ) # 节点被激活时,激活其它节点的概率,a对b的影响等于b对a的影响 for random_i in range(N): for random_j in range(random_i + 1, N): num = random.random() @@ -322,7 +322,7 @@ def _linear_threshold(G, S, community_label, countIterations, node_label_pair): tol = 0.00001 avg_result_1 = 0 avg_result_2 = 0 - for b in range(countIterations): + for _b in range(countIterations): activeNodes = [] # T is the set of nodes that are to be processed T = [] @@ -332,7 +332,7 @@ def _linear_threshold(G, S, community_label, countIterations, node_label_pair): for neighbor in G.neighbors(node=v): if neighbor not in S: weight_degree = 1.0 / float(G.in_degree()[neighbor]) - if neighbor not in Q.keys(): + if neighbor not in Q: np = NodeParams(False, weight_degree, random.random()) Q[neighbor] = np T.append(neighbor) @@ -348,7 +348,7 @@ def _linear_threshold(G, S, community_label, countIterations, node_label_pair): if neighbor in S: continue weight_degree = 1.0 / float(G.in_degree()[neighbor]) - if neighbor not in Q.keys(): + if neighbor not in Q: np = NodeParams(False, weight_degree, random.random()) Q[neighbor] = np T.append(neighbor) diff --git a/easygraph/functions/structural_holes/strong_connected_component.py b/easygraph/functions/structural_holes/strong_connected_component.py index aa9c7a1c..dc499d2b 100644 --- a/easygraph/functions/structural_holes/strong_connected_component.py +++ b/easygraph/functions/structural_holes/strong_connected_component.py @@ -1,8 +1,6 @@ import easygraph as eg - from easygraph.utils.decorators import * - __all__ = [ "number_strongly_connected_components", "strongly_connected_components", @@ -173,7 +171,7 @@ def condensation(G, scc=None): for edge in G.edges: if mapping[edge[0]] != mapping[edge[1]]: C.add_edge(mapping[edge[0]], mapping[edge[1]]) - if edge[1] not in incoming_info.keys(): + if edge[1] not in incoming_info: incoming_info[edge[1]] = set() incoming_info[edge[1]].add(edge[0]) C.graph["incoming_info"] = incoming_info diff --git a/easygraph/functions/structural_holes/weakTie.py b/easygraph/functions/structural_holes/weakTie.py index f19a2298..31aa8a27 100644 --- a/easygraph/functions/structural_holes/weakTie.py +++ b/easygraph/functions/structural_holes/weakTie.py @@ -1,8 +1,6 @@ import easygraph as eg - from easygraph.utils import * - __all__ = [ "weakTie", "weakTieLocal", @@ -109,10 +107,9 @@ def _computeCloseness(G, c, u, threshold, length): n = 0 strength_sum_u = 0 for v in c: - if u in G[v] and v != u: - if G[v][u]["strength"] != 0: - n += 1 - strength_sum_u += G[v][u]["strength"] + if u in G[v] and v != u and G[v][u]["strength"] != 0: + n += 1 + strength_sum_u += G[v][u]["strength"] closeness_c_u = (strength_sum_u - n * threshold) / length return closeness_c_u diff --git a/easygraph/readwrite/edgelist.py b/easygraph/readwrite/edgelist.py index f8b5a18b..fac2bfdb 100644 --- a/easygraph/readwrite/edgelist.py +++ b/easygraph/readwrite/edgelist.py @@ -1,6 +1,5 @@ import easygraph as eg - __all__ = [ "parse_edgelist", ] @@ -100,10 +99,7 @@ def parse_edgelist( elif data is True: # no edge types specified try: # try to evaluate as dictionary - if delimiter == ",": - edgedata_str = ",".join(d) - else: - edgedata_str = " ".join(d) + edgedata_str = ",".join(d) if delimiter == "," else " ".join(d) edgedata = dict(literal_eval(edgedata_str.strip())) except Exception as err: raise TypeError( @@ -116,7 +112,7 @@ def parse_edgelist( f"Edge data {d} and data_keys {data} are not the same length" ) edgedata = {} - for (edge_key, edge_type), edge_value in zip(data, d): + for (edge_key, edge_type), edge_value in zip(data, d, strict=False): try: edge_value = edge_type(edge_value) except Exception as err: diff --git a/easygraph/readwrite/gexf.py b/easygraph/readwrite/gexf.py index aea2edb6..7eca6251 100644 --- a/easygraph/readwrite/gexf.py +++ b/easygraph/readwrite/gexf.py @@ -1,17 +1,16 @@ import itertools import time - -from xml.etree.ElementTree import Element -from xml.etree.ElementTree import ElementTree -from xml.etree.ElementTree import SubElement -from xml.etree.ElementTree import register_namespace -from xml.etree.ElementTree import tostring +from xml.etree.ElementTree import ( + Element, + ElementTree, + SubElement, + register_namespace, + tostring, +) import easygraph as eg - from easygraph.utils import * - __all__ = ["write_gexf", "relabel_gexf_graph", "generate_gexf", "read_gexf"] @@ -132,10 +131,7 @@ def read_gexf(path, node_type=None, relabel=False, version="1.2draft"): .. [1] GEXF File Format, http://gexf.net/ """ reader = GEXFReader(node_type=node_type, version=version) - if relabel: - G = relabel_gexf_graph(reader(path)) - else: - G = reader(path) + G = relabel_gexf_graph(reader(path)) if relabel else reader(path) return G @@ -187,7 +183,7 @@ def construct_types(self): (np.float64, "float"), (np.float32, "float"), (np.float16, "float"), - (np.float_, "float"), + (np.float64, "float"), (np.int_, "int"), (np.int8, "int"), (np.int16, "int"), @@ -252,7 +248,7 @@ def __init__( # Make meta element a non-graph element # Also add lastmodifieddate as attribute, not tag meta_element = Element("meta") - subelement_text = f"EasyGraph" + subelement_text = "EasyGraph" SubElement(meta_element, "creator").text = subelement_text meta_element.set("lastmodifieddate", time.strftime("%Y-%m-%d")) self.xml.append(meta_element) @@ -283,20 +279,14 @@ def __str__(self): def add_graph(self, G): # first pass through G collecting edge ids - for u, v, dd in G.edges: + for _u, _v, dd in G.edges: eid = dd.get("id") if eid is not None: self.all_edge_ids.add(str(eid)) # set graph attributes - if G.graph.get("mode") == "dynamic": - mode = "dynamic" - else: - mode = "static" + mode = "dynamic" if G.graph.get("mode") == "dynamic" else "static" # Add a graph element to the XML - if G.is_directed(): - default = "directed" - else: - default = "undirected" + default = "directed" if G.is_directed() else "undirected" name = G.graph.get("name", "") graph_element = Element("graph", defaultedgetype=default, mode=mode, name=name) self.graph_element = graph_element @@ -663,10 +653,7 @@ def __call__(self, stream): def make_graph(self, graph_xml): edgedefault = graph_xml.get("defaultedgetype", None) - if edgedefault == "directed": - G = eg.MultiDiGraph() - else: - G = eg.MultiGraph() + G = eg.MultiDiGraph() if edgedefault == "directed" else eg.MultiGraph() # graph attributes graph_name = graph_xml.get("name", "") @@ -733,10 +720,7 @@ def make_graph(self, graph_xml): # switch to Graph or DiGraph if no parallel edges were found. if self.simple_graph: - if G.is_directed(): - G = eg.DiGraph(G) - else: - G = eg.Graph(G) + G = eg.DiGraph(G) if G.is_directed() else eg.Graph(G) return G def add_node(self, G, node_xml, node_attr, node_pid=None): @@ -998,7 +982,7 @@ def relabel_gexf_graph(G): raise EasyGraphError( "Failed to relabel nodes: missing node labels found. Use relabel=False." ) from err - x, y = zip(*mapping) + x, y = zip(*mapping, strict=False) if len(set(y)) != len(G): raise EasyGraphError( "Failed to relabel nodes: duplicate node labels found. Use relabel=False." diff --git a/easygraph/readwrite/gml.py b/easygraph/readwrite/gml.py index d9815195..87f07834 100644 --- a/easygraph/readwrite/gml.py +++ b/easygraph/readwrite/gml.py @@ -28,25 +28,19 @@ `Network data page `_. """ - +import contextlib import html.entities as htmlentitydefs import re -import warnings - from ast import literal_eval from collections import defaultdict from enum import Enum from io import StringIO -from typing import Any -from typing import NamedTuple -from unicodedata import category +from typing import Any, NamedTuple import easygraph as eg - from easygraph.utils import open_file from easygraph.utils.exception import EasyGraphError - __all__ = ["read_gml", "parse_gml", "generate_gml", "write_gml"] LIST_START_VALUE = "_easygraph_list_start" @@ -74,10 +68,7 @@ def fixup(m): text = m.group(0) if text[1] == "#": # Character reference - if text[2] == "x": - code = int(text[3:-1], 16) - else: - code = int(text[2:-1]) + code = int(text[3:-1], 16) if text[2] == "x" else int(text[2:-1]) else: # Named entity try: @@ -110,8 +101,6 @@ def literal_destringizer(rep): ValueError If `rep` is not a Python literal. """ - msg = "literal_destringizer is deprecated and will be removed in 3.0." - warnings.warn(msg, DeprecationWarning) if isinstance(rep, str): orig_rep = rep try: @@ -291,10 +280,8 @@ def parse_kv(curr_token): elif category == Pattern.STRINGS: value = unescape(curr_token.value[1:-1]) if destringizer: - try: + with contextlib.suppress(ValueError): value = destringizer(value) - except ValueError: - pass curr_token = next(tokens) elif category == Pattern.DICT_START: curr_token, value = parse_dict(curr_token) @@ -304,10 +291,8 @@ def parse_kv(curr_token): # String convert the token value value = unescape(str(curr_token.value)) if destringizer: - try: + with contextlib.suppress(ValueError): value = destringizer(value) - except ValueError: - pass curr_token = next(tokens) except Exception: msg = ( @@ -547,7 +532,7 @@ def stringize(key, value, ignored_keys, indent, in_list=False): yield from stringize(attr, value, ignored_keys, " ") # Output node data - node_id = dict(zip(G, range(len(G)))) + node_id = dict(zip(G, range(len(G)), strict=False)) ignored_keys = {"id", "label"} for node, attrs in G.nodes.items(): yield " node [" @@ -724,9 +709,6 @@ def write_gml(G, path, stringizer=None): def literal_stringizer(value): - msg = "literal_stringizer is deprecated and will be removed in 3.0." - warnings.warn(msg, DeprecationWarning) - def stringize(value): if isinstance(value, (int, bool)) or value is None: if value is True: # GML uses 1/0 for boolean values. diff --git a/easygraph/readwrite/graphml.py b/easygraph/readwrite/graphml.py index 8f577105..cebbbc77 100644 --- a/easygraph/readwrite/graphml.py +++ b/easygraph/readwrite/graphml.py @@ -42,15 +42,12 @@ """ import warnings - from collections import defaultdict import easygraph as eg - from easygraph.utils import open_file from easygraph.utils.exception import EasyGraphError - __all__ = [ "write_graphml", "read_graphml", @@ -418,7 +415,7 @@ def construct_types(self): (np.float64, "float"), (np.float32, "float"), (np.float16, "float"), - (np.float_, "float"), + (np.float64, "float"), (np.int_, "int"), (np.int8, "int"), (np.int16, "int"), @@ -536,10 +533,7 @@ def get_key(self, name, attr_type, scope, default): try: return self.keys[keys_key] except KeyError: - if self.named_key_ids: - new_id = name - else: - new_id = f"d{len(list(self.keys))}" + new_id = name if self.named_key_ids else f"d{len(list(self.keys))}" self.keys[keys_key] = new_id key_kwargs = { @@ -622,10 +616,7 @@ def add_graph_element(self, G): """ Serialize graph G in GraphML to the stream. """ - if G.is_directed(): - default_edge_type = "directed" - else: - default_edge_type = "undirected" + default_edge_type = "directed" if G.is_directed() else "undirected" graphid = G.graph.pop("id", None) if graphid is None: @@ -753,10 +744,7 @@ def add_graph_element(self, G): """ Serialize graph G in GraphML to the stream. """ - if G.is_directed(): - default_edge_type = "directed" - else: - default_edge_type = "undirected" + default_edge_type = "directed" if G.is_directed() else "undirected" graphid = G.graph.pop("id", None) if graphid is None: @@ -783,27 +771,27 @@ def add_graph_element(self, G): element_type = self.get_xml_type(self.attr_type(k, "graph", v)) self.get_key(str(k), element_type, "graph", None) # Nodes and data - for node, d in G.nodes.items(): + for _node, d in G.nodes.items(): for k, v in d.items(): self.attribute_types[(str(k), "node")].add(type(v)) - for node, d in G.nodes.items(): + for _node, d in G.nodes.items(): for k, v in d.items(): T = self.get_xml_type(self.attr_type(k, "node", v)) self.get_key(str(k), T, "node", node_default.get(k)) # Edges and data if G.is_multigraph(): - for u, v, ekey, d in G.edges: + for _u, v, _ekey, d in G.edges: for k, v in d.items(): self.attribute_types[(str(k), "edge")].add(type(v)) - for u, v, ekey, d in G.edges: + for _u, v, _ekey, d in G.edges: for k, v in d.items(): T = self.get_xml_type(self.attr_type(k, "edge", v)) self.get_key(str(k), T, "edge", edge_default.get(k)) else: - for u, v, d in G.edges: + for _u, v, d in G.edges: for k, v in d.items(): self.attribute_types[(str(k), "edge")].add(type(v)) - for u, v, d in G.edges: + for _u, v, d in G.edges: for k, v in d.items(): T = self.get_xml_type(self.attr_type(k, "edge", v)) self.get_key(str(k), T, "edge", edge_default.get(k)) @@ -850,8 +838,7 @@ def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False): self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes def __call__(self, path=None, string=None): - from xml.etree.ElementTree import ElementTree - from xml.etree.ElementTree import fromstring + from xml.etree.ElementTree import ElementTree, fromstring if path is not None: self.xml = ElementTree(file=path) @@ -867,10 +854,7 @@ def make_graph(self, graph_xml, graphml_keys, defaults, G=None): # set default graph type edgedefault = graph_xml.get("edgedefault", None) if G is None: - if edgedefault == "directed": - G = eg.MultiDiGraph() - else: - G = eg.MultiGraph() + G = eg.MultiDiGraph() if edgedefault == "directed" else eg.MultiGraph() # set defaults for graph attributes G.graph["node_default"] = {} G.graph["edge_default"] = {} @@ -910,7 +894,7 @@ def add_node(self, G, node_xml, graphml_keys, defaults): # warn on finding unsupported ports tag ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port") if ports is not None: - warnings.warn("GraphML port tag not supported.") + warnings.warn("GraphML port tag not supported.", stacklevel=2) # find the node by id and cast it to the appropriate type node_id = self.node_type(node_xml.get("id")) # get data/attributes for node @@ -926,7 +910,7 @@ def add_edge(self, G, edge_element, graphml_keys): # warn on finding unsupported ports tag ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port") if ports is not None: - warnings.warn("GraphML port tag not supported.") + warnings.warn("GraphML port tag not supported.", stacklevel=2) # raise error if we find mixed directed and undirected edges directed = edge_element.get("directed") @@ -987,7 +971,7 @@ def decode_data_elements(self, graphml_keys, obj_xml): node_label = None # set GenericNode's configuration as shape type gn = data_element.find(f"{{{self.NS_Y}}}GenericNode") - if gn: + if gn is not None: data["shape_type"] = gn.get("configuration") for node_type in ["GenericNode", "ShapeNode", "SVGNode", "ImageNode"]: pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}" @@ -1034,7 +1018,7 @@ def find_graphml_keys(self, graph_element): attr_type = "yfiles" if attr_type is None: attr_type = "string" - warnings.warn(f"No key type for id {attr_id}. Using string") + warnings.warn(f"No key type for id {attr_id}. Using string", stacklevel=2) if attr_name is None: raise eg.EasyGraphError(f"Unknown key for id {attr_id}.") graphml_keys[attr_id] = { diff --git a/easygraph/readwrite/graphviz.py b/easygraph/readwrite/graphviz.py index cc03ceaf..83ed4f07 100644 --- a/easygraph/readwrite/graphviz.py +++ b/easygraph/readwrite/graphviz.py @@ -1,6 +1,5 @@ import easygraph as eg - __all__ = ["write_dot", "read_dot", "from_agraph", "to_agraph"] @@ -37,15 +36,9 @@ def from_agraph(A, create_using=None): """ if create_using is None: if A.is_directed(): - if A.is_strict(): - create_using = eg.DiGraph - else: - create_using = eg.MultiDiGraph + create_using = eg.DiGraph if A.is_strict() else eg.MultiDiGraph else: - if A.is_strict(): - create_using = eg.Graph - else: - create_using = eg.MultiGraph + create_using = eg.Graph if A.is_strict() else eg.MultiGraph # assign defaults N = eg.empty_graph(0, create_using) diff --git a/easygraph/readwrite/pajek.py b/easygraph/readwrite/pajek.py index f90ab827..1fdef768 100644 --- a/easygraph/readwrite/pajek.py +++ b/easygraph/readwrite/pajek.py @@ -60,7 +60,6 @@ # import networkx as nx from easygraph.utils import open_file - __all__ = ["read_pajek", "parse_pajek", "generate_pajek", "write_pajek"] @@ -77,10 +76,6 @@ def generate_pajek(G): See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm for format information. """ - if G.name == "": - name = "EasyGraph" - else: - name = G.name # Apparently many Pajek format readers can't process this line # So we'll leave it out for now. # yield '*network %s'%name @@ -89,7 +84,7 @@ def generate_pajek(G): yield f"*vertices {G.order()}" nodes = list(G) # make dictionary mapping nodes to integers - nodenumber = dict(zip(nodes, range(1, len(nodes) + 1))) + nodenumber = dict(zip(nodes, range(1, len(nodes) + 1), strict=False)) for n in nodes: # copy node attributes and pop mandatory attributes # to avoid duplication. @@ -114,7 +109,7 @@ def generate_pajek(G): else: warnings.warn( f"Node attribute {k} is not processed." - f" {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}." + f" {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}.", stacklevel=2 ) yield s @@ -141,7 +136,7 @@ def generate_pajek(G): else: warnings.warn( f"Edge attribute {k} is not processed." - f" {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}." + f" {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}.", stacklevel=2 ) yield s @@ -259,7 +254,7 @@ def parse_pajek(lines): elif l.lower().startswith("*vertices"): nodelabels = {} l, nnodes = l.split() - for i in range(int(nnodes)): + for _i in range(int(nnodes)): l = next(lines) try: splitline = [x for x in shlex.split(str(l))] @@ -277,7 +272,7 @@ def parse_pajek(lines): ) except: pass - extra_attr = zip(splitline[5::2], splitline[6::2]) + extra_attr = zip(splitline[5::2], splitline[6::2], strict=False) G.nodes[label].update(extra_attr) elif l.lower().startswith("*edges") or l.lower().startswith("*arcs"): if l.lower().startswith("*edge"): @@ -308,7 +303,7 @@ def parse_pajek(lines): pass # if there isn't, just assign a 1 # edge_data.update({'value':1}) - extra_attr = zip(splitline[3::2], splitline[4::2]) + extra_attr = zip(splitline[3::2], splitline[4::2], strict=False) edge_data.update(extra_attr) # if G.has_edge(u,v): # multigraph=True diff --git a/easygraph/readwrite/tests/test_gexf.py b/easygraph/readwrite/tests/test_gexf.py index 896a57a6..565465d7 100644 --- a/easygraph/readwrite/tests/test_gexf.py +++ b/easygraph/readwrite/tests/test_gexf.py @@ -2,9 +2,10 @@ import sys import time -import easygraph as eg import pytest +import easygraph as eg + class TestGEXF: @classmethod @@ -162,7 +163,7 @@ def test_read_attribute_graphml(self): assert sorted(G.nodes) == sorted(H.nodes) ge = sorted(G.edges) he = sorted(H.edges) - for a, b in zip(ge, he): + for a, b in zip(ge, he, strict=False): assert a == b self.attribute_fh.seek(0) @@ -275,7 +276,7 @@ def test_edge_id_construct(self): expected = f""" - + EasyGraph @@ -306,7 +307,7 @@ def test_numpy_type(self): xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation\ ="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd"\ version="1.2"> - + EasyGraph diff --git a/easygraph/readwrite/tests/test_gml.py b/easygraph/readwrite/tests/test_gml.py index 45946d1f..6180f015 100644 --- a/easygraph/readwrite/tests/test_gml.py +++ b/easygraph/readwrite/tests/test_gml.py @@ -2,16 +2,14 @@ import io import os import tempfile - from ast import literal_eval from contextlib import contextmanager from textwrap import dedent -import easygraph as eg import pytest -from easygraph.readwrite.gml import literal_destringizer -from easygraph.readwrite.gml import literal_stringizer +import easygraph as eg +from easygraph.readwrite.gml import literal_destringizer, literal_stringizer class TestGraph: @@ -269,9 +267,9 @@ def test_float_label(self): def test_name(self): G = eg.parse_gml('graph [ name "x" node [ id 0 label "x" ] ]') - assert "x" == G.graph["name"] + assert G.graph["name"] == "x" G = eg.parse_gml('graph [ node [ id 0 label "x" ] ]') - assert "" == G.name + assert G.name == "" assert "name" not in G.graph def test_graph_types(self): diff --git a/easygraph/readwrite/tests/test_graphml.py b/easygraph/readwrite/tests/test_graphml.py index 88ec32ef..012c40af 100644 --- a/easygraph/readwrite/tests/test_graphml.py +++ b/easygraph/readwrite/tests/test_graphml.py @@ -2,12 +2,11 @@ import os import tempfile -import easygraph as eg import pytest +import easygraph as eg from easygraph.readwrite.graphml import GraphMLWriter -from easygraph.utils import edges_equal -from easygraph.utils import nodes_equal +from easygraph.utils import edges_equal, nodes_equal class BaseGraphML: @@ -377,7 +376,7 @@ def test_read_attribute_graphml(self): assert nodes_equal(sorted(G.nodes), sorted(H.nodes)) ge = sorted(G.edges) he = sorted(H.edges) - for a, b in zip(ge, he): + for a, b in zip(ge, he, strict=False): assert a == b self.attribute_fh.seek(0) @@ -385,7 +384,7 @@ def test_read_attribute_graphml(self): assert sorted(G.nodes) == sorted(PG.nodes) ge = sorted(G.edges) he = sorted(PG.edges) - for a, b in zip(ge, he): + for a, b in zip(ge, he, strict=False): assert a == b def test_node_default_attribute_graphml(self): @@ -503,11 +502,10 @@ def test_multigraph_keys(self): """ fh = io.BytesIO(s.encode("UTF-8")) - G = eg.read_graphml(fh) - expected = [("n0", "n1", "e0"), ("n0", "n1", "e1")] + eg.read_graphml(fh) # assert sorted(G.edges) == expected fh.seek(0) - H = eg.parse_graphml(s) + eg.parse_graphml(s) # assert sorted(H.edges) == expected def test_preserve_multi_edge_data(self): @@ -1180,8 +1178,8 @@ def test_write_read_attribute_named_key_ids_graphml(self): named_key_ids_behavior_fh.seek(0) J = eg.read_graphml(named_key_ids_behavior_fh) - assert all(n1 == n2 for (n1, n2) in zip(H.nodes, J.nodes)) - assert all(e1 == e2 for (e1, e2) in zip(H.edges, J.edges)) + assert all(n1 == n2 for (n1, n2) in zip(H.nodes, J.nodes, strict=False)) + assert all(e1 == e2 for (e1, e2) in zip(H.edges, J.edges, strict=False)) def test_write_read_attribute_numeric_type_graphml(self): from xml.etree.ElementTree import parse @@ -1283,7 +1281,7 @@ def test_mixed_int_type_number_attributes(self): def test_numpy_float(self): np = pytest.importorskip("numpy") - wt = np.float_(3.4) + wt = np.float64(3.4) G = eg.Graph([(1, 2, {"weight": wt})]) fd, fname = tempfile.mkstemp() self.writer(G, fname) @@ -1380,7 +1378,7 @@ def test_multigraph_write_generate_edge_id_from_attribute(self): assert nodes_equal(G.nodes, H.nodes) # assert edges_equal(G.edges, H.edges)x - x = [data.get("eid") for u, v, _, data in H.edges] + [data.get("eid") for u, v, _, data in H.edges] assert sorted(data.get("eid") for u, v, _, data in H.edges) == sorted( edge_attributes.values() ) @@ -1452,7 +1450,7 @@ def test_numpy_float32(self): os.unlink(fname) def test_numpy_float64_inference(self): - np = pytest.importorskip("numpy") + pytest.importorskip("numpy") G = self.attribute_numeric_type_graph fd, fname = tempfile.mkstemp() self.writer(G, fname, infer_numeric_types=True) diff --git a/easygraph/readwrite/tests/test_graphviz.py b/easygraph/readwrite/tests/test_graphviz.py index bac33144..8266f62e 100644 --- a/easygraph/readwrite/tests/test_graphviz.py +++ b/easygraph/readwrite/tests/test_graphviz.py @@ -3,13 +3,10 @@ import pytest - pygraphviz = pytest.importorskip("pygraphviz") import easygraph as eg - -from easygraph.utils import edges_equal -from easygraph.utils import nodes_equal +from easygraph.utils import edges_equal, nodes_equal class TestAGraph: @@ -51,7 +48,7 @@ def agraph_checks(self, G): def test_from_agraph_name(self): G = eg.Graph(name="test") A = eg.to_agraph(G) - H = eg.from_agraph(A) + eg.from_agraph(A) assert G.name == "test" def test_undirected(self): diff --git a/easygraph/readwrite/tests/test_pajek.py b/easygraph/readwrite/tests/test_pajek.py index b21d4b33..a01f3edb 100644 --- a/easygraph/readwrite/tests/test_pajek.py +++ b/easygraph/readwrite/tests/test_pajek.py @@ -40,16 +40,14 @@ """ Pajek tests """ -import easygraph as eg +import easygraph as eg print(eg) import os import tempfile -from easygraph.utils import edges_equal -from easygraph.utils import nodes_equal - +from easygraph.utils import edges_equal, nodes_equal # from rich import print @@ -284,7 +282,7 @@ def test_noname(self): line = "*network\n" other_lines = self.data.split("\n")[1:] data = line + "\n".join(other_lines) - G = eg.parse_pajek(data) + eg.parse_pajek(data) def test_unicode(self): import io diff --git a/easygraph/readwrite/tests/test_pickle.py b/easygraph/readwrite/tests/test_pickle.py index 91165f12..106540aa 100644 --- a/easygraph/readwrite/tests/test_pickle.py +++ b/easygraph/readwrite/tests/test_pickle.py @@ -8,7 +8,6 @@ import tempfile import easygraph as eg - from easygraph.utils import edges_equal diff --git a/easygraph/readwrite/tests/test_ucinet.py b/easygraph/readwrite/tests/test_ucinet.py index 3c91c87b..a48f524d 100644 --- a/easygraph/readwrite/tests/test_ucinet.py +++ b/easygraph/readwrite/tests/test_ucinet.py @@ -6,9 +6,6 @@ import easygraph as eg -from nose import SkipTest -from nose.tools import * - def filterEdges(edges): return [e[:3] for e in edges] @@ -35,10 +32,6 @@ def setup_class(self): ("e", "a"), ] ) - try: - pass - except ImportError: - raise SkipTest("NumPy not available.") def test_generate_ucinet(self): Gout = eg.generate_ucinet(self.G) @@ -260,15 +253,6 @@ def test_read_ucinet(self): def test_write_ucinet(self): fh = io.BytesIO() - data = """\ -dl n=5 format=fullmatrix -data: -0 1 1 1 1 -1 0 1 0 0 -1 1 0 0 1 -1 0 0 0 0 -1 0 1 0 0 -""" graph = eg.MultiDiGraph() graph.add_nodes_from([0, 1, 2, 3, 4]) graph.add_edges_from( diff --git a/easygraph/readwrite/ucinet.py b/easygraph/readwrite/ucinet.py index 99b080ae..588caee6 100644 --- a/easygraph/readwrite/ucinet.py +++ b/easygraph/readwrite/ucinet.py @@ -21,16 +21,15 @@ for full format information. Short version on http://www.analytictech.com/networks/dataentry.htm """ - +import contextlib import re import shlex -import easygraph as eg import numpy as np +import easygraph as eg from easygraph.utils import open_file - __all__ = ["generate_ucinet", "read_ucinet", "parse_ucinet", "write_ucinet"] @@ -67,9 +66,13 @@ def generate_ucinet(G): yield "data:" - yield str(np.asmatrix(eg.to_numpy_array(G, nodelist=nodes, dtype=int))).replace( - "[", " " - ).replace("]", " ").lstrip().rstrip() + yield ( + str(np.asmatrix(eg.to_numpy_array(G, nodelist=nodes, dtype=int))) + .replace("[", " ") + .replace("]", " ") + .lstrip() + .rstrip() + ) @open_file(0, mode="rb") @@ -85,11 +88,11 @@ def read_ucinet(path, encoding="UTF-8"): G : EasyGraph MultiGraph or MultiDiGraph. Examples -------- - >>> G=eg.path_graph(4) + >>> G = eg.path_graph(4) >>> eg.write_ucinet(G, "test.dl") - >>> G=eg.read_ucinet("test.dl") + >>> G = eg.read_ucinet("test.dl") To create a Graph instead of a MultiGraph use - >>> G1=eg.Graph(G) + >>> G1 = eg.Graph(G) See Also -------- parse_ucinet() @@ -114,7 +117,7 @@ def write_ucinet(G, path, encoding="UTF-8"): Filenames ending in .gz or .bz2 will be compressed. Examples -------- - >>> G=eg.path_graph(4) + >>> G = eg.path_graph(4) >>> eg.write_ucinet(G, "test.net") References ---------- @@ -144,9 +147,7 @@ def parse_ucinet(lines): See UCINET User Guide or http://www.analytictech.com/ucinet/help/hs5000.htm for full format information. Short version on http://www.analytictech.com/networks/dataentry.htm """ - from numpy import genfromtxt - from numpy import isnan - from numpy import reshape + from numpy import genfromtxt, isnan, reshape G = eg.MultiDiGraph() @@ -163,14 +164,12 @@ def parse_ucinet(lines): lexer.whitespace_split = True number_of_nodes = 0 - number_of_matrices = 0 nr = 0 # number of rows (rectangular matrix) nc = 0 # number of columns (rectangular matrix) ucinet_format = "fullmatrix" # Format by default labels = {} # Contains labels of nodes row_labels_embedded = False # Whether labels are embedded in data or not cols_labels_embedded = False - diagonal = True # whether the main diagonal is present or absent KEYWORDS = ("format", "data:", "labels:") # TODO remove ':' in keywords @@ -188,14 +187,14 @@ def parse_ucinet(lines): nc = int(get_param(r"\d+", token, lexer)) number_of_nodes = max(nr, nc) elif token.startswith("nm"): - number_of_matrices = int(get_param(r"\d+", token, lexer)) + int(get_param(r"\d+", token, lexer)) else: number_of_nodes = int(get_param(r"\d+", token, lexer)) nr = number_of_nodes nc = number_of_nodes elif token.startswith("diagonal"): - diagonal = get_param("present|absent", token, lexer) + get_param("present|absent", token, lexer) elif token.startswith("format"): ucinet_format = get_param( @@ -206,9 +205,7 @@ def parse_ucinet(lines): ) # TODO : row and columns labels - elif token.startswith("row"): # Row labels - pass - elif token.startswith("column"): # Columns labels + elif token.startswith("row") or token.startswith("column"): # Row labels pass elif token.startswith("labels"): @@ -236,7 +233,7 @@ def parse_ucinet(lines): params = {} if cols_labels_embedded: # params['names'] = True - labels = dict(zip(range(0, nc), data_lines.splitlines()[1].split())) + labels = dict(zip(range(0, nc), data_lines.splitlines()[1].split(), strict=False)) # params['skip_header'] = 2 # First character is \n if row_labels_embedded: # Skip first column # TODO rectangular case : labels can differ from rows to columns @@ -245,10 +242,8 @@ def parse_ucinet(lines): if ucinet_format == "fullmatrix": # In Python3 genfromtxt requires bytes string - try: + with contextlib.suppress(TypeError): data_lines = bytes(data_lines, "utf-8") - except TypeError: - pass # Do not use splitlines() because it is not necessarily written as a square matrix data = genfromtxt([data_lines], case_sensitive=False, **params) if cols_labels_embedded or row_labels_embedded: @@ -269,10 +264,7 @@ def parse_ucinet(lines): pass else: for neighbor in row[1:]: - if ucinet_format == "nodelist1": - source = row[0] - else: - source = str(i) + source = row[0] if ucinet_format == "nodelist1" else str(i) s += source + " " + neighbor + "\n" G = eg.parse_edgelist( @@ -282,7 +274,7 @@ def parse_ucinet(lines): ) if not row_labels_embedded or not cols_labels_embedded: - G = eg.relabel_nodes(G, dict(zip(list(G.nodes), [i - 1 for i in G.nodes]))) + G = eg.relabel_nodes(G, dict(zip(list(G.nodes), [i - 1 for i in G.nodes], strict=False))) elif ucinet_format == "edgelist1": G = eg.parse_edgelist( @@ -292,7 +284,7 @@ def parse_ucinet(lines): ) if not row_labels_embedded or not cols_labels_embedded: - G = eg.relabel_nodes(G, dict(zip(list(G.nodes), [i - 1 for i in G.nodes]))) + G = eg.relabel_nodes(G, dict(zip(list(G.nodes), [i - 1 for i in G.nodes], strict=False))) # Relabel nodes if labels: @@ -322,6 +314,6 @@ def get_param(regex, token, lines): try: n = next(lines) except StopIteration: - raise Exception("Parameter %s value not recognized" % token) + raise Exception(f"Parameter {token} value not recognized") query = re.search(regex, n) return query.group() diff --git a/easygraph/tests/test_convert.py b/easygraph/tests/test_convert.py index 1c869b8f..1443ec5e 100644 --- a/easygraph/tests/test_convert.py +++ b/easygraph/tests/test_convert.py @@ -1,12 +1,10 @@ import pytest - np = pytest.importorskip("numpy") pd = pytest.importorskip("pandas") sp = pytest.importorskip("scipy") import easygraph as eg - from easygraph.utils.misc import * @@ -43,7 +41,7 @@ def setup_method(self): self.df = df mdf = pd.DataFrame([[4, 16, "A", "D"]], columns=["weight", "cost", 0, "b"]) - self.mdf = df.append(mdf) + self.mdf = pd.concat([df, mdf], ignore_index=True) def assert_equal(self, G1, G2): assert nodes_equal(G1.nodes, G2.nodes) diff --git a/easygraph/utils/alias.py b/easygraph/utils/alias.py index 4fe2651d..43f91b6e 100644 --- a/easygraph/utils/alias.py +++ b/easygraph/utils/alias.py @@ -16,10 +16,10 @@ def create_alias_table(area_ratio): """ import numpy as np - l = len(area_ratio) - accept, alias = [0] * l, [0] * l + length = len(area_ratio) + accept, alias = [0] * length, [0] * length small, large = [], [] - area_ratio_ = np.array(area_ratio) * l + area_ratio_ = np.array(area_ratio) * length for i, prob in enumerate(area_ratio_): if prob < 1.0: small.append(i) diff --git a/easygraph/utils/convert_to_matrix.py b/easygraph/utils/convert_to_matrix.py index 8e005212..4b548945 100644 --- a/easygraph/utils/convert_to_matrix.py +++ b/easygraph/utils/convert_to_matrix.py @@ -2,7 +2,6 @@ import easygraph as eg - __all__ = [ "to_numpy_matrix", "from_numpy_array", @@ -29,12 +28,12 @@ def to_numpy_matrix(G, edge_sign=1.0, not_edge_sign=0.0): """ import numpy as np - index_of_node = dict(zip(G.nodes, range(len(G)))) + index_of_node = dict(zip(G.nodes, range(len(G)), strict=False)) N = len(G) M = np.full((N, N), not_edge_sign) for u, udict in G.adj.items(): - for v, data in udict.items(): + for v, _data in udict.items(): M[index_of_node[u], index_of_node[v]] = edge_sign M = np.asmatrix(M) @@ -154,7 +153,7 @@ def from_numpy_array(A, parallel_edges=False, create_using=None): G.add_nodes_from(range(n)) # Get a list of all the entries in the array with nonzero entries. These # coordinates become edges in the graph. (convert to int from np.int64) - edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero())) + edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero(), strict=False)) # handle numpy constructed data type if python_type == "void": # Sort the fields by their offset, then by dtype, then by name. @@ -167,7 +166,7 @@ def from_numpy_array(A, parallel_edges=False, create_using=None): v, { name: kind_to_python_type[dtype.kind](val) - for (_, dtype, name), val in zip(fields, A[u, v]) + for (_, dtype, name), val in zip(fields, A[u, v], strict=False) }, ) for u, v in edges @@ -320,7 +319,7 @@ def to_numpy_array( raise eg.EasyGraphError("nodelist contains duplicates.") undirected = not G.is_directed() - index = dict(zip(nodelist, range(nlen))) + index = dict(zip(nodelist, range(nlen), strict=False)) # Initially, we start with an array of nans. Then we populate the array # using data from the graph. Afterwards, any leftover nans will be @@ -558,7 +557,7 @@ def from_pandas_edgelist( g = eg.empty_graph(0, create_using) if edge_attr is None: - g.add_edges_from(zip(df[source], df[target])) + g.add_edges_from(zip(df[source], df[target], strict=False)) return g reserved_columns = [source, target] @@ -579,7 +578,7 @@ def from_pandas_edgelist( ) try: - attribute_data = zip(*[df[col] for col in attr_col_headings]) + attribute_data = zip(*[df[col] for col in attr_col_headings], strict=False) except (KeyError, TypeError) as err: msg = f"Invalid edge_attr argument: {edge_attr}" raise eg.EasyGraphError(msg) from err @@ -589,23 +588,23 @@ def from_pandas_edgelist( if edge_key is not None: try: multigraph_edge_keys = df[edge_key] - attribute_data = zip(attribute_data, multigraph_edge_keys) + attribute_data = zip(attribute_data, multigraph_edge_keys, strict=False) except (KeyError, TypeError) as err: msg = f"Invalid edge_key argument: {edge_key}" raise eg.EasyGraphError(msg) from err - for s, t, attrs in zip(df[source], df[target], attribute_data): + for s, t, attrs in zip(df[source], df[target], attribute_data, strict=False): if edge_key is not None: attrs, multigraph_edge_key = attrs key = g.add_edge(s, t, key=multigraph_edge_key) else: key = g.add_edge(s, t) - g[s][t][key].update(zip(attr_col_headings, attrs)) + g[s][t][key].update(zip(attr_col_headings, attrs, strict=False)) else: - for s, t, attrs in zip(df[source], df[target], attribute_data): + for s, t, attrs in zip(df[source], df[target], attribute_data, strict=False): g.add_edge(s, t) - g[s][t].update(zip(attr_col_headings, attrs)) + g[s][t].update(zip(attr_col_headings, attrs, strict=False)) return g @@ -774,7 +773,7 @@ def _coo_gen_triples(A): """ row, col, data = A.row, A.col, A.data - return zip(row, col, data) + return zip(row, col, data, strict=False) def _dok_gen_triples(A): diff --git a/easygraph/utils/decorators.py b/easygraph/utils/decorators.py index 75a04c7e..91634cee 100644 --- a/easygraph/utils/decorators.py +++ b/easygraph/utils/decorators.py @@ -3,16 +3,13 @@ import gzip import inspect import re - from collections import defaultdict from os.path import splitext from pathlib import Path import easygraph as eg - from easygraph.utils.exception import EasyGraphError - __all__ = [ "only_implemented_for_UnDirected_graph", "only_implemented_for_Directed_graph", @@ -63,11 +60,13 @@ def not_implemented_for(*graph_types): def sp_function(G): pass + # rule out MultiDiGraph - @not_implemented_for("directed","multigraph") + @not_implemented_for("directed", "multigraph") def sp_np_function(G): pass + # rule out all except DiGraph @not_implemented_for("undirected") @not_implemented_for("multigraph") @@ -85,8 +84,8 @@ def sp_np_function(G): ) # 3-way logic: True if "directed" input, False if "undirected" input, else None - dval = ("directed" in graph_types) or not ("undirected" in graph_types) and None - mval = ("multigraph" in graph_types) or not ("graph" in graph_types) and None + dval = ("directed" in graph_types) or "undirected" not in graph_types and None + mval = ("multigraph" in graph_types) or "graph" not in graph_types and None errmsg = f"not implemented for {' '.join(graph_types)} type" def _not_implemented_for(g): @@ -130,22 +129,26 @@ def open_file(path_arg, mode="r"): -------- Decorate functions like this:: - @open_file(0,"r") + @open_file(0, "r") def read_function(pathname): pass - @open_file(1,"w") + + @open_file(1, "w") def write_function(G, pathname): pass - @open_file(1,"w") + + @open_file(1, "w") def write_function(G, pathname="graph.dot"): pass - @open_file("pathname","w") + + @open_file("pathname", "w") def write_function(G, pathname="graph.dot"): pass + @open_file("path", "w+") def another_function(arg, **kwargs): path = kwargs["path"] @@ -161,19 +164,19 @@ def another_function(arg, **kwargs): @open_file("path") def some_function(arg1, arg2, path=None): - if path is None: - fobj = tempfile.NamedTemporaryFile(delete=False) - else: - # `path` could have been a string or file object or something - # similar. In any event, the decorator has given us a file object - # and it will close it for us, if it should. - fobj = path - - try: - fobj.write("blah") - finally: - if path is None: - fobj.close() + if path is None: + fobj = tempfile.NamedTemporaryFile(delete=False) + else: + # `path` could have been a string or file object or something + # similar. In any event, the decorator has given us a file object + # and it will close it for us, if it should. + fobj = path + + try: + fobj.write("blah") + finally: + if path is None: + fobj.close() Normally, we'd want to use "with" to ensure that fobj gets closed. However, the decorator will make `path` a file object for us, @@ -256,6 +259,7 @@ def _convert(amount): if amount.currency != currency: amount = amount.to_currency(currency) return amount + return argmap(_convert, which_arg) Despite this common idiom for argmap, most of the following examples @@ -315,10 +319,12 @@ def foo(a, b, c): def double(a): return 2 * a + @argmap(double, 3) def overflow(a, *args): return a, args + print(overflow(1, 2, 3, 4, 5, 6)) # output is 1, (2, 3, 8, 5, 6) **Try Finally** @@ -368,6 +374,7 @@ def _opener(path): # assume `path` handles the closing fclose = lambda: None return path, fclose + return argmap(_opener, which_arg, try_finally=True) which can then be used as:: @@ -925,12 +932,9 @@ def signature(cls, f): def_sig.append(name) fname = cls._name(f) - def_sig = f'def {fname}({", ".join(def_sig)}):' + def_sig = f"def {fname}({', '.join(def_sig)}):" - if inspect.isgeneratorfunction(f): - _return = "yield from" - else: - _return = "return" + _return = "yield from" if inspect.isgeneratorfunction(f) else "return" call_sig = f"{_return} {{}}({', '.join(call_sig)})" diff --git a/easygraph/utils/exception.py b/easygraph/utils/exception.py index de7d5f95..ffa286cb 100644 --- a/easygraph/utils/exception.py +++ b/easygraph/utils/exception.py @@ -6,7 +6,6 @@ Base exceptions and errors for EasyGraph. """ - __all__ = ["EasyGraphException", "EasyGraphError", "EasyGraphNotImplemented"] diff --git a/easygraph/utils/mapped_queue.py b/easygraph/utils/mapped_queue.py index 301e6958..bde8651d 100644 --- a/easygraph/utils/mapped_queue.py +++ b/easygraph/utils/mapped_queue.py @@ -3,10 +3,8 @@ Codes from NetworkX - http://networkx.github.io/ """ - import heapq - __all__ = ["MappedQueue"] @@ -53,8 +51,10 @@ class MappedQueue: Pearson Education. """ - def __init__(self, data=[]): + def __init__(self, data=None): """Priority queue class with updatable priorities.""" + if data is None: + data = [] self.h = list(data) self.d = dict() self._heapify() diff --git a/easygraph/utils/misc.py b/easygraph/utils/misc.py index 0108dbbe..ba5a7d9d 100644 --- a/easygraph/utils/misc.py +++ b/easygraph/utils/misc.py @@ -1,6 +1,4 @@ -from itertools import chain -from itertools import tee - +from itertools import chain, tee __all__ = ["split_len", "split", "nodes_equal", "edges_equal", "pairwise"] @@ -79,7 +77,7 @@ def edges_equal(edges1, edges2, need_data=True): for c1, e in enumerate(edges1): u, v = e[0], e[1] data = [] - if need_data == True: + if need_data: data = [e[2:]] if v in d1[u]: data = d1[u][v] + data @@ -89,7 +87,7 @@ def edges_equal(edges1, edges2, need_data=True): for c2, e in enumerate(edges2): u, v = e[0], e[1] data = [] - if need_data == True: + if need_data: data = [e[2:]] if v in d2[u]: data = d2[u][v] + data @@ -117,5 +115,5 @@ def pairwise(iterable, cyclic=False): a, b = tee(iterable) first = next(b, None) if cyclic is True: - return zip(a, chain(b, (first,))) - return zip(a, b) + return zip(a, chain(b, (first,)), strict=False) + return zip(a, b, strict=False) diff --git a/easygraph/utils/relabel.py b/easygraph/utils/relabel.py index 0a6cbece..347f20bd 100644 --- a/easygraph/utils/relabel.py +++ b/easygraph/utils/relabel.py @@ -2,10 +2,7 @@ def relabel_nodes(G, mapping): - if not hasattr(mapping, "__getitem__"): - m = {n: mapping(n) for n in G} - else: - m = mapping + m = {n: mapping(n) for n in G} if not hasattr(mapping, "__getitem__") else mapping return _relabel_copy(G, m) diff --git a/easygraph/utils/tests/test_convert_to_matrix.py b/easygraph/utils/tests/test_convert_to_matrix.py index 5093dfd9..e38a63ab 100644 --- a/easygraph/utils/tests/test_convert_to_matrix.py +++ b/easygraph/utils/tests/test_convert_to_matrix.py @@ -1,9 +1,9 @@ -import sys import unittest -import easygraph as eg import numpy as np +import easygraph as eg + class ConvertToMatrix(unittest.TestCase): def test_to_numpy_matrix(self): diff --git a/easygraph/utils/type_change.py b/easygraph/utils/type_change.py index 187b2290..97bf8456 100644 --- a/easygraph/utils/type_change.py +++ b/easygraph/utils/type_change.py @@ -1,6 +1,5 @@ import easygraph as eg - __all__ = [ "from_pyGraphviz_agraph", "to_pyGraphviz_agraph", @@ -40,15 +39,9 @@ def from_pyGraphviz_agraph(A, create_using=None): """ if create_using is None: if A.is_directed(): - if A.is_strict(): - create_using = eg.DiGraph - else: - create_using = eg.MultiDiGraph + create_using = eg.DiGraph if A.is_strict() else eg.MultiDiGraph else: - if A.is_strict(): - create_using = eg.Graph - else: - create_using = eg.MultiGraph + create_using = eg.Graph if A.is_strict() else eg.MultiGraph # assign defaults N = eg.empty_graph(0, create_using) diff --git a/pyproject.toml b/pyproject.toml index e3c81922..33fe5c0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,168 +1,153 @@ -# [tool.poetry] -# name = "python-easygraph" -# version = "1.2.0rc1.dev0" -# description = "Python Graph Library" -# authors = [ -# "Teddy Xinyuan Chen <45612704+tddschn@users.noreply.github.com>", -# ] -# maintainers = [ -# "Teddy Xinyuan Chen <45612704+tddschn@users.noreply.github.com>" -# ] -# license = "MIT" - -# readme = "README.md" - -# packages = [ -# { include = "poetry", from = "src" } -# ] -# include = [ -# { path = "tests", format = "sdist" } -# ] - -# homepage = "https://python-poetry.org/" -# repository = "https://github.com/python-poetry/poetry" -# documentation = "https://python-poetry.org/docs" - -# keywords = ["packaging", "dependency", "poetry"] - -# classifiers = [ -# "Topic :: Software Development :: Build Tools", -# "Topic :: Software Development :: Libraries :: Python Modules" -# ] - -# [tool.poetry.build] -# generate-setup-file = false - -# # Requirements -# [tool.poetry.dependencies] -# python = "^3.7" - -# poetry-core = "^1.1.0b3" -# poetry-plugin-export = "^1.0.5" -# cachecontrol = { version = "^0.12.9", extras = ["filecache"] } -# cachy = "^0.3.0" -# cleo = "^1.0.0a5" -# crashtest = "^0.3.0" -# html5lib = "^1.0" -# importlib-metadata = { version = "^4.4", python = "<3.10" } -# # keyring uses calver, so version is unclamped -# keyring = ">=21.2.0" -# # packaging uses calver, so version is unclamped -# packaging = ">=20.4" -# pexpect = "^4.7.0" -# pkginfo = "^1.5" -# platformdirs = "^2.5.2" -# requests = "^2.18" -# requests-toolbelt = "^0.9.1" -# shellingham = "^1.1" -# tomlkit = ">=0.11.1,<1.0.0" -# # exclude 20.4.5 - 20.4.6 due to https://github.com/pypa/pip/issues/9953 -# virtualenv = "(>=20.4.3,<20.4.5 || >=20.4.7)" -# xattr = { version = "^0.9.7", markers = "sys_platform == 'darwin'" } -# urllib3 = "^1.26.0" -# dulwich = "^0.20.44" - -# [tool.poetry.dev-dependencies] -# tox = "^3.18" -# pytest = "^7.1" -# pytest-cov = "^3.0" -# pytest-mock = "^3.5" -# pytest-randomly = "^3.10" -# pytest-sugar = "^0.9" -# pytest-xdist = { version = "^2.5", extras = ["psutil"] } -# pre-commit = "^2.6" -# deepdiff = "^5.0" -# httpretty = "^1.0" -# typing-extensions = { version = "^4.0.0", python = "<3.8" } -# zipp = { version = "^3.4", python = "<3.8" } -# flatdict = "^4.0.1" -# mypy = ">=0.960" -# types-html5lib = ">=1.1.7" -# types-jsonschema = ">=4.4.4" -# types-requests = ">=2.27.11" - -# [tool.poetry.scripts] -# poetry = "poetry.console.application:main" - - -# [build-system] -# requires = ["poetry-core>=1.0.0"] -# build-backend = "poetry.core.masonry.api" - - -[tool.isort] -py_version = 37 -profile = "black" -force_single_line = true -combine_as_imports = true -lines_between_types = 1 -lines_after_imports = 2 -src_paths = ["src", "tests"] -extend_skip = ["setup.py"] -known_third_party = ["poetry.core"] - - -[tool.black] -target-version = ['py37'] -preview = true -force-exclude = ''' -.*/setup\.py$ -''' - - -# [tool.mypy] -# files = "src" -# mypy_path = "src" -# namespace_packages = true -# explicit_package_bases = true -# show_error_codes = true -# strict = true -# enable_error_code = [ -# "ignore-without-code", -# "redundant-expr", -# "truthy-bool", -# ] - -# use of importlib-metadata backport at python3.7 makes it impossible to -# satisfy mypy without some ignores: but we get a different set of ignores at -# different python versions. -# -# , meanwhile suppress that -# warning. -# [[tool.mypy.overrides]] -# module = [ -# 'poetry.console.commands.self.show.plugins', -# 'poetry.installation.executor', -# 'poetry.mixology.version_solver', -# 'poetry.plugins.plugin_manager', -# 'poetry.repositories.installed_repository', -# 'poetry.utils.env', -# ] -# warn_unused_ignores = false - -# [[tool.mypy.overrides]] -# module = [ -# 'cachecontrol.*', -# 'cachy.*', -# 'cleo.*', -# 'crashtest.*', -# 'pexpect.*', -# 'pkginfo.*', -# 'requests_toolbelt.*', -# 'shellingham.*', -# 'virtualenv.*', -# 'xattr.*', -# ] -# ignore_missing_imports = true - -[tool.coverage.run] -branch = true -source = [ "easygraph/"] -relative_files = true - - -[tool.coverage.report] -exclude_lines = [ - "pragma: no cover", - "if TYPE_CHECKING:" +[project] +name = "Python-EasyGraph" +version = "0.2a40" +authors = [ + { name = "Fudan MSN Group", email = "easygraph@163.com" }, ] +description = "Easy Graph" +readme = "README.rst" +requires-python = ">=3.10" +license = "BSD-3-Clause" +classifiers = [ + "Programming Language :: Python :: 3.10", + "Operating System :: OS Independent", +] +dependencies = [ + "gensim>=4.2.0", + "joblib>=1.1.1", + "matplotlib>=3.3.4", + "numpy>=1.19.5", + "pandas>=0.25.3", + "progressbar>=2.5", + "progressbar33>=2.4", + "scikit-learn>=0.24.2", + "scipy>=1.5.4", + "six>=1.17.0", + "statsmodels>=0.12.2", + "tqdm>=4.64.1", +] + +[project.scripts] +easy-graph = "easy_graph:main" + +[project.optional-dependencies] +graphviz = [ + "pygraphviz>=1.14", +] + +[tool.scikit-build] +minimum-version = "build-system.requires" +build-dir = "build/{wheel_tag}" + +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11"] +build-backend = "scikit_build_core.build" + +[tool.setuptools] +packages = ["easygraph"] + +[tool.scikit-build.cmake] +source-dir = "_cpp_easygraph" + +[dependency-groups] +dev = [ + "pandas-stubs>=2.3.2.250926", + "pytest>=8.4.2", + "ruff>=0.14.1", + "scikit-build-core>=0.11.6", +] + + +[tool.pytest.ini_options] +python_files = "**/tests/test_*.py" +addopts = "-ra" + + +# pyproject.toml +[tool.ruff] +target-version = "py310" # 原 min_python_version +line-length = 88 # 原 max-line-length + +[tool.ruff.lint] +# 只启用你确实想要的规则族: +# - E/F/W: pycodestyle/pyflakes +# - I: isort(导入排序) +# - B: flake8-bugbear +# - UP: pyupgrade(含“优先 f-string”) +# - Q: flake8-quotes(引号风格) +# - SIM: flake8-simplify +# - TID: flake8-tidy-imports(含 TID252 禁止相对导入) +# - TCH: flake8-type-checking(类型导入移动到 if TYPE_CHECKING) +# - ERA: eradicate(清理注释死码) +select = [ + "E", "F", "W", + "I", + "B", + "UP", + "Q", + "SIM", + "TID", + "TCH", + "ERA", +] + +# 仅保留你确实需要的忽略。E203 与 Black 冲突通常忽略。 +ignore = [ + "E203", + # 你若不想启用某个规则族,就别放进 select;无需在 ignore 里罗列一堆。 +] + +# (可选)按文件忽略 +# per-file-ignores = { "tests/**.py" = ["S101"] } + +# —— 各插件的细化配置 —— # +[tool.ruff.format] +# 字符串引号风格: "double" | "single | "preserve" +quote-style = "double" +# 是否在多行字典/列表/元组的最后一项加逗号 +skip-magic-trailing-comma = false +# 是否格式化 docstring(会重新换行) +docstring-code-format = true +# 是否在文件末尾强制留一个换行符 +line-ending = "auto" # auto | lf | crlf | cr +# 控制格式化时是否保留空行 +indent-style = "space" # 只支持 space(未来可能扩展 tab) + +[tool.ruff.lint.flake8-quotes] +# 原 flake8: inline-quotes = double +inline-quotes = "double" +# 若想统一 docstring/多行字符串: +# multiline-quotes = "double" +# docstring-quotes = "double" + +[tool.ruff.lint.flake8-tidy-imports] +# 原 flake8: ban-relative-imports = true ==> TID252 +ban-relative-imports = "all" + +[tool.ruff.lint.flake8-type-checking] +# 原: type-checking-exempt-modules = typing, typing-extensions +exempt-modules = ["typing", "typing_extensions"] + + +[tool.tox] +# 定义通用配置 +requires = ["tox-uv>=1.0"] +provision = ".tox-uv" +skipsdist = false +env_list = ["py36", "py37", "py38", "py39", "py310"] + +[tool.tox.env] +# 默认环境配置 +deps = [ + "lxml", + "pytest", + "." +] +commands = [ + "pytest {posargs}" +] + +[tool.tox.env.pyver] +description = "Simply check the Python version, do not install the package" +skipsdist = true +commands = ["python --version"] diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index f7054a48..00000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -# to speed up tests collection -python_files = **/tests/test_*.py -addopts=-ra --disable-warnings diff --git a/scripts/archive/check_installation.py b/scripts/archive/check_installation.py index db73247a..f43da7bf 100755 --- a/scripts/archive/check_installation.py +++ b/scripts/archive/check_installation.py @@ -3,7 +3,6 @@ import easygraph as eg - G = eg.Graph() G.add_edge(1, 2) diff --git a/setup.py b/setup.py deleted file mode 100644 index c62cf183..00000000 --- a/setup.py +++ /dev/null @@ -1,71 +0,0 @@ -import platform - -from pathlib import Path - -import setuptools - - -with open("README.rst") as fh: - long_description = fh.read() - -cpp_source_dir = Path(__file__).parent / "cpp_easygraph" -sources = list(str(x) for x in cpp_source_dir.rglob("*.cpp")) - -uname = platform.uname() -compileArgs = [] -if uname[0] == "Darwin" or uname[0] == "Linux": - compileArgs = ["-std=c++11"] -CYTHON_STR = "Cython" - -setuptools.setup( - name="Python-EasyGraph", - version="0.2a40", - author="Fudan MSN Group", - author_email="easygraph@163.com", - description="Easy Graph", - long_description=long_description, - long_description_content_type="text/x-rst", - url="https://github.com/easy-graph/Easy-Graph", - packages=setuptools.find_packages(), - classifiers=[ - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - ], - python_requires=">=3.6, <3.11", - install_requires=[ - "numpy>=1.18.5, <=1.19.5; python_version=='3.6'", - "numpy>=1.18.5; python_version>='3.7'", - "tqdm>=4.49.0", - "joblib>=0.16.0", - "six>=1.15.0, <1.16.0", - "gensim<=4.1.2; python_version=='3.6'", - "gensim>=4.1.2; python_version>='3.7'", - "progressbar33>=2.4", - "scikit-learn>=0.23.0, <=0.24.2; python_version=='3.6'", - "scikit-learn>=0.24.0, <=1.0.2; python_version=='3.7'", - "scikit-learn>=1.1; python_version>='3.8'", - "scipy>=1.5.0, <=1.5.4; python_version=='3.6'", - "scipy>=1.5.0, <=1.7.3; python_version=='3.7'", - "scipy>=1.8.0; python_version>='3.8'", - "matplotlib>=3.3.0, <=3.3.4; python_version<'3.10'", - "matplotlib>=3.5.2; python_version>='3.10'", - "statsmodels>=0.12.0, <=0.12.2; python_version=='3.6'", - "statsmodels>=0.12.0; python_version>='3.7'", - "progressbar>=2.5", - "nose>=0.10.1", - "pandas>=1.0.1, <=1.1.5; python_version<='3.7'", - ], - setup_requires=[CYTHON_STR], - test_suite="nose.collector", - tests_require=[], - ext_modules=[ - setuptools.Extension( - "cpp_easygraph", sources, optional=True, extra_compile_args=compileArgs - ) - ], -) diff --git a/tox-conda-test.ini b/tox-conda-test.ini deleted file mode 100644 index 8fb355f4..00000000 --- a/tox-conda-test.ini +++ /dev/null @@ -1,9 +0,0 @@ -[tox] -requires = tox-conda -envlist = py36, py37, py38, py39 - -skipsdist = true - -[testenv] -commands = - python --version diff --git a/tox-conda.ini b/tox-conda.ini deleted file mode 100644 index 36627e72..00000000 --- a/tox-conda.ini +++ /dev/null @@ -1,14 +0,0 @@ -# content of: tox.ini , put in same dir as setup.py -[tox] -requires = tox-conda -envlist = py36, py37, py38, py39 - -[testenv] -# install pytest in the virtualenv where commands will be executed -deps = - pytest - . - -commands = - # NOTE: you can run any command line tool here - not just tests - pytest diff --git a/tox-test.ini b/tox-test.ini deleted file mode 100644 index 5f5ebe00..00000000 --- a/tox-test.ini +++ /dev/null @@ -1,8 +0,0 @@ -[tox] -envlist = py36, py37, py38, py39 - -skipsdist = true - -[testenv] -commands = - python --version diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 2ae04401..00000000 --- a/tox.ini +++ /dev/null @@ -1,19 +0,0 @@ -# content of: tox.ini , put in same dir as setup.py -[tox] -; envlist = py36, py37, py38, py39 -; py36 is really old https://endoflife.date/python -; and I can't install it on my machine without using pyenv. -envlist = py36, py37, py38, py39, py310 - -[testenv] -# install pytest in the virtualenv where commands will be executed -deps = - ; Pillow - ; kiwisolver - ; genism - lxml - pytest - ; . -commands = - # NOTE: you can run any command line tool here - not just tests - pytest