OR-Tools  8.2
strongly_connected_components.h
Go to the documentation of this file.
1// Copyright 2010-2018 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// This code computes the strongly connected components of a directed graph,
15// and presents them sorted by reverse topological order.
16//
17// It implements an efficient version of Tarjan's strongly connected components
18// algorithm published in: Tarjan, R. E. (1972), "Depth-first search and linear
19// graph algorithms", SIAM Journal on Computing.
20//
21// A description can also be found here:
22// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
23//
24// SIMPLE EXAMPLE:
25//
26// Fill a vector<vector<int>> graph; representing your graph adjacency lists.
27// That is, graph[i] contains the nodes adjacent to node #i. The nodes must be
28// integers in [0, num_nodes). Then just do:
29//
30// vector<vector<int>> components;
31// FindStronglyConnectedComponents(
32// static_cast<int>(graph.size()), graph, &components);
33//
34// The nodes of each strongly connected components will be listed in each
35// subvector of components. The components appear in reverse topological order:
36// outgoing arcs from a component will only be towards earlier components.
37//
38// IMPORTANT: num_nodes will be the number of nodes of the graph. Its type
39// is the type used internally by the algorithm. It is why it is better to
40// convert it to int or even int32 rather than using size_t which takes 64 bits.
41
42#ifndef UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
43#define UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
44
45#include <limits>
46#include <vector>
47
49#include "ortools/base/macros.h"
50
51// Finds the strongly connected components of a directed graph. It is templated
52// so it can be used in many contexts. See the simple example above for the
53// easiest use case.
54//
55// The requirement of the different types are:
56// - The type NodeIndex must be an integer type representing a node of the
57// graph. The nodes must be in [0, num_nodes). It can be unsigned.
58// - The type Graph must provide a [] operator such that the following code
59// iterates over the adjacency list of the given node:
60// for (const NodeIndex head : graph[node]) {}
61// - The type SccOutput must implement the function:
62// emplace_back(NodeIndex const* begin, NodeIndex const* end);
63// It will be called with the connected components of the given graph as they
64// are found (In the reverse topological order).
65//
66// More practical details on the algorithm:
67// - It deals properly with self-loop and duplicate nodes.
68// - It is really fast! and work in O(nodes + edges).
69// - Its memory usage is also bounded by O(nodes + edges) but in practice it
70// uses less than the input graph.
71template <typename NodeIndex, typename Graph, typename SccOutput>
73 const Graph& graph, SccOutput* components);
74
75// A simple custom output class that just counts the number of SCC. Not
76// allocating many vectors can save both space and speed if your graph is large.
77//
78// Note: If this matters, you probably don't want to use vector<vector<int>> as
79// an input either. See StaticGraph in ortools/graph/graph.h
80// for an efficient graph data structure compatible with this algorithm.
81template <typename NodeIndex>
84 void emplace_back(NodeIndex const* b, NodeIndex const* e) {
86 }
87 // This is just here so this class can transparently replace a code that
88 // use vector<vector<int>> as an SccOutput, and get its size with size().
89 int size() const { return number_of_components; }
90};
91
92// This implementation is slightly different than a classical iterative version
93// of Tarjan's strongly connected components algorithm. But basically it is
94// still an iterative DFS. We use a class so memory can be reused if one needs
95// to compute many SCC in a row. It also allows more complex behavior in the
96// Graph or SccOutput class that might inspect the current state of the
97// algorithm.
98//
99// TODO(user): Possible optimizations:
100// - Try to reserve the vectors which sizes are bounded by num_nodes.
101// - Use an index rather than doing push_back(), pop_back() on them.
102template <typename NodeIndex, typename Graph, typename SccOutput>
104 public:
106 const Graph& graph,
107 SccOutput* components) {
108 // Reset the class fields.
109 scc_stack_.clear();
110 scc_start_index_.clear();
111 node_index_.assign(num_nodes, 0);
112 node_to_process_.clear();
113
114 // Optimization. This will always be equal to scc_start_index_.back() except
115 // when scc_stack_ is empty, in which case its value does not matter.
116 NodeIndex current_scc_start = 0;
117
118 // Loop over all the nodes not yet settled and start a DFS from each of
119 // them.
120 for (NodeIndex base_node = 0; base_node < num_nodes; ++base_node) {
121 if (node_index_[base_node] != 0) continue;
122 DCHECK_EQ(0, node_to_process_.size());
123 node_to_process_.push_back(base_node);
124 do {
125 const NodeIndex node = node_to_process_.back();
126 const NodeIndex index = node_index_[node];
127 if (index == 0) {
128 // We continue the dfs from this node and set its 1-based index.
129 scc_stack_.push_back(node);
130 current_scc_start = scc_stack_.size();
131 node_index_[node] = current_scc_start;
132 scc_start_index_.push_back(current_scc_start);
133
134 // Enqueue all its adjacent nodes.
135 NodeIndex min_head_index = kSettledIndex;
136 for (const NodeIndex head : graph[node]) {
137 const NodeIndex head_index = node_index_[head];
138 if (head_index == 0) {
139 node_to_process_.push_back(head);
140 } else {
141 // Note that if head_index == kSettledIndex, nothing happens.
142 min_head_index = std::min(min_head_index, head_index);
143 }
144 }
145
146 // Update the start of this strongly connected component.
147 // Note that scc_start_index_ can never be empty since it first
148 // element is 1 and by definition min_head_index is 1-based and can't
149 // be 0.
150 while (current_scc_start > min_head_index) {
151 scc_start_index_.pop_back();
152 current_scc_start = scc_start_index_.back();
153 }
154 } else {
155 node_to_process_.pop_back();
156 if (current_scc_start == index) {
157 // We found a strongly connected component.
158 components->emplace_back(&scc_stack_[current_scc_start - 1],
159 &scc_stack_[0] + scc_stack_.size());
160 for (int i = current_scc_start - 1; i < scc_stack_.size(); ++i) {
161 node_index_[scc_stack_[i]] = kSettledIndex;
162 }
163 scc_stack_.resize(current_scc_start - 1);
164 scc_start_index_.pop_back();
165 current_scc_start =
166 scc_start_index_.empty() ? 0 : scc_start_index_.back();
167 }
168 }
169 } while (!node_to_process_.empty());
170 }
171 }
172
173 // Advanced usage. This can be used in either the Graph or SccOutput template
174 // class to query the current state of the algorithm. It allows to build more
175 // complex variant based on the core DFS algo.
177 return node_index_[node] > 0 && node_index_[node] < kSettledIndex;
178 }
179
180 private:
181 static constexpr NodeIndex kSettledIndex =
183
184 // Each node expanded by the DFS will be pushed on this stack. A node is only
185 // popped back when its strongly connected component has been explored and
186 // outputted.
187 std::vector<NodeIndex> scc_stack_;
188
189 // This is equivalent to the "low link" of a node in Tarjan's algorithm.
190 // Basically, scc_start_index_.back() represent the 1-based index in
191 // scc_stack_ of the beginning of the current strongly connected component.
192 // All the nodes after this index will be on the same component.
193 std::vector<NodeIndex> scc_start_index_;
194
195 // Each node is assigned an index which changes 2 times in the algorithm:
196 // - Everyone starts with an index of 0 which means unexplored.
197 // - The first time they are explored by the DFS and pushed on scc_stack_,
198 // they get their 1-based index on this stack.
199 // - Once they have been processed and outputted to components, they are said
200 // to be settled, and their index become kSettledIndex.
201 std::vector<NodeIndex> node_index_;
202
203 // This is a well known way to do an efficient iterative DFS. Each time a node
204 // is explored, all its adjacent nodes are pushed on this stack. The iterative
205 // dfs processes the nodes one by one by popping them back from here.
206 std::vector<NodeIndex> node_to_process_;
207};
208
209// Simple wrapper function for most usage.
210template <typename NodeIndex, typename Graph, typename SccOutput>
212 const Graph& graph,
213 SccOutput* components) {
215 return helper.FindStronglyConnectedComponents(num_nodes, graph, components);
216}
217
218#endif // UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
int64 min
Definition: alldiff_cst.cc:138
int64 max
Definition: alldiff_cst.cc:139
#define DCHECK_EQ(val1, val2)
Definition: base/logging.h:885
void FindStronglyConnectedComponents(const NodeIndex num_nodes, const Graph &graph, SccOutput *components)
ListGraph Graph
Definition: graph.h:2360
int index
Definition: pack.cc:508
int64 head
void FindStronglyConnectedComponents(const NodeIndex num_nodes, const Graph &graph, SccOutput *components)
void emplace_back(NodeIndex const *b, NodeIndex const *e)