C++ Reference

C++ Reference: Graph

cliques.h
Go to the documentation of this file.
1// Copyright 2010-2018 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14//
15// Maximal clique algorithms, based on the Bron-Kerbosch algorithm.
16// See http://en.wikipedia.org/wiki/Bron-Kerbosch_algorithm
17// and
18// C. Bron and J. Kerbosch, Joep, "Algorithm 457: finding all cliques of an
19// undirected graph", CACM 16 (9): 575-577, 1973.
20// http://dl.acm.org/citation.cfm?id=362367&bnc=1.
21//
22// Keywords: undirected graph, clique, clique cover, Bron, Kerbosch.
23
24#ifndef OR_TOOLS_GRAPH_CLIQUES_H_
25#define OR_TOOLS_GRAPH_CLIQUES_H_
26
27#include <functional>
28#include <numeric>
29#include <vector>
30
31#include "absl/strings/str_cat.h"
32#include "ortools/base/int_type.h"
33#include "ortools/base/logging.h"
34#include "ortools/base/strong_vector.h"
35#include "ortools/util/time_limit.h"
36
37namespace operations_research {
38
39// Finds all maximal cliques, even of size 1, in the
40// graph described by the graph callback. graph->Run(i, j) indicates
41// if there is an arc between i and j.
42// This function takes ownership of 'callback' and deletes it after it has run.
43// If 'callback' returns true, then the search for cliques stops.
44void FindCliques(std::function<bool(int, int)> graph, int node_count,
45 std::function<bool(const std::vector<int>&)> callback);
46
47// Covers the maximum number of arcs of the graph with cliques. The graph
48// is described by the graph callback. graph->Run(i, j) indicates if
49// there is an arc between i and j.
50// This function takes ownership of 'callback' and deletes it after it has run.
51// It calls 'callback' upon each clique.
52// It ignores cliques of size 1.
53void CoverArcsByCliques(std::function<bool(int, int)> graph, int node_count,
54 std::function<bool(const std::vector<int>&)> callback);
55
56// Possible return values of the callback for reporting cliques. The returned
57// value determines whether the algorithm will continue the search.
58enum class CliqueResponse {
59 // The algorithm will continue searching for other maximal cliques.
61 // The algorithm will stop the search immediately. The search can be resumed
62 // by calling BronKerboschAlgorithm::Run (resp. RunIterations) again.
63 STOP
64};
65
66// The status value returned by BronKerboschAlgorithm::Run and
67// BronKerboschAlgorithm::RunIterations.
69 // The algorithm has enumerated all maximal cliques.
71 // The search algorithm was interrupted either because it reached the
72 // iteration limit or because the clique callback returned
73 // CliqueResponse::STOP.
75};
76
77// Implements the Bron-Kerbosch algorithm for finding maximal cliques.
78// The graph is represented as a callback that gets two nodes as its arguments
79// and it returns true if and only if there is an arc between the two nodes. The
80// cliques are reported back to the user using a second callback.
81//
82// Typical usage:
83// auto graph = [](int node1, int node2) { return true; };
84// auto on_clique = [](const std::vector<int>& clique) {
85// LOG(INFO) << "Clique!";
86// };
87//
88// BronKerboschAlgorithm<int> bron_kerbosch(graph, num_nodes, on_clique);
89// bron_kerbosch.Run();
90//
91// or:
92//
93// BronKerboschAlgorithm bron_kerbosch(graph, num_nodes, clique);
94// bron_kerbosch.RunIterations(kMaxNumIterations);
95//
96// This is a non-recursive implementation of the Bron-Kerbosch algorithm with
97// pivots as described in the paper by Bron and Kerbosch (1973) (the version 2
98// algorithm in the paper).
99// The basic idea of the algorithm is to incrementally build the cliques using
100// depth-first search. During the search, the algorithm maintains two sets of
101// candidates (nodes that are connected to all nodes in the current clique):
102// - the "not" set - these are candidates that were already visited by the
103// search and all the maximal cliques that contain them as a part of the
104// current clique were already reported.
105// - the actual candidates - these are candidates that were not visited yet, and
106// they can be added to the clique.
107// In each iteration, the algorithm does the first of the following actions that
108// applies:
109// A. If there are no actual candidates and there are candidates in the "not"
110// set, or if all actual candidates are connected to the same node in the
111// "not" set, the current clique can't be extended to a maximal clique that
112// was not already reported. Return from the recursive call and move the
113// selected candidate to the set "not".
114// B. If there are no candidates at all, it means that the current clique can't
115// be extended and that it is in fact a maximal clique. Report it to the user
116// and return from the recursive call. Move the selected candidate to the set
117// "not".
118// C. Otherwise, there are actual candidates, extend the current clique with one
119// of these candidates and process it recursively.
120//
121// To avoid unnecessary steps, the algorithm selects a pivot at each level of
122// the recursion to guide the selection of candidates added to the current
123// clique. The pivot can be either in the "not" set and among the actual
124// candidates. The algorithm tries to move the pivot and all actual candidates
125// connected to it to the set "not" as quickly as possible. This will fulfill
126// the conditions of step A, and the search algorithm will be able to leave the
127// current branch. Selecting a pivot that has the lowest number of disconnected
128// nodes among the candidates can reduce the running time significantly.
129//
130// The worst-case maximal depth of the recursion is equal to the number of nodes
131// in the graph, which makes the natural recursive implementation impractical
132// for nodes with more than a few thousands of nodes. To avoid the limitation,
133// this class simulates the recursion by maintaining a stack with the state at
134// each level of the recursion. The algorithm then runs in a loop. In each
135// iteration, the algorithm can do one or both of:
136// 1. Return to the previous recursion level (step A or B of the algorithm) by
137// removing the top state from the stack.
138// 2. Select the next candidate and enter the next recursion level (step C of
139// the algorithm) by adding a new state to the stack.
140//
141// The worst-case time complexity of the algorithm is O(3^(N/3)), and the memory
142// complexity is O(N^2), where N is the number of nodes in the graph.
143template <typename NodeIndex>
145 public:
146 // A callback called by the algorithm to test if there is an arc between a
147 // pair of nodes. The callback must return true if and only if there is an
148 // arc. Note that to function properly, the function must be symmetrical
149 // (represent an undirected graph).
150 using IsArcCallback = std::function<bool(NodeIndex, NodeIndex)>;
151 // A callback called by the algorithm to report a maximal clique to the user.
152 // The clique is returned as a list of nodes in the clique, in no particular
153 // order. The caller must make a copy of the vector if they want to keep the
154 // nodes.
155 //
156 // The return value of the callback controls how the algorithm continues after
157 // this clique. See the description of the values of 'CliqueResponse' for more
158 // details.
160 std::function<CliqueResponse(const std::vector<NodeIndex>&)>;
161
162 // Initializes the Bron-Kerbosch algorithm for the given graph and clique
163 // callback function.
165 CliqueCallback clique_callback)
166 : is_arc_(std::move(is_arc)),
167 clique_callback_(std::move(clique_callback)),
168 num_nodes_(num_nodes) {}
169
170 // Runs the Bron-Kerbosch algorithm for kint64max iterations. In practice,
171 // this is equivalent to running until completion or until the clique callback
172 // returns BronKerboschAlgorithmStatus::STOP. If the method returned because
173 // the search is finished, it will return COMPLETED; otherwise, it will return
174 // INTERRUPTED and it can be resumed by calling this method again.
176
177 // Runs at most 'max_num_iterations' iterations of the Bron-Kerbosch
178 // algorithm. When this function returns INTERRUPTED, there is still work to
179 // be done to process all the cliques in the graph. In such case the method
180 // can be called again and it will resume the work where the previous call had
181 // stopped. When it returns COMPLETED any subsequent call to the method will
182 // resume the search from the beginning.
183 BronKerboschAlgorithmStatus RunIterations(int64 max_num_iterations);
184
185 // Runs at most 'max_num_iterations' iterations of the Bron-Kerbosch
186 // algorithm, until the time limit is exceeded or until all cliques are
187 // enumerated. When this function returns INTERRUPTED, there is still work to
188 // be done to process all the cliques in the graph. In such case the method
189 // can be called again and it will resume the work where the previous call had
190 // stopped. When it returns COMPLETED any subsequent call to the method will
191 // resume the search from the beginning.
192 BronKerboschAlgorithmStatus RunWithTimeLimit(int64 max_num_iterations,
193 TimeLimit* time_limit);
194
195 // Runs the Bron-Kerbosch algorithm for at most kint64max iterations, until
196 // the time limit is excceded or until all cliques are enumerated. In
197 // practice, running the algorithm for kint64max iterations is equivalent to
198 // running until completion or until the other stopping conditions apply. When
199 // this function returns INTERRUPTED, there is still work to be done to
200 // process all the cliques in the graph. In such case the method can be called
201 // again and it will resume the work where the previous call had stopped. When
202 // it returns COMPLETED any subsequent call to the method will resume the
203 // search from the beginning.
205 return RunWithTimeLimit(kint64max, time_limit);
206 }
207
208 private:
209 DEFINE_INT_TYPE(CandidateIndex, ptrdiff_t);
210
211 // A data structure that maintains the variables of one "iteration" of the
212 // search algorithm. These are the variables that would normally be allocated
213 // on the stack in the recursive implementation.
214 //
215 // Note that most of the variables in the structure are explicitly left
216 // uninitialized by the constructor to avoid wasting resources on values that
217 // will be overwritten anyway. Most of the initialization is done in
218 // BronKerboschAlgorithm::InitializeState.
219 struct State {
220 State() {}
221 State(const State& other)
222 : pivot(other.pivot),
223 num_remaining_candidates(other.num_remaining_candidates),
224 candidates(other.candidates),
225 first_candidate_index(other.first_candidate_index),
226 candidate_for_recursion(other.candidate_for_recursion) {}
227
228 State& operator=(const State& other) {
229 pivot = other.pivot;
230 num_remaining_candidates = other.num_remaining_candidates;
231 candidates = other.candidates;
232 first_candidate_index = other.first_candidate_index;
233 candidate_for_recursion = other.candidate_for_recursion;
234 return *this;
235 }
236
237 // Moves the first candidate in the state to the "not" set. Assumes that the
238 // first candidate is also the pivot or a candidate disconnected from the
239 // pivot (as done by RunIteration).
240 inline void MoveFirstCandidateToNotSet() {
241 ++first_candidate_index;
242 --num_remaining_candidates;
243 }
244
245 // Creates a human-readable representation of the current state.
246 std::string DebugString() {
247 std::string buffer;
248 absl::StrAppend(&buffer, "pivot = ", pivot,
249 "\nnum_remaining_candidates = ", num_remaining_candidates,
250 "\ncandidates = [");
251 for (CandidateIndex i(0); i < candidates.size(); ++i) {
252 if (i > 0) buffer += ", ";
253 absl::StrAppend(&buffer, candidates[i]);
254 }
255 absl::StrAppend(
256 &buffer, "]\nfirst_candidate_index = ", first_candidate_index.value(),
257 "\ncandidate_for_recursion = ", candidate_for_recursion.value());
258 return buffer;
259 }
260
261 // The pivot node selected for the given level of the recursion.
262 NodeIndex pivot;
263 // The number of remaining candidates to be explored at the given level of
264 // the recursion; the number is computed as num_disconnected_nodes +
265 // pre_increment in the original algorithm.
266 int num_remaining_candidates;
267 // The list of nodes that are candidates for extending the current clique.
268 // This vector has the format proposed in the paper by Bron-Kerbosch; the
269 // first 'first_candidate_index' elements of the vector represent the
270 // "not" set of nodes that were already visited by the algorithm. The
271 // remaining elements are the actual candidates for extending the current
272 // clique.
273 // NOTE(user): We could store the delta between the iterations; however,
274 // we need to evaluate the impact this would have on the performance.
275 absl::StrongVector<CandidateIndex, NodeIndex> candidates;
276 // The index of the first actual candidate in 'candidates'. This number is
277 // also the number of elements of the "not" set stored at the beginning of
278 // 'candidates'.
279 CandidateIndex first_candidate_index;
280
281 // The current position in candidates when looking for the pivot and/or the
282 // next candidate disconnected from the pivot.
283 CandidateIndex candidate_for_recursion;
284 };
285
286 // The deterministic time coefficients for the push and pop operations of the
287 // Bron-Kerbosch algorithm. The coefficients are set to match approximately
288 // the running time in seconds on a recent workstation on the random graph
289 // benchmark.
290 // NOTE(user): PushState is not the only source of complexity in the
291 // algorithm, but non-negative linear least squares produced zero coefficients
292 // for all other deterministic counters tested during the benchmarking. When
293 // we optimize the algorithm, we might need to add deterministic time to the
294 // other places that may produce complexity, namely InitializeState, PopState
295 // and SelectCandidateIndexForRecursion.
296 static const double kPushStateDeterministicTimeSecondsPerCandidate;
297
298 // Initializes the root state of the algorithm.
299 void Initialize();
300
301 // Removes the top state from the state stack. This is equivalent to returning
302 // in the recursive implementation of the algorithm.
303 void PopState();
304
305 // Adds a new state to the top of the stack, adding the node 'selected' to the
306 // current clique. This is equivalent to making a recurisve call in the
307 // recursive implementation of the algorithm.
308 void PushState(NodeIndex selected);
309
310 // Initializes the given state. Runs the pivot selection algorithm in the
311 // state.
312 void InitializeState(State* state);
313
314 // Returns true if (node1, node2) is an arc in the graph or if node1 == node2.
315 inline bool IsArc(NodeIndex node1, NodeIndex node2) const {
316 return node1 == node2 || is_arc_(node1, node2);
317 }
318
319 // Selects the next node for recursion. The selected node is either the pivot
320 // (if it is not in the set "not") or a node that is disconnected from the
321 // pivot.
322 CandidateIndex SelectCandidateIndexForRecursion(State* state);
323
324 // Returns a human-readable string representation of the clique.
325 std::string CliqueDebugString(const std::vector<NodeIndex>& clique);
326
327 // The callback called when the algorithm needs to determine if (node1, node2)
328 // is an arc in the graph.
329 IsArcCallback is_arc_;
330
331 // The callback called when the algorithm discovers a maximal clique. The
332 // return value of the callback controls how the algorithm proceeds with the
333 // clique search.
334 CliqueCallback clique_callback_;
335
336 // The number of nodes in the graph.
337 const NodeIndex num_nodes_;
338
339 // Contains the state of the aglorithm. The vector serves as an external stack
340 // for the recursive part of the algorithm - instead of using the C++ stack
341 // and natural recursion, it is implemented as a loop and new states are added
342 // to the top of the stack. The algorithm ends when the stack is empty.
343 std::vector<State> states_;
344
345 // A vector that receives the current clique found by the algorithm.
346 std::vector<NodeIndex> current_clique_;
347
348 // Set to true if the algorithm is active (it was not stopped by an the clique
349 // callback).
350 int64 num_remaining_iterations_;
351
352 // The current time limit used by the solver. The time limit is assigned by
353 // the Run methods and it can be different for each call to run.
354 TimeLimit* time_limit_;
355};
356
357template <typename NodeIndex>
358void BronKerboschAlgorithm<NodeIndex>::InitializeState(State* state) {
359 DCHECK(state != nullptr);
360 const int num_candidates = state->candidates.size();
361 int num_disconnected_candidates = num_candidates;
362 state->pivot = 0;
363 CandidateIndex pivot_index(-1);
364 for (CandidateIndex pivot_candidate_index(0);
365 pivot_candidate_index < num_candidates &&
366 num_disconnected_candidates > 0;
367 ++pivot_candidate_index) {
368 const NodeIndex pivot_candidate = state->candidates[pivot_candidate_index];
369 int count = 0;
370 for (CandidateIndex i(state->first_candidate_index); i < num_candidates;
371 ++i) {
372 if (!IsArc(pivot_candidate, state->candidates[i])) {
373 ++count;
374 }
375 }
376 if (count < num_disconnected_candidates) {
377 pivot_index = pivot_candidate_index;
378 state->pivot = pivot_candidate;
379 num_disconnected_candidates = count;
380 }
381 }
382 state->num_remaining_candidates = num_disconnected_candidates;
383 if (pivot_index >= state->first_candidate_index) {
384 std::swap(state->candidates[pivot_index],
385 state->candidates[state->first_candidate_index]);
386 ++state->num_remaining_candidates;
387 }
388}
389
390template <typename NodeIndex>
391typename BronKerboschAlgorithm<NodeIndex>::CandidateIndex
392BronKerboschAlgorithm<NodeIndex>::SelectCandidateIndexForRecursion(
393 State* state) {
394 DCHECK(state != nullptr);
395 CandidateIndex disconnected_node_index =
396 std::max(state->first_candidate_index, state->candidate_for_recursion);
397 while (disconnected_node_index < state->candidates.size() &&
398 state->candidates[disconnected_node_index] != state->pivot &&
399 IsArc(state->pivot, state->candidates[disconnected_node_index])) {
400 ++disconnected_node_index;
401 }
402 state->candidate_for_recursion = disconnected_node_index;
403 return disconnected_node_index;
404}
405
406template <typename NodeIndex>
407void BronKerboschAlgorithm<NodeIndex>::Initialize() {
408 DCHECK(states_.empty());
409 states_.reserve(num_nodes_);
410 states_.emplace_back();
411
412 State* const root_state = &states_.back();
413 root_state->first_candidate_index = 0;
414 root_state->candidate_for_recursion = 0;
415 root_state->candidates.resize(num_nodes_, 0);
416 std::iota(root_state->candidates.begin(), root_state->candidates.end(), 0);
417 root_state->num_remaining_candidates = num_nodes_;
418 InitializeState(root_state);
419
420 DVLOG(2) << "Initialized";
421}
422
423template <typename NodeIndex>
424void BronKerboschAlgorithm<NodeIndex>::PopState() {
425 DCHECK(!states_.empty());
426 states_.pop_back();
427 if (!states_.empty()) {
428 State* const state = &states_.back();
429 current_clique_.pop_back();
430 state->MoveFirstCandidateToNotSet();
431 }
432}
433
434template <typename NodeIndex>
435std::string BronKerboschAlgorithm<NodeIndex>::CliqueDebugString(
436 const std::vector<NodeIndex>& clique) {
437 std::string message = "Clique: [ ";
438 for (const NodeIndex node : clique) {
439 absl::StrAppend(&message, node, " ");
440 }
441 message += "]";
442 return message;
443}
444
445template <typename NodeIndex>
446void BronKerboschAlgorithm<NodeIndex>::PushState(NodeIndex selected) {
447 DCHECK(!states_.empty());
448 DCHECK(time_limit_ != nullptr);
449 DVLOG(2) << "PushState: New depth = " << states_.size() + 1
450 << ", selected node = " << selected;
451 absl::StrongVector<CandidateIndex, NodeIndex> new_candidates;
452
453 State* const previous_state = &states_.back();
454 const double deterministic_time =
455 kPushStateDeterministicTimeSecondsPerCandidate *
456 previous_state->candidates.size();
457 time_limit_->AdvanceDeterministicTime(deterministic_time, "PushState");
458
459 // Add all candidates from previous_state->candidates that are connected to
460 // 'selected' in the graph to the vector 'new_candidates', skipping the node
461 // 'selected'; this node is always at the position
462 // 'previous_state->first_candidate_index', so we can skip it by skipping the
463 // element at this particular index.
464 new_candidates.reserve(previous_state->candidates.size());
465 for (CandidateIndex i(0); i < previous_state->first_candidate_index; ++i) {
466 const NodeIndex candidate = previous_state->candidates[i];
467 if (IsArc(selected, candidate)) {
468 new_candidates.push_back(candidate);
469 }
470 }
471 const CandidateIndex new_first_candidate_index(new_candidates.size());
472 for (CandidateIndex i = previous_state->first_candidate_index + 1;
473 i < previous_state->candidates.size(); ++i) {
474 const NodeIndex candidate = previous_state->candidates[i];
475 if (IsArc(selected, candidate)) {
476 new_candidates.push_back(candidate);
477 }
478 }
479
480 current_clique_.push_back(selected);
481 if (new_candidates.empty()) {
482 // We've found a clique. Report it to the user, but do not push the state
483 // because it would be popped immediately anyway.
484 DVLOG(2) << CliqueDebugString(current_clique_);
485 const CliqueResponse response = clique_callback_(current_clique_);
486 if (response == CliqueResponse::STOP) {
487 // The number of remaining iterations will be decremented at the end of
488 // the loop in RunIterations; setting it to 0 here would make it -1 at
489 // the end of the main loop.
490 num_remaining_iterations_ = 1;
491 }
492 current_clique_.pop_back();
493 previous_state->MoveFirstCandidateToNotSet();
494 return;
495 }
496
497 // NOTE(user): The following line may invalidate previous_state (if the
498 // vector data was re-allocated in the process). We must avoid using
499 // previous_state below here.
500 states_.emplace_back();
501 State* const new_state = &states_.back();
502 new_state->candidates.swap(new_candidates);
503 new_state->first_candidate_index = new_first_candidate_index;
504
505 InitializeState(new_state);
506}
507
508template <typename NodeIndex>
510 int64 max_num_iterations, TimeLimit* time_limit) {
511 CHECK(time_limit != nullptr);
512 time_limit_ = time_limit;
513 if (states_.empty()) {
514 Initialize();
515 }
516 for (num_remaining_iterations_ = max_num_iterations;
517 !states_.empty() && num_remaining_iterations_ > 0 &&
518 !time_limit->LimitReached();
519 --num_remaining_iterations_) {
520 State* const state = &states_.back();
521 DVLOG(2) << "Loop: " << states_.size() << " states, "
522 << state->num_remaining_candidates << " candidate to explore\n"
523 << state->DebugString();
524 if (state->num_remaining_candidates == 0) {
525 PopState();
526 continue;
527 }
528
529 const CandidateIndex selected_index =
530 SelectCandidateIndexForRecursion(state);
531 DVLOG(2) << "selected_index = " << selected_index;
532 const NodeIndex selected = state->candidates[selected_index];
533 DVLOG(2) << "Selected candidate = " << selected;
534
535 NodeIndex& f = state->candidates[state->first_candidate_index];
536 NodeIndex& s = state->candidates[selected_index];
537 std::swap(f, s);
538
539 PushState(selected);
540 }
541 time_limit_ = nullptr;
542 return states_.empty() ? BronKerboschAlgorithmStatus::COMPLETED
544}
545
546template <typename NodeIndex>
548 int64 max_num_iterations) {
549 TimeLimit time_limit(std::numeric_limits<double>::infinity());
550 return RunWithTimeLimit(max_num_iterations, &time_limit);
551}
552
553template <typename NodeIndex>
555 return RunIterations(kint64max);
556}
557
558template <typename NodeIndex>
559const double BronKerboschAlgorithm<
560 NodeIndex>::kPushStateDeterministicTimeSecondsPerCandidate = 0.54663e-7;
561} // namespace operations_research
562
563#endif // OR_TOOLS_GRAPH_CLIQUES_H_
BronKerboschAlgorithmStatus Run()
Definition: cliques.h:554
BronKerboschAlgorithm(IsArcCallback is_arc, NodeIndex num_nodes, CliqueCallback clique_callback)
Definition: cliques.h:164
BronKerboschAlgorithmStatus RunIterations(int64 max_num_iterations)
Definition: cliques.h:547
BronKerboschAlgorithmStatus RunWithTimeLimit(int64 max_num_iterations, TimeLimit *time_limit)
Definition: cliques.h:509
BronKerboschAlgorithmStatus RunWithTimeLimit(TimeLimit *time_limit)
Definition: cliques.h:204
std::function< bool(NodeIndex, NodeIndex)> IsArcCallback
Definition: cliques.h:150
std::function< CliqueResponse(const std::vector< NodeIndex > &)> CliqueCallback
Definition: cliques.h:160
void CoverArcsByCliques(std::function< bool(int, int)> graph, int node_count, std::function< bool(const std::vector< int > &)> callback)
void FindCliques(std::function< bool(int, int)> graph, int node_count, std::function< bool(const std::vector< int > &)> callback)