C++ Reference

C++ Reference: Algorithms

dynamic_partition.h
Go to the documentation of this file.
1// Copyright 2010-2018 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// TODO(user,user): refine this toplevel comment when this file settles.
15//
16// Two dynamic partition classes: one that incrementally splits a partition
17// into more and more parts; one that incrementally merges a partition into less
18// and less parts.
19//
20// GLOSSARY:
21// The partition classes maintain a partition of N integers 0..N-1
22// (aka "elements") into disjoint equivalence classes (aka "parts").
23//
24// SAFETY:
25// Like vector<int> crashes when used improperly, these classes are not "safe":
26// most of their methods may crash if called with invalid arguments. The client
27// code is responsible for using this class properly. A few DCHECKs() will help
28// catch bugs, though.
29
30#ifndef OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
31#define OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
32
33#include <string>
34#include <vector>
35
36#include "ortools/base/logging.h"
37
38namespace operations_research {
39
40// Partition class that supports incremental splitting, with backtracking.
41// See http://en.wikipedia.org/wiki/Partition_refinement .
42// More precisely, the supported edit operations are:
43// - Refine the partition so that a subset S (typically, |S| <<< N)
44// of elements are all considered non-equivalent to any element in ¬S.
45// Typically, this should be done in O(|S|).
46// - Undo the above operations (backtracking).
47//
48// TODO(user): rename this to BacktrackableSplittingPartition.
50 public:
51 // Creates a DynamicPartition on n elements, numbered 0..n-1. Start with
52 // the trivial partition (only one subset containing all elements).
53 explicit DynamicPartition(int num_elements);
54
55 // Ditto, but specify the initial part of each elements. Part indices must
56 // form a dense integer set starting at 0; eg. [2, 1, 0, 1, 1, 3, 0] is valid.
57 explicit DynamicPartition(const std::vector<int>& initial_part_of_element);
58
59 // Accessors.
60 int NumElements() const { return element_.size(); }
61 const int NumParts() const { return part_.size(); }
62
63 // To iterate over the elements in part #i:
64 // for (int element : partition.ElementsInPart(i)) { ... }
65 //
66 // ORDERING OF ELEMENTS INSIDE PARTS: the order of elements within a given
67 // part is volatile, and may change with Refine() or UndoRefine*() operations,
68 // even if the part itself doesn't change.
69 struct IterablePart;
70 IterablePart ElementsInPart(int i) const;
71
72 int PartOf(int element) const;
73 int SizeOfPart(int part) const;
74 int ParentOfPart(int part) const;
75
76 // A handy shortcut to ElementsInPart(PartOf(e)). The returned IterablePart
77 // will never be empty, since it contains at least i.
78 IterablePart ElementsInSamePartAs(int i) const;
79
80 // Returns a fingerprint of the given part. While collisions are possible,
81 // their probability is quite low. Two parts that have the same size and the
82 // same fingerprint are most likely identical.
83 // Also, two parts that have the exact same set of elements will *always*
84 // have the same fingerprint.
85 uint64 FprintOfPart(int part) const;
86
87 // Refines the partition such that elements that are in distinguished_subset
88 // never share the same part as elements that aren't in that subset.
89 // This might be a no-op: in that case, NumParts() won't change, but the
90 // order of elements inside each part may change.
91 //
92 // ORDERING OF PARTS:
93 // For each i such that Part #i has a non-trivial intersection with
94 // "distinguished_subset" (neither empty, nor the full Part); Part #i is
95 // stripped out of all elements that are in "distinguished_subset", and
96 // those elements are sent to a newly created part, whose parent_part = i.
97 // The parts newly created by a single Refine() operations are sorted
98 // by parent_part.
99 // Example: a Refine() on a partition with 6 parts causes parts #1, #3 and
100 // #4 to be split: the partition will now contain 3 new parts: part #6 (with
101 // parent_part = 1), part #7 (with parent_part = 3) and part #8 (with
102 // parent_part = 4).
103 //
104 // TODO(user): the graph symmetry finder could probably benefit a lot from
105 // keeping track of one additional bit of information for each part that
106 // remains unchanged by a Refine() operation: was that part entirely *in*
107 // the distinguished subset or entirely *out*?
108 void Refine(const std::vector<int>& distinguished_subset);
109
110 // Undo one or several Refine() operations, until the number of parts
111 // becomes equal to "original_num_parts".
112 // Prerequisite: NumParts() >= original_num_parts.
113 void UndoRefineUntilNumPartsEqual(int original_num_parts);
114
115 // Dump the partition to a string. There might be different conventions for
116 // sorting the parts and the elements inside them.
118 // Elements are sorted within parts, and parts are then sorted
119 // lexicographically.
121 // Elements are sorted within parts, and parts are kept in order.
123 };
124 std::string DebugString(DebugStringSorting sorting) const;
125
126 // ADVANCED USAGE:
127 // All elements (0..n-1) of the partition, sorted in a way that's compatible
128 // with the hierarchical partitioning:
129 // - All the elements of any given part are contiguous.
130 // - Elements of a part P are always after elements of part Parent(P).
131 // - The order remains identical (and the above property holds) after any
132 // UndoRefine*() operation.
133 // Note that the order does get changed by Refine() operations.
134 // This is a reference, so it'll only remain valid and constant until the
135 // class is destroyed or until Refine() get called.
136 const std::vector<int>& ElementsInHierarchicalOrder() const {
137 return element_;
138 }
139
140 private:
141 // A DynamicPartition instance maintains a list of all of its elements,
142 // 'sorted' by partitions: elements of the same subset are contiguous
143 // in that list.
144 std::vector<int> element_;
145
146 // The reverse of elements_[]: element_[index_of_[i]] = i.
147 std::vector<int> index_of_;
148
149 // part_of_[i] is the index of the part that contains element i.
150 std::vector<int> part_of_;
151
152 struct Part {
153 // This part holds elements[start_index .. end_index-1].
154 // INVARIANT: end_index > start_index.
155 int start_index; // Inclusive
156 int end_index; // Exclusive
157
158 // The Part that this part was split out of. See the comment at Refine().
159 // INVARIANT: part[i].parent_part <= i, and the equality holds iff part[i]
160 // has no parent.
161 int parent_part; // Index into the part[] array.
162
163 // The part's fingerprint is the XOR of all fingerprints of its elements.
164 // See FprintOfInt32() in the .cc.
165 uint64 fprint;
166
167 Part() : start_index(0), end_index(0), parent_part(0), fprint(0) {}
168 Part(int start_index, int end_index, int parent_part, uint64 fprint)
169 : start_index(start_index),
170 end_index(end_index),
171 parent_part(parent_part),
172 fprint(fprint) {}
173 };
174 std::vector<Part> part_; // The disjoint parts.
175
176 // Used temporarily and exclusively by Refine(). This prevents Refine()
177 // from being thread-safe.
178 // INVARIANT: tmp_counter_of_part_ contains only 0s before and after Refine().
179 std::vector<int> tmp_counter_of_part_;
180 std::vector<int> tmp_affected_parts_;
181};
182
184 std::vector<int>::const_iterator begin() const { return begin_; }
185 std::vector<int>::const_iterator end() const { return end_; }
186 std::vector<int>::const_iterator begin_;
187 std::vector<int>::const_iterator end_;
188
189 int size() const { return end_ - begin_; }
190
192 IterablePart(const std::vector<int>::const_iterator& b,
193 const std::vector<int>::const_iterator& e)
194 : begin_(b), end_(e) {}
195
196 // These typedefs allow this iterator to be used within testing::ElementsAre.
197 typedef int value_type;
198 typedef std::vector<int>::const_iterator const_iterator;
199};
200
201// Partition class that supports incremental merging, using the union-find
202// algorithm (see http://en.wikipedia.org/wiki/Disjoint-set_data_structure).
204 public:
205 // At first, all nodes are in their own singleton part.
207 explicit MergingPartition(int num_nodes) { Reset(num_nodes); }
208 void Reset(int num_nodes);
209
210 int NumNodes() const { return parent_.size(); }
211
212 // Complexity: amortized O(Ackermann⁻¹(N)) -- which is essentially O(1) --
213 // where N is the number of nodes.
214 //
215 // Return value: If this merge caused a representative node (of either node1
216 // or node2) to stop being a representative (because only one can remain);
217 // this method returns that removed representative. Otherwise it returns -1.
218 //
219 // Details: a smaller part will always be merged onto a larger one.
220 // Upons ties, the smaller representative becomes the overall representative.
221 int MergePartsOf(int node1, int node2); // The 'union' of the union-find.
222
223 // Get the representative of "node" (a node in the same equivalence class,
224 // which will also be returned for any other "node" in the same class).
225 // The complexity if the same as MergePartsOf().
227
228 // Specialized reader API: prunes "nodes" to only keep at most one node per
229 // part: any node which is in the same part as an earlier node will be pruned.
230 void KeepOnlyOneNodePerPart(std::vector<int>* nodes);
231
232 // Output the whole partition as node equivalence classes: if there are K
233 // parts and N nodes, node_equivalence_classes[i] will contain the part index
234 // (a number in 0..K-1) of node #i. Parts will be sorted by their first node
235 // (i.e. node 0 will always be in part 0; then the next node that isn't in
236 // part 0 will be in part 1, and so on).
237 // Returns the number K of classes.
238 int FillEquivalenceClasses(std::vector<int>* node_equivalence_classes);
239
240 // Dump all components, with nodes sorted within each part and parts
241 // sorted lexicographically. Eg. "0 1 3 4 | 2 5 | 6 7 8".
242 std::string DebugString();
243
244 // Advanced usage: sets 'node' to be in its original singleton. All nodes
245 // who may point to 'node' as a parent will remain in an inconsistent state.
246 // This can be used to reinitialize a MergingPartition that has been sparsely
247 // modified in O(|modifications|).
248 // CRASHES IF USED INCORRECTLY.
249 void ResetNode(int node);
250
251 int NumNodesInSamePartAs(int node) {
252 return part_size_[GetRootAndCompressPath(node)];
253 }
254
255 // FOR DEBUGGING OR SPECIAL "CONST" ACCESS ONLY:
256 // Find the root of the union-find tree with leaf 'node', i.e. its
257 // representative node, but don't use path compression.
258 // The amortized complexity can be as bad as log(N), as opposed to the
259 // version using path compression.
260 int GetRoot(int node) const;
261
262 private:
263 // Along the upwards path from 'node' to its root, set the parent of all
264 // nodes (including the root) to 'parent'.
265 void SetParentAlongPathToRoot(int node, int parent);
266
267 std::vector<int> parent_;
268 std::vector<int> part_size_;
269
270 // Used transiently by KeepOnlyOneNodePerPart().
271 std::vector<bool> tmp_part_bit_;
272};
273
274// *** Implementation of inline methods of the above classes. ***
275
277 int i) const {
278 DCHECK_GE(i, 0);
279 DCHECK_LT(i, NumParts());
280 return IterablePart(element_.begin() + part_[i].start_index,
281 element_.begin() + part_[i].end_index);
282}
283
284inline int DynamicPartition::PartOf(int element) const {
285 DCHECK_GE(element, 0);
286 DCHECK_LT(element, part_of_.size());
287 return part_of_[element];
288}
289
290inline int DynamicPartition::SizeOfPart(int part) const {
291 DCHECK_GE(part, 0);
292 DCHECK_LT(part, part_.size());
293 const Part& p = part_[part];
294 return p.end_index - p.start_index;
295}
296
297inline int DynamicPartition::ParentOfPart(int part) const {
298 DCHECK_GE(part, 0);
299 DCHECK_LT(part, part_.size());
300 return part_[part].parent_part;
301}
302
304 int i) const {
305 return ElementsInPart(PartOf(i));
306}
307
308inline uint64 DynamicPartition::FprintOfPart(int part) const {
309 DCHECK_GE(part, 0);
310 DCHECK_LT(part, part_.size());
311 return part_[part].fprint;
312}
313
314inline int MergingPartition::GetRoot(int node) const {
315 DCHECK_GE(node, 0);
316 DCHECK_LT(node, NumNodes());
317 int child = node;
318 while (true) {
319 const int parent = parent_[child];
320 if (parent == child) return child;
321 child = parent;
322 }
323}
324
325inline void MergingPartition::SetParentAlongPathToRoot(int node, int parent) {
326 DCHECK_GE(node, 0);
327 DCHECK_LT(node, NumNodes());
328 DCHECK_GE(parent, 0);
329 DCHECK_LT(parent, NumNodes());
330 int child = node;
331 while (true) {
332 const int old_parent = parent_[child];
333 parent_[child] = parent;
334 if (old_parent == child) return;
335 child = old_parent;
336 }
337}
338
339inline void MergingPartition::ResetNode(int node) {
340 DCHECK_GE(node, 0);
341 DCHECK_LT(node, NumNodes());
342 parent_[node] = node;
343 part_size_[node] = 1;
344}
345
346} // namespace operations_research
347
348#endif // OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
IterablePart ElementsInPart(int i) const
DynamicPartition(const std::vector< int > &initial_part_of_element)
void Refine(const std::vector< int > &distinguished_subset)
void UndoRefineUntilNumPartsEqual(int original_num_parts)
IterablePart ElementsInSamePartAs(int i) const
std::string DebugString(DebugStringSorting sorting) const
const std::vector< int > & ElementsInHierarchicalOrder() const
int MergePartsOf(int node1, int node2)
int FillEquivalenceClasses(std::vector< int > *node_equivalence_classes)
void KeepOnlyOneNodePerPart(std::vector< int > *nodes)
std::vector< int >::const_iterator end() const
std::vector< int >::const_iterator const_iterator
std::vector< int >::const_iterator begin() const
IterablePart(const std::vector< int >::const_iterator &b, const std::vector< int >::const_iterator &e)