Skip to content

Commit d6e992f

Browse files
committed
Fix bug with probe::spear over equivalent elements (#230)
1 parent 07ae456 commit d6e992f

File tree

3 files changed

+40
-32
lines changed

3 files changed

+40
-32
lines changed

docs/Measures-of-presortedness.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ Computes the number of non-decreasing runs in *X* minus one.
281281
#include <cpp-sort/probes/spear.h>
282282
```
283283

284-
Spearman's footrule distance: sum of distances between the position of individual elements in *X* and their position once *X* is sorted. Its use a a measure of presortedness was proposed by P. Diaconis and R. L. Graham in *Spearman's Footrule as a Measure of Disarray*.
284+
Spearman's footrule distance: sum of distances between the position of individual elements in *X* and their position once *X* is sorted (we use a stable sort to handle *equivalent elements*). Its use a a measure of presortedness was proposed by P. Diaconis and R. L. Graham in *Spearman's Footrule as a Measure of Disarray*.
285285

286286
| Complexity | Memory | Iterators |
287287
| ----------- | ----------- | ------------- |

include/cpp-sort/probes/spear.h

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,12 @@
1313
#include <utility>
1414
#include <cpp-sort/sorter_facade.h>
1515
#include <cpp-sort/sorter_traits.h>
16-
#include <cpp-sort/utility/as_function.h>
1716
#include <cpp-sort/utility/functional.h>
1817
#include <cpp-sort/utility/size.h>
1918
#include <cpp-sort/utility/static_const.h>
20-
#include "../detail/equal_range.h"
2119
#include "../detail/immovable_vector.h"
2220
#include "../detail/iterator_traits.h"
23-
#include "../detail/pdqsort.h"
21+
#include "../detail/spinsort.h"
2422
#include "../detail/type_traits.h"
2523

2624
namespace cppsort
@@ -36,50 +34,54 @@ namespace probe
3634
-> ::cppsort::detail::difference_type_t<ForwardIterator>
3735
{
3836
using difference_type = ::cppsort::detail::difference_type_t<ForwardIterator>;
39-
auto&& proj = utility::as_function(projection);
4037

4138
if (size < 2) {
4239
return 0;
4340
}
4441

4542
////////////////////////////////////////////////////////////
46-
// Indirectly sort the iterators
43+
// Indirectly sort the collection
4744

48-
// Copy the iterators in a vector
49-
cppsort::detail::immovable_vector<ForwardIterator> iterators(size);
45+
// Copy the iterators and their original index in a vector
46+
cppsort::detail::immovable_vector<
47+
std::pair<ForwardIterator, difference_type>
48+
> iter_idx(size);
49+
difference_type index = 0;
5050
for (auto it = first; it != last; ++it) {
51-
iterators.emplace_back(it);
51+
iter_idx.emplace_back(it, index);
52+
++index;
5253
}
5354

54-
// Sort the iterators on pointed values
55-
cppsort::detail::pdqsort(
56-
iterators.begin(), iterators.end(),
57-
compare, utility::indirect{} | projection
55+
// Sort the iterator/index pairs on pointed values
56+
// Note: we use a stable sort to ensure that we don't end up with
57+
// to big values for equivalent elements. For example given
58+
// the sequence (5, 1, 2, 8, 9, 5), we want the first 5 to
59+
// have the sorted position 2, and the second 5 to have the
60+
// sorted position 3. Otherwise we might get 10 as a result
61+
// instead of the expected 8, and it will depend on the used
62+
// unstable sort implementation.
63+
cppsort::detail::spinsort(
64+
iter_idx.begin(), iter_idx.end(),
65+
std::move(compare),
66+
&std::pair<ForwardIterator, difference_type>::first
67+
| utility::indirect{}
68+
| std::move(projection)
5869
);
5970

6071
////////////////////////////////////////////////////////////
61-
// Sum of distances between an elements and their sorted
72+
// Sum of distances between elements and their sorted
6273
// positions
6374

6475
difference_type sum_dist = 0;
65-
difference_type it_pos = 0;
66-
for (auto it = first; it != last; ++it) {
67-
// Find the range where *it belongs once sorted
68-
auto rng = cppsort::detail::equal_range(
69-
iterators.begin(), iterators.end(), proj(*it),
70-
compare, utility::indirect{} | projection
71-
);
72-
auto pos_min = rng.first - iterators.begin();
73-
auto pos_max = rng.second - iterators.begin();
74-
75-
// If *it isn't into one of its sorted positions, computed the closest one
76-
if (it_pos < pos_min) {
77-
sum_dist += pos_min - it_pos;
78-
} else if (it_pos >= pos_max) {
79-
sum_dist += it_pos - pos_max + 1;
76+
difference_type current_pos = 0;
77+
for (auto const& it_idx: iter_idx) {
78+
auto original_pos = it_idx.second;
79+
if (original_pos < current_pos) {
80+
sum_dist += current_pos - original_pos;
81+
} else {
82+
sum_dist += original_pos - current_pos;
8083
}
81-
82-
++it_pos;
84+
++current_pos;
8385
}
8486
return sum_dist;
8587
}

tests/probes/spear.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ TEST_CASE( "presortedness measure: spear", "[probe][max]" )
3838
CHECK( spear(li.begin(), li.end()) == max_n );
3939
}
4040

41-
SECTION( "exhaustive check for 4 values" )
41+
SECTION( "exhaustive check for 4 distinct values" )
4242
{
4343
// Results from Spearman's Footrule as a Measure of Disarray
4444
// by Diaconis and Graham
@@ -72,4 +72,10 @@ TEST_CASE( "presortedness measure: spear", "[probe][max]" )
7272
CHECK( spear({4, 3, 1, 2}) == 8 );
7373
CHECK( spear({4, 3, 2, 1}) == 8 );
7474
}
75+
76+
SECTION( "regression test: some equal values" )
77+
{
78+
int arr[] = {2, 2, 0, -1};
79+
CHECK( spear(arr) == 8 );
80+
}
7581
}

0 commit comments

Comments
 (0)