Let X[1 ‥ n] and Y[1 ‥ n] be two arrays, each containing n numbers already in sorted order. Give anO(lg n)-time algorithm to find the median of all 2n elements in arraysX and Y.
template <typename Iterator>
std::pair<Iterator, Iterator> find_median( Iterator b1, Iterator e1, Iterator b2, Iterator e2 )
{
size_t n1 = std::distance( b1, e1 ), n2 = std::distance( b2, e2 );
assert( n1 > 0 && n2 > 0 && n1 == n2 );
if ( n1 > 2 ) {
while ( n1 != 2 && n2 != 2 ) {
auto m1 = b1 + n1/2, m2 = b2 + n2/2;
if ( *m1 <= *m2 ) {
b1 = m1;
e2 = m2 + 1;
n1 = std::distance( b1, e1 );
n2 = std::distance( b2, e2 );
if ( n1 != n2 ) {
assert( ( n1 + 1 ) == n2 );
--b1;
++n1;
}
} else {
b2 = m2;
e1 = m1 + 1;
n1 = std::distance( b1, e1 );
n2 = std::distance( b2, e2 );
if ( n1 != n2 ) {
assert( ( n2 + 1 ) == n1 );
--b2;
++n2;
}
}
}
assert( n1 == n2 );
}
std::pair<Iterator, Iterator> medians;
if ( n1 == 2 ) {
if ( *b1 <= *b2 ) {
if ( *b2 <= *( b1 + 1 ) ) {
medians.first = b2;
if ( *( b1 + 1 ) <= *( b2 + 1 ) ) {
medians.second = b1 + 1;
} else {
medians.second = b2 + 1;
}
} else {
medians.first = b1 + 1;
medians.second = b2;
}
} else {
if ( *b1 <= *( b2 + 1 ) ) {
medians.first = b1;
if ( *( b2 + 1 ) <= *( b1 + 1 ) ) {
medians.second = b2 + 1;
} else {
medians.second = b1 + 1;
}
} else {
medians.first = b2 + 1;
medians.second = b1;
}
}
} else {
assert( n1 == 1 );
if ( *b1 <= *b2 ) {
medians.first = b1;
medians.second = b2;
} else {
medians.first = b2;
medians.second = b1;
}
}
return medians;
}
The testing code:
for ( int retry = 0; retry < 64; ++retry ) {
size_t size = get_random_size();
std::vector<int> ai1( size );
random_data( &ai1[ 0 ], sizeof( ai1[ 0 ] ) * size );
std::vector<int> ai2( size );
random_data( &ai2[ 0 ], sizeof( ai2[ 0 ] ) * size );
std::sort( ai1.begin(), ai1.end() );
std::sort( ai2.begin(), ai2.end() );
auto mi = find_median( ai1.begin(), ai1.end(), ai2.begin(), ai2.end() );
std::vector<int> buf( size * 2 );
std::merge( ai1.begin(), ai1.end(), ai2.begin(), ai2.end(), buf.begin() );
if ( *mi.first != buf[ size - 1 ] && *mi.second != buf[ size ] ) {
//save_data( "d1.bin", &ai1[ 0 ], size );
//save_data( "d2.bin", &ai2[ 0 ], size );
auto i = std::lower_bound( buf.begin(), buf.end(), *mi.first );
fprintf( stderr, "Cannot find the median correctly in the two sorted %u-size arrays, the false one was %u\n", size, i - buf.begin() );
DebugBreak();
}
}