Ok, I added the binary search and removed the unnecessary set. Overall speedup is around 27% (measured on 10k runs of the old and new approach). New implementation is in "delegate_shuffle", the old one is in "delegate_shuffle_linear".
/* 
 * File:   main.cpp
 * Author: Hrossik
 *
 * Created on April 7, 2015, 11:27 PM
 */
#include <cstdlib>
#include <vector>
#include <cmath>
#include <set>
#include <stdio.h>
#include <time.h>
#include <algorithm>
#define NUM_DELEGATES 101
#define INVALID_DELEGATE -1
#define delegate_id int
#define sha256 int
using namespace std;
vector<delegate_id> delegate_shuffle(vector<delegate_id>, vector<delegate_id>, sha256);
vector<delegate_id> delegate_shuffle_linear(vector<delegate_id>, vector<delegate_id>, sha256);
int binary_search(vector<delegate_id>, delegate_id);
void print_vec(vector<delegate_id>);
/*
 * 
 */
int main(int argc, char** argv) {
    vector<delegate_id> prev_delegate_order;
    for (int i = 0; i < NUM_DELEGATES; i++) {
        prev_delegate_order.push_back(i);
    }
    vector<delegate_id> top_101_delegates_sorted_by_vote;
    for (int i = 0; i < NUM_DELEGATES; i++) {
        top_101_delegates_sorted_by_vote.push_back(i);
    }
    const int num_of_tests = 100000;
    struct timespec start, stop;
    double time_bin = 0;
    for (int i = 0; i < num_of_tests; i++) {
        clock_gettime(CLOCK_REALTIME, &start);
        delegate_shuffle(prev_delegate_order, top_101_delegates_sorted_by_vote, time(NULL));
        clock_gettime(CLOCK_REALTIME, &stop);
        time_bin += (stop.tv_sec - start.tv_sec)*1000.0 + (stop.tv_nsec - start.tv_nsec) / 1000000.0;
    }
    double time_lin = 0;
    for (int i = 0; i < num_of_tests; i++) {
        clock_gettime(CLOCK_REALTIME, &start);
        delegate_shuffle_linear(prev_delegate_order, top_101_delegates_sorted_by_vote, time(NULL));
        clock_gettime(CLOCK_REALTIME, &stop);
        time_lin += (stop.tv_sec - start.tv_sec)*1000.0 + (stop.tv_nsec - start.tv_nsec) / 1000000.0;
    }
    printf("time_bin = %.6lf, time_lin = %.6lf", time_bin, time_lin);
    // test of binary_search
    /*int count = 0;
    for (int i = 0; i < prev_delegate_order.size(); i++) {
        int ind = binary_search(prev_delegate_order, i);
        if(ind == i) {
            count++;
        }        
    }
    printf("count = %d\n",count);*/
    return 0;
}
vector<delegate_id> delegate_shuffle(vector<delegate_id> prev_delegate_order, vector<delegate_id> top_101_delegates_sorted_by_vote, sha256 random_seed) {
    // init
    srand(random_seed);
    vector<delegate_id> result;
    const int lower_half = floor(NUM_DELEGATES / 2);
    const int upper_half = ceil(NUM_DELEGATES / 2);
    
    // sort top_101_delegates_sorted_by_vote by delegates
    sort(top_101_delegates_sorted_by_vote.begin(),top_101_delegates_sorted_by_vote.end());
    // prefill with empty/invalid delegates
    for (int i = 0; i < NUM_DELEGATES; i++) {
        result.push_back(INVALID_DELEGATE);
    }
    vector<delegate_id> constrained_delegates;
    // find the delegates, which are constrained by the n/2 distance and are at top_101_delegates_sorted_by_vote so should be placed in the result
    for (int i = NUM_DELEGATES; i > lower_half; i--) {
        int actual = binary_search(top_101_delegates_sorted_by_vote, prev_delegate_order[i]);
        if (actual != -1) {
            top_101_delegates_sorted_by_vote.erase(top_101_delegates_sorted_by_vote.begin() + actual);
            constrained_delegates.push_back(prev_delegate_order[i]);
        }
    }
    // init free slots for constrained delegates
    vector<int> free_slots;
    for (int i = 0; i < upper_half; i++) {
        free_slots.push_back(NUM_DELEGATES - 1 - i);
    }
    // fill the result with constrained delegates. Each of them has n/2 possible locations
    for (int i = 0; i < constrained_delegates.size(); i++) {
        int uniform_rand_free_slot = rand() % free_slots.size();
        result[free_slots[uniform_rand_free_slot]] = constrained_delegates[i];
        // erase used slot
        free_slots.erase(free_slots.begin() + uniform_rand_free_slot);
        // add new slot, because next delegate's constraint is lower
        free_slots.push_back(lower_half - i);
    }
    // fill free slots for the rest of the delegates
    for (int i = lower_half - constrained_delegates.size(); i > -1; i--) {
        free_slots.push_back(i);
    }
    // place the rest of the delegates. i-th delegate has X possible locations, where X = NUM_DELEGATES - constrained_delegates.size() - i
    for (int i = 0; i < top_101_delegates_sorted_by_vote.size(); i++) {
        int uniform_rand_free_slot = rand() % free_slots.size();
        result[free_slots[uniform_rand_free_slot]] = top_101_delegates_sorted_by_vote[i];
        free_slots.erase(free_slots.begin() + uniform_rand_free_slot);
    }
    return result;
}
void print_vec(vector<delegate_id> vec) {
    for (int i = 0; i < vec.size(); i++) {
        printf("%d ", vec[i]);
    }
    printf("\n");
}
int binary_search(vector<delegate_id> sorted_vec, delegate_id wanted) {
    int lower_bound = 0;
    int upper_bound = sorted_vec.size() - 1;
    while (lower_bound < upper_bound) {
        int mid = (lower_bound + upper_bound) / 2;
        if (wanted > sorted_vec[mid]) {
            lower_bound = mid + 1;
        } else {
            upper_bound = mid;
        }
    }
    if (sorted_vec[lower_bound] != wanted) {
        lower_bound = -1;
    }
    return lower_bound;
}
vector<delegate_id> delegate_shuffle_linear(vector<delegate_id> prev_delegate_order, vector<delegate_id> top_101_delegates_sorted_by_vote, sha256 random_seed) {
    // init
    srand(random_seed);
    vector<delegate_id> result;
    const int lower_half = floor(NUM_DELEGATES / 2);
    const int upper_half = ceil(NUM_DELEGATES / 2);
    std::set<delegate_id> remaining(top_101_delegates_sorted_by_vote.begin(),
            top_101_delegates_sorted_by_vote.end());
    // prefill with empty/invalid delegates
    for (int i = 0; i < NUM_DELEGATES; i++) {
        result.push_back(INVALID_DELEGATE);
    }
    vector<delegate_id> constrained_delegates;
    // find the delegates, which are constrained by the n/2 distance and are at top_101_delegates_sorted_by_vote so should be placed in the result
    for (int i = NUM_DELEGATES; i > lower_half; i--) {
        set<delegate_id>::iterator actual = remaining.find((delegate_id) prev_delegate_order[i]);
        if (actual != remaining.end()) {
            remaining.erase(actual);
            constrained_delegates.push_back(prev_delegate_order[i]);
        }
    }
    // init free slots for constrained delegates
    vector<int> free_slots;
    for (int i = 0; i < upper_half; i++) {
        free_slots.push_back(NUM_DELEGATES - 1 - i);
    }
    // fill the result with constrained delegates. Each of them has n/2 possible locations
    for (int i = 0; i < constrained_delegates.size(); i++) {
        int uniform_rand_free_slot = rand() % free_slots.size();
        result[free_slots[uniform_rand_free_slot]] = constrained_delegates[i];
        // erase used slot
        free_slots.erase(free_slots.begin() + uniform_rand_free_slot);
        // add new slot, because next delegate's constraint is lower
        free_slots.push_back(lower_half - i);
    }
    // fill free slots for the rest of the delegates
    for (int i = lower_half - constrained_delegates.size(); i > -1; i--) {
        free_slots.push_back(i);
    }
    // place the rest of the delegates. i-th delegate has X possible locations, where X = NUM_DELEGATES - constrained_delegates.size() - i
    for (set<delegate_id>::iterator it = remaining.begin(); it != remaining.end(); ++it) {
        int uniform_rand_free_slot = rand() % free_slots.size();
        result[free_slots[uniform_rand_free_slot]] = *it;
        free_slots.erase(free_slots.begin() + uniform_rand_free_slot);
    }
    return result;
}