import time

data = data_bk.copy()
t = time.time()
for el in data2:
    data.difference_update([el])
print(time.time() - t)
print(len(data))

#0.314723014832
#500000



data = data_bk.copy()
t = time.time()
data.difference_update(data2)
print(time.time() - t)
print(len(data))

#0.0722649097443
#500000


## List
for batch_size in [1, 10, 100, 1000, 10000, 100000]:
    data = data_bk.copy()
    t = time.time()
    batch = []
    for el in data2:
        batch.append(el)
        if len(batch) > batch_size:
            data.difference_update(batch)
            batch = []
    data.difference_update(batch)
    elapsed = time.time() - t
    assert len(data) == 500000
    print(batch_size, elapsed)


(1, 0.3517491817474365)
(10, 0.28018689155578613)
(100, 0.2459700107574463)
(1000, 0.245513916015625)
(10000, 0.2845900058746338)
(100000, 0.30616307258605957)


## Set
for batch_size in [1, 10, 100, 1000, 10000, 100000]:
    data = data_bk.copy()
    t = time.time()
    batch = set()
    for el in data2:
        batch.add(el)
        if len(batch) > batch_size:
            data.difference_update(batch)
            batch = set()
    data.difference_update(batch)
    elapsed = time.time() - t
    assert len(data) == 500000
    print(batch_size, elapsed)

(1, 0.32877302169799805)
(10, 0.27147603034973145)
(100, 0.25719690322875977)
(1000, 0.2440640926361084)
(10000, 0.26209020614624023)
(100000, 0.28574204444885254)



--- if instead data has 500'000 objects, data2 also 500'000 objects, and the intersection is
    250106 (difference = 249894):

# loop
0.286896944046
249894

# one-shot
0.059839963913
249894

# Batches-List
for batch_size in [1, 10, 100, 1000, 10000, 100000]:
    data = data_bk.copy()
    t = time.time()
    batch = []
    for el in data2:
        batch.append(el)
        if len(batch) > batch_size:
            data.difference_update(batch)
            batch = []
    data.difference_update(batch)
    elapsed = time.time() - t
    assert len(data) == 249894
    print(batch_size, elapsed)

(1, 0.32374000549316406)
(10, 0.28627514839172363)
(100, 0.29275012016296387)
(1000, 0.27927589416503906)
(10000, 0.29823899269104004)
(100000, 0.2862880229949951)

# Batches-set
for batch_size in [1, 10, 100, 1000, 10000, 100000]:
    data = data_bk.copy()
    t = time.time()
    batch = set()
    for el in data2:
        batch.add(el)
        if len(batch) > batch_size:
            data.difference_update(batch)
            batch = set()
    data.difference_update(batch)
    elapsed = time.time() - t
    assert len(data) == 249894
    print(batch_size, elapsed)

(1, 0.31519508361816406)
(10, 0.27642202377319336)
(100, 0.26827406883239746)
(1000, 0.25890493392944336)
(10000, 0.26543593406677246)
(100000, 0.28112196922302246)
