Commit b53100ca authored by gunkkk's avatar gunkkk
Browse files

a

parent e22596f3
import numpy as np
from keras.datasets import mnist, cifar10
from keras.utils import to_categorical
from keras import backend as K
def data_partition(datax, datay, fraction=1/2, random=False):
"""
data: len* \n
return x1,x2,y1,y2
@TODO part2 no random
"""
if fraction<0 or fraction>1:
return 0
allnum = int(datax.shape[0])
part1num = int(allnum*fraction)
part2num = allnum - part1num
# print(part1num,part2num,allnum)
# assert part1num + part2num == allnum
print(part1num,part2num)
if random == False:
partx1 = datax[:part1num]
partx2 = datax[part1num:]
party1 = datay[:part1num]
party2 = datay[part1num:]
else:
part1index = np.random.choice(allnum, part1num, replace=False)
part2index = np.delete(np.arange(allnum),part1index)
part2index = np.random.choice(part2index, part2num, replace=False)
#print(part1index[:100])
# print(part2inedx[:100])
assert len(part1index)+len(part2index) == allnum
partx1 = datax[part1index]
partx2 = datax[part2index]
party1 = datay[part1index]
party2 = datay[part2index]
return partx1, partx2, party1, party2
def load_part_mnist(part, partnum):
"""
divide mnist (include train and test) into 'partnum' part
return 'part'-th part of mnist
"""
num_classes = 10
img_rows, img_cols = 28, 28
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_data, y_data = np.append(x_train,x_test,axis=0), np.append(y_train,y_test,axis=0)
assert x_data.shape[0] == 70000
i = part-1
partsize = int(70000/partnum)
s = partsize
data_x = x_data[i*s:(i+1)*s]
data_y = y_data[i*s:(i+1)*s]
x_data = data_x
y_data = data_y
if K.image_data_format() == 'channels_first':
x_data = x_data.reshape(x_data.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_data = x_data.reshape(x_data.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_data = x_data.astype('float32')
x_data /= 255
print('x_data shape:', x_data.shape)
# convert class vectors to binary class matrices
y_data = to_categorical(y_data, num_classes)
return x_data, y_data, input_shape, num_classes
def add_noise(datax, k=0.1):
shape = datax.shape
# print(datax[0])
datax = datax.reshape(-1)
allnum = datax.shape[0]
partnum = int(allnum*k)
randomindex = np.random.choice(allnum, partnum, replace=False)
randoms = np.random.random((partnum))
# print(shape,randoms.shape)
datax[randomindex] = randoms
datax = datax.reshape(shape)
# print(datax.shape,datax[0])
return datax
def shadowload(fraction = 1/2, database='mnist', trainfraction=1/4):
"""
input fraction
load data for shadow models(50 or more)
return size: 17500 from fraction of the mnist
"""
if database == 'mnist':
x, y, _, _ = load_part_mnist(1,1)
total = 70000
elif database == 'cifar10':
x, y, _, _ = load_part_cifar10(1,1)
total=60000
else:
raise Exception('no such db')
# assert False
totalnum = int(x.shape[0]*fraction)
x, y = x[:totalnum], y[:totalnum]
assert x.shape[0]==int(total*fraction)
x,_, y,_ = data_partition(x,y,2*trainfraction,random=True)
x_train, x_test, y_train, y_test = data_partition(x,y,1/2,random=True)
# x_train = add_noise(x_train, 0.1)
# x_test = add_noise(x_test,0.1)
# tr_index = np.random.choice(x.shape[0], int(x.shape[0]/2), replace=False)
# x_train = x[tr_index]
# y_train = y[tr_index]
# index_all = np.arange(x.shape[0])
# te_index = np.delete(index_all, tr_index)
# x_test = x[te_index]
# y_test = y[te_index]
# assert x_test.shape[0] == 17500
return x_train, x_test, y_train, y_test
def targetload(fraction=1/2,database='mnist', trainfraction=1/12):
"""
load mnist for target
"""
# print ('aaaa')
if database == 'mnist':
x, y, _, _ = load_part_mnist(1,1)
total = 70000
elif database == 'cifar10':
x, y, _, _ = load_part_cifar10(1,1)
total=60000
else:
raise Exception('no such db')
totalnum = int(x.shape[0]*fraction)
x, y = x[totalnum:], y[totalnum:]
assert x.shape[0]==int(total*fraction)
# x,_, y,_ = data_partition(x,y,2*trainfraction,random=True)
x_train, x_test, y_train, y_test = data_partition(x,y,1/2,random=False)
print("target train shape2:",x_train.shape)
print("target test shape:",x_test.shape)
return x_train, x_test, y_train, y_test
def load_part_cifar10(part, partnum):
"""
divide - (include train and test) into 'partnum' part
return 'part'-th part of cifar10
"""
num_classes = 10
img_rows, img_cols = 32, 32
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
x_data, y_data = np.append(x_train,x_test,axis=0), np.append(y_train,y_test,axis=0)
assert x_data.shape[0] == 60000
i = part-1
partsize = int(60000/partnum)
s = partsize
data_x = x_data[i*s:(i+1)*s]
data_y = y_data[i*s:(i+1)*s]
x_data = data_x
y_data = data_y
# Convert class vectors to binary class matrices.
y_data = to_categorical(y_data, num_classes)
print('x_data shape:', x_data.shape,y_data.shape)
if K.image_data_format() == 'channels_first':
x_data = x_data.reshape(x_data.shape[0], 3, img_rows, img_cols)
input_shape = (3, img_rows, img_cols)
else:
x_data = x_data.reshape(x_data.shape[0], img_rows, img_cols, 3)
input_shape = (img_rows, img_cols, 3)
return x_data, y_data, input_shape, num_classes
def load_cifar10_class(classs=0):
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# print(x_test.shape,y_test.shape)
# print(x_test[-1],y_test[-1])
# deltest = np.where(y_test==classs)
#y_test = np.delete(y_test,deltest[0],axis=0)
# x_test = np.delete(x_test,deltest[0],axis=0)
#print(x_test.shape,y_test.shape)
deltrain = np.where(y_train==classs)
tmpx = np.take(x_train,deltrain[0],axis=0)
tmpy = np.take(y_train,deltrain[0],axis=0)
x_train = np.delete(x_train, deltrain[0], axis=0)
y_train = np.delete(y_train, deltrain[0], axis=0)
x_test = np.append(x_test,tmpx,axis=0)
y_test = np.append(y_test,tmpy,axis=0)
# print(x_test[-1],y_test[-1])
# print(y_test)
# assert False
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
return x_train, x_test, y_train, y_test
def load_cifar10_batch(batch_num=1):
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# print(x_test.shape,y_test.shape)
# print(x_test[-1],y_test[-1])
# deltest = np.where(y_test==classs)
#y_test = np.delete(y_test,deltest[0],axis=0)
# x_test = np.delete(x_test,deltest[0],axis=0)
#print(x_test.shape,y_test.shape)
allnum = x_train.shape[0]
randomindex = np.random.choice(allnum, batch_num, replace=False)
# print(randomindex)
deltrain = randomindex,1
tmpx = np.take(x_train,deltrain[0],axis=0)
tmpy = np.take(y_train,deltrain[0],axis=0)
x_train = np.delete(x_train, deltrain[0], axis=0)
y_train = np.delete(y_train, deltrain[0], axis=0)
x_test = np.append(x_test,tmpx,axis=0)
y_test = np.append(y_test,tmpy,axis=0)
# print(x_test[-1],y_test[-1])
# print(y_test).
# assert False
print('x_train shape:', x_train.shape)
# assert False
# Convert class vectors to binary class matrices.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
tmpy = to_categorical(tmpy,10)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
tmpx = tmpx.astype('float32')
tmpx /= 255
x_train /= 255
x_test /= 255
return x_train, x_test, y_train, y_test,tmpx,tmpy
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment