Skip to content

1.5 Model Configuration

This section introduces how to change the model. Far now, we only train the default model for each benchmark (e.g. CNN for mnist_classification). For most of the benchmarks, we have provided several popular models that can be easily used by replacing the parameter model in flgo.init. We show the usage by the following example.

1.5.1 Example: Select model for MNIST

import flgo.benchmark.mnist_classification.model.cnn as cnn
import flgo.benchmark.mnist_classification.model.mlp as mlp
import flgo.algorithm.fedavg as fedavg
task = './mnist_iid' # this task has been generated in Example 2.1
cnn_runner = flgo.init(task, fedavg, option={'num_rounds':5, 'num_epochs':1, 'gpu':0}, model=cnn)
mlp_runner = flgo.init(task, fedavg, option={'num_rounds':5, 'num_epochs':1, 'gpu':0}, model=mlp)
cnn_runner.run()
mlp_runner.run()

# result analysis
import flgo.experiment.analyzer
analysis_plan = {
    'Selector':{
        'task': task,
        'header':['fedavg'],
        'filter':{'M':['cnn', 'mlp'], 'R':5}, # filter the result by the communication round R=5
        'legend_with':['M']
    },
    'Painter':{
        'Curve':[
            {'args':{'x': 'communication_round', 'y':'val_loss'}, 'fig_option':{'title':'valid loss on MNIST'}},
            {'args':{'x': 'communication_round', 'y':'val_accuracy'},  'fig_option':{'title':'valid accuracy on MNIST'}},
        ]
    }
}
flgo.experiment.analyzer.show(analysis_plan)

png

png

1.5.2 Customization on models

We now discuss the impletementation of models in our framework. Different from the centralized ML setting where there is only a model that transform the input to the output, the model in our framework should describe what and how the models are kept by different participants. This is because different parties in FL may have models with different architectures and paramters (e.g. personzalized FL, vertical FL, model-agnostic FL). In addition, the model sometimes could also be a significant part of particular methods. Therefore, we define the model as a class as follows:

class GeneralModel:
    @classmethod
    def init_local_module(cls, object):
        """init local models (e.g. personal models that cannot be shared) for the object according to its information"""
        pass

    @classmethod
    def init_global_module(cls, object):
        """init global models (e.g. sharable models) for the object according to its information"""
        pass

Now we construct a model and test it as the example.

from torch import nn
import torch.nn.functional as F
from flgo.utils.fmodule import FModule

class CNNModel(FModule): # inherit from flgo.utils.fmodule.FModule instead of torch.nn.Module
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(3136, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view((x.shape[0],28,28))
        x = x.unsqueeze(1)
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def init_local_module(object):
    pass

def init_global_module(object):
    # In classical horizontal FL, only the server needs to trace the latest global and store it
    if 'Server' in object.get_classname():
        object.model = CNNModel().to(object.device)

class MyCNN:
    init_local_module = init_local_module
    init_global_module = init_global_module

mycnn_runner = flgo.init(task, fedavg, option={'num_rounds':5, 'num_epochs':1, 'gpu':0}, model=MyCNN)
mycnn_runner.run()

analysis_plan = {
    'Selector':{
        'task': task,
        'header':['fedavg'],
        'filter':{'M':['MyCNN', 'cnn'], 'R':5},
        'legend_with':['M']
    },
    'Painter':{
        'Curve':[
            {'args':{'x': 'communication_round', 'y':'val_loss'}, 'fig_option':{'title':'valid loss on MNIST'}},
            {'args':{'x': 'communication_round', 'y':'val_accuracy'},  'fig_option':{'title':'valid accuracy on MNIST'}},
        ]
    }
}
flgo.experiment.analyzer.show(analysis_plan)

png

png

The two CNNs of the same architecture has a similar performance in this example.

1.5.3 What is FModule?

FModule is a class that decorates the class torch.nn.Module to enable direct operations on models like add, sub. FModule directly inherits from torch.nn.Module and won't have any impact on its original characteristics. The only difference lies in that FModule allows the model-level operations by using operators +,-,* to obtain a new model. We show the usage of FModule by the following example.

1.5.3.1 Example: Model-level operators

from torch import nn
import torch.nn.functional as F
from flgo.utils.fmodule import FModule

class Model(FModule):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(3, 3, bias=False)

    def forward(self, x):
        return x

A = Model()
B = Model()
print("model A: ", A.head.weight)
print("model B: ", B.head.weight)

# add
C = A + B  # C is a new instance of class Model and changes on C won't have any impact on A or B
print("C=A+B: {}", C.head.weight)
print("Type of C:", C)
model A:  Parameter containing:
tensor([[ 0.2429, -0.4990,  0.1843],
        [-0.2553,  0.1664,  0.3536],
        [ 0.5772,  0.0578, -0.0694]], requires_grad=True)
model B:  Parameter containing:
tensor([[-0.4220, -0.3707, -0.2508],
        [-0.4888, -0.1267,  0.1310],
        [ 0.5714, -0.2370,  0.3410]], requires_grad=True)
C=A+B: {} Parameter containing:
tensor([[-0.1790, -0.8697, -0.0665],
        [-0.7441,  0.0397,  0.4845],
        [ 1.1486, -0.1792,  0.2716]], requires_grad=True)
Type of C: Model(
  (fc): Linear(in_features=3, out_features=3, bias=False)
)
# sub
print('A-B: \n', (A - B).head.weight)
print('+++++++++++++++++++++++++++++++++++')
# scale
print('2*A: \n', (2 * A).fc.weight)
print('+++++++++++++++++++++++++++++++++++')
# div
print('A/2: \n', (A / 2).fc.weight)
print('+++++++++++++++++++++++++++++++++++')
# norm
print('||A||_2: \n', (A ** 2))
print('+++++++++++++++++++++++++++++++++++')
# neg
print('-A: \n', (-A).head.weight)
print('+++++++++++++++++++++++++++++++++++')
# zeros-copy
print('A.zeros_like(): \n', A.zeros_like().head.weight)
print('+++++++++++++++++++++++++++++++++++')
# dot
print("dot(A,B):\n", A.dot(B))
print('+++++++++++++++++++++++++++++++++++')
# cos-similarity
print("cos_sim(A,B):\n", A.cos_sim(B))
print('+++++++++++++++++++++++++++++++++++')
# size
print("size(A):\n", A.count_parameters())
print('+++++++++++++++++++++++++++++++++++')
A-B:
 Parameter containing:
tensor([[ 0.6649, -0.1282,  0.4352],
        [ 0.2336,  0.2932,  0.2226],
        [ 0.0057,  0.2948, -0.4103]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
2*A:
 Parameter containing:
tensor([[ 0.4859, -0.9979,  0.3687],
        [-0.5105,  0.3329,  0.7072],
        [ 1.1543,  0.1156, -0.1388]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
A/2:
 Parameter containing:
tensor([[ 0.1215, -0.2495,  0.0922],
        [-0.1276,  0.0832,  0.1768],
        [ 0.2886,  0.0289, -0.0347]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
||A||_2:
 tensor(0.9493)
+++++++++++++++++++++++++++++++++++
-A:
 Parameter containing:
tensor([[-0.2429,  0.4990, -0.1843],
        [ 0.2553, -0.1664, -0.3536],
        [-0.5772, -0.0578,  0.0694]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
A.zeros_like():
 Parameter containing:
tensor([[0., -0., 0.],
        [-0., 0., 0.],
        [0., 0., -0.]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
dot(A,B):
 tensor(0.4787)
+++++++++++++++++++++++++++++++++++
cos_sim(A,B):
 tensor(0.4703)
+++++++++++++++++++++++++++++++++++
size(A):
 9
+++++++++++++++++++++++++++++++++++

Besides the model-level operators, we also implement some common functions on model-level.

import flgo.utils.fmodule as ff

# exp(A)
print('exp(A):\n', ff.exp(A).head.weight)
print('+++++++++++++++++++++++++++++++++++')
# log(A)
print('log(A):\n', ff.log(A).head.weight)
print('+++++++++++++++++++++++++++++++++++')
# model to 1-D vector
a = ff._model_to_tensor(A)
print('a = Vec(A):\n', a)
# 1-D tensor to model
print('A from a: \n', ff._model_from_tensor(a, A.__class__).head.weight)
print('+++++++++++++++++++++++++++++++++++')
# model averaging
print('AVERAGE([A,B]):\n', ff._model_average([A, B]).head.weight)
# model sum
print('SUM([A,B]):\n', ff._model_sum([A, B]).head.weight)
exp(A):
 Parameter containing:
tensor([[1.2750, 0.6072, 1.2024],
        [0.7747, 1.1811, 1.4242],
        [1.7810, 1.0595, 0.9330]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
log(A):
 Parameter containing:
tensor([[-1.4149,     nan, -1.6910],
        [    nan, -1.7931, -1.0396],
        [-0.5496, -2.8510,     nan]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
a = Vec(A):
 tensor([ 0.2429, -0.4990,  0.1843, -0.2553,  0.1664,  0.3536,  0.5772,  0.0578,
        -0.0694])
A from a:
 Parameter containing:
tensor([[ 0.2429, -0.4990,  0.1843],
        [-0.2553,  0.1664,  0.3536],
        [ 0.5772,  0.0578, -0.0694]], requires_grad=True)
+++++++++++++++++++++++++++++++++++
AVERAGE([A,B]):
 Parameter containing:
tensor([[-0.0895, -0.4348, -0.0333],
        [-0.3721,  0.0199,  0.2423],
        [ 0.5743, -0.0896,  0.1358]], requires_grad=True)
SUM([A,B]):
 Parameter containing:
tensor([[-0.1790, -0.8697, -0.0665],
        [-0.7441,  0.0397,  0.4845],
        [ 1.1486, -0.1792,  0.2716]], requires_grad=True)

1.5.4 Fast Customization

We further provide fast API to convert a model into federated one by writing only one line code

from torch import nn
import torch.nn.functional as F

class NewModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(3136, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view((x.shape[0],28,28))
        x = x.unsqueeze(1)
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = flgo.convert_model(NewModel) # the default value of model_name is 'anonymous'
mycnn_runner2 = flgo.init(task, fedavg, option={'num_rounds':5, 'num_epochs':1, 'gpu':0}, model=model)
mycnn_runner2.run()

analysis_plan = {
    'Selector':{
        'task': task,
        'header':['fedavg'],
        'filter':{'M':['MyCNN', 'anonymous'], 'R':5},
        'legend_with':['M']
    },
    'Painter':{
        'Curve':[
            {'args':{'x': 'communication_round', 'y':'val_loss'}, 'fig_option':{'title':'valid loss on MNIST'}},
            {'args':{'x': 'communication_round', 'y':'val_accuracy'},  'fig_option':{'title':'valid accuracy on MNIST'}},
        ]
    }
}
flgo.experiment.analyzer.show(analysis_plan)

png

png