Skip to content

flgo.utils.fflow

GlobalVariable

This class is to create a shared space for sharing variables across different parties for each runner

Source code in flgo\utils\fflow.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class GlobalVariable:
    """This class is to create a shared space for sharing variables across
    different parties for each runner"""

    def __init__(self, logger:flgo.experiment.logger.BasicLogger=None, simulator:flgo.simulator.base.BasicSimulator=None, clock:flgo.simulator.base.ElemClock=None, dev_list:list=None, TaskCalculator:flgo.benchmark.base.BasicTaskCalculator=None, TaskPipe:flgo.benchmark.base.BasicTaskPipe=None):
        self.logger = logger
        self.simulator = simulator
        self.clock = clock
        self.dev_list = dev_list
        self.TaskCalculator = TaskCalculator
        self.TaskPipe = TaskPipe
        self.crt_dev = 0

    def apply_for_device(self):
        r"""
        Apply for a new device from currently available ones (i.e. devices in self.dev_list)

        Returns:
            GPU device (i.e. torch.device)
        """
        if self.dev_list is None: return None
        dev = self.dev_list[self.crt_dev]
        self.crt_dev = (self.crt_dev + 1) % len(self.dev_list)
        return dev

apply_for_device()

Apply for a new device from currently available ones (i.e. devices in self.dev_list)

Returns:

Type Description

GPU device (i.e. torch.device)

Source code in flgo\utils\fflow.py
71
72
73
74
75
76
77
78
79
80
81
def apply_for_device(self):
    r"""
    Apply for a new device from currently available ones (i.e. devices in self.dev_list)

    Returns:
        GPU device (i.e. torch.device)
    """
    if self.dev_list is None: return None
    dev = self.dev_list[self.crt_dev]
    self.crt_dev = (self.crt_dev + 1) % len(self.dev_list)
    return dev

convert_model(get_model, model_name='anonymous', scene='horizontal')

Convert an existing model into a model that can be loaded in flgo.

Parameters:

Name Type Description Default
get_model Callable

this function will return a model of type torch.nn.Module when it is called

required
model_name str

the name of the model

'anonymous'
scene str

the FL scene

'horizontal'

Returns:

Name Type Description
res_model

the model can be used in flgo.init(..., model=res_model, ...)

Source code in flgo\utils\fflow.py
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
def convert_model(get_model:Callable, model_name='anonymous', scene:str='horizontal'):
    r"""
    Convert an existing model into a model that can be loaded in flgo.
    Args:
        get_model (Callable): this function will return a model of type torch.nn.Module when it is called
        model_name (str): the name of the model
        scene (str): the FL scene

    Returns:
        res_model: the model can be used in flgo.init(..., model=res_model, ...)
    """
    class DecoratedModel(flgo.utils.fmodule.FModule):
        def __init__(self):
            super().__init__()
            self.model = get_model()

        def forward(self, *args, **kwargs):
            return self.model(*args, **kwargs)

    if scene=='horizontal':
        class AnonymousModel:
            __name__ = model_name

            @classmethod
            def init_global_module(self, object):
                if 'Server' in object.__class__.__name__:
                    object.model = DecoratedModel().to(object.device)

            @classmethod
            def init_local_module(self, object):
                pass

    elif scene=='decentralized':
        class AnonymousModel:
            __name__ = model_name

            @classmethod
            def init_local_module(self, object):
                if 'Client' in object.__class__.__name__:
                    object.model = DecoratedModel().to(object.device)

            @classmethod
            def init_global_module(self, object):
                pass
    else:
        raise NotImplementedError('The current version only support converting model for horizontalFL and DecentralizedFL.')
    return AnonymousModel()

download_resource(root, name, type, overwrite=False)

Download resource from github

Parameters:

Name Type Description Default
root str

the path to store the resource

required
name str

the name of the resource

required
type type

the type of the resource in ['algorithm', 'benchmark', 'simulator']

required
overwrite bool

whether to overwrite existing file

False
Source code in flgo\utils\fflow.py
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
def download_resource(root:str, name:str, type:str, overwrite:bool=False):
    """
    Download resource from github
    Args:
        root (str): the path to store the resource
        name (str): the name of the resource
        type (type): the type of the resource in ['algorithm', 'benchmark', 'simulator']
        overwrite (bool): whether to overwrite existing file
    Returns:
    """
    resource_root = "https://github.com/WwZzz/easyFL/raw/FLGo/resources/"
    if type not in ['algorithm', 'benchmark', 'simulator']: raise ValueError("Args type must of value in ['algorithm', 'benchmark', 'simulator']")
    url = resource_root+type+'/'
    suffix_dict = {'algorithm':'.py', 'simulator':'.py', 'benchmark':'.zip'}
    suffix = suffix_dict[type]
    file_name = name
    if not file_name.endswith(suffix):
        file_name = file_name+suffix
    if not os.path.exists(file_name) or overwrite:
        try:
            urllib.request.urlretrieve(url+file_name, os.path.join(root, file_name))
        except Exception as e:
            print(e)
            return None
    else:
        warnings.warn("There already exist {} named {}".format(type, name))
    if type == 'benchmark':
        bmk_zip = zipfile.ZipFile(file_name)
        bmk_zip.extractall(root)
    module_path = '.'.join(os.path.relpath(os.path.join(root, name), os.path.curdir).split(os.path.sep))
    module = importlib.import_module(module_path)
    if type in ['algorithm', 'benchmark']:
        return module
    else:
        Simulator = getattr(module, 'Simulator')
        return Simulator

gen_benchmark(benchmark, config_file, target_path='.', data_type='cv', task_type='classification')

Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
variables in the configuration.

Parameters:

Name Type Description Default
benchmark str

the name of the benchmark

required
config_file str

the path of the configuration file

required
target_path

(str): the path to store the benchmark

'.'
data_type str

the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']

'cv'
task_type str

the type of the task (e.g. classification, regression...)

'classification'

Returns:

Name Type Description
bmk_module str

the module name of the generated benchmark

Source code in flgo\utils\fflow.py
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
def gen_benchmark(benchmark:str, config_file:str, target_path='.',data_type:str='cv', task_type:str='classification'):
    r"""
        Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
        where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
        variables in the configuration.
    Args:
        benchmark (str): the name of the benchmark
        config_file (str): the path of the configuration file
        target_path: (str): the path to store the benchmark
        data_type (str): the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']
        task_type (str): the type of the task (e.g. classification, regression...)
    Returns:
        bmk_module (str): the module name of the generated benchmark
    """
    if not os.path.exists(config_file): raise FileNotFoundError('File {} not found.'.format(config_file))
    target_path = os.path.abspath(target_path)
    bmk_path = os.path.join(target_path, benchmark)
    if os.path.exists(bmk_path): raise FileExistsError('Benchmark {} already exists'.format(bmk_path))
    temp_path = os.path.join(flgo.benchmark.path, 'toolkits', data_type, task_type, 'temp')
    if not os.path.exists(temp_path):
        raise NotImplementedError('There is no support to automatically generation of {}.{}. More other types are comming soon...'.format(data_type, task_type))
    else:
        shutil.copytree(temp_path, bmk_path)
    shutil.copyfile(config_file, os.path.join(bmk_path, 'config.py'))
    bmk_module = '.'.join(os.path.relpath(bmk_path, os.getcwd()).split(os.path.sep))
    return bmk_module

gen_benchmark_from_file(benchmark, config_file, target_path='.', data_type='cv', task_type='classification', overwrite=False)

Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
variables in the configuration.

Parameters:

Name Type Description Default
benchmark str

the name of the benchmark

required
config_file str

the path of the configuration file

required
target_path

(str): the path to store the benchmark

'.'
data_type str

the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']

'cv'
task_type str

the type of the task (e.g. classification, regression...)

'classification'
overwrite bool

overwrite current benchmark if there already exists a benchmark of the same name

False

Returns:

Name Type Description
bmk_module str

the module name of the generated benchmark

Source code in flgo\utils\fflow.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def gen_benchmark_from_file(benchmark:str, config_file:str, target_path='.',data_type:str='cv', task_type:str='classification', overwrite:bool=False) -> str:
    r"""
        Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
        where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
        variables in the configuration.
    Args:
        benchmark (str): the name of the benchmark
        config_file (str): the path of the configuration file
        target_path: (str): the path to store the benchmark
        data_type (str): the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']
        task_type (str): the type of the task (e.g. classification, regression...)
        overwrite (bool): overwrite current benchmark if there already exists a benchmark of the same name
    Returns:
        bmk_module (str): the module name of the generated benchmark
    """
    if not os.path.exists(config_file): raise FileNotFoundError('File {} not found.'.format(config_file))
    target_path = os.path.abspath(target_path)
    bmk_path = os.path.join(target_path, benchmark)
    if os.path.exists(bmk_path):
        if not overwrite:
            warnings.warn('There already exists a benchmark `{}`'.format(benchmark))
            return '.'.join(os.path.relpath(bmk_path, os.getcwd()).split(os.path.sep))
        # raise FileExistsError('Benchmark {} already exists'.format(bmk_path))
    temp_path = os.path.join(flgo.benchmark.path, 'toolkits', data_type, task_type, 'temp')
    if not os.path.exists(temp_path):
        raise NotImplementedError('There is no support to automatically generation of {}.{}. More other types are comming soon...'.format(data_type, task_type))
    else:
        shutil.copytree(temp_path, bmk_path)
    shutil.copyfile(config_file, os.path.join(bmk_path, 'config.py'))
    bmk_module = '.'.join(os.path.relpath(bmk_path, os.getcwd()).split(os.path.sep))
    return bmk_module

gen_decentralized_benchmark(benchmark, config_file, target_path='.', data_type='cv', task_type='classification')

Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
variables in the configuration.

Parameters:

Name Type Description Default
benchmark str

the name of the benchmark

required
config_file str

the path of the configuration file

required
target_path

(str): the path to store the benchmark

'.'
data_type str

the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']

'cv'
task_type str

the type of the task (e.g. classification, regression...)

'classification'

Returns:

Name Type Description
bmk_module str

the module name of the generated benchmark

Source code in flgo\utils\fflow.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def gen_decentralized_benchmark(benchmark:str, config_file:str, target_path = '.', data_type:str='cv', task_type:str='classification'):
    r"""
        Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
        where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
        variables in the configuration.
    Args:
        benchmark (str): the name of the benchmark
        config_file (str): the path of the configuration file
        target_path: (str): the path to store the benchmark
        data_type (str): the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']
        task_type (str): the type of the task (e.g. classification, regression...)
    Returns:
        bmk_module (str): the module name of the generated benchmark
    """
    if not os.path.exists(config_file): raise FileNotFoundError('File {} not found.'.format(config_file))
    target_path = os.path.abspath(target_path)
    bmk_path = os.path.join(target_path, benchmark)
    if os.path.exists(bmk_path): raise FileExistsError('Benchmark {} already exists'.format(bmk_path))
    temp_path = os.path.join(flgo.benchmark.path, 'toolkits', data_type, task_type, 'dec_temp')
    if not os.path.exists(temp_path):
        raise NotImplementedError('There is no support to automatically generation of {}.{}. More other types are comming soon...'.format(data_type, task_type))
    else:
        shutil.copytree(temp_path, bmk_path)
    shutil.copyfile(config_file, os.path.join(bmk_path, 'config.py'))
    bmk_module = '.'.join(os.path.relpath(bmk_path, os.getcwd()).split(os.path.sep))
    return bmk_module

gen_hierarchical_benchmark(benchmark, config_file, target_path='.', data_type='cv', task_type='classification')

Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
variables in the configuration.

Parameters:

Name Type Description Default
benchmark str

the name of the benchmark

required
config_file str

the path of the configuration file

required
target_path

(str): the path to store the benchmark

'.'
data_type str

the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']

'cv'
task_type str

the type of the task (e.g. classification, regression...)

'classification'

Returns:

Name Type Description
bmk_module str

the module name of the generated benchmark

Source code in flgo\utils\fflow.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def gen_hierarchical_benchmark(benchmark:str, config_file:str, target_path = '.', data_type:str='cv', task_type:str='classification'):
    r"""
        Create customized benchmarks from configurations. The configuration is a .py file that describes the datasets and the model,
        where there must exist a function named `get_model` and a variable `train_data`. `val_data` and test_data are two optional
        variables in the configuration.
    Args:
        benchmark (str): the name of the benchmark
        config_file (str): the path of the configuration file
        target_path: (str): the path to store the benchmark
        data_type (str): the type of dataset that should be in the list ['cv', 'nlp', 'graph', 'rec', 'series', 'tabular']
        task_type (str): the type of the task (e.g. classification, regression...)
    Returns:
        bmk_module (str): the module name of the generated benchmark
    """
    if not os.path.exists(config_file): raise FileNotFoundError('File {} not found.'.format(config_file))
    target_path = os.path.abspath(target_path)
    bmk_path = os.path.join(target_path, benchmark)
    if os.path.exists(bmk_path): raise FileExistsError('Benchmark {} already exists'.format(bmk_path))
    temp_path = os.path.join(flgo.benchmark.path, 'toolkits', data_type, task_type, 'hier_temp')
    if not os.path.exists(temp_path):
        raise NotImplementedError('There is no support to automatically generation of {}.{}. More other types are comming soon...'.format(data_type, task_type))
    else:
        shutil.copytree(temp_path, bmk_path)
    shutil.copyfile(config_file, os.path.join(bmk_path, 'config.py'))
    bmk_module = '.'.join(os.path.relpath(bmk_path, os.getcwd()).split(os.path.sep))
    return bmk_module

gen_task(config={}, task_path='', rawdata_path='', seed=0, overwrite=False)

Generate a federated task that is specified by the benchmark information and the partition information, where the generated task will be stored in the task_path and the raw data will be downloaded into the rawdata_path.

config (dict || str): configuration is either a dict contains parameters or a filename of a .yml file
task_path (str): where the generated task will be stored
rawdata_path (str): where the raw data will be downloaded\stored
seed (int): the random seed used to generate the task

Example:

    >>> import flgo
    >>> config = {'benchmark':{'name':'flgo.benchmark.mnist_classification'}, 'partitioner':{'name':'IIDParitioner', 'para':{'num_clients':100}}}
    >>> flgo.gen_task(config, './my_mnist_iid')
    >>> # The task will be stored as `my_mnist_iid` in the current working dictionary
Source code in flgo\utils\fflow.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
def gen_task(config={}, task_path:str= '', rawdata_path:str= '', seed:int=0, overwrite:bool=False):
    r"""
    Generate a federated task that is specified by the benchmark information and the partition information, where the generated task will be stored in the task_path and the raw data will be downloaded into the rawdata_path.

        config (dict || str): configuration is either a dict contains parameters or a filename of a .yml file
        task_path (str): where the generated task will be stored
        rawdata_path (str): where the raw data will be downloaded\stored
        seed (int): the random seed used to generate the task

    Example:
    ```python
        >>> import flgo
        >>> config = {'benchmark':{'name':'flgo.benchmark.mnist_classification'}, 'partitioner':{'name':'IIDParitioner', 'para':{'num_clients':100}}}
        >>> flgo.gen_task(config, './my_mnist_iid')
        >>> # The task will be stored as `my_mnist_iid` in the current working dictionary
    ```
    """
    # setup random seed
    random.seed(3 + seed)
    np.random.seed(97 + seed)
    torch.manual_seed(12+seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    # load configuration
    gen_option = load_configuration(config)
    if type(gen_option['benchmark']) is not dict: gen_option['benchmark']={'name':gen_option['benchmark']}
    if 'para' not in gen_option['benchmark'].keys(): gen_option['benchmark']['para'] = {}
    # init generator
    if rawdata_path!='': gen_option['benchmark']['para']['rawdata_path']=rawdata_path
    if type(gen_option['benchmark']['name']) is str:
        bmk_core = importlib.import_module('.'.join([gen_option['benchmark']['name'], 'core']))
    elif hasattr(gen_option['benchmark']['name'], '__path__'):
        bmk_core = importlib.import_module('.core', gen_option['benchmark']['name'].__name__)
    else:
        raise RuntimeError("The value of parameter config['benchmark']['name'] should be either a string or a python package.")
    task_generator = getattr(bmk_core, 'TaskGenerator')(**gen_option['benchmark']['para'])
    bmk_module = importlib.import_module(gen_option['benchmark']['name']) if type(
        gen_option['benchmark']['name']) is str else gen_option['benchmark']['name']
    # create partitioner for generator if specified
    if 'partitioner' in gen_option.keys():
        if isinstance(gen_option['partitioner'], flgo.benchmark.partition.BasicPartitioner):
            partitioner = gen_option['partitioner']
            task_generator.register_partitioner(partitioner)
            partitioner.register_generator(task_generator)
        else:
            if not isinstance(gen_option['partitioner'], dict):
                gen_option['partitioner'] = {'name': gen_option['partitioner'], 'para':{}}
            # update parameters of partitioner
            if 'para' not in gen_option['partitioner'].keys():
                gen_option['partitioner']['para'] = {}
            else:
                if 'name' not in gen_option['partitioner'].keys():
                    gen_option['benchmark']['para'].update(gen_option['partitioner']['para'])
            if 'name' in gen_option['partitioner'].keys():
                Partitioner = gen_option['partitioner']['name']
                if type(Partitioner) is str:
                    if Partitioner in globals().keys(): Partitioner = eval(Partitioner)
                    else: Partitioner = getattr(flgo.benchmark.partition, Partitioner)
                partitioner = Partitioner(**gen_option['partitioner']['para'])
                task_generator.register_partitioner(partitioner)
                partitioner.register_generator(task_generator)
            else:
                try:
                    if hasattr(bmk_module, 'default_partitioner'):
                        Partitioner = getattr(bmk_module, 'default_partitioner')
                        default_partition_para = getattr(bmk_module, 'default_partition_para') if hasattr(bmk_module, 'default_partition_para') else {}
                        partitioner = Partitioner(**default_partition_para)
                        task_generator.register_partitioner(partitioner)
                        partitioner.register_generator(task_generator)
                    else:
                        partitioner = None
                except:
                    partitioner = None
    # initialize task pipe
    if len(task_path) == 0: task_path = 'FLGoTask_' + uuid.uuid4().hex
    task_pipe = getattr(bmk_core, 'TaskPipe')(task_path)
    # check if task already exists
    if task_pipe.task_exists():
        if not overwrite:
            warnings.warn('Task {} already exists. To overwrite the existing task, use flgo.gen_task(...,overwrite=True,...)'.format(task_path))
            return
        else:
            shutil.rmtree(task_path)
    # generate federated task
    task_generator.generate()
    # save the generated federated benchmark
    try:
        # create task architecture
        task_pipe.create_task_architecture()
        # save meta infomation
        task_pipe.save_info(task_generator)
        # save task
        task_pipe.save_task(task_generator)
        print('Task {} has been successfully generated.'.format(task_pipe.task_path))
    except Exception as e:
        print(e)
        task_pipe.remove_task()
        print("Failed to saving splited dataset.")
        return None
    # save visualization
    try:
        visualize_func = getattr(bmk_module,'visualize')
        visualize_func(task_generator, partitioner, task_path)
    except Exception as e:
        print('Warning: Failed to visualize the partitioned result where there exists error {}'.format(e))
    finally:
        return task_path

gen_task_by_(benchmark, partitioner=None, task_path='', seed=0, overwrite=False)

Generate federated task from benchmark and partitioner without inputing other parameters

Parameters:

Name Type Description Default
benchmark module

benchmark

required
partitioner flgo.benchmark.partition.BasicPartitioner

a instance of type flgo.benchmark.partition.BasicPartitioner

None
task_path str

the name and the path of the task

''
seed int

random seed

0
overwrite bool

overwrite the old task if the task_path already exist if True

False

Returns:

Name Type Description
task_path str

the path of the task

Source code in flgo\utils\fflow.py
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
def gen_task_by_(benchmark, partitioner:flgo.benchmark.partition.BasicPartitioner=None, task_path:str='', seed:int=0, overwrite:bool=False):
    """
    Generate federated task from benchmark and partitioner without inputing other parameters
    Args:
        benchmark (module): benchmark
        partitioner (flgo.benchmark.partition.BasicPartitioner): a instance of type flgo.benchmark.partition.BasicPartitioner
        task_path (str): the name and the path of the task
        seed (int): random seed
        overwrite (bool): overwrite the old task if the task_path already exist if True

    Returns:
        task_path (str): the path of the task
    """
    # generate the name of task randomly if empty
    if len(task_path)==0: task_path = 'FLGoTask_'+uuid.uuid4().hex
    # setup random seed
    random.seed(3 + seed)
    np.random.seed(97 + seed)
    torch.manual_seed(12 + seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    bmk_core = importlib.import_module('.core', benchmark.__name__)
    # load configuration
    task_generator = getattr(bmk_core, 'TaskGenerator')()
    if partitioner is not None:
        task_generator.register_partitioner(partitioner)
        partitioner.register_generator(task_generator)
    # check if task already exists
    task_pipe = getattr(bmk_core, 'TaskPipe')(task_path)
    if task_pipe.task_exists():
        if not overwrite:
            warnings.warn('Task {} already exists. To overwrite the existing task, use flgo.gen_task_by_(...,overwrite=True,...)'.format(task_path))
            return
        else:
            shutil.rmtree(task_path)
    # generate federated task
    task_generator.generate()
    # save the generated federated benchmark
    # initialize task pipe
    try:
        # create task architecture
        task_pipe.create_task_architecture()
        # save meta infomation
        task_pipe.save_info(task_generator)
        # save task
        task_pipe.save_task(task_generator)
        print('Task {} has been successfully generated.'.format(task_pipe.task_path))
    except Exception as e:
        print(e)
        task_pipe.remove_task()
        print("Failed to saving splited dataset.")
        return None
    # save visualization
    try:
        visualize_func = getattr(benchmark, 'visualize')
        visualize_func(task_generator, partitioner, task_path)
    except Exception as e:
        print('Warning: Failed to visualize the partitioned result where there exists error {}'.format(e))
    finally:
        return task_path

init(task, algorithm, option={}, model=None, Logger=None, Simulator=flgo.simulator.DefaultSimulator, scene='horizontal')

Initialize a runner in FLGo, which is to optimize a model on a specific task (i.e. IID-mnist-of-100-clients) by the selected federated algorithm.

Parameters:

Name Type Description Default
task str

the dictionary of the federated task

required
algorithm module|class

the algorithm will be used to optimize the model in federated manner, which must contain pre-defined attributions (e.g. algorithm.Server and algorithm.Client for horizontal federated learning)

required
option dict | str

the configurations of training, environment, algorithm, logger and simulator

{}
model module|class

the model module that contains two methods: model.init_local_module(object) and model.init_global_module(object)

None
Logger flgo.experiment.logger.BasicLogger

the class of the logger inherited from flgo.experiment.logger.BasicLogger

None
Simulator flgo.simulator.base.BasicSimulator

the class of the simulator inherited from flgo.simulator.BasicSimulator

flgo.simulator.DefaultSimulator
scene str

'horizontal' or 'vertical' in current version of FLGo

'horizontal'

Returns:

Name Type Description
runner

the object instance that has the method runner.run()

Example:

    >>> import flgo
    >>> from flgo.algorithm import fedavg
    >>> from flgo.experiment.logger.simple_logger import SimpleLogger
    >>> # create task 'mnist_iid' by flgo.gen_task('gen_config.yml', 'mnist_iid') if there exists no such task
    >>> if os.path.exists('mnist_iid'): flgo.gen_task({'benchmark':{'name':'flgo.benchmark.mnist_classification'}, 'partitioner':{'name':'IIDPartitioner','para':{'num_clients':100}}}, 'mnist_iid')
    >>> # create runner
    >>> fedavg_runner = flgo.init('mnist_iid', algorithm=fedavg, option = {'num_rounds':20, 'gpu':[0], 'learning_rate':0.1})
    >>> fedavg_runner.run()
    ... # the training will start after runner.run() was called, and the running-time results will be recorded by Logger into the task dictionary
Source code in flgo\utils\fflow.py
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
def init(task: str, algorithm, option = {}, model=None, Logger: flgo.experiment.logger.BasicLogger = None, Simulator: BasicSimulator=flgo.simulator.DefaultSimulator, scene='horizontal'):
    r"""
    Initialize a runner in FLGo, which is to optimize a model on a specific task (i.e. IID-mnist-of-100-clients) by the selected federated algorithm.

    Args:
        task (str): the dictionary of the federated task
        algorithm (module|class): the algorithm will be used to optimize the model in federated manner, which must contain pre-defined attributions (e.g. algorithm.Server and algorithm.Client for horizontal federated learning)
        option (dict|str): the configurations of training, environment, algorithm, logger and simulator
        model (module|class): the model module that contains two methods: model.init_local_module(object) and model.init_global_module(object)
        Logger (flgo.experiment.logger.BasicLogger): the class of the logger inherited from flgo.experiment.logger.BasicLogger
        Simulator (flgo.simulator.base.BasicSimulator): the class of the simulator inherited from flgo.simulator.BasicSimulator
        scene (str): 'horizontal' or 'vertical' in current version of FLGo

    Returns:
        runner: the object instance that has the method runner.run()

    Example:
    ```python
        >>> import flgo
        >>> from flgo.algorithm import fedavg
        >>> from flgo.experiment.logger.simple_logger import SimpleLogger
        >>> # create task 'mnist_iid' by flgo.gen_task('gen_config.yml', 'mnist_iid') if there exists no such task
        >>> if os.path.exists('mnist_iid'): flgo.gen_task({'benchmark':{'name':'flgo.benchmark.mnist_classification'}, 'partitioner':{'name':'IIDPartitioner','para':{'num_clients':100}}}, 'mnist_iid')
        >>> # create runner
        >>> fedavg_runner = flgo.init('mnist_iid', algorithm=fedavg, option = {'num_rounds':20, 'gpu':[0], 'learning_rate':0.1})
        >>> fedavg_runner.run()
        ... # the training will start after runner.run() was called, and the running-time results will be recorded by Logger into the task dictionary
    ```
    """

    # init option
    option = load_configuration(option)
    default_option = read_option_from_command()
    for op_key in option:
        if op_key in default_option.keys():
            op_type = type(default_option[op_key])
            if op_type == type(option[op_key]):
                default_option[op_key] = option[op_key]
            else:
                if op_type is list:
                    default_option[op_key]=list(option[op_key]) if hasattr(option[op_key], '__iter__') else [option[op_key]]
                elif op_type is tuple:
                    default_option[op_key] = tuple(option[op_key]) if hasattr(option[op_key], '__iter__') else (option[op_key])
                else:
                    default_option[op_key] = op_type(option[op_key])
        else:
            default_option[op_key] = option[op_key]
    option = default_option
    setup_seed(seed=option['seed'])
    option['task'] = task
    option['algorithm'] = (algorithm.__name__).split('.')[-1]
    # option['server_with_cpu'] = True if (option['num_parallels']>1 and len(option['gpu'])>1) else option['server_with_cpu']
    # init task info
    if not os.path.exists(task):
        raise FileExistsError("Fedtask '{}' doesn't exist. Please generate the specified task by flgo.gen_task().")
    with open(os.path.join(task, 'info'), 'r') as inf:
        task_info = json.load(inf)
    benchmark = task_info['benchmark']
    if model== None:
        bmk_module = importlib.import_module(benchmark)
        if hasattr(algorithm, 'init_global_module') or hasattr(algorithm, 'init_local_module'):
            model = algorithm
        elif hasattr(bmk_module, 'default_model'):
            model = getattr(bmk_module, 'default_model')
        else:
            raise NotImplementedError("Model cannot be None when there exists no default model for the current benchmark {} and the algorithm {} didn't define the model by `init_local_module` or `init_global_module`".format(task_info['benchmark'], option['algorithm']))
    option['model'] = (model.__name__).split('.')[-1]
    # create global variable
    gv = GlobalVariable()
    # init logger
    if Logger is None:
        if scene=='horizontal':
            Logger = flgo.experiment.logger.simple_logger.SimpleLogger
        elif scene=='vertical':
            Logger = flgo.experiment.logger.vertical_logger.VerticalLogger
        elif scene=='decentralized':
            Logger = flgo.experiment.logger.dec_logger.DecLogger
        elif scene=='hierarchical':
            Logger = flgo.experiment.logger.hier_logger.HierLogger
    gv.logger = Logger(task=task, option=option, name=str(id(gv))+str(Logger), level=option['log_level'])
    # init device
    gv.dev_list = [torch.device('cpu')] if (option['gpu'] is None or len(option['gpu'])==0) else [torch.device('cuda:{}'.format(gpu_id)) for gpu_id in option['gpu']]
    gv.logger.info('Initializing devices: '+','.join([str(dev) for dev in gv.dev_list])+' will be used for this running.')
    # init task
    gv.logger.info('BENCHMARK:\t{}'.format(benchmark))
    gv.logger.info('TASK:\t\t\t{}'.format(task))
    gv.logger.info('MODEL:\t\t{}'.format(model.__name__))
    gv.logger.info('ALGORITHM:\t{}'.format(option['algorithm']))
    core_module = '.'.join([benchmark, 'core'])
    gv.TaskPipe = getattr(importlib.import_module(core_module), 'TaskPipe')
    task_pipe = gv.TaskPipe(task)
    TaskCalculator = getattr(importlib.import_module(core_module), 'TaskCalculator')
    gv.TaskCalculator = TaskCalculator
    setup_seed(option['dataseed'])
    task_data = task_pipe.load_data(option)
    # init objects
    obj_class = [c for c in dir(algorithm) if not c.startswith('__')]
    tmp = []
    for c in obj_class:
        try:
            C = getattr(algorithm, c)
            setattr(C, 'gv', gv)
            setattr(C, 'TaskCalculator', TaskCalculator)
            tmp.append(c)
        except:
            continue
    # init simulator
    if scene=='horizontal':
        for c in obj_class:
            if 'Client' in c:
                class_client = getattr(algorithm, c)
                class_client.train = flgo.simulator.base.with_completeness(class_client.train)
            elif 'Server' in c:
                class_server = getattr(algorithm, c)
                class_server.sample = flgo.simulator.base.with_availability(class_server.sample)
                class_server.communicate_with = flgo.simulator.base.with_latency(class_server.communicate_with)
                class_server.communicate = flgo.simulator.base.with_dropout(class_server.communicate)
    objects = task_pipe.generate_objects(option, algorithm, scene=scene)
    obj_classes = collections.defaultdict(int)
    for obj in objects: obj_classes[obj.__class__]+=1
    creating_str = []
    for k,v in obj_classes.items(): creating_str.append("{} {}".format(v, k))
    creating_str = ', '.join(creating_str)
    gv.logger.info('SCENE:\t\t{} FL with '.format(scene)+creating_str)
    task_pipe.distribute(task_data, objects)
    # init model
    if hasattr(model, 'init_local_module'):
        for object in objects:
            model.init_local_module(object)
    if hasattr(model, 'init_global_module'):
        for object in objects:
            model.init_global_module(object)
    if hasattr(model, 'init_dataset'):
        for object in objects:
            model.init_dataset(object)
    setup_seed(option['seed']+346)
    # init communicator
    gv.communicator = flgo.VirtualCommunicator(objects)

    for ob in objects: ob.initialize()

    # init virtual system environment
    gv.logger.info('SIMULATOR:\t{}'.format(str(Simulator)))
    # flgo.simulator.base.random_seed_gen = flgo.simulator.base.seed_generator(option['seed'])

    gv.clock = flgo.simulator.base.ElemClock()
    gv.simulator = Simulator(objects, option) if scene == 'horizontal' else None
    if gv.simulator is not None: gv.simulator.initialize()
    gv.clock.register_simulator(simulator=gv.simulator)
    gv.logger.register_variable(coordinator=objects[0], participants=objects[1:], option=option, clock=gv.clock, scene=scene, objects = objects, simulator=Simulator.__name__ if scene == 'horizontal' else 'None')
    if scene=='horizontal':
        gv.logger.register_variable(server=objects[0], clients=objects[1:])
    gv.logger.initialize()
    gv.logger.info('Ready to start.')

    # register global variables for objects
    for c in tmp:
        try:
            C = getattr(algorithm, c)
            delattr(C, 'gv')
        except:
            continue
    for ob in objects:
        ob.gv = gv
    if gv.simulator is not None:
        gv.simulator.gv = gv
    gv.clock.gv = gv
    gv.logger.gv = gv
    return objects[0]

list_resource(type='algorithm')

List currently available resources at github. The arg. type should be one of elements in {'algorithm', 'benchmark', 'simulator'}

Parameters:

Name Type Description Default
type str 'algorithm'

Returns:

Name Type Description
res list

the name of currently available resources

Source code in flgo\utils\fflow.py
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
def list_resource(type:str='algorithm'):
    """
    List currently available resources at github. The arg. `type` should be one of elements in {'algorithm', 'benchmark', 'simulator'}
    Args:
        type (str):
    Returns:
        res (list): the name of currently available resources
    """
    if type not in ['algorithm', 'benchmark', 'simulator']: raise ValueError("Args type must of value in ['algorithm', 'benchmark', 'simulator']")
    url = "https://github.com/WwZzz/easyFL/tree/FLGo/resources/"+type
    suffix_dict = {"algorithm": ".py", "benchmark":".zip", "simulator":".py"}
    suffix = suffix_dict[type]
    try:
        content = str(requests.get(url).content, encoding=sys.getfilesystemencoding())
    except Exception as e:
        print(e)
        return None
    res = re.findall(r'"[a-zA-Z0-9_-]*{}"'.format(suffix), content)
    res = [s.strip('"') for s in res]
    res = [s[:-len(suffix)] for s in res if s!="test.py"]
    return res

load_configuration(config={})

Load configurations from .yml file or dict.

Parameters:

Name Type Description Default
config dict | str

the configurations

{}

Returns:

Type Description

a dict of option (i.e. configuration)

Source code in flgo\utils\fflow.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def load_configuration(config={}):
    r"""
    Load configurations from .yml file or dict.

    Args:
        config (dict|str): the configurations

    Returns:
        a dict of option (i.e. configuration)
    """
    if type(config) is str and config.endswith('.yml'):
        with open(config) as f:
            option = yaml.load(f, Loader=yaml.FullLoader)
        return option
    elif type(config) is dict:
        return config
    else:
        raise TypeError('The input config should be either a dict or a filename.')

module2fmodule(Model)

Convert a class of torch.nn.Module into class flgo.utils.fmodule.FModule

Parameters:

Name Type Description Default
Model class

a class inherited from torch.nn.Module

required

Returns:

Name Type Description
TempModule class

The same class but additionally inheriting from flgo.utils.fmodule.FModule

Source code in flgo\utils\fflow.py
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
def module2fmodule(Model):
    """
    Convert a class of torch.nn.Module into class flgo.utils.fmodule.FModule
    Args:
        Model (class): a class inherited from torch.nn.Module

    Returns:
        TempModule (class): The same class but additionally inheriting from flgo.utils.fmodule.FModule

    """
    class TempFModule(Model, flgo.utils.fmodule.FModule):
        def __init__(self, *args, **kwargs):
            super(TempFModule, self).__init__(*args, **kwargs)
    return TempFModule

multi_init_and_run(runner_args, devices=[], scheduler=None)

Create multiple runners and run in parallel

Parameters:

Name Type Description Default
runner_args list

each element in runner_args should be either a dict or a tuple or parameters

required
devices list

a list of gpu id

[]
scheduler flgo.experiment.device_scheduler.BasicScheduler(...

GPU scheduler

None

Returns:

Type Description

a list of output results of runners

Example:

    >>> from flgo.algorithm import fedavg, fedprox, scaffold
    >>> # create task 'mnist_iid' by flgo.gen_task if there exists no such task
    >>> task='./mnist_iid'
    >>> if os.path.exists(task): flgo.gen_task({'benchmark':{'name':'flgo.benchmark.mnist_classification'}, 'partitioner':{'name':'IIDPartitioner','para':{'num_clients':100}}}, task)
    >>> algos = [fedavg, fedprox, scaffold]
    >>> flgo.multi_init_and_run([{'task':task, 'algorithm':algo} for algo in algos], devices=[0])
Source code in flgo\utils\fflow.py
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
def multi_init_and_run(runner_args:list, devices = [], scheduler=None):
    r"""
    Create multiple runners and run in parallel

    Args:
        runner_args (list): each element in runner_args should be either a dict or a tuple or parameters
        devices (list): a list of gpu id
        scheduler (flgo.experiment.device_scheduler.BasicScheduler(...)): GPU scheduler

    Returns:
        a list of output results of runners

    Example:
    ```python
        >>> from flgo.algorithm import fedavg, fedprox, scaffold
        >>> # create task 'mnist_iid' by flgo.gen_task if there exists no such task
        >>> task='./mnist_iid'
        >>> if os.path.exists(task): flgo.gen_task({'benchmark':{'name':'flgo.benchmark.mnist_classification'}, 'partitioner':{'name':'IIDPartitioner','para':{'num_clients':100}}}, task)
        >>> algos = [fedavg, fedprox, scaffold]
        >>> flgo.multi_init_and_run([{'task':task, 'algorithm':algo} for algo in algos], devices=[0])
    ```
    """
    if len(runner_args)==0:return
    args = []
    if type(runner_args[0]) is dict:
        keys = ['task', 'algorithm', 'option', 'model', 'Logger', 'Simulator', 'scene']
        for a in runner_args:
            tmp = collections.defaultdict(lambda:None, a)
            if tmp['task'] is None or tmp['algorithm'] is None:
                raise RuntimeError("keyword 'task' or 'algorithm' is of NoneType")
            algorithm = tmp['algorithm']
            tmp['algorithm'] = algorithm.__name__ if (not hasattr(algorithm, '__module__') and hasattr(algorithm, '__name__')) else algorithm
            if tmp['option'] is None:
                tmp['option'] = default_option_dict
            else:
                option = tmp['option']
                default_option = read_option_from_command()
                for op_key in option:
                    if op_key in default_option.keys():
                        op_type = type(default_option[op_key])
                        if op_type == type(option[op_key]):
                            default_option[op_key] = option[op_key]
                        else:
                            if op_type is list:
                                default_option[op_key] = list(option[op_key]) if hasattr(option[op_key],
                                                                                         '__iter__') else [
                                    option[op_key]]
                            elif op_type is tuple:
                                default_option[op_key] = tuple(option[op_key]) if hasattr(option[op_key],
                                                                                          '__iter__') else (
                                option[op_key])
                            else:
                                default_option[op_key] = op_type(option[op_key])
                tmp['option'] = default_option
            if tmp['model'] is None:
                model_name = None
            else:
                if not hasattr(tmp['model'], '__module__') and hasattr(tmp['model'], '__name__'):
                    model_name = tmp['model'].__name__
                else:
                    model_name = tmp['model']
            tmp['model'] = model_name
            if tmp['Logger'] is None:
                tmp['Logger'] = flgo.experiment.logger.simple_logger.SimpleLogger
            algorithm_name = tmp['algorithm'].__name__ if (not hasattr(tmp['algorithm'], '__module__') and hasattr(tmp['algorithm'], '__name__')) else tmp['algorithm']
            if tmp['Simulator'] is None:
                tmp['Simulator'] = flgo.simulator.DefaultSimulator
            if tmp['scene'] is None:
                tmp['scene'] = 'horizontal'
            args.append([tmp[k] for k in keys])
    elif type(runner_args[0]) is tuple or type(runner_args[0]) is list:
        for a in runner_args:
            if len(a)<2: raise RuntimeError('the args of runner should at least contain task and algorithm.')
            default_args = [None, None, default_option_dict, None, flgo.experiment.logger.simple_logger.SimpleLogger, flgo.simulator.DefaultSimulator, 'horizontal']
            for aid in range(len(a)):
                if aid==0:
                    default_args[aid] = a[aid]
                if aid==1:
                    algorithm = a[aid]
                    algorithm_name = algorithm.__name__ if (not hasattr(algorithm, '__module__') and hasattr(algorithm, '__name__')) else algorithm
                    default_args[aid] = algorithm_name
                elif aid==2:
                    option = a[aid]
                    default_option = read_option_from_command()
                    for op_key in option:
                        if op_key in default_option.keys():
                            op_type = type(default_option[op_key])
                            if op_type == type(option[op_key]):
                                default_option[op_key] = option[op_key]
                            else:
                                if op_type is list:
                                    default_option[op_key] = list(option[op_key]) if hasattr(option[op_key],
                                                                                             '__iter__') else [
                                        option[op_key]]
                                elif op_type is tuple:
                                    default_option[op_key] = tuple(option[op_key]) if hasattr(option[op_key],
                                                                                              '__iter__') else (
                                        option[op_key])
                                else:
                                    default_option[op_key] = op_type(option[op_key])
                    default_args[aid] = default_option
                elif aid==3:
                    model = a[aid]
                    if model is None:
                        model_name = None
                    else:
                        if not hasattr(model, '__module__') and hasattr(model, '__name__'):
                            model_name = model.__name__
                        else:
                            model_name = model
                    default_args[aid] = model_name
                else:
                    default_args[aid] = a[aid]

    runner_state = {rid: {'p': None, 'completed': False, 'output': None, 'runner_in_queue': False, 'recv': None, } for
                    rid in range(len(args))}
    if scheduler is None: scheduler = flgo.experiment.device_scheduler.BasicScheduler(devices)
    while True:
        for rid in range(len(args)):
            current_arg = args[rid]
            if runner_state[rid]['p'] is None:
                if not runner_state[rid]['completed']:
                    available_device = scheduler.get_available_device(current_arg)
                    if available_device is None:
                        continue
                    else:
                        list_current_arg = copy.deepcopy(current_arg)
                        list_current_arg[2]['gpu'] = available_device
                        recv_end, send_end = multiprocessing.Pipe(False)
                        list_current_arg.append(send_end)
                        runner_state[rid]['p'] = multiprocessing.Process(target=_call_by_process, args=tuple(list_current_arg))
                        runner_state[rid]['recv'] = recv_end
                        runner_state[rid]['p'].start()
                        scheduler.add_process(runner_state[rid]['p'].pid)
                        print('Process {} was created for args {}'.format(runner_state[rid]['p'].pid,current_arg))
            else:
                if runner_state[rid]['p'].exitcode is not None:
                    tmp = runner_state[rid]['recv'].recv()
                    scheduler.remove_process(tmp[-1])
                    try:
                        runner_state[rid]['p'].terminate()
                    except:
                        pass
                    runner_state[rid]['p'] = None
                    if len(tmp) == 2:
                        runner_state[rid]['completed'] = True
                        runner_state[rid]['output'] = tmp[0]
                    else:
                        print(tmp[1])
        if all([v['completed'] for v in runner_state.values()]): break
        time.sleep(1)
    res = []
    for rid in range(len(runner_state)):
        rec_path = runner_state[rid]['output']
        with open(rec_path, 'r') as inf:
            s_inf = inf.read()
            rec = json.loads(s_inf)
        res.append(rec)
    return res

read_option_from_command()

Generate running-time configurations for flgo.init with default values from command lines

Returns:

Type Description

a dict of option (i.e. configuration)

Source code in flgo\utils\fflow.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def read_option_from_command():
    r"""
    Generate running-time configurations for flgo.init with default values from command lines

    Returns:
        a dict of option (i.e. configuration)
    """

    parser = argparse.ArgumentParser()
    """Training Options"""
    # basic settings
    # methods of server side for sampling and aggregating
    parser.add_argument('--sample', help='methods for sampling clients', type=str, choices=sample_list, default='uniform')
    parser.add_argument('--aggregate', help='methods for aggregating models', type=str, choices=agg_list, default='other')
    # hyper-parameters of training in server side
    parser.add_argument('--num_rounds', help='number of communication rounds', type=int, default=20)
    parser.add_argument('--proportion', help='proportion of clients sampled per round', type=float, default=0.2)
    parser.add_argument('--learning_rate_decay', help='learning rate decay for the training process;', type=float, default=0.998)
    parser.add_argument('--lr_scheduler', help='type of the global learning rate scheduler', type=int, default=-1)
    parser.add_argument('--early_stop', help='stop training if there is no improvement for no smaller than the maximum rounds', type=int, default=-1)
    # hyper-parameters of local_movielens_recommendation training
    parser.add_argument('--num_epochs', help='number of epochs when clients locally train the model on data;', type=int, default=5)
    parser.add_argument('--num_steps', help='the number of local steps, which dominate num_epochs when setting num_steps>0', type=int, default=-1)
    parser.add_argument('--learning_rate', help='learning rate for inner solver;', type=float, default=0.1)
    parser.add_argument('--batch_size', help='batch size', type=float, default='64')
    parser.add_argument('--optimizer', help='select the optimizer for gd', type=str, choices=optimizer_list, default='SGD')
    parser.add_argument('--clip_grad', help='clipping gradients if the max norm of gradients ||g|| > clip_norm > 0', type=float, default=0.0)
    parser.add_argument('--momentum', help='momentum of local training', type=float, default=0.0)
    parser.add_argument('--weight_decay', help='weight decay of local training', type=float, default=0.0)
    parser.add_argument('--num_edge_rounds', help='number of edge rounds in hierFL', type=int, default=5)
    # algorithm-dependent hyper-parameters
    parser.add_argument('--algo_para', help='algorithm-dependent hyper-parameters', nargs='*', type=float)

    """Environment Options"""
    # the ratio of the amount of the data used to train
    parser.add_argument('--train_holdout', help='the rate of holding out the validation dataset from all the local training datasets', type=float, default=0.1)
    parser.add_argument('--test_holdout', help='the rate of holding out the validation dataset from the testing datasets owned by the server', type=float, default=0.0)
    parser.add_argument('--local_test', help='if this term is set True and train_holdout>0, (0.5*train_holdout) of data will be set as client.test_data.', action="store_true", default=False)
    # realistic machine config
    parser.add_argument('--seed', help='seed for random initialization;', type=int, default=0)
    parser.add_argument('--dataseed', help='seed for random initialization for data train/val/test partition', type=int, default=0)
    parser.add_argument('--gpu', nargs='*', help='GPU IDs and empty input is equal to using CPU', type=int)
    parser.add_argument('--server_with_cpu', help='the model parameters will be stored in the memory if True', action="store_true", default=False)
    parser.add_argument('--num_parallels', help="the number of parallels in the clients computing session", type=int, default=1)
    parser.add_argument('--num_workers', help='the number of workers of DataLoader', type=int, default=0)
    parser.add_argument('--pin_memory', help='pin_memory of DataLoader', action="store_true", default=False)
    parser.add_argument('--test_batch_size', help='the batch_size used in testing phase;', type=int, default=512)

    """Simulator Options"""
    # the simulating systemic configuration of clients and the server that helps constructing the heterogeity in the network condition & computing power
    parser.add_argument('--availability', help="client availability mode", type=str, default = 'IDL')
    parser.add_argument('--connectivity', help="client connectivity mode", type=str, default = 'IDL')
    parser.add_argument('--completeness', help="client completeness mode", type=str, default = 'IDL')
    parser.add_argument('--responsiveness', help="client responsiveness mode", type=str, default='IDL')

    """Logger Options"""
    # logger setting
    parser.add_argument('--log_level', help='the level of logger', type=str, default='INFO')
    parser.add_argument('--log_file', help='bool controls whether log to file and default value is False', action="store_true", default=False)
    parser.add_argument('--no_log_console', help='bool controls whether log to screen and default value is True', action="store_true", default=False)
    parser.add_argument('--no_overwrite', help='bool controls whether to overwrite the old result', action="store_true", default=False)
    parser.add_argument('--eval_interval', help='evaluate every __ rounds;', type=int, default=1)
    try: option = vars(parser.parse_known_args()[0])
    except IOError as msg: parser.error(str(msg))
    for key in option.keys():
        if option[key] is None:
            option[key]=[]
    return option

run_in_parallel(task, algorithm, options=[], model=None, devices=[], Logger=flgo.experiment.logger.simple_logger.SimpleLogger, Simulator=flgo.simulator.DefaultSimulator, scene='horizontal', scheduler=None)

Run different groups of hyper-parameters for one task and one algorithm in parallel.

Parameters:

Name Type Description Default
task str

the dictionary of the federated task

required
algorithm module|class

the algorithm will be used to optimize the model in federated manner, which must contain pre-defined attributions (e.g. algorithm.Server and algorithm.Client for horizontal federated learning)

required
options list

the configurations of different groups of hyper-parameters

[]
model module|class

the model module that contains two methods: model.init_local_module(object) and model.init_global_module(object)

None
devices list

the list of IDs of devices

[]
Logger class

the class of the logger inherited from flgo.experiment.logger.BasicLogger

flgo.experiment.logger.simple_logger.SimpleLogger
Simulator class

the class of the simulator inherited from flgo.simulator.BasicSimulator

flgo.simulator.DefaultSimulator
scene str

'horizontal' or 'vertical' in current version of FLGo

'horizontal'
scheduler instance of flgo.experiment.device_scheduler.BasicScheduler

GPU scheduler that schedules GPU by checking their availability

None

Returns:

Type Description

the returns of _call_by_process

Source code in flgo\utils\fflow.py
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
def run_in_parallel(task: str, algorithm, options:list = [], model=None, devices = [], Logger:flgo.experiment.logger.BasicLogger = flgo.experiment.logger.simple_logger.SimpleLogger, Simulator=flgo.simulator.DefaultSimulator, scene='horizontal', scheduler = None):
    """
    Run different groups of hyper-parameters for one task and one algorithm in parallel.

    Args:
        task (str): the dictionary of the federated task
        algorithm (module|class): the algorithm will be used to optimize the model in federated manner, which must contain pre-defined attributions (e.g. algorithm.Server and algorithm.Client for horizontal federated learning)
        options (list): the configurations of different groups of hyper-parameters
        model (module|class): the model module that contains two methods: model.init_local_module(object) and model.init_global_module(object)
        devices (list): the list of IDs of devices
        Logger (class): the class of the logger inherited from flgo.experiment.logger.BasicLogger
        Simulator (class): the class of the simulator inherited from flgo.simulator.BasicSimulator
        scene (str): 'horizontal' or 'vertical' in current version of FLGo
        scheduler (instance of flgo.experiment.device_scheduler.BasicScheduler): GPU scheduler that schedules GPU by checking their availability

    Returns:
        the returns of _call_by_process
    """
    try:
        # init multiprocess
        torch.multiprocessing.set_start_method('spawn', force=True)
        torch.multiprocessing.set_sharing_strategy('file_system')
    except:
        pass
    if model is None:
        model_name = None
    else:
        if not hasattr(model, '__module__') and hasattr(model, '__name__'):
            model_name = model.__name__
        else:
            model_name = model
    algorithm_name = algorithm.__name__ if (not hasattr(algorithm, '__module__') and hasattr(algorithm, '__name__')) else algorithm
    option_state = {oid:{'p':None, 'completed':False, 'output':None, 'option_in_queue':False, 'recv':None, } for oid in range(len(options))}
    if scheduler is None: scheduler = flgo.experiment.device_scheduler.BasicScheduler(devices)
    while True:
        for oid in range(len(options)):
            opt = options[oid]
            if option_state[oid]['p'] is None:
                if not option_state[oid]['completed']:
                    available_device = scheduler.get_available_device(opt)
                    if available_device is None: continue
                    else:
                        opt['gpu'] = available_device
                        recv_end, send_end = multiprocessing.Pipe(False)
                        option_state[oid]['p'] = multiprocessing.Process(target=_call_by_process, args=(task, algorithm_name, opt, model_name, Logger, Simulator, scene, send_end))
                        option_state[oid]['recv'] = recv_end
                        option_state[oid]['p'].start()
                        scheduler.add_process(option_state[oid]['p'].pid)
                        print('Process {} was created for args {}'.format(option_state[oid]['p'].pid,(task, algorithm_name, opt, model_name, Logger, Simulator, scene)))
            else:
                if option_state[oid]['p'].exitcode is not None:
                    tmp = option_state[oid]['recv'].recv()
                    scheduler.remove_process(tmp[-1])
                    try:
                        option_state[oid]['p'].terminate()
                    except:
                        pass
                    option_state[oid]['p'] = None
                    if len(tmp)==2:
                        option_state[oid]['completed'] = True
                        option_state[oid]['output'] = tmp[0]
                    else:
                        print(tmp[1])
        if all([v['completed'] for v in option_state.values()]):break
        time.sleep(1)
    res = []
    for oid in range(len(options)):
        rec_path = option_state[oid]['output']
        with open(rec_path, 'r') as inf:
            s_inf = inf.read()
            rec = json.loads(s_inf)
        res.append(rec)
    return res

set_data_root(data_root=None)

Set the root of data that stores all the raw data automatically

Parameters:

Name Type Description Default
data_root str

the path of a directory whose default value is None and will be set to os.getcwd() as default.

None
Source code in flgo\utils\fflow.py
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
def set_data_root(data_root:str=None):
    """
    Set the root of data that stores all the raw data automatically
    Args:
        data_root (str): the path of a directory whose default value is None and will be set to os.getcwd() as default.
    Returns:
    """
    file_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'benchmark', '__init__.py')
    default_root = os.path.abspath(os.path.join(flgo.benchmark.path, 'RAW_DATA'))
    if data_root is None and os.path.abspath(flgo.benchmark.data_root)!=default_root:
        crt_root = default_root.strip()
        root_name = '"'+default_root.strip()+'"'
    elif data_root == 'cwd':
        crt_root = os.path.abspath(os.getcwd())
        root_name = 'os.getcwd()'
    else:
        if not os.path.exists(data_root):
            os.makedirs(data_root)
        if not os.path.isdir(data_root):
            raise TypeError('data_root must be a dir')
        crt_root = os.path.abspath(data_root).strip()
        root_name = '"'+crt_root+'"'
    with open(file_path, 'r', encoding=sys.getfilesystemencoding()) as inf:
        lines = inf.readlines()
        idx = -1
        for i,line in enumerate(lines):
            if line.find('data_root')>-1:
                idx = i
                break
        if idx>0:
            lines[idx] = "data_root = "+ root_name
    with open(file_path, 'w', encoding=sys.getfilesystemencoding()) as outf:
        outf.writelines(lines)
    flgo.benchmark.data_root = crt_root
    print('Data root directory has successfully been changed to {}'.format(crt_root))
    return

setup_seed(seed)

Fix all the random seed used in numpy, torch and random module

Parameters:

Name Type Description Default
seed int

the random seed

required
Source code in flgo\utils\fflow.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def setup_seed(seed):
    r"""
    Fix all the random seed used in numpy, torch and random module

    Args:
        seed (int): the random seed
    """
    if seed <0:
        torch.backends.cudnn.enabled = False
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        torch.use_deterministic_algorithms(True)
        seed = -seed
    random.seed(1+seed)
    np.random.seed(21+seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(12+seed)
    torch.cuda.manual_seed_all(123+seed)

tune(task, algorithm, option={}, model=None, Logger=flgo.experiment.logger.tune_logger.TuneLogger, Simulator=flgo.simulator.DefaultSimulator, scene='horizontal', scheduler=None)

Tune hyper-parameters for the specific (task, algorithm, model) in parallel.

Parameters:

Name Type Description Default
task str

the dictionary of the federated task

required
algorithm module|class

the algorithm will be used to optimize the model in federated manner, which must contain pre-defined attributions (e.g. algorithm.Server and algorithm.Client for horizontal federated learning)

required
option dict

the dict whose values should be of type list to construct the combinations

{}
model module|class

the model module that contains two methods: model.init_local_module(object) and model.init_global_module(object)

None
Logger class

the class of the logger inherited from flgo.experiment.logger.BasicLogger

flgo.experiment.logger.tune_logger.TuneLogger
Simulator class

the class of the simulator inherited from flgo.simulator.BasicSimulator

flgo.simulator.DefaultSimulator
scene str

'horizontal' or 'vertical' in current version of FLGo

'horizontal'
scheduler instance of flgo.experiment.device_scheduler.BasicScheduler

GPU scheduler that schedules GPU by checking their availability

None
Source code in flgo\utils\fflow.py
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
def tune(task: str, algorithm, option: dict = {}, model=None, Logger: flgo.experiment.logger.BasicLogger = flgo.experiment.logger.tune_logger.TuneLogger, Simulator: BasicSimulator=flgo.simulator.DefaultSimulator, scene='horizontal', scheduler=None):
    """
        Tune hyper-parameters for the specific (task, algorithm, model) in parallel.
        Args:
            task (str): the dictionary of the federated task
            algorithm (module|class): the algorithm will be used to optimize the model in federated manner, which must contain pre-defined attributions (e.g. algorithm.Server and algorithm.Client for horizontal federated learning)
            option (dict): the dict whose values should be of type list to construct the combinations
            model (module|class): the model module that contains two methods: model.init_local_module(object) and model.init_global_module(object)
            Logger (class): the class of the logger inherited from flgo.experiment.logger.BasicLogger
            Simulator (class): the class of the simulator inherited from flgo.simulator.BasicSimulator
            scene (str): 'horizontal' or 'vertical' in current version of FLGo
            scheduler (instance of flgo.experiment.device_scheduler.BasicScheduler): GPU scheduler that schedules GPU by checking their availability
        """
    # generate combinations of hyper-parameters
    if 'gpu' in option.keys():
        device_ids = option['gpu']
        option.pop('gpu')
        if not isinstance(device_ids, Iterable): device_ids = [device_ids]
    else:
        device_ids = [-1]
    keys = list(option.keys())
    for k in keys: option[k] = [option[k]] if (not isinstance(option[k], Iterable) or isinstance(option[k], str)) else option[k]
    para_combs = [para_comb for para_comb in itertools.product(*(option[k] for k in keys))]
    options = [{k:v for k,v in zip(keys, paras)} for paras in para_combs]
    for op in options:op['log_file'] = True
    if scheduler is None:
        scheduler = flgo.experiment.device_scheduler.BasicScheduler(device_ids)
    outputs = run_in_parallel(task, algorithm, options,model, devices=device_ids, Logger=Logger, Simulator=Simulator, scene=scene, scheduler=scheduler)
    optimal_idx = int(np.argmin([min(output['val_loss']) for output in outputs]))
    optimal_para = options[optimal_idx]
    print("The optimal combination of hyper-parameters is:")
    print('-----------------------------------------------')
    for k,v in optimal_para.items():
        if k=='gpu': continue
        print("{}\t|{}".format(k,v))
    print('-----------------------------------------------')
    op_round = np.argmin(outputs[optimal_idx]['val_loss'])
    if 'eval_interval' in option.keys(): op_round = option['eval_interval']*op_round
    print('The minimal validation loss occurs at the round {}'.format(op_round))