如何在使用Keras Tuner调优模型时跳过有问题的超参数组合?

zd287kbt  于 5个月前  发布在  其他
关注(0)|答案(2)|浏览(74)

使用Keras Tuner时,似乎没有办法允许跳过有问题的超参数组合。例如,Conv1D层中的过滤器数量可能与下面MaxPooling1D层中的所有池大小值不兼容,从而导致模型构建错误。但是,在运行调谐器之前可能无法知道这一点。一旦运行调谐器,这将导致一个错误,将终止整个调优过程。2有没有办法跳过任何导致错误的hyperparam组合?
示例代码:

def model_builder(hp):
    model = Sequential()
    model.add(
        Embedding(
            input_dim=hp.Int(
                'vocab_size', 
                min_value=4000,
                max_value=10000,
                step=1000,
                default=4000
            ), 
            output_dim=hp.Choice(
                'embedding_dim',
                values=[32, 64, 128, 256],
                default=32
            ), 
            input_length=hp.Int(
                'max_length',
                min_value=50,
                max_value=200,
                step=10,
                default=50
            )
        )
    )
    model.add(
        Conv1D(
            filters=hp.Choice(
                'num_filters_1',
                values=[32, 64],
                default=32
            ), 
            kernel_size=hp.Choice(
                'kernel_size_1',
                values=[3, 5, 7, 9],
                default=7
            ),
            activation='relu'
        )
    )
    model.add(
        MaxPooling1D(
            pool_size=hp.Choice(
                'pool_size', 
                values=[3, 5],
                default=5
            )
        )
    )
    model.add(
        Conv1D(
            filters=hp.Choice(
                'num_filters_2',
                values=[32, 64],
                default=32
            ), 
            kernel_size=hp.Choice(
                'kernel_size_2',
                values=[3, 5, 7, 9],
                default=7
            ), 
            activation='relu'
        )
    )
    model.add(
        GlobalMaxPooling1D()
    )
    model.add(
        Dropout(
            rate=hp.Float(
                'dropout_1',
                min_value=0.0,
                max_value=0.5,
                default=0.5,
                step=0.05
            )
        )
    )
    model.add(
        Dense(
            units=hp.Int(
                'units',
                min_value=10,
                max_value=100,
                step=10,
                default=10
            ), 
            kernel_regularizer=tf.keras.regularizers.l2(
                hp.Float(
                    'regularizer_1',
                    min_value=1e-4,
                    max_value=1e-1,
                    sampling='LOG',
                    default=1e-2
                )
            ), 
            activation='relu'
        )
    )
    model.add(
        Dropout(
            hp.Float(
                'dropout_2',
                min_value=0.0,
                max_value=0.5,
                default=0.5,
                step=0.05
            )
        )
    )
    model.add(
        Dense(
            1, 
            kernel_regularizer=tf.keras.regularizers.l2(
                hp.Float(
                    'regularizer_2',
                    min_value=1e-4,
                    max_value=1e-1,
                    sampling='LOG',
                    default=1e-2
                )
            ), 
            activation='sigmoid'
        )
    )

    
    model.compile(
        loss='binary_crossentropy', 
        optimizer=hp.Choice(
            'optimizer',
            values=['rmsprop', 'adam', 'sgd']
        ), 
        metrics=['accuracy']
    )
    
    return model

tuner = kt.Hyperband(
    model_builder,
    objective='val_accuracy', 
    max_epochs=20,
    #factor=3,
    directory='my_dir',
    project_name='cec',
    seed=seed
)   

class ClearTrainingOutput(tf.keras.callbacks.Callback):
  def on_train_end(*args, **kwargs):
    IPython.display.clear_output(wait=True)
    
tuner.search(
    X_train, 
    y_train, 
    epochs=20, 
    validation_data=(X_test, y_test), 
    callbacks=[ClearTrainingOutput()]
)

字符串
错误消息:

Epoch 1/3
WARNING:tensorflow:Model was constructed with shape (None, 150) for input Tensor("embedding_input:0", shape=(None, 150), dtype=float32), but it was called on an input with incompatible shape (32, 50).
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-62-16a1eae457d8> in <module>
      3     IPython.display.clear_output(wait=True)
      4 
----> 5 tuner.search(
      6     X_train,
      7     y_train,

~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/engine/base_tuner.py in search(self, *fit_args, **fit_kwargs)
    128 
    129             self.on_trial_begin(trial)
--> 130             self.run_trial(trial, *fit_args, **fit_kwargs)
    131             self.on_trial_end(trial)
    132         self.on_search_end()

~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/tuners/hyperband.py in run_trial(self, trial, *fit_args, **fit_kwargs)
    385             fit_kwargs['epochs'] = hp.values['tuner/epochs']
    386             fit_kwargs['initial_epoch'] = hp.values['tuner/initial_epoch']
--> 387         super(Hyperband, self).run_trial(trial, *fit_args, **fit_kwargs)
    388 
    389     def _build_model(self, hp):

~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/engine/multi_execution_tuner.py in run_trial(self, trial, *fit_args, **fit_kwargs)
     94 
     95             model = self.hypermodel.build(trial.hyperparameters)
---> 96             history = model.fit(*fit_args, **copied_fit_kwargs)
     97             for metric, epoch_values in history.history.items():
     98                 if self.oracle.objective.direction == 'min':

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
     64   def _method_wrapper(self, *args, **kwargs):
     65     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
---> 66       return method(self, *args, **kwargs)
     67 
     68     # Running inside `run_distribute_coordinator` already.

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
    846                 batch_size=batch_size):
    847               callbacks.on_train_batch_begin(step)
--> 848               tmp_logs = train_function(iterator)
    849               # Catch OutOfRangeError for Datasets of unknown size.
    850               # This blocks until the batch has finished executing.

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
    578         xla_context.Exit()
    579     else:
--> 580       result = self._call(*args, **kwds)
    581 
    582     if tracing_count == self._get_tracing_count():

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
    625       # This is the first call of __call__, so we have to initialize.
    626       initializers = []
--> 627       self._initialize(args, kwds, add_initializers_to=initializers)
    628     finally:
    629       # At this point we know that the initialization is complete (or less

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
    503     self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
    504     self._concrete_stateful_fn = (
--> 505         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
    506             *args, **kwds))
    507 

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2444       args, kwargs = None, None
   2445     with self._lock:
-> 2446       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2447     return graph_function
   2448 

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
   2775 
   2776       self._function_cache.missed.add(call_context_key)
-> 2777       graph_function = self._create_graph_function(args, kwargs)
   2778       self._function_cache.primary[cache_key] = graph_function
   2779       return graph_function, args, kwargs

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2655     arg_names = base_arg_names + missing_arg_names
   2656     graph_function = ConcreteFunction(
-> 2657         func_graph_module.func_graph_from_py_func(
   2658             self._name,
   2659             self._python_function,

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    979         _, original_func = tf_decorator.unwrap(python_func)
    980 
--> 981       func_outputs = python_func(*func_args, **func_kwargs)
    982 
    983       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
    439         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    440         # the function a weak reference to itself to avoid a reference cycle.
--> 441         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    442     weak_wrapped_fn = weakref.ref(wrapped_fn)
    443 

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

ValueError: in user code:

    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:531 train_step  **
        y_pred = self(x, training=True)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py:277 call
        return super(Sequential, self).call(inputs, training=training, mask=mask)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:717 call
        return self._run_internal_graph(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:888 _run_internal_graph
        output_tensors = layer(computed_tensors, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py:207 call
        outputs = self._convolution_op(inputs, self.kernel)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1106 __call__
        return self.conv_op(inp, filter)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:638 __call__
        return self.call(inp, filter)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:231 __call__
        return self.conv_op(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:220 _conv1d
        return conv1d(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574 new_func
        return func(*args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574 new_func
        return func(*args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1655 conv1d
        result = gen_nn_ops.conv2d(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/gen_nn_ops.py:965 conv2d
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py:742 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:3319 _create_op_internal
        ret = Operation(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1816 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1657 _create_c_op
        raise ValueError(str(e))

    ValueError: Negative dimension size caused by subtracting 7 from 6 for '{{node sequential/conv1d_1/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential/conv1d_1/conv1d/ExpandDims, sequential/conv1d_1/conv1d/ExpandDims_1)' with input shapes: [32,1,6,32], [1,7,32,32].

uurv41yg

uurv41yg1#

我自己一直在寻找这个问题的解决方案很长一段时间,并找到了它。是的,不是很优雅,但它工作。我会把它留在这里,也许它会帮助别人。重点是把模型构造 Package 在一个try-except块中,如果发生值错误,建立一个高度简化的模型。这里重要的是创建你自己的损失函数,它会返回太大的损失值,我们可以用自己的回调函数来捕捉,并停止训练模型(附带卷积神经网络示例的代码)P.S.可以让你自己的损失函数返回NaN,但是由于某种原因,keras-tuner认为NaN <任何数字,因此它将给予NaN值以获得最佳val_loss。

def invalid_loss(y_true, y_pred):
    return keras.losses.BinaryCrossentropy()(y_true, y_pred) + 2000000

def invalid_model():
    model = keras.Sequential()

    model.add(layers.Input((input_shape)))

    model.add(layers.Resizing(height=2, width=2))

    model.add(layers.Conv2D(filters=1,
                            kernel_size=2,
                            activation='relu',
                           ))

    model.add(layers.GlobalMaxPooling2D())

    model.add(layers.Dense(units=output_shape,
                           activation="sigmoid",
                          ))

    model.compile(optimizer="Adam",
                  loss=invalid_loss,
                  metrics=[metrics.BinaryAccuracy()])

    return model

def build_model(hp):
    try:
       model = keras.Sequential()

       model.add(layers.Input((input_shape)))

       ...

       model.add(layers.Dense(units=output_shape, 
                              activation=dense_activation))

       model.compile(optimizer="Adam",
                     loss=losses.BinaryCrossentropy(),
                     metrics=[metrics.BinaryAccuracy()])
    
    except ValueError:
        model = invalid_model()

    return model

字符串
这里有一个你自己回调的例子,它会停止训练,以免在“无效”模型上浪费时间。

class EarlyStoppingByLoss(keras.callbacks.Callback):
    def __init__(self, max_loss):
        self.max_loss = max_loss

    def on_train_batch_end(self, batch, logs=None):
        if logs["loss"] >= self.max_loss:
            self.model.stop_training = True


您还可以控制超大模型(例如,如果您在从卷积层切换到全连接层时使用Flatten())

from keras.utils.layer_utils import count_params

class EarlyStoppingByModelSize(keras.callbacks.Callback):
    def __init__(self, max_size):
        self.max_size = max_size

    def on_train_begin(self, logs=None):
        trainable_count = count_params(self.model.trainable_weights)
        if trainable_count > self.max_size:
            self.model.stop_training = True


因此,最后我们将这些回调函数添加到列表中,并在训练模型时使用它们

callbacks = []
callbacks.append(EarlyStoppingByLoss(900000))
callbacks.append(EarlyStoppingByModelSize(12000000))

tuner.search(x=x_train,
             y=y_train,
             epochs=epochs,
             validation_data=(x_test, y_test),
             callbacks=callbacks,
             verbose=1,
             batch_size=batch_size)

e5nqia27

e5nqia272#

我假设有一个简单的解决方案对我有效。keras.Tunermax_model_size kwarg已经解决了previous answer中建议的自定义回调EarlyStoppingByModelSize。受max_model_size内部工作方式的启发,失败模型构建的简单解决方案是继承HyperModel类,如下所示,然后重写HyperMOdel.build方法。
注意事项:这仍然会引发错误和试验失败,但调谐器安全地继续进行下一次试验。这是更有意的行为,而不是记录高损失,使其难以过滤掉失败的模型,同时使用tensorbord进行分析,它还节省了空间,因为它不需要记录虚假失败模型的信息。

import keras_tuner as kt

class HyperModel(kt.HyperModel):
    
    def build(self, hp: kt.HyperParameters) -> ke.Model: 
        try:
            # build and return model 
            ...
        except Exception as _e:
            # raise error as failed to build
            from keras_tuner.src import errors
            raise errors.FailedTrialError(
                f"Failed to build model with error: {_e}"
            )

字符串

相关问题