From bd86e2f1bf94dfb9b67b301451a5abb079ba8e26 Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 10 Mar 2023 10:49:09 +0800 Subject: [PATCH 01/54] [feat]: add sequence encoding module --- .../python/feature_column/feature_column.py | 12 ++ easy_rec/python/layers/cmbf.py | 3 +- easy_rec/python/layers/input_layer.py | 21 ++- easy_rec/python/layers/sequence_encoder.py | 159 ++++++++++++++++++ easy_rec/python/layers/uniter.py | 3 +- easy_rec/python/model/dbmtl.py | 19 +++ easy_rec/python/model/easy_rec_model.py | 21 +++ easy_rec/python/protos/dbmtl.proto | 6 + easy_rec/python/protos/feature_config.proto | 2 + easy_rec/python/protos/layer.proto | 36 ++++ easy_rec/python/test/train_eval_test.py | 2 +- easy_rec/version.py | 2 +- setup.cfg | 2 +- 13 files changed, 279 insertions(+), 9 deletions(-) create mode 100644 easy_rec/python/layers/sequence_encoder.py diff --git a/easy_rec/python/feature_column/feature_column.py b/easy_rec/python/feature_column/feature_column.py index 94a9cd132..04fc07baf 100644 --- a/easy_rec/python/feature_column/feature_column.py +++ b/easy_rec/python/feature_column/feature_column.py @@ -86,6 +86,8 @@ def _cmp_embed_config(a, b): 'shared embed info of [%s] is not matched [%s] vs [%s]' % ( embed_name, config, self._share_embed_infos[embed_name]) self._share_embed_names[embed_name] += 1 + if config.feature_type == FeatureConfig.FeatureType.SequenceFeature: + self._share_embed_infos[embed_name] = copy_obj(config) else: self._share_embed_names[embed_name] = 1 self._share_embed_infos[embed_name] = copy_obj(config) @@ -156,6 +158,11 @@ def _cmp_embed_config(a, b): combiner=self._share_embed_infos[embed_name].combiner, partitioner=partitioner, ev_params=ev_params) + config = self._share_embed_infos[embed_name] + max_seq_len = config.max_seq_len if config.HasField( + 'max_seq_len') else -1 + for fc in share_embed_fcs: + fc.max_seq_length = max_seq_len self._deep_share_embed_columns[embed_name] = share_embed_fcs # for handling wide share embedding columns @@ -168,6 +175,11 @@ def _cmp_embed_config(a, b): combiner='sum', partitioner=partitioner, ev_params=ev_params) + config = self._share_embed_infos[embed_name] + max_seq_len = config.max_seq_len if config.HasField( + 'max_seq_len') else -1 + for fc in share_embed_fcs: + fc.max_seq_length = max_seq_len self._wide_share_embed_columns[embed_name] = share_embed_fcs for fc_name in self._deep_columns: diff --git a/easy_rec/python/layers/cmbf.py b/easy_rec/python/layers/cmbf.py index 2c6ed8444..b42ddfd30 100644 --- a/easy_rec/python/layers/cmbf.py +++ b/easy_rec/python/layers/cmbf.py @@ -33,7 +33,8 @@ def __init__(self, model_config, feature_configs, features, cmbf_config, has_feature = True self._txt_seq_features = None if input_layer.has_group('text'): - self._txt_seq_features = input_layer(features, 'text', is_combine=False) + self._txt_seq_features, _, _ = input_layer( + features, 'text', is_combine=False) has_feature = True self._other_features = None if input_layer.has_group('other'): # e.g. statistical feature diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index 731f47c82..fa17a1c15 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -123,12 +123,25 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): group_columns, group_seq_columns = feature_group.select_columns( self._fc_parser) - assert len(group_columns) == 0, \ - 'there are none sequence columns: %s' % str(group_columns) + embedding_reg_lst = [] + output_features = None + group_features = [] + if group_columns: + cols_to_output_tensors = OrderedDict() + output_features = feature_column.input_layer( + features, + group_columns, + cols_to_output_tensors=cols_to_output_tensors, + feature_name_to_output_tensors=feature_name_to_output_tensors) + group_features = [cols_to_output_tensors[x] for x in group_columns] + + for col, val in cols_to_output_tensors.items(): + if isinstance(col, EmbeddingColumn) or isinstance( + col, SharedEmbeddingColumn): + embedding_reg_lst.append(val) builder = feature_column._LazyBuilder(features) seq_features = [] - embedding_reg_lst = [] for fc in group_seq_columns: with variable_scope.variable_scope('input_layer/' + fc.categorical_column.name): @@ -140,7 +153,7 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): embedding_reg_lst.append(tmp_embedding) regularizers.apply_regularization( self._embedding_regularizer, weights_list=embedding_reg_lst) - return seq_features + return seq_features, output_features, group_features def single_call_input_layer(self, features, diff --git a/easy_rec/python/layers/sequence_encoder.py b/easy_rec/python/layers/sequence_encoder.py new file mode 100644 index 000000000..07c339890 --- /dev/null +++ b/easy_rec/python/layers/sequence_encoder.py @@ -0,0 +1,159 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +import tensorflow as tf + +from easy_rec.python.compat import regularizers +from easy_rec.python.layers import dnn +from easy_rec.python.layers import multihead_cross_attention +from easy_rec.python.utils.shape_utils import get_shape_list + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class SequenceEncoder(object): + + def __init__(self, input_layer, feature_groups_config, emb_reg, l2_reg): + self._input_layer = input_layer + self._feature_groups_config = { + x.group_name: x for x in feature_groups_config + } + self._emb_reg = emb_reg + self._l2_reg = l2_reg + + def __call__(self, features, group_name, is_training=True, *args, **kwargs): + group_config = self._feature_groups_config[group_name] + if len(group_config.sequence_encoders) == 0: + return None + + seq_features, target_feature, target_features = self._input_layer( + features, group_name, is_combine=False) + assert len( + seq_features) > 0, 'sequence feature is empty in group: ' + group_name + + outputs = [] + for encoder in group_config.sequence_encoders: + encoder_type = encoder.WhichOneof('encoder').lower() + if encoder_type == 'bst': + encoding = self.bst_encoder(seq_features, target_feature, group_name, + encoder.bst) + outputs.append(encoding) + elif encoder_type == 'din': + encoding = self.din_encoder(seq_features, target_feature, group_name, + encoder.din, is_training) + outputs.append(encoding) + else: + assert False, 'unsupported sequence encode type: ' + encoder_type + + if len(outputs) == 0: + logging.warning( + "there's no sequence encoder configured in feature group: " + + group_name) + return None + if len(outputs) == 1: + return outputs[0] + + return tf.concat(outputs, axis=-1) + + def din_encoder(self, seq_features, target_feature, group_name, config, + is_training): + seq_input = [seq_fea for seq_fea, _ in seq_features] + regularizers.apply_regularization(self._emb_reg, weights_list=seq_input) + keys = tf.concat(seq_input, axis=-1) + + target_emb_size = target_feature.shape.as_list()[-1] + seq_emb_size = keys.shape.as_list()[-1] + assert target_emb_size == seq_emb_size, 'the embedding size of sequence and target item is not equal' \ + ' in feature group:' + group_name + + batch_size, max_seq_len, _ = get_shape_list(keys, 3) + queries = tf.tile(tf.expand_dims(target_feature, 1), [1, max_seq_len, 1]) + din_all = tf.concat([queries, keys, queries - keys, queries * keys], + axis=-1) + din_layer = dnn.DNN( + config.dnn, + self._l2_reg, + group_name + '/din_attention', + is_training, + last_layer_no_activation=True, + last_layer_no_batch_norm=True) + output = din_layer(din_all) # [B, L, 1] + scores = tf.transpose(output, [0, 2, 1]) # [B, 1, L] + + seq_len = seq_features[0][1] + seq_mask = tf.sequence_mask(seq_len, max_seq_len, dtype=tf.bool) + seq_mask = tf.expand_dims(seq_mask, 1) + paddings = tf.ones_like(scores) * (-2**32 + 1) + scores = tf.where(seq_mask, scores, paddings) # [B, 1, L] + scores = scores / (seq_emb_size**0.5) + # normalization with softmax is abandoned according to the original paper + scores = tf.nn.sigmoid(scores) + output = tf.squeeze(tf.matmul(scores, keys)) + return output + + def bst_encoder(self, seq_features, target_feature, group_name, config): + seq_embeds = [seq_fea for seq_fea, _ in seq_features] + regularizers.apply_regularization(self._emb_reg, weights_list=seq_embeds) + + max_position = config.max_position_embeddings + batch_size, max_seq_len, _ = get_shape_list(seq_features[0][0], 3) + valid_len = tf.assert_less_equal( + max_seq_len, + max_position, + message='sequence length is greater than `max_position_embeddings`:' + + str(max_position) + ' in feature group:' + group_name) + with tf.control_dependencies([valid_len]): + # seq_input: [batch_size, seq_len, embed_size] + seq_input = tf.concat(seq_embeds, axis=-1) + + # seq_len: [batch_size, ], 假设每个sequence feature的length都是相同的 + seq_len = seq_features[0][1] + seq_embed_size = seq_input.shape.as_list()[-1] + if target_feature is not None: + target_size = target_feature.shape.as_list()[-1] + assert seq_embed_size == target_size, 'the embedding size of sequence and target item is not equal' \ + ' in feature group:' + group_name + # target_feature: [batch_size, 1, embed_size] + target_feature = tf.expand_dims(target_feature, 1) + # seq_input: [batch_size, seq_len+1, embed_size] + seq_input = tf.concat([target_feature, seq_input], axis=1) + max_seq_len += 1 + seq_len += 1 + + if seq_embed_size != config.hidden_size: + seq_input = tf.layers.dense(seq_input, config.hidden_size) + + seq_fea = multihead_cross_attention.embedding_postprocessor( + seq_input, + position_embedding_name=group_name + '_position_embeddings', + max_position_embeddings=max_position) + seq_mask = tf.map_fn( + fn=lambda t: dynamic_mask(t, max_seq_len), elems=tf.to_int32(seq_len)) + attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( + from_tensor=seq_fea, to_mask=seq_mask) + + hidden_act = multihead_cross_attention.get_activation(config.hidden_act) + attention_fea = multihead_cross_attention.transformer_encoder( + seq_fea, + hidden_size=config.hidden_size, + num_hidden_layers=config.num_hidden_layers, + num_attention_heads=config.num_attention_heads, + attention_mask=attention_mask, + intermediate_size=config.intermediate_size, + intermediate_act_fn=hidden_act, + hidden_dropout_prob=config.hidden_dropout_prob, + attention_probs_dropout_prob=config.attention_probs_dropout_prob, + initializer_range=config.initializer_range, + name=group_name + + '/bst') # shape: [batch_size, seq_length, hidden_size] + + out_fea = attention_fea[:, 0, :] # target feature + return out_fea + + +def dynamic_mask(x, max_len): + ones = tf.ones(shape=tf.stack([x]), dtype=tf.int32) + zeros = tf.zeros(shape=tf.stack([max_len - x]), dtype=tf.int32) + return tf.concat([ones, zeros], axis=0) diff --git a/easy_rec/python/layers/uniter.py b/easy_rec/python/layers/uniter.py index 96b9cdc46..248afc1a9 100644 --- a/easy_rec/python/layers/uniter.py +++ b/easy_rec/python/layers/uniter.py @@ -31,7 +31,8 @@ def __init__(self, model_config, feature_configs, features, uniter_config, tower_num += 1 self._txt_seq_features = None if input_layer.has_group('text'): - self._txt_seq_features = input_layer(features, 'text', is_combine=False) + self._txt_seq_features, _, _ = input_layer( + features, 'text', is_combine=False) tower_num += 1 self._use_token_type = True if tower_num > 1 else False self._other_features = None diff --git a/easy_rec/python/model/dbmtl.py b/easy_rec/python/model/dbmtl.py index 913793474..3639bf029 100644 --- a/easy_rec/python/model/dbmtl.py +++ b/easy_rec/python/model/dbmtl.py @@ -42,6 +42,20 @@ def __init__(self, self._init_towers(self._model_config.task_towers) def build_predict_graph(self): + if self._model_config.use_input_batch_norm: + self._features = tf.layers.batch_normalization( + self._features, + training=self._is_training, + trainable=True, + name='input_bn') + if self._model_config.HasField('input_dropout_rate'): + drop_rate = self._model_config.input_dropout_rate + self._features = tf.layers.dropout( + self._features, + rate=drop_rate, + training=self._is_training, + name='input_dropout') + if self._model_config.HasField('bottom_cmbf'): bottom_fea = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) elif self._model_config.HasField('bottom_uniter'): @@ -56,6 +70,11 @@ def build_predict_graph(self): else: bottom_fea = self._features + if self._model_config.use_sequence_encoder: + seq_encoding = self.get_sequence_encoding(is_training=self._is_training) + if seq_encoding is not None: + bottom_fea = tf.concat([bottom_fea, seq_encoding], axis=-1) + # MMOE block if self._model_config.HasField('expert_dnn'): mmoe_layer = mmoe.MMOE( diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index 912291987..eff1af32a 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -12,6 +12,7 @@ from easy_rec.python.compat import regularizers from easy_rec.python.layers import input_layer +from easy_rec.python.layers.sequence_encoder import SequenceEncoder from easy_rec.python.utils import constant from easy_rec.python.utils import estimator_utils from easy_rec.python.utils import restore_filter @@ -60,6 +61,10 @@ def __init__(self, if constant.SAMPLE_WEIGHT in features: self._sample_weight = features[constant.SAMPLE_WEIGHT] + self._sequence_encoder = SequenceEncoder(self._input_layer, + model_config.feature_groups, + self._emb_reg, self._l2_reg) + @property def embedding_regularization(self): return self._base_model_config.embedding_regularization @@ -99,6 +104,22 @@ def build_input_layer(self, model_config, feature_configs): if model_config.HasField('variational_dropout') else None, is_training=self._is_training) + def get_sequence_encoding(self, group_name=None, is_training=True): + if group_name is None: + seq_encoding = [] + for group in self.feature_groups: + if len(group.sequence_encoders) == 0: + continue + encoding = self.get_sequence_encoding(group.group_name, + self._is_training) + if encoding is not None: + seq_encoding.append(encoding) + if seq_encoding: + return tf.concat(seq_encoding, axis=-1) + else: + return None + return self._sequence_encoder(self._feature_dict, group_name, is_training) + @abstractmethod def build_predict_graph(self): pass diff --git a/easy_rec/python/protos/dbmtl.proto b/easy_rec/python/protos/dbmtl.proto index 841b8adec..2b1f981aa 100644 --- a/easy_rec/python/protos/dbmtl.proto +++ b/easy_rec/python/protos/dbmtl.proto @@ -20,4 +20,10 @@ message DBMTL { repeated BayesTaskTower task_towers = 4; // l2 regularization optional float l2_regularization = 5 [default = 1e-4]; + // Whether to user sequence encoder + required bool use_sequence_encoder = 6 [default = false]; + // Whether to user sequence encoder + required bool use_input_batch_norm = 7 [default = false]; + // input layer dropout rate + optional float input_dropout_rate = 8 [default = 0]; } diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index 596e87e4d..5ed305c10 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -3,6 +3,7 @@ package protos; import "easy_rec/python/protos/hyperparams.proto"; import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/layer.proto"; enum WideOrDeep { DEEP = 0; WIDE = 1; @@ -140,6 +141,7 @@ message FeatureGroupConfig { optional WideOrDeep wide_deep = 3 [default = DEEP]; repeated SeqAttGroupConfig sequence_features = 4; optional bool negative_sampler = 5 [default = false]; + repeated SequenceEncoder sequence_encoders = 6; } message SeqAttMap { diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 6cea6d3bd..482c5241f 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -74,3 +74,39 @@ message UniterTower { // dnn layers for other features optional DNN other_feature_dnn = 11; } + +message SequenceEncoder { + // encoder parameters + oneof encoder { + BSTEncoder bst = 1; + DINEncoder din = 2; + } +} + +message BSTEncoder { + // Size of the encoder layers and the pooler layer + required uint32 hidden_size = 1; + // Number of hidden layers in the Transformer encoder + required uint32 num_hidden_layers = 2; + // Number of attention heads for each attention layer in the Transformer encoder + required uint32 num_attention_heads = 3; + // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder + required uint32 intermediate_size = 4; + // The non-linear activation function (function or string) in the encoder and pooler. + required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. + // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler + required float hidden_dropout_prob = 6 [default = 0.1]; + // The dropout ratio for the attention probabilities + required float attention_probs_dropout_prob = 7 [default = 0.1]; + // The maximum sequence length that this model might ever be used with + required uint32 max_position_embeddings = 8 [default = 512]; + // Whether to add position embeddings for the position of each token in the text sequence + required bool use_position_embeddings = 9 [default = true]; + // The stddev of the truncated_normal_initializer for initializing all weight matrices + required float initializer_range = 10 [default = 0.02]; +} + +message DINEncoder { + // din attention layer + required DNN dnn = 1; +} diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py index 5fca892b2..57c1d79bd 100644 --- a/easy_rec/python/test/train_eval_test.py +++ b/easy_rec/python/test/train_eval_test.py @@ -7,11 +7,11 @@ import threading import time import unittest +from distutils.version import LooseVersion import numpy as np import six import tensorflow as tf -from distutils.version import LooseVersion from tensorflow.python.platform import gfile from easy_rec.python.main import predict diff --git a/easy_rec/version.py b/easy_rec/version.py index 6e00ca21f..e4d390b71 100644 --- a/easy_rec/version.py +++ b/easy_rec/version.py @@ -1,3 +1,3 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -__version__ = '0.6.1' +__version__ = '0.6.2' diff --git a/setup.cfg b/setup.cfg index b5b966faa..b180b9fb1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ multi_line_output = 7 force_single_line = true known_standard_library = setuptools known_first_party = easy_rec -known_third_party = absl,common_io,distutils,docutils,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml +known_third_party = absl,common_io,docutils,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml no_lines_before = LOCALFOLDER default_section = THIRDPARTY skip = easy_rec/python/protos From 2b8f2e70cbcf3d2322691be06a751c3f7a8eb93e Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 10 Mar 2023 13:53:20 +0800 Subject: [PATCH 02/54] [feat]: add sequence encoding module --- easy_rec/python/layers/cmbf.py | 4 + easy_rec/python/layers/common_layers.py | 17 --- easy_rec/python/layers/dnn.py | 4 +- .../layers/multihead_cross_attention.py | 43 +----- easy_rec/python/layers/sequence_encoder.py | 20 ++- easy_rec/python/layers/uniter.py | 8 +- .../model/collaborative_metric_learning.py | 2 +- easy_rec/python/utils/activation.py | 127 ++++++++++++++++++ 8 files changed, 155 insertions(+), 70 deletions(-) create mode 100644 easy_rec/python/utils/activation.py diff --git a/easy_rec/python/layers/cmbf.py b/easy_rec/python/layers/cmbf.py index b42ddfd30..e5f1caeb2 100644 --- a/easy_rec/python/layers/cmbf.py +++ b/easy_rec/python/layers/cmbf.py @@ -326,6 +326,10 @@ def merge_text_embedding(self, txt_embeddings, input_masks): return txt_embeddings def __call__(self, is_training, *args, **kwargs): + if not is_training: + self._model_config.hidden_dropout_prob = 0.0 + self._model_config.attention_probs_dropout_prob = 0.0 + # shape: [batch_size, image_num/image_dim, hidden_size] img_attention_fea = self.image_self_attention_tower() diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index 80ad1496f..165fce5e1 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -1,29 +1,12 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -import numpy as np import tensorflow as tf if tf.__version__ >= '2.0': tf = tf.compat.v1 -def gelu(x): - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU. - Original paper: https://arxiv.org/abs/1606.08415 - Args: - x: float Tensor to perform activation. - - Returns: - `x` with the GELU activation applied. - """ - cdf = 0.5 * (1.0 + tf.tanh( - (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) - return x * cdf - - def highway(x, size=None, activation=None, diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index 4fdce37ba..d2af5a4cf 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -4,7 +4,7 @@ import tensorflow as tf -from easy_rec.python.utils.load_class import load_by_path +from easy_rec.python.utils.activation import get_activation if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -34,7 +34,7 @@ def __init__(self, self._name = name self._is_training = is_training logging.info('dnn activation function = %s' % self._config.activation) - self.activation = load_by_path(self._config.activation) + self.activation = get_activation(self._config.activation, is_training=is_training) self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm diff --git a/easy_rec/python/layers/multihead_cross_attention.py b/easy_rec/python/layers/multihead_cross_attention.py index 911ff7bae..bafb7e019 100644 --- a/easy_rec/python/layers/multihead_cross_attention.py +++ b/easy_rec/python/layers/multihead_cross_attention.py @@ -5,13 +5,10 @@ from __future__ import print_function import math - -import six import tensorflow as tf - from easy_rec.python.compat.layers import layer_norm as tf_layer_norm -from easy_rec.python.layers.common_layers import gelu from easy_rec.python.utils.shape_utils import get_shape_list +from easy_rec.python.utils.activation import gelu if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -736,41 +733,3 @@ def layer_norm_and_dropout(input_tensor, dropout_prob, name=None): output_tensor = layer_norm(input_tensor, name) output_tensor = dropout(output_tensor, dropout_prob) return output_tensor - - -def get_activation(activation_string): - """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`. - - Args: - activation_string: String name of the activation function. - - Returns: - A Python function corresponding to the activation function. If - `activation_string` is None, empty, or "linear", this will return None. - If `activation_string` is not a string, it will return `activation_string`. - - Raises: - ValueError: The `activation_string` does not correspond to a known - activation. - """ - # We assume that anything that's not a string is already an activation - # function, so we just return it. - if not isinstance(activation_string, six.string_types): - return activation_string - - if not activation_string: - return None - - act = activation_string.lower() - if act == 'linear': - return None - elif act == 'relu': - return tf.nn.relu - elif act == 'gelu': - return gelu - elif act == 'tanh': - return tf.tanh - elif act == 'swish': - return tf.nn.swish - else: - raise ValueError('Unsupported activation: %s' % act) diff --git a/easy_rec/python/layers/sequence_encoder.py b/easy_rec/python/layers/sequence_encoder.py index 07c339890..0d141c094 100644 --- a/easy_rec/python/layers/sequence_encoder.py +++ b/easy_rec/python/layers/sequence_encoder.py @@ -8,6 +8,7 @@ from easy_rec.python.layers import dnn from easy_rec.python.layers import multihead_cross_attention from easy_rec.python.utils.shape_utils import get_shape_list +from easy_rec.python.utils.activation import get_activation if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -93,7 +94,11 @@ def din_encoder(self, seq_features, target_feature, group_name, config, output = tf.squeeze(tf.matmul(scores, keys)) return output - def bst_encoder(self, seq_features, target_feature, group_name, config): + def bst_encoder(self, seq_features, target_feature, group_name, config, is_training): + if not is_training: + config.hidden_dropout_prob = 0.0 + config.attention_probs_dropout_prob = 0.0 + seq_embeds = [seq_fea for seq_fea, _ in seq_features] regularizers.apply_regularization(self._emb_reg, weights_list=seq_embeds) @@ -123,7 +128,11 @@ def bst_encoder(self, seq_features, target_feature, group_name, config): seq_len += 1 if seq_embed_size != config.hidden_size: - seq_input = tf.layers.dense(seq_input, config.hidden_size) + seq_input = tf.layers.dense( + seq_input, + config.hidden_size, + activation=tf.nn.relu, + kernel_regularizer=self._l2_reg) seq_fea = multihead_cross_attention.embedding_postprocessor( seq_input, @@ -134,7 +143,7 @@ def bst_encoder(self, seq_features, target_feature, group_name, config): attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( from_tensor=seq_fea, to_mask=seq_mask) - hidden_act = multihead_cross_attention.get_activation(config.hidden_act) + hidden_act = get_activation(config.hidden_act) attention_fea = multihead_cross_attention.transformer_encoder( seq_fea, hidden_size=config.hidden_size, @@ -146,9 +155,8 @@ def bst_encoder(self, seq_features, target_feature, group_name, config): hidden_dropout_prob=config.hidden_dropout_prob, attention_probs_dropout_prob=config.attention_probs_dropout_prob, initializer_range=config.initializer_range, - name=group_name + - '/bst') # shape: [batch_size, seq_length, hidden_size] - + name=group_name + '/bst') + # attention_fea shape: [batch_size, seq_length, hidden_size] out_fea = attention_fea[:, 0, :] # target feature return out_fea diff --git a/easy_rec/python/layers/uniter.py b/easy_rec/python/layers/uniter.py index 248afc1a9..47ccc678c 100644 --- a/easy_rec/python/layers/uniter.py +++ b/easy_rec/python/layers/uniter.py @@ -5,6 +5,7 @@ from easy_rec.python.layers import dnn from easy_rec.python.layers import multihead_cross_attention from easy_rec.python.utils.shape_utils import get_shape_list +from easy_rec.python.utils.activation import get_activation if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -224,6 +225,10 @@ def image_embeddings(self): return img_fea def __call__(self, is_training, *args, **kwargs): + if not is_training: + self._model_config.hidden_dropout_prob = 0.0 + self._model_config.attention_probs_dropout_prob = 0.0 + sub_modules = [] img_fea = self.image_embeddings() @@ -262,8 +267,7 @@ def __call__(self, is_training, *args, **kwargs): input_mask = tf.concat(masks, axis=1) attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( from_tensor=all_fea, to_mask=input_mask) - hidden_act = multihead_cross_attention.get_activation( - self._model_config.hidden_act) + hidden_act = get_activation(self._model_config.hidden_act) attention_fea = multihead_cross_attention.transformer_encoder( all_fea, hidden_size=hidden_size, diff --git a/easy_rec/python/model/collaborative_metric_learning.py b/easy_rec/python/model/collaborative_metric_learning.py index 84c87ccaa..7e5d7c008 100644 --- a/easy_rec/python/model/collaborative_metric_learning.py +++ b/easy_rec/python/model/collaborative_metric_learning.py @@ -3,7 +3,7 @@ from easy_rec.python.core.metrics import metric_learning_average_precision_at_k from easy_rec.python.core.metrics import metric_learning_recall_at_k from easy_rec.python.layers import dnn -from easy_rec.python.layers.common_layers import gelu +from easy_rec.python.utils.activation import gelu from easy_rec.python.layers.common_layers import highway from easy_rec.python.loss.circle_loss import circle_loss from easy_rec.python.loss.multi_similarity import ms_loss diff --git a/easy_rec/python/utils/activation.py b/easy_rec/python/utils/activation.py new file mode 100644 index 000000000..7b5d5248b --- /dev/null +++ b/easy_rec/python/utils/activation.py @@ -0,0 +1,127 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. + +import tensorflow as tf +import numpy as np +import six +from easy_rec.python.utils.load_class import load_by_path +from tensorflow.python.keras.layers import Layer +try: + from tensorflow.python.keras.layers import BatchNormalization +except ImportError: + BatchNormalization = tf.keras.layers.BatchNormalization +try: + from tensorflow.python.ops.init_ops import Zeros +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros + + +class Dice(Layer): + """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. + Input shape + - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. + Output shape + - Same shape as the input. + Arguments + - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). + - **epsilon** : Small float added to variance to avoid dividing by zero. + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """ + + def __init__(self, axis=-1, epsilon=1e-9, is_training=None, **kwargs): + self.axis = axis + self.epsilon = epsilon + self.is_training = is_training + super(Dice, self).__init__(**kwargs) + + def build(self, input_shape): + self.bn = BatchNormalization( + axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( + ), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name + super(Dice, self).build(input_shape) # Be sure to call this somewhere! + self.uses_learning_phase = True + + def call(self, inputs, **kwargs): + inputs_normed = self.bn(inputs, training=self.is_training) + # tf.layers.batch_normalization( + # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + x_p = tf.sigmoid(inputs_normed) + return self.alphas * (1.0 - x_p) * inputs + x_p * inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self, ): + config = {'axis': self.axis, 'epsilon': self.epsilon} + base_config = super(Dice, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def gelu(x): + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU. + Original paper: https://arxiv.org/abs/1606.08415 + Args: + x: float Tensor to perform activation. + + Returns: + `x` with the GELU activation applied. + """ + cdf = 0.5 * (1.0 + tf.tanh( + (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) + return x * cdf + + +def get_activation(activation_string, **kwargs): + """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`. + + Args: + activation_string: String name of the activation function. + + Returns: + A Python function corresponding to the activation function. If + `activation_string` is None, empty, or "linear", this will return None. + If `activation_string` is not a string, it will return `activation_string`. + + Raises: + ValueError: The `activation_string` does not correspond to a known + activation. + """ + # We assume that anything that's not a string is already an activation + # function, so we just return it. + if not isinstance(activation_string, six.string_types): + return activation_string + + if not activation_string: + return None + + act = activation_string.lower() + if act == 'linear': + return None + elif act == 'relu': + return tf.nn.relu + elif act == 'gelu': + return gelu + elif act == 'leaky_relu': + return tf.nn.leaky_relu + elif act in ('prelu', 'PRelu'): + return tf.keras.layers.PReLU(**kwargs) + elif act in ("dice", "Dice"): + return Dice(**kwargs) + elif act == 'elu': + return tf.nn.elu + elif act == 'selu': + return tf.nn.selu + elif act == 'tanh': + return tf.tanh + elif act == 'swish': + if tf.__version__ < '1.13.0': + return lambda x: x * tf.sigmoid(x) + return tf.nn.swish + elif act == 'sigmoid': + return tf.nn.sigmoid + else: + return load_by_path(activation_string) From e666f41026539b723806d0918c989cc77d7c1acb Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 10 Mar 2023 16:55:35 +0800 Subject: [PATCH 03/54] [feat]: add sequence encoding module --- easy_rec/python/layers/dnn.py | 13 +++- .../layers/multihead_cross_attention.py | 4 +- easy_rec/python/layers/sequence_encoder.py | 13 ++-- easy_rec/python/layers/uniter.py | 2 +- .../model/collaborative_metric_learning.py | 2 +- easy_rec/python/model/easy_rec_model.py | 31 ++++---- easy_rec/python/protos/layer.proto | 2 +- easy_rec/python/utils/activation.py | 71 +++++++++++-------- 8 files changed, 81 insertions(+), 57 deletions(-) diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index d2af5a4cf..3365f47f0 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -34,7 +34,13 @@ def __init__(self, self._name = name self._is_training = is_training logging.info('dnn activation function = %s' % self._config.activation) - self.activation = get_activation(self._config.activation, is_training=is_training) + if self._config.activation.lower() == 'dice': + self.activations = [ + get_activation('dice', is_training=is_training, feat_dim=units) + for units in self.hidden_units + ] + else: + self.activation = get_activation(self._config.activation) self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm @@ -51,6 +57,7 @@ def __call__(self, deep_fea, hidden_layer_feature_output=False): if hidden_units_len == 1 and self.hidden_units[0] == 0: return deep_fea + act = self._config.activation.lower() hidden_feature_dict = {} for i, unit in enumerate(self.hidden_units): deep_fea = tf.layers.dense( @@ -67,8 +74,8 @@ def __call__(self, deep_fea, hidden_layer_feature_output=False): trainable=True, name='%s/dnn_%d/bn' % (self._name, i)) if (i + 1 < hidden_units_len) or not self._last_layer_no_activation: - deep_fea = self.activation( - deep_fea, name='%s/dnn_%d/act' % (self._name, i)) + act_fn = self.activations[i] if act == 'dice' else self.activation + deep_fea = act_fn(deep_fea, name='%s/dnn_%d/act' % (self._name, i)) if len(self.dropout_ratio) > 0 and self._is_training: assert self.dropout_ratio[ i] < 1, 'invalid dropout_ratio: %.3f' % self.dropout_ratio[i] diff --git a/easy_rec/python/layers/multihead_cross_attention.py b/easy_rec/python/layers/multihead_cross_attention.py index bafb7e019..92b2b64df 100644 --- a/easy_rec/python/layers/multihead_cross_attention.py +++ b/easy_rec/python/layers/multihead_cross_attention.py @@ -5,10 +5,12 @@ from __future__ import print_function import math + import tensorflow as tf + from easy_rec.python.compat.layers import layer_norm as tf_layer_norm -from easy_rec.python.utils.shape_utils import get_shape_list from easy_rec.python.utils.activation import gelu +from easy_rec.python.utils.shape_utils import get_shape_list if tf.__version__ >= '2.0': tf = tf.compat.v1 diff --git a/easy_rec/python/layers/sequence_encoder.py b/easy_rec/python/layers/sequence_encoder.py index 0d141c094..e97ed8d09 100644 --- a/easy_rec/python/layers/sequence_encoder.py +++ b/easy_rec/python/layers/sequence_encoder.py @@ -7,8 +7,8 @@ from easy_rec.python.compat import regularizers from easy_rec.python.layers import dnn from easy_rec.python.layers import multihead_cross_attention -from easy_rec.python.utils.shape_utils import get_shape_list from easy_rec.python.utils.activation import get_activation +from easy_rec.python.utils.shape_utils import get_shape_list if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -39,7 +39,7 @@ def __call__(self, features, group_name, is_training=True, *args, **kwargs): encoder_type = encoder.WhichOneof('encoder').lower() if encoder_type == 'bst': encoding = self.bst_encoder(seq_features, target_feature, group_name, - encoder.bst) + encoder.bst, is_training) outputs.append(encoding) elif encoder_type == 'din': encoding = self.din_encoder(seq_features, target_feature, group_name, @@ -74,7 +74,7 @@ def din_encoder(self, seq_features, target_feature, group_name, config, din_all = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1) din_layer = dnn.DNN( - config.dnn, + config.attention_dnn, self._l2_reg, group_name + '/din_attention', is_training, @@ -91,10 +91,12 @@ def din_encoder(self, seq_features, target_feature, group_name, config, scores = scores / (seq_emb_size**0.5) # normalization with softmax is abandoned according to the original paper scores = tf.nn.sigmoid(scores) - output = tf.squeeze(tf.matmul(scores, keys)) + output = tf.squeeze(tf.matmul(scores, keys), axis=[1]) + print('din output shape:', output.shape) return output - def bst_encoder(self, seq_features, target_feature, group_name, config, is_training): + def bst_encoder(self, seq_features, target_feature, group_name, config, + is_training): if not is_training: config.hidden_dropout_prob = 0.0 config.attention_probs_dropout_prob = 0.0 @@ -158,6 +160,7 @@ def bst_encoder(self, seq_features, target_feature, group_name, config, is_train name=group_name + '/bst') # attention_fea shape: [batch_size, seq_length, hidden_size] out_fea = attention_fea[:, 0, :] # target feature + print('bst output shape:', out_fea.shape) return out_fea diff --git a/easy_rec/python/layers/uniter.py b/easy_rec/python/layers/uniter.py index 47ccc678c..3018bad61 100644 --- a/easy_rec/python/layers/uniter.py +++ b/easy_rec/python/layers/uniter.py @@ -4,8 +4,8 @@ from easy_rec.python.layers import dnn from easy_rec.python.layers import multihead_cross_attention -from easy_rec.python.utils.shape_utils import get_shape_list from easy_rec.python.utils.activation import get_activation +from easy_rec.python.utils.shape_utils import get_shape_list if tf.__version__ >= '2.0': tf = tf.compat.v1 diff --git a/easy_rec/python/model/collaborative_metric_learning.py b/easy_rec/python/model/collaborative_metric_learning.py index 7e5d7c008..d785e7141 100644 --- a/easy_rec/python/model/collaborative_metric_learning.py +++ b/easy_rec/python/model/collaborative_metric_learning.py @@ -3,12 +3,12 @@ from easy_rec.python.core.metrics import metric_learning_average_precision_at_k from easy_rec.python.core.metrics import metric_learning_recall_at_k from easy_rec.python.layers import dnn -from easy_rec.python.utils.activation import gelu from easy_rec.python.layers.common_layers import highway from easy_rec.python.loss.circle_loss import circle_loss from easy_rec.python.loss.multi_similarity import ms_loss from easy_rec.python.model.easy_rec_model import EasyRecModel from easy_rec.python.protos.loss_pb2 import LossType +from easy_rec.python.utils.activation import gelu from easy_rec.python.utils.proto_util import copy_obj from easy_rec.python.protos.collaborative_metric_learning_pb2 import CoMetricLearningI2I as MetricLearningI2IConfig # NOQA diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index eff1af32a..7815ed0de 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -105,20 +105,23 @@ def build_input_layer(self, model_config, feature_configs): is_training=self._is_training) def get_sequence_encoding(self, group_name=None, is_training=True): - if group_name is None: - seq_encoding = [] - for group in self.feature_groups: - if len(group.sequence_encoders) == 0: - continue - encoding = self.get_sequence_encoding(group.group_name, - self._is_training) - if encoding is not None: - seq_encoding.append(encoding) - if seq_encoding: - return tf.concat(seq_encoding, axis=-1) - else: - return None - return self._sequence_encoder(self._feature_dict, group_name, is_training) + if group_name is not None: + return self._sequence_encoder(self._feature_dict, group_name, is_training) + + seq_encoding = [] + for group in self.feature_groups: + if len(group.sequence_encoders) == 0: + continue + encoding = self.get_sequence_encoding(group.group_name, self._is_training) + if encoding is not None: + seq_encoding.append(encoding) + + if len(seq_encoding) > 1: + return tf.concat(seq_encoding, axis=-1) + elif len(seq_encoding) == 1: + return seq_encoding[0] + else: + return None @abstractmethod def build_predict_graph(self): diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 482c5241f..814de794e 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -108,5 +108,5 @@ message BSTEncoder { message DINEncoder { // din attention layer - required DNN dnn = 1; + required DNN attention_dnn = 1; } diff --git a/easy_rec/python/utils/activation.py b/easy_rec/python/utils/activation.py index 7b5d5248b..39d9011c4 100644 --- a/easy_rec/python/utils/activation.py +++ b/easy_rec/python/utils/activation.py @@ -1,59 +1,66 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -import tensorflow as tf import numpy as np import six -from easy_rec.python.utils.load_class import load_by_path +import tensorflow as tf from tensorflow.python.keras.layers import Layer + +from easy_rec.python.utils.load_class import load_by_path + try: - from tensorflow.python.keras.layers import BatchNormalization -except ImportError: - BatchNormalization = tf.keras.layers.BatchNormalization -try: - from tensorflow.python.ops.init_ops import Zeros + from tensorflow.python.keras.layers import BatchNormalization except ImportError: - from tensorflow.python.ops.init_ops_v2 import Zeros + BatchNormalization = tf.keras.layers.BatchNormalization class Dice(Layer): - """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. - Input shape - - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. - Output shape - - Same shape as the input. - Arguments - - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). - - **epsilon** : Small float added to variance to avoid dividing by zero. - References - - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) + """The Data Adaptive Activation Function in DIN. + + Which can be viewed as a generalization of PReLu, and can adaptively adjust the rectified point + according to distribution of input data. + + Input shape + - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape + - Same shape as the input. + + Arguments + - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). + - **epsilon** : Small float added to variance to avoid dividing by zero. + + References + - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C] + Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. + ACM, 2018: 1059-1068.] (https://arxiv.org/pdf/1706.06978.pdf) """ - def __init__(self, axis=-1, epsilon=1e-9, is_training=None, **kwargs): + def __init__(self, + feat_dim, + axis=-1, + epsilon=1e-9, + is_training=None, + **kwargs): + super(Dice, self).__init__(**kwargs) + self.feat_dim = feat_dim self.axis = axis self.epsilon = epsilon self.is_training = is_training - super(Dice, self).__init__(**kwargs) - - def build(self, input_shape): self.bn = BatchNormalization( - axis=self.axis, epsilon=self.epsilon, center=False, scale=False) - self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( - ), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name - super(Dice, self).build(input_shape) # Be sure to call this somewhere! - self.uses_learning_phase = True + axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + self.alphas = tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32) def call(self, inputs, **kwargs): inputs_normed = self.bn(inputs, training=self.is_training) - # tf.layers.batch_normalization( - # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) x_p = tf.sigmoid(inputs_normed) return self.alphas * (1.0 - x_p) * inputs + x_p * inputs def compute_output_shape(self, input_shape): return input_shape - def get_config(self, ): + def get_config(self,): config = {'axis': self.axis, 'epsilon': self.epsilon} base_config = super(Dice, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -108,8 +115,10 @@ def get_activation(activation_string, **kwargs): elif act == 'leaky_relu': return tf.nn.leaky_relu elif act in ('prelu', 'PRelu'): + if len(kwargs) == 0: + return tf.nn.leaky_relu return tf.keras.layers.PReLU(**kwargs) - elif act in ("dice", "Dice"): + elif act in ('dice', 'Dice'): return Dice(**kwargs) elif act == 'elu': return tf.nn.elu From 778e70eac41f93d61a5f3324664204ba57343d8b Mon Sep 17 00:00:00 2001 From: weisu Date: Sun, 12 Mar 2023 20:46:59 +0800 Subject: [PATCH 04/54] [feat]: add sequence encoding module --- easy_rec/python/layers/bst.py | 90 +++++++++++ easy_rec/python/layers/din.py | 53 +++++++ easy_rec/python/layers/dnn.py | 16 +- easy_rec/python/layers/sequence_encoder.py | 169 ++++++--------------- easy_rec/python/model/easy_rec_model.py | 20 ++- easy_rec/python/protos/layer.proto | 7 +- easy_rec/python/utils/activation.py | 27 ++-- 7 files changed, 234 insertions(+), 148 deletions(-) create mode 100644 easy_rec/python/layers/bst.py create mode 100644 easy_rec/python/layers/din.py diff --git a/easy_rec/python/layers/bst.py b/easy_rec/python/layers/bst.py new file mode 100644 index 000000000..2bdb20c9d --- /dev/null +++ b/easy_rec/python/layers/bst.py @@ -0,0 +1,90 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf +from tensorflow.python.keras.layers import Layer + +from easy_rec.python.layers import multihead_cross_attention +from easy_rec.python.utils.activation import get_activation +from easy_rec.python.utils.shape_utils import get_shape_list + + +class BST(Layer): + + def __init__(self, config, l2_reg, name='din', **kwargs): + super(BST, self).__init__(name=name, **kwargs) + self.l2_reg = l2_reg + self.config = config + + def call(self, inputs, training=None, **kwargs): + seq_features, target_feature = inputs + if not training: + self.config.hidden_dropout_prob = 0.0 + self.config.attention_probs_dropout_prob = 0.0 + + seq_embeds = [seq_fea for seq_fea, _ in seq_features] + + max_position = self.config.max_position_embeddings + # max_seq_len: the max sequence length in current mini-batch, all sequences are padded to this length + batch_size, max_seq_len, _ = get_shape_list(seq_features[0][0], 3) + valid_len = tf.assert_less_equal( + max_seq_len, + max_position, + message='sequence length is greater than `max_position_embeddings`:' + + str(max_position) + ' in feature group:' + self.name) + with tf.control_dependencies([valid_len]): + # seq_input: [batch_size, seq_len, embed_size] + seq_input = tf.concat(seq_embeds, axis=-1) + + # seq_len: [batch_size, 1], the true length of each sequence + seq_len = seq_features[0][1] + seq_embed_size = seq_input.shape.as_list()[-1] + if target_feature is not None: + target_size = target_feature.shape.as_list()[-1] + assert seq_embed_size == target_size, 'the embedding size of sequence and target item is not equal' \ + ' in feature group:' + self.name + # target_feature: [batch_size, 1, embed_size] + target_feature = tf.expand_dims(target_feature, 1) + # seq_input: [batch_size, seq_len+1, embed_size] + seq_input = tf.concat([target_feature, seq_input], axis=1) + max_seq_len += 1 + seq_len += 1 + max_position += 1 + + seq_input = tf.layers.dense( + seq_input, + self.config.hidden_size, + activation=tf.nn.leaky_relu, + kernel_regularizer=self.l2_reg) + + seq_fea = multihead_cross_attention.embedding_postprocessor( + seq_input, + position_embedding_name=self.name + '/position_embeddings', + max_position_embeddings=max_position) + seq_mask = tf.map_fn( + fn=lambda t: dynamic_mask(t, max_seq_len), elems=tf.to_int32(seq_len)) + attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( + from_tensor=seq_fea, to_mask=seq_mask) + + hidden_act = get_activation(self.config.hidden_act) + attention_fea = multihead_cross_attention.transformer_encoder( + seq_fea, + hidden_size=self.config.hidden_size, + num_hidden_layers=self.config.num_hidden_layers, + num_attention_heads=self.config.num_attention_heads, + attention_mask=attention_mask, + intermediate_size=self.config.intermediate_size, + intermediate_act_fn=hidden_act, + hidden_dropout_prob=self.config.hidden_dropout_prob, + attention_probs_dropout_prob=self.config.attention_probs_dropout_prob, + initializer_range=self.config.initializer_range, + name=self.name + '/bst') + # attention_fea shape: [batch_size, seq_length, hidden_size] + out_fea = attention_fea[:, 0, :] # target feature + print('bst output shape:', out_fea.shape) + return out_fea + + +def dynamic_mask(x, max_len): + ones = tf.ones(shape=tf.stack([x]), dtype=tf.int32) + zeros = tf.zeros(shape=tf.stack([max_len - x]), dtype=tf.int32) + return tf.concat([ones, zeros], axis=0) diff --git a/easy_rec/python/layers/din.py b/easy_rec/python/layers/din.py new file mode 100644 index 000000000..3b6f42df5 --- /dev/null +++ b/easy_rec/python/layers/din.py @@ -0,0 +1,53 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf +from tensorflow.python.keras.layers import Layer + +from easy_rec.python.layers import dnn +from easy_rec.python.utils.shape_utils import get_shape_list + + +class DIN(Layer): + + def __init__(self, config, l2_reg, name='din', **kwargs): + super(DIN, self).__init__(name=name, **kwargs) + self.l2_reg = l2_reg + self.config = config + + def call(self, inputs, training=None, **kwargs): + seq_features, target_feature = inputs + seq_input = [seq_fea for seq_fea, _ in seq_features] + keys = tf.concat(seq_input, axis=-1) + + target_emb_size = target_feature.shape.as_list()[-1] + seq_emb_size = keys.shape.as_list()[-1] + assert target_emb_size == seq_emb_size, 'the embedding size of sequence and target item is not equal' \ + ' in feature group:' + self.name + + batch_size, max_seq_len, _ = get_shape_list(keys, 3) + queries = tf.tile(tf.expand_dims(target_feature, 1), [1, max_seq_len, 1]) + din_all = tf.concat([queries, keys, queries - keys, queries * keys], + axis=-1) + din_layer = dnn.DNN( + self.config.attention_dnn, + self.l2_reg, + self.name + '/din_attention', + training, + last_layer_no_activation=True, + last_layer_no_batch_norm=True) + output = din_layer(din_all) # [B, L, 1] + scores = tf.transpose(output, [0, 2, 1]) # [B, 1, L] + + seq_len = seq_features[0][1] + seq_mask = tf.sequence_mask(seq_len, max_seq_len, dtype=tf.bool) + seq_mask = tf.expand_dims(seq_mask, 1) + paddings = tf.ones_like(scores) * (-2**32 + 1) + scores = tf.where(seq_mask, scores, paddings) # [B, 1, L] + scores = scores / (seq_emb_size**0.5) + # normalization with softmax is abandoned according to the original paper + scores = tf.nn.sigmoid(scores) + output = tf.squeeze(tf.matmul(scores, keys), axis=[1]) + if self.config.need_target_feature: + output = tf.concat([output, target_feature], axis=-1) + print('din output shape:', output.shape) + return output diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index 3365f47f0..6016d6233 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -34,13 +34,10 @@ def __init__(self, self._name = name self._is_training = is_training logging.info('dnn activation function = %s' % self._config.activation) - if self._config.activation.lower() == 'dice': - self.activations = [ - get_activation('dice', is_training=is_training, feat_dim=units) - for units in self.hidden_units - ] - else: - self.activation = get_activation(self._config.activation) + self.activations = [ + get_activation(self._config.activation, is_training=is_training) + for _ in self.hidden_units + ] self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm @@ -57,7 +54,6 @@ def __call__(self, deep_fea, hidden_layer_feature_output=False): if hidden_units_len == 1 and self.hidden_units[0] == 0: return deep_fea - act = self._config.activation.lower() hidden_feature_dict = {} for i, unit in enumerate(self.hidden_units): deep_fea = tf.layers.dense( @@ -74,8 +70,8 @@ def __call__(self, deep_fea, hidden_layer_feature_output=False): trainable=True, name='%s/dnn_%d/bn' % (self._name, i)) if (i + 1 < hidden_units_len) or not self._last_layer_no_activation: - act_fn = self.activations[i] if act == 'dice' else self.activation - deep_fea = act_fn(deep_fea, name='%s/dnn_%d/act' % (self._name, i)) + deep_fea = self.activations[i]( + deep_fea, name='%s/dnn_%d/act' % (self._name, i)) if len(self.dropout_ratio) > 0 and self._is_training: assert self.dropout_ratio[ i] < 1, 'invalid dropout_ratio: %.3f' % self.dropout_ratio[i] diff --git a/easy_rec/python/layers/sequence_encoder.py b/easy_rec/python/layers/sequence_encoder.py index e97ed8d09..80c90eafa 100644 --- a/easy_rec/python/layers/sequence_encoder.py +++ b/easy_rec/python/layers/sequence_encoder.py @@ -4,11 +4,9 @@ import tensorflow as tf -from easy_rec.python.compat import regularizers -from easy_rec.python.layers import dnn -from easy_rec.python.layers import multihead_cross_attention -from easy_rec.python.utils.activation import get_activation -from easy_rec.python.utils.shape_utils import get_shape_list +from easy_rec.python.layers.bst import BST +from easy_rec.python.layers.din import DIN +from easy_rec.python.protos.feature_config_pb2 import FeatureConfig if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -16,13 +14,51 @@ class SequenceEncoder(object): - def __init__(self, input_layer, feature_groups_config, emb_reg, l2_reg): + def __init__(self, input_layer, feature_configs, feature_groups_config, + l2_reg): self._input_layer = input_layer self._feature_groups_config = { x.group_name: x for x in feature_groups_config } - self._emb_reg = emb_reg self._l2_reg = l2_reg + self._feature_config_by_name = { + x.feature_name if x.HasField('feature_name') else x.input_names[0]: x + for x in feature_configs + } + + for name, group in self._feature_groups_config.items(): + if len(group.sequence_encoders) == 0: + continue + check_share_emb = False + for encoder in group.sequence_encoders: + if encoder.force_share_embeddings: + check_share_emb = True + break + if not check_share_emb: + continue + if not self.check_share_embedding(group): + raise ValueError( + 'sequence feature group `%s` check share embedding failed, ' + 'you should add `embedding_name` to feature config' % name) + + def check_share_embedding(self, feature_group): + seq_emb_names = set() + target_emb_names = set() + for feature in feature_group.feature_names: + conf = self._feature_config_by_name[feature] + if not conf.HasField('embedding_name'): + return False + if conf.feature_type == FeatureConfig.FeatureType.SequenceFeature: + seq_emb_names.add(conf.embedding_name) + else: + target_emb_names.add(conf.embedding_name) + + if seq_emb_names != target_emb_names: + tf.logging.error( + 'sequence share embedding names: %s, target share embedding names: %s' + % (','.join(seq_emb_names), ','.join(target_emb_names))) + return False + return True def __call__(self, features, group_name, is_training=True, *args, **kwargs): group_config = self._feature_groups_config[group_name] @@ -38,12 +74,12 @@ def __call__(self, features, group_name, is_training=True, *args, **kwargs): for encoder in group_config.sequence_encoders: encoder_type = encoder.WhichOneof('encoder').lower() if encoder_type == 'bst': - encoding = self.bst_encoder(seq_features, target_feature, group_name, - encoder.bst, is_training) + bst = BST(encoder.bst, self._l2_reg, name=group_name) + encoding = bst([seq_features, target_feature], is_training) outputs.append(encoding) elif encoder_type == 'din': - encoding = self.din_encoder(seq_features, target_feature, group_name, - encoder.din, is_training) + din = DIN(encoder.din, self._l2_reg, name=group_name) + encoding = din([seq_features, target_feature], is_training) outputs.append(encoding) else: assert False, 'unsupported sequence encode type: ' + encoder_type @@ -57,114 +93,3 @@ def __call__(self, features, group_name, is_training=True, *args, **kwargs): return outputs[0] return tf.concat(outputs, axis=-1) - - def din_encoder(self, seq_features, target_feature, group_name, config, - is_training): - seq_input = [seq_fea for seq_fea, _ in seq_features] - regularizers.apply_regularization(self._emb_reg, weights_list=seq_input) - keys = tf.concat(seq_input, axis=-1) - - target_emb_size = target_feature.shape.as_list()[-1] - seq_emb_size = keys.shape.as_list()[-1] - assert target_emb_size == seq_emb_size, 'the embedding size of sequence and target item is not equal' \ - ' in feature group:' + group_name - - batch_size, max_seq_len, _ = get_shape_list(keys, 3) - queries = tf.tile(tf.expand_dims(target_feature, 1), [1, max_seq_len, 1]) - din_all = tf.concat([queries, keys, queries - keys, queries * keys], - axis=-1) - din_layer = dnn.DNN( - config.attention_dnn, - self._l2_reg, - group_name + '/din_attention', - is_training, - last_layer_no_activation=True, - last_layer_no_batch_norm=True) - output = din_layer(din_all) # [B, L, 1] - scores = tf.transpose(output, [0, 2, 1]) # [B, 1, L] - - seq_len = seq_features[0][1] - seq_mask = tf.sequence_mask(seq_len, max_seq_len, dtype=tf.bool) - seq_mask = tf.expand_dims(seq_mask, 1) - paddings = tf.ones_like(scores) * (-2**32 + 1) - scores = tf.where(seq_mask, scores, paddings) # [B, 1, L] - scores = scores / (seq_emb_size**0.5) - # normalization with softmax is abandoned according to the original paper - scores = tf.nn.sigmoid(scores) - output = tf.squeeze(tf.matmul(scores, keys), axis=[1]) - print('din output shape:', output.shape) - return output - - def bst_encoder(self, seq_features, target_feature, group_name, config, - is_training): - if not is_training: - config.hidden_dropout_prob = 0.0 - config.attention_probs_dropout_prob = 0.0 - - seq_embeds = [seq_fea for seq_fea, _ in seq_features] - regularizers.apply_regularization(self._emb_reg, weights_list=seq_embeds) - - max_position = config.max_position_embeddings - batch_size, max_seq_len, _ = get_shape_list(seq_features[0][0], 3) - valid_len = tf.assert_less_equal( - max_seq_len, - max_position, - message='sequence length is greater than `max_position_embeddings`:' + - str(max_position) + ' in feature group:' + group_name) - with tf.control_dependencies([valid_len]): - # seq_input: [batch_size, seq_len, embed_size] - seq_input = tf.concat(seq_embeds, axis=-1) - - # seq_len: [batch_size, ], 假设每个sequence feature的length都是相同的 - seq_len = seq_features[0][1] - seq_embed_size = seq_input.shape.as_list()[-1] - if target_feature is not None: - target_size = target_feature.shape.as_list()[-1] - assert seq_embed_size == target_size, 'the embedding size of sequence and target item is not equal' \ - ' in feature group:' + group_name - # target_feature: [batch_size, 1, embed_size] - target_feature = tf.expand_dims(target_feature, 1) - # seq_input: [batch_size, seq_len+1, embed_size] - seq_input = tf.concat([target_feature, seq_input], axis=1) - max_seq_len += 1 - seq_len += 1 - - if seq_embed_size != config.hidden_size: - seq_input = tf.layers.dense( - seq_input, - config.hidden_size, - activation=tf.nn.relu, - kernel_regularizer=self._l2_reg) - - seq_fea = multihead_cross_attention.embedding_postprocessor( - seq_input, - position_embedding_name=group_name + '_position_embeddings', - max_position_embeddings=max_position) - seq_mask = tf.map_fn( - fn=lambda t: dynamic_mask(t, max_seq_len), elems=tf.to_int32(seq_len)) - attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( - from_tensor=seq_fea, to_mask=seq_mask) - - hidden_act = get_activation(config.hidden_act) - attention_fea = multihead_cross_attention.transformer_encoder( - seq_fea, - hidden_size=config.hidden_size, - num_hidden_layers=config.num_hidden_layers, - num_attention_heads=config.num_attention_heads, - attention_mask=attention_mask, - intermediate_size=config.intermediate_size, - intermediate_act_fn=hidden_act, - hidden_dropout_prob=config.hidden_dropout_prob, - attention_probs_dropout_prob=config.attention_probs_dropout_prob, - initializer_range=config.initializer_range, - name=group_name + '/bst') - # attention_fea shape: [batch_size, seq_length, hidden_size] - out_fea = attention_fea[:, 0, :] # target feature - print('bst output shape:', out_fea.shape) - return out_fea - - -def dynamic_mask(x, max_len): - ones = tf.ones(shape=tf.stack([x]), dtype=tf.int32) - zeros = tf.zeros(shape=tf.stack([max_len - x]), dtype=tf.int32) - return tf.concat([ones, zeros], axis=0) diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index 7815ed0de..e28660c45 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -61,9 +61,10 @@ def __init__(self, if constant.SAMPLE_WEIGHT in features: self._sample_weight = features[constant.SAMPLE_WEIGHT] - self._sequence_encoder = SequenceEncoder(self._input_layer, + self._sequence_encoder = SequenceEncoder(self._input_layer, feature_configs, model_config.feature_groups, - self._emb_reg, self._l2_reg) + self._l2_reg) + self._sequence_encoding_by_group_name = {} @property def embedding_regularization(self): @@ -106,13 +107,24 @@ def build_input_layer(self, model_config, feature_configs): def get_sequence_encoding(self, group_name=None, is_training=True): if group_name is not None: - return self._sequence_encoder(self._feature_dict, group_name, is_training) + if group_name in self._sequence_encoding_by_group_name: + return self._sequence_encoding_by_group_name[group_name] + encoding = self._sequence_encoder(self._feature_dict, group_name, + is_training) + self._sequence_encoding_by_group_name[group_name] = encoding + return encoding seq_encoding = [] for group in self.feature_groups: if len(group.sequence_encoders) == 0: continue - encoding = self.get_sequence_encoding(group.group_name, self._is_training) + group_name = group.group_name + if group_name in self._sequence_encoding_by_group_name: + encoding = self._sequence_encoding_by_group_name[group_name] + else: + encoding = self._sequence_encoder(self._feature_dict, group_name, + is_training) + self._sequence_encoding_by_group_name[group_name] = encoding if encoding is not None: seq_encoding.append(encoding) diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 814de794e..a5917a38d 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -78,9 +78,10 @@ message UniterTower { message SequenceEncoder { // encoder parameters oneof encoder { - BSTEncoder bst = 1; - DINEncoder din = 2; + BSTEncoder bst = 101; + DINEncoder din = 102; } + required bool force_share_embeddings = 1 [default = true]; } message BSTEncoder { @@ -109,4 +110,6 @@ message BSTEncoder { message DINEncoder { // din attention layer required DNN attention_dnn = 1; + // whether to keep target item feature + required bool need_target_feature = 2 [default = true]; } diff --git a/easy_rec/python/utils/activation.py b/easy_rec/python/utils/activation.py index 39d9011c4..25df2a486 100644 --- a/easy_rec/python/utils/activation.py +++ b/easy_rec/python/utils/activation.py @@ -13,6 +13,11 @@ except ImportError: BatchNormalization = tf.keras.layers.BatchNormalization +try: + from tensorflow.python.ops.init_ops import Zeros +except ImportError: + from tensorflow.python.ops.init_ops_v2 import Zeros + class Dice(Layer): """The Data Adaptive Activation Function in DIN. @@ -37,20 +42,22 @@ class Dice(Layer): ACM, 2018: 1059-1068.] (https://arxiv.org/pdf/1706.06978.pdf) """ - def __init__(self, - feat_dim, - axis=-1, - epsilon=1e-9, - is_training=None, - **kwargs): + def __init__(self, axis=-1, epsilon=1e-9, is_training=None, **kwargs): super(Dice, self).__init__(**kwargs) - self.feat_dim = feat_dim self.axis = axis self.epsilon = epsilon self.is_training = is_training + + def build(self, input_shape): + super(Dice, self).build(input_shape) # Be sure to call this somewhere! self.bn = BatchNormalization( axis=self.axis, epsilon=self.epsilon, center=False, scale=False) - self.alphas = tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32) + self.alphas = self.add_weight( + shape=(input_shape[-1],), + initializer=Zeros(), + dtype=tf.float32, + name='dice_alpha') # name='alpha_'+self.name + self.uses_learning_phase = True def call(self, inputs, **kwargs): inputs_normed = self.bn(inputs, training=self.is_training) @@ -114,11 +121,11 @@ def get_activation(activation_string, **kwargs): return gelu elif act == 'leaky_relu': return tf.nn.leaky_relu - elif act in ('prelu', 'PRelu'): + elif act == 'prelu': if len(kwargs) == 0: return tf.nn.leaky_relu return tf.keras.layers.PReLU(**kwargs) - elif act in ('dice', 'Dice'): + elif act == 'dice': return Dice(**kwargs) elif act == 'elu': return tf.nn.elu From 0254902ff454e34ccc9b88db3e40550f25fa0335 Mon Sep 17 00:00:00 2001 From: weisu Date: Sat, 18 Mar 2023 19:49:41 +0800 Subject: [PATCH 05/54] [feat]: add pairwise logistic loss --- easy_rec/python/builders/loss_builder.py | 36 ++++- .../python/compat/weight_decay_optimizers.py | 7 +- easy_rec/python/layers/bst.py | 21 +-- easy_rec/python/layers/din.py | 10 +- easy_rec/python/layers/dnn.py | 5 +- easy_rec/python/loss/focal_loss.py | 62 ++++++++ easy_rec/python/loss/pairwise_loss.py | 136 ++++++++++++++++-- easy_rec/python/model/multi_task_model.py | 4 +- easy_rec/python/model/rank_model.py | 38 +++-- easy_rec/python/protos/loss.proto | 29 ++++ easy_rec/python/protos/tower.proto | 6 +- easy_rec/python/utils/activation.py | 36 +++-- setup.cfg | 2 +- 13 files changed, 338 insertions(+), 54 deletions(-) create mode 100644 easy_rec/python/loss/focal_loss.py diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index a26372605..cf2751965 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -4,6 +4,9 @@ import tensorflow as tf +from easy_rec.python.loss.focal_loss import sigmoid_focal_loss_with_logits +from easy_rec.python.loss.pairwise_loss import pairwise_focal_loss +from easy_rec.python.loss.pairwise_loss import pairwise_logistic_loss from easy_rec.python.loss.pairwise_loss import pairwise_loss from easy_rec.python.protos.loss_pb2 import LossType @@ -36,7 +39,28 @@ def build(loss_type, return tf.losses.mean_squared_error( labels=label, predictions=pred, weights=loss_weight, **kwargs) elif loss_type == LossType.PAIR_WISE_LOSS: - return pairwise_loss(label, pred) + session = kwargs.get('session_ids', None) + margin = 0 if loss_param is None else loss_param.margin + return pairwise_loss( + label, pred, session_ids=session, margin=margin, weights=loss_weight) + elif loss_type == LossType.PAIRWISE_LOGISTIC_LOSS: + session = kwargs.get('session_ids', None) + temp = 1.0 if loss_param is None else loss_param.temperature + return pairwise_logistic_loss( + label, pred, session_ids=session, temperature=temp, weights=loss_weight) + elif loss_type == LossType.PAIRWISE_FOCAL_LOSS: + session = kwargs.get('session_ids', None) + if loss_param is None: + return pairwise_focal_loss( + label, pred, session_ids=session, weights=loss_weight) + return pairwise_focal_loss( + label, + pred, + session_ids=session, + gamma=loss_param.gamma, + alpha=loss_param.alpha if loss_param.HasField('alpha') else None, + margin=loss_param.margin, + weights=loss_weight) elif loss_type == LossType.F1_REWEIGHTED_LOSS: f1_beta_square = 1.0 if loss_param is None else loss_param.f1_beta_square label_smoothing = 0 if loss_param is None else loss_param.label_smoothing @@ -46,6 +70,16 @@ def build(loss_type, f1_beta_square, weights=loss_weight, label_smoothing=label_smoothing) + elif loss_type == LossType.BINARY_FOCAL_LOSS: + if loss_param is None: + return sigmoid_focal_loss_with_logits( + label, pred, sample_weights=loss_weight) + gamma = loss_param.gamma + alpha = None + if loss_param.HasField('alpha'): + alpha = loss_param.alpha + return sigmoid_focal_loss_with_logits( + label, pred, gamma=gamma, alpha=alpha, sample_weights=loss_weight) else: raise ValueError('unsupported loss type: %s' % LossType.Name(loss_type)) diff --git a/easy_rec/python/compat/weight_decay_optimizers.py b/easy_rec/python/compat/weight_decay_optimizers.py index d29dce5bb..26eb9754f 100755 --- a/easy_rec/python/compat/weight_decay_optimizers.py +++ b/easy_rec/python/compat/weight_decay_optimizers.py @@ -411,10 +411,12 @@ def __init__(self, try: - from tensorflow.python.training import AdamAsyncOptimizer + # from tensorflow.python.training import AdamAsyncOptimizer + import tensorflow as tf @tf_export('contrib.opt.AdamAsyncWOptimizer') - class AdamAsyncWOptimizer(DecoupledWeightDecayExtension, AdamAsyncOptimizer): + class AdamAsyncWOptimizer(DecoupledWeightDecayExtension, + tf.train.AdamAsyncOptimizer): """Optimizer that implements the Adam algorithm with weight decay. This is an implementation of the AdamW optimizer described in ["Fixing @@ -472,4 +474,5 @@ def __init__(self, use_locking=use_locking, name=name) except ImportError: + print('import AdamAsyncOptimizer failed') pass diff --git a/easy_rec/python/layers/bst.py b/easy_rec/python/layers/bst.py index 2bdb20c9d..87e12770c 100644 --- a/easy_rec/python/layers/bst.py +++ b/easy_rec/python/layers/bst.py @@ -1,21 +1,23 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import tensorflow as tf -from tensorflow.python.keras.layers import Layer from easy_rec.python.layers import multihead_cross_attention from easy_rec.python.utils.activation import get_activation from easy_rec.python.utils.shape_utils import get_shape_list +# from tensorflow.python.keras.layers import Layer -class BST(Layer): + +class BST(object): def __init__(self, config, l2_reg, name='din', **kwargs): - super(BST, self).__init__(name=name, **kwargs) + # super(BST, self).__init__(name=name, **kwargs) + self.name = name self.l2_reg = l2_reg self.config = config - def call(self, inputs, training=None, **kwargs): + def __call__(self, inputs, training=None, **kwargs): seq_features, target_feature = inputs if not training: self.config.hidden_dropout_prob = 0.0 @@ -50,11 +52,12 @@ def call(self, inputs, training=None, **kwargs): seq_len += 1 max_position += 1 - seq_input = tf.layers.dense( - seq_input, - self.config.hidden_size, - activation=tf.nn.leaky_relu, - kernel_regularizer=self.l2_reg) + if seq_embed_size != self.config.hidden_size: + seq_input = tf.layers.dense( + seq_input, + self.config.hidden_size, + activation=tf.nn.relu, + kernel_regularizer=self.l2_reg) seq_fea = multihead_cross_attention.embedding_postprocessor( seq_input, diff --git a/easy_rec/python/layers/din.py b/easy_rec/python/layers/din.py index 3b6f42df5..60d106fe3 100644 --- a/easy_rec/python/layers/din.py +++ b/easy_rec/python/layers/din.py @@ -1,20 +1,22 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import tensorflow as tf -from tensorflow.python.keras.layers import Layer from easy_rec.python.layers import dnn from easy_rec.python.utils.shape_utils import get_shape_list +# from tensorflow.python.keras.layers import Layer -class DIN(Layer): + +class DIN(object): def __init__(self, config, l2_reg, name='din', **kwargs): - super(DIN, self).__init__(name=name, **kwargs) + # super(DIN, self).__init__(name=name, **kwargs) + self.name = name self.l2_reg = l2_reg self.config = config - def call(self, inputs, training=None, **kwargs): + def __call__(self, inputs, training=None, **kwargs): seq_features, target_feature = inputs seq_input = [seq_fea for seq_fea, _ in seq_features] keys = tf.concat(seq_input, axis=-1) diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index 6016d6233..74e355e82 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -35,8 +35,9 @@ def __init__(self, self._is_training = is_training logging.info('dnn activation function = %s' % self._config.activation) self.activations = [ - get_activation(self._config.activation, is_training=is_training) - for _ in self.hidden_units + get_activation( + self._config.activation, is_training=is_training, feat_dim=units) + for units in self.hidden_units ] self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm diff --git a/easy_rec/python/loss/focal_loss.py b/easy_rec/python/loss/focal_loss.py new file mode 100644 index 000000000..d596b7938 --- /dev/null +++ b/easy_rec/python/loss/focal_loss.py @@ -0,0 +1,62 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +import tensorflow as tf + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +def sigmoid_focal_loss_with_logits(labels, + logits, + gamma=2.0, + alpha=None, + sample_weights=None): + """Implements the focal loss function. + + Focal loss was first introduced in the RetinaNet paper + (https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for + classification when you have highly imbalanced classes. It down-weights + well-classified examples and focuses on hard examples. The loss value is + much high for a sample which is misclassified by the classifier as compared + to the loss value corresponding to a well-classified example. One of the + best use-cases of focal loss is its usage in object detection where the + imbalance between the background class and other classes is extremely high. + + Args + labels: true targets tensor. + logits: predictions tensor. + alpha: balancing factor. + gamma: modulating factor. + + Returns: + Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the + same shape as `y_true`; otherwise, it is scalar. + + Raises: + ValueError: If the shape of `sample_weight` is invalid or value of + `gamma` is less than zero + """ + if gamma and gamma < 0: + raise ValueError('Value of gamma should be greater than or equal to zero') + logging.info('[focal_loss] gamma: {}, alpha: {}'.format(gamma, alpha)) + + y_true = tf.cast(labels, logits.dtype) + + # convert the predictions into probabilities + y_pred = tf.nn.sigmoid(logits) + p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred)) + weights = tf.pow((1 - p_t), gamma) + + if alpha is not None: + alpha_factor = y_true * alpha + ((1 - alpha) * (1 - y_true)) + weights *= alpha_factor + + if sample_weights is not None: + if tf.is_numeric_tensor(sample_weights): + weights *= tf.cast(sample_weights, tf.float32) + else: + weights *= sample_weights + + return tf.losses.sigmoid_cross_entropy(y_true, logits, weights=weights) diff --git a/easy_rec/python/loss/pairwise_loss.py b/easy_rec/python/loss/pairwise_loss.py index 9e16e3bdb..d2af10cb8 100644 --- a/easy_rec/python/loss/pairwise_loss.py +++ b/easy_rec/python/loss/pairwise_loss.py @@ -1,27 +1,147 @@ -# coding=utf-8 +# -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import logging import tensorflow as tf +from focal_loss import sigmoid_focal_loss_with_logits +from tensorflow.python.ops.losses.losses_impl import compute_weighted_loss + +from easy_rec.python.utils.shape_utils import get_shape_list if tf.__version__ >= '2.0': tf = tf.compat.v1 -def pairwise_loss(labels, logits): - pairwise_logits = tf.expand_dims(logits, -1) - tf.expand_dims(logits, 0) - logging.info('[pairwise_loss] pairwise logits: {}'.format(pairwise_logits)) +def pairwise_loss(labels, logits, session_ids=None, margin=0, weights=1.0): + """Pairwise loss. Also see `pairwise_logistic_loss` below. + Args: + labels: a `Tensor` with shape [batch_size]. e.g. click or not click in the session. + logits: a `Tensor` with shape [batch_size]. e.g. the value of last neuron before activation. + session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id + margin: the margin between positive and negative sample pair + weights: sample weights + """ + logging.info('[pairwise_loss] margin: {}'.format(margin)) + pairwise_logits = tf.math.subtract( + tf.expand_dims(logits, -1), tf.expand_dims(logits, 0)) - margin pairwise_mask = tf.greater( tf.expand_dims(labels, -1) - tf.expand_dims(labels, 0), 0) - logging.info('[pairwise_loss] mask: {}'.format(pairwise_mask)) + if session_ids is not None: + logging.info('[pairwise_loss] use session ids') + group_equal = tf.equal( + tf.expand_dims(session_ids, -1), tf.expand_dims(session_ids, 0)) + pairwise_mask = tf.logical_and(pairwise_mask, group_equal) + + pairwise_logits = tf.boolean_mask(pairwise_logits, pairwise_mask) + pairwise_pseudo_labels = tf.ones_like(pairwise_logits) + + if tf.is_numeric_tensor(weights): + logging.info('[pairwise_loss] use sample weight') + weights = tf.expand_dims(tf.cast(weights, tf.float32), -1) + batch_size, _ = get_shape_list(weights, 2) + pairwise_weights = tf.tile(weights, tf.stack([1, batch_size])) + pairwise_weights = tf.boolean_mask(pairwise_weights, pairwise_mask) + else: + pairwise_weights = weights + + loss = tf.losses.sigmoid_cross_entropy( + pairwise_pseudo_labels, pairwise_logits, weights=pairwise_weights) + # set rank loss to zero if a batch has no positive sample. + loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss) + return loss + +def pairwise_focal_loss(labels, + logits, + session_ids=None, + margin=0, + gamma=2, + alpha=None, + weights=1.0): + logging.info('[pairwise_focal_loss] margin: {}, gamma: {}, alpha: {}'.format( + margin, gamma, alpha)) + pairwise_logits = tf.math.subtract( + tf.expand_dims(logits, -1), tf.expand_dims(logits, 0)) - margin + pairwise_mask = tf.greater( + tf.expand_dims(labels, -1) - tf.expand_dims(labels, 0), 0) + if session_ids is not None: + logging.info('[pairwise_focal_loss] use session ids') + group_equal = tf.equal( + tf.expand_dims(session_ids, -1), tf.expand_dims(session_ids, 0)) + pairwise_mask = tf.logical_and(pairwise_mask, group_equal) pairwise_logits = tf.boolean_mask(pairwise_logits, pairwise_mask) - logging.info('[pairwise_loss] after masking: {}'.format(pairwise_logits)) + + if tf.is_numeric_tensor(weights): + logging.info('[pairwise_focal_loss] use sample weight') + weights = tf.expand_dims(tf.cast(weights, tf.float32), -1) + batch_size, _ = get_shape_list(weights, 2) + pairwise_weights = tf.tile(weights, tf.stack([1, batch_size])) + pairwise_weights = tf.boolean_mask(pairwise_weights, pairwise_mask) + else: + pairwise_weights = weights pairwise_pseudo_labels = tf.ones_like(pairwise_logits) - loss = tf.losses.sigmoid_cross_entropy(pairwise_pseudo_labels, - pairwise_logits) + loss = sigmoid_focal_loss_with_logits( + pairwise_pseudo_labels, + pairwise_logits, + gamma=gamma, + alpha=alpha, + sample_weights=pairwise_weights) + + # set rank loss to zero if a batch has no positive sample. + loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss) + return loss + + +def pairwise_logistic_loss(labels, + logits, + session_ids=None, + temperature=1.0, + weights=1.0): + r"""Pairwise logistic loss. + + Definition: + $$ + \mathcal{L}(\{y\}, \{s\}) = + \sum_i \sum_j I[y_i > y_j] \log(1 + \exp(-(s_i - s_j))) + $$ + + Args: + labels: A `Tensor` of the same shape as `logits` representing graded + relevance. + logits: A `Tensor` with shape [batch_size]. + session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id + temperature: A float number to modify the scores=scores/temperature. + weights: A scalar, a `Tensor` with shape [batch_size] for each sample + """ + logits /= temperature + pairwise_logits = tf.math.subtract( + tf.expand_dims(logits, -1), tf.expand_dims(logits, 0)) + + pairwise_mask = tf.greater( + tf.expand_dims(labels, -1) - tf.expand_dims(labels, 0), 0) + if session_ids is not None: + logging.info('[pairwise_logistic_loss] use session ids') + group_equal = tf.equal( + tf.expand_dims(session_ids, -1), tf.expand_dims(session_ids, 0)) + pairwise_mask = tf.logical_and(pairwise_mask, group_equal) + pairwise_logits = tf.boolean_mask(pairwise_logits, pairwise_mask) + + # The following is the same as log(1 + exp(-pairwise_logits)). + losses = tf.nn.relu(-pairwise_logits) + tf.math.log1p( + tf.exp(-tf.abs(pairwise_logits))) + + if tf.is_numeric_tensor(weights): + logging.info('[pairwise_logistic_loss] use sample weight') + weights = tf.expand_dims(tf.cast(weights, tf.float32), -1) + batch_size, _ = get_shape_list(weights, 2) + pairwise_weights = tf.tile(weights, tf.stack([1, batch_size])) + pairwise_weights = tf.boolean_mask(pairwise_weights, pairwise_mask) + else: + pairwise_weights = weights + + loss = compute_weighted_loss(losses, pairwise_weights) # set rank loss to zero if a batch has no positive sample. loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss) return loss diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index 677d5bc58..de321ab7d 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -89,7 +89,9 @@ def build_loss_graph(self): """Build loss graph for multi task model.""" for task_tower_cfg in self._task_towers: tower_name = task_tower_cfg.tower_name - loss_weight = task_tower_cfg.weight * self._sample_weight + loss_weight = task_tower_cfg.weight + if task_tower_cfg.use_sample_weight: + loss_weight *= self._sample_weight if hasattr(task_tower_cfg, 'task_space_indicator_label') and \ task_tower_cfg.HasField('task_space_indicator_label'): diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index c5cb118e6..65e1364a6 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -35,10 +35,16 @@ def _output_to_prediction_impl(self, num_class=1, suffix=''): prediction_dict = {} - if loss_type == LossType.F1_REWEIGHTED_LOSS or loss_type == LossType.PAIR_WISE_LOSS: + binary_loss_type = { + LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS + } + if loss_type in binary_loss_type: assert num_class == 1, 'num_class must be 1 when loss type is F1_REWEIGHTED_LOSS/PAIR_WISE_LOSS' output = tf.squeeze(output, axis=1) probs = tf.sigmoid(output) + tf.summary.scalar('prediction/probs', tf.reduce_mean(probs)) prediction_dict['logits' + suffix] = output prediction_dict['probs' + suffix] = probs elif loss_type == LossType.CLASSIFICATION: @@ -96,7 +102,8 @@ def build_rtp_output_dict(self): loss_types = {loss.loss_type for loss in self._losses} binary_loss_set = { LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS + LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS } if loss_types & binary_loss_set: if 'probs' in self._prediction_dict: @@ -117,7 +124,7 @@ def build_rtp_output_dict(self): + 't_graph() is called.') else: logging.warning( - 'failed to build RTP rank_predict: unsupported loss type {}'.foramt( + 'failed to build RTP rank_predict: unsupported loss type {}'.format( loss_types)) if forwarded is not None: rank_predict = tf.identity(forwarded, name='rank_predict') @@ -133,14 +140,16 @@ def _build_loss_impl(self, suffix='', loss_param=None): loss_dict = {} + binary_loss_type = { + LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS + } if loss_type == LossType.CLASSIFICATION: loss_name = 'cross_entropy_loss' + suffix pred = self._prediction_dict['logits' + suffix] - elif loss_type == LossType.F1_REWEIGHTED_LOSS: - loss_name = 'f1_reweighted_loss' + suffix - pred = self._prediction_dict['logits' + suffix] - elif loss_type == LossType.PAIR_WISE_LOSS: - loss_name = 'pairwise_loss' + suffix + elif loss_type in binary_loss_type: + loss_name = LossType.Name(loss_type).lower() + suffix pred = self._prediction_dict['logits' + suffix] elif loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]: loss_name = 'l2_loss' + suffix @@ -150,13 +159,18 @@ def _build_loss_impl(self, tf.summary.scalar('labels/%s' % label_name, tf.reduce_mean(tf.to_float(self._labels[label_name]))) + kwargs = {} + if loss_param is not None: + if hasattr(loss_param, 'session_name'): + kwargs['session_ids'] = self._labels[loss_param.session_name] loss_dict[loss_name] = loss_builder.build( loss_type, self._labels[label_name], pred, loss_weight, num_class, - loss_param=loss_param) + loss_param=loss_param, + **kwargs) return loss_dict def build_loss_graph(self): @@ -202,7 +216,8 @@ def _build_metric_impl(self, from easy_rec.python.core import metrics as metrics_lib binary_loss_set = { LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS + LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS } metric_dict = {} if metric.WhichOneof('metric') == 'auc': @@ -342,7 +357,8 @@ def build_metric_graph(self, eval_config): def _get_outputs_impl(self, loss_type, num_class=1, suffix=''): binary_loss_set = { LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS + LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type in binary_loss_set: if num_class == 1: diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index c0284711a..4b0f2fd5b 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -13,6 +13,9 @@ enum LossType { SOFTMAX_CROSS_ENTROPY_WITH_NEGATIVE_MINING = 7; PAIR_WISE_LOSS = 8; F1_REWEIGHTED_LOSS = 9; + BINARY_FOCAL_LOSS = 10; + PAIRWISE_FOCAL_LOSS = 11; + PAIRWISE_LOGISTIC_LOSS = 12; } message Loss { @@ -23,6 +26,10 @@ message Loss { SoftmaxCrossEntropyWithNegativeMining softmax_loss = 102; CircleLoss circle_loss = 103; MultiSimilarityLoss multi_simi_loss = 104; + BinaryFocalLoss binary_focal_loss = 105; + PairwiseLoss pairwise_loss = 106; + PairwiseFocalLoss pairwise_focal_loss = 107; + PairwiseLogisticLoss pairwise_logistic_loss = 108; } }; @@ -49,3 +56,25 @@ message F1ReweighedLoss { required float f1_beta_square = 1 [default = 1.0]; required float label_smoothing = 2 [default = 0]; } + +message BinaryFocalLoss { + required float gamma = 1 [default = 2.0]; + optional float alpha = 2; +} + +message PairwiseLoss { + required float margin = 1 [default = 0]; + optional string session_name = 2; +} + +message PairwiseFocalLoss { + required float gamma = 1 [default = 2.0]; + optional float alpha = 2; + required float margin = 3 [default = 0]; + optional string session_name = 4; +} + +message PairwiseLogisticLoss { + required float temperature = 1 [default = 1.0]; + optional string session_name = 4; +} diff --git a/easy_rec/python/protos/tower.proto b/easy_rec/python/protos/tower.proto index 02c6ce67c..580708825 100644 --- a/easy_rec/python/protos/tower.proto +++ b/easy_rec/python/protos/tower.proto @@ -26,7 +26,7 @@ message TaskTower { optional DNN dnn = 6; // training loss weights optional float weight = 7 [default = 1.0]; - // label name for indcating the sample space for the task tower + // label name for indicating the sample space for the task tower optional string task_space_indicator_label = 10; // the loss weight for sample in the task space optional float in_task_space_weight = 11 [default = 1.0]; @@ -34,6 +34,8 @@ message TaskTower { optional float out_task_space_weight = 12 [default = 1.0]; // multiple losses repeated Loss losses = 13; + // whether to use sample weight in this tower + required bool use_sample_weight = 14 [default = true]; }; @@ -68,4 +70,6 @@ message BayesTaskTower { // optional float prediction_weight = 14 [default = 1.0]; // multiple losses repeated Loss losses = 15; + // whether to use sample weight in this tower + required bool use_sample_weight = 16 [default = true]; }; diff --git a/easy_rec/python/utils/activation.py b/easy_rec/python/utils/activation.py index 25df2a486..d05d705b3 100644 --- a/easy_rec/python/utils/activation.py +++ b/easy_rec/python/utils/activation.py @@ -13,10 +13,10 @@ except ImportError: BatchNormalization = tf.keras.layers.BatchNormalization -try: - from tensorflow.python.ops.init_ops import Zeros -except ImportError: - from tensorflow.python.ops.init_ops_v2 import Zeros +# try: +# from tensorflow.python.ops.init_ops import Zeros +# except ImportError: +# from tensorflow.python.ops.init_ops_v2 import Zeros class Dice(Layer): @@ -42,22 +42,30 @@ class Dice(Layer): ACM, 2018: 1059-1068.] (https://arxiv.org/pdf/1706.06978.pdf) """ - def __init__(self, axis=-1, epsilon=1e-9, is_training=None, **kwargs): + def __init__(self, + feat_dim, + axis=-1, + epsilon=1e-9, + is_training=None, + **kwargs): super(Dice, self).__init__(**kwargs) self.axis = axis self.epsilon = epsilon self.is_training = is_training - - def build(self, input_shape): - super(Dice, self).build(input_shape) # Be sure to call this somewhere! self.bn = BatchNormalization( axis=self.axis, epsilon=self.epsilon, center=False, scale=False) - self.alphas = self.add_weight( - shape=(input_shape[-1],), - initializer=Zeros(), - dtype=tf.float32, - name='dice_alpha') # name='alpha_'+self.name - self.uses_learning_phase = True + self.alphas = tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32) + + # def build(self, input_shape): + # super(Dice, self).build(input_shape) # Be sure to call this somewhere! + # self.bn = BatchNormalization( + # axis=self.axis, epsilon=self.epsilon, center=False, scale=False) + # self.alphas = self.add_weight( + # shape=(input_shape[-1],), + # initializer=Zeros(), + # dtype=tf.float32, + # name='dice_alpha') # name='alpha_'+self.name + # self.uses_learning_phase = True def call(self, inputs, **kwargs): inputs_normed = self.bn(inputs, training=self.is_training) diff --git a/setup.cfg b/setup.cfg index b180b9fb1..469407312 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ multi_line_output = 7 force_single_line = true known_standard_library = setuptools known_first_party = easy_rec -known_third_party = absl,common_io,docutils,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml +known_third_party = absl,common_io,docutils,focal_loss,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml no_lines_before = LOCALFOLDER default_section = THIRDPARTY skip = easy_rec/python/protos From 6b54fe70f904292d57bb3aa6daf917b984a1f990 Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 21 Mar 2023 00:47:32 +0800 Subject: [PATCH 06/54] [feat]: add pairwise logistic loss --- easy_rec/python/builders/loss_builder.py | 40 ++++++-- .../python/compat/weight_decay_optimizers.py | 8 +- easy_rec/python/loss/focal_loss.py | 35 ++++++- easy_rec/python/loss/pairwise_loss.py | 95 +++++++++++++------ easy_rec/python/model/rank_model.py | 4 +- easy_rec/python/protos/loss.proto | 9 +- 6 files changed, 143 insertions(+), 48 deletions(-) diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index cf2751965..390b7996c 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -23,6 +23,7 @@ def build(loss_type, num_class=1, loss_param=None, **kwargs): + loss_name = kwargs.pop('loss_name') if loss_type == LossType.CLASSIFICATION: if num_class == 1: return tf.losses.sigmoid_cross_entropy( @@ -42,25 +43,46 @@ def build(loss_type, session = kwargs.get('session_ids', None) margin = 0 if loss_param is None else loss_param.margin return pairwise_loss( - label, pred, session_ids=session, margin=margin, weights=loss_weight) + label, + pred, + session_ids=session, + margin=margin, + weights=loss_weight, + name=loss_name) elif loss_type == LossType.PAIRWISE_LOGISTIC_LOSS: session = kwargs.get('session_ids', None) temp = 1.0 if loss_param is None else loss_param.temperature + ohem_ratio = 1.0 if loss_param is None else loss_param.ohem_ratio + hinge_margin = None + if loss_param is not None and loss_param.HasField('hinge_margin'): + hinge_margin = loss_param.hinge_margin return pairwise_logistic_loss( - label, pred, session_ids=session, temperature=temp, weights=loss_weight) + label, + pred, + session_ids=session, + temperature=temp, + hinge_margin=hinge_margin, + ohem_ratio=ohem_ratio, + weights=loss_weight, + name=loss_name) elif loss_type == LossType.PAIRWISE_FOCAL_LOSS: session = kwargs.get('session_ids', None) if loss_param is None: return pairwise_focal_loss( - label, pred, session_ids=session, weights=loss_weight) + label, pred, session_ids=session, weights=loss_weight, name=loss_name) + hinge_margin = None + if loss_param.HasField('hinge_margin'): + hinge_margin = loss_param.hinge_margin return pairwise_focal_loss( label, pred, session_ids=session, gamma=loss_param.gamma, alpha=loss_param.alpha if loss_param.HasField('alpha') else None, - margin=loss_param.margin, - weights=loss_weight) + hinge_margin=hinge_margin, + ohem_ratio=loss_param.ohem_ratio, + weights=loss_weight, + name=loss_name) elif loss_type == LossType.F1_REWEIGHTED_LOSS: f1_beta_square = 1.0 if loss_param is None else loss_param.f1_beta_square label_smoothing = 0 if loss_param is None else loss_param.label_smoothing @@ -79,7 +101,13 @@ def build(loss_type, if loss_param.HasField('alpha'): alpha = loss_param.alpha return sigmoid_focal_loss_with_logits( - label, pred, gamma=gamma, alpha=alpha, sample_weights=loss_weight) + label, + pred, + gamma=gamma, + alpha=alpha, + ohem_ratio=loss_param.ohem_ratio, + sample_weights=loss_weight, + label_smoothing=loss_param.label_smoothing) else: raise ValueError('unsupported loss type: %s' % LossType.Name(loss_type)) diff --git a/easy_rec/python/compat/weight_decay_optimizers.py b/easy_rec/python/compat/weight_decay_optimizers.py index 26eb9754f..7c9baf905 100755 --- a/easy_rec/python/compat/weight_decay_optimizers.py +++ b/easy_rec/python/compat/weight_decay_optimizers.py @@ -411,12 +411,10 @@ def __init__(self, try: - # from tensorflow.python.training import AdamAsyncOptimizer - import tensorflow as tf + from tensorflow.train import AdamAsyncOptimizer @tf_export('contrib.opt.AdamAsyncWOptimizer') - class AdamAsyncWOptimizer(DecoupledWeightDecayExtension, - tf.train.AdamAsyncOptimizer): + class AdamAsyncWOptimizer(DecoupledWeightDecayExtension, AdamAsyncOptimizer): """Optimizer that implements the Adam algorithm with weight decay. This is an implementation of the AdamW optimizer described in ["Fixing @@ -474,5 +472,5 @@ def __init__(self, use_locking=use_locking, name=name) except ImportError: - print('import AdamAsyncOptimizer failed') + print('import AdamAsyncOptimizer failed when loading AdamAsyncWOptimizer') pass diff --git a/easy_rec/python/loss/focal_loss.py b/easy_rec/python/loss/focal_loss.py index d596b7938..2e322782e 100644 --- a/easy_rec/python/loss/focal_loss.py +++ b/easy_rec/python/loss/focal_loss.py @@ -12,7 +12,9 @@ def sigmoid_focal_loss_with_logits(labels, logits, gamma=2.0, alpha=None, - sample_weights=None): + ohem_ratio=1.0, + sample_weights=None, + label_smoothing=0): """Implements the focal loss function. Focal loss was first introduced in the RetinaNet paper @@ -25,10 +27,15 @@ def sigmoid_focal_loss_with_logits(labels, imbalance between the background class and other classes is extremely high. Args - labels: true targets tensor. - logits: predictions tensor. + labels: `[batch_size]` target integer labels in `{0, 1}`. + logits: Float `[batch_size]` logits outputs of the network. alpha: balancing factor. gamma: modulating factor. + ohem_ratio: the percent of hard examples to be mined + sample_weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). + label_smoothing: If greater than `0` then smooth the labels. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the @@ -38,14 +45,19 @@ def sigmoid_focal_loss_with_logits(labels, ValueError: If the shape of `sample_weight` is invalid or value of `gamma` is less than zero """ + assert 0 < ohem_ratio <= 1.0, 'ohem_ratio must be in (0, 1]' if gamma and gamma < 0: raise ValueError('Value of gamma should be greater than or equal to zero') - logging.info('[focal_loss] gamma: {}, alpha: {}'.format(gamma, alpha)) + logging.info( + '[focal_loss] gamma: {}, alpha: {}, ohem_ratho: {}, label smoothing: {}' + .format(gamma, alpha, ohem_ratio, label_smoothing)) y_true = tf.cast(labels, logits.dtype) # convert the predictions into probabilities y_pred = tf.nn.sigmoid(logits) + epsilon = 1e-7 + y_pred = tf.clip_by_value(y_pred, epsilon, 1 - epsilon) p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred)) weights = tf.pow((1 - p_t), gamma) @@ -59,4 +71,17 @@ def sigmoid_focal_loss_with_logits(labels, else: weights *= sample_weights - return tf.losses.sigmoid_cross_entropy(y_true, logits, weights=weights) + if ohem_ratio == 1.0: + return tf.losses.sigmoid_cross_entropy( + y_true, logits, weights=weights, label_smoothing=label_smoothing) + + losses = tf.losses.sigmoid_cross_entropy( + y_true, + logits, + weights=weights, + label_smoothing=label_smoothing, + reduction=tf.losses.Reduction.NONE) + k = tf.size(losses) * ohem_ratio + topk = tf.nn.top_k(losses, k) + losses = tf.boolean_mask(topk.values, topk.values > 0) + return tf.reduce_mean(losses) diff --git a/easy_rec/python/loss/pairwise_loss.py b/easy_rec/python/loss/pairwise_loss.py index d2af10cb8..1a9382ab7 100644 --- a/easy_rec/python/loss/pairwise_loss.py +++ b/easy_rec/python/loss/pairwise_loss.py @@ -12,8 +12,13 @@ tf = tf.compat.v1 -def pairwise_loss(labels, logits, session_ids=None, margin=0, weights=1.0): - """Pairwise loss. Also see `pairwise_logistic_loss` below. +def pairwise_loss(labels, + logits, + session_ids=None, + margin=0, + weights=1.0, + name=''): + """Deprecated Pairwise loss. Also see `pairwise_logistic_loss` below. Args: labels: a `Tensor` with shape [batch_size]. e.g. click or not click in the session. @@ -21,23 +26,26 @@ def pairwise_loss(labels, logits, session_ids=None, margin=0, weights=1.0): session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id margin: the margin between positive and negative sample pair weights: sample weights + name: the name of loss """ - logging.info('[pairwise_loss] margin: {}'.format(margin)) + loss_name = name if name else 'pairwise_logistic_loss' + logging.info('[{}] margin: {}'.format(loss_name, margin)) pairwise_logits = tf.math.subtract( tf.expand_dims(logits, -1), tf.expand_dims(logits, 0)) - margin pairwise_mask = tf.greater( tf.expand_dims(labels, -1) - tf.expand_dims(labels, 0), 0) if session_ids is not None: - logging.info('[pairwise_loss] use session ids') + logging.info('[%s] use session ids' % loss_name) group_equal = tf.equal( tf.expand_dims(session_ids, -1), tf.expand_dims(session_ids, 0)) pairwise_mask = tf.logical_and(pairwise_mask, group_equal) pairwise_logits = tf.boolean_mask(pairwise_logits, pairwise_mask) - pairwise_pseudo_labels = tf.ones_like(pairwise_logits) + num_pair = tf.size(pairwise_logits) + tf.summary.scalar('loss/%s_num_of_pairs' % loss_name, num_pair) if tf.is_numeric_tensor(weights): - logging.info('[pairwise_loss] use sample weight') + logging.info('[%s] use sample weight' % loss_name) weights = tf.expand_dims(tf.cast(weights, tf.float32), -1) batch_size, _ = get_shape_list(weights, 2) pairwise_weights = tf.tile(weights, tf.stack([1, batch_size])) @@ -45,35 +53,48 @@ def pairwise_loss(labels, logits, session_ids=None, margin=0, weights=1.0): else: pairwise_weights = weights + pairwise_pseudo_labels = tf.ones_like(pairwise_logits) loss = tf.losses.sigmoid_cross_entropy( pairwise_pseudo_labels, pairwise_logits, weights=pairwise_weights) # set rank loss to zero if a batch has no positive sample. - loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss) + # loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss) return loss def pairwise_focal_loss(labels, logits, session_ids=None, - margin=0, + hinge_margin=None, gamma=2, alpha=None, - weights=1.0): - logging.info('[pairwise_focal_loss] margin: {}, gamma: {}, alpha: {}'.format( - margin, gamma, alpha)) - pairwise_logits = tf.math.subtract( - tf.expand_dims(logits, -1), tf.expand_dims(logits, 0)) - margin + weights=1.0, + ohem_ratio=1.0, + name=''): + loss_name = name if name else 'pairwise_focal_loss' + logging.info( + '[{}] hinge margin: {}, gamma: {}, alpha: {}, ohem_ratio: {}'.format( + loss_name, hinge_margin, gamma, alpha, ohem_ratio)) + assert 0 < ohem_ratio <= 1.0, 'ohem_ratio must be in (0, 1]' + + pairwise_logits = tf.expand_dims(logits, -1) - tf.expand_dims(logits, 0) + pairwise_mask = tf.greater( tf.expand_dims(labels, -1) - tf.expand_dims(labels, 0), 0) + if hinge_margin is not None: + hinge_mask = tf.less(pairwise_logits, hinge_margin) + pairwise_mask = tf.logical_and(pairwise_mask, hinge_mask) if session_ids is not None: - logging.info('[pairwise_focal_loss] use session ids') + logging.info('[%s] use session ids' % loss_name) group_equal = tf.equal( tf.expand_dims(session_ids, -1), tf.expand_dims(session_ids, 0)) pairwise_mask = tf.logical_and(pairwise_mask, group_equal) + pairwise_logits = tf.boolean_mask(pairwise_logits, pairwise_mask) + num_pair = tf.size(pairwise_logits) + tf.summary.scalar('loss/%s_num_of_pairs' % loss_name, num_pair) if tf.is_numeric_tensor(weights): - logging.info('[pairwise_focal_loss] use sample weight') + logging.info('[%s] use sample weight' % loss_name) weights = tf.expand_dims(tf.cast(weights, tf.float32), -1) batch_size, _ = get_shape_list(weights, 2) pairwise_weights = tf.tile(weights, tf.stack([1, batch_size])) @@ -87,10 +108,8 @@ def pairwise_focal_loss(labels, pairwise_logits, gamma=gamma, alpha=alpha, + ohem_ratio=ohem_ratio, sample_weights=pairwise_weights) - - # set rank loss to zero if a batch has no positive sample. - loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss) return loss @@ -98,8 +117,11 @@ def pairwise_logistic_loss(labels, logits, session_ids=None, temperature=1.0, - weights=1.0): - r"""Pairwise logistic loss. + hinge_margin=None, + weights=1.0, + ohem_ratio=1.0, + name=''): + r"""Computes pairwise logistic loss between `labels` and `logits`. Definition: $$ @@ -112,28 +134,40 @@ def pairwise_logistic_loss(labels, relevance. logits: A `Tensor` with shape [batch_size]. session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id - temperature: A float number to modify the scores=scores/temperature. + temperature: (Optional) The temperature to use for scaling the logits. + hinge_margin: the margin between positive and negative logits weights: A scalar, a `Tensor` with shape [batch_size] for each sample + ohem_ratio: the percent of hard examples to be mined + name: the name of loss """ - logits /= temperature + assert 0 < ohem_ratio <= 1.0, 'ohem_ratio must be in (0, 1]' + loss_name = name if name else 'pairwise_logistic_loss' + if temperature != 1.0: + logits /= temperature pairwise_logits = tf.math.subtract( tf.expand_dims(logits, -1), tf.expand_dims(logits, 0)) pairwise_mask = tf.greater( tf.expand_dims(labels, -1) - tf.expand_dims(labels, 0), 0) + if hinge_margin is not None: + hinge_mask = tf.less(pairwise_logits, hinge_margin) + pairwise_mask = tf.logical_and(pairwise_mask, hinge_mask) if session_ids is not None: - logging.info('[pairwise_logistic_loss] use session ids') + logging.info('[%s] use session ids' % loss_name) group_equal = tf.equal( tf.expand_dims(session_ids, -1), tf.expand_dims(session_ids, 0)) pairwise_mask = tf.logical_and(pairwise_mask, group_equal) + pairwise_logits = tf.boolean_mask(pairwise_logits, pairwise_mask) + num_pair = tf.size(pairwise_logits) + tf.summary.scalar('loss/%s_num_of_pairs' % loss_name, num_pair) # The following is the same as log(1 + exp(-pairwise_logits)). losses = tf.nn.relu(-pairwise_logits) + tf.math.log1p( tf.exp(-tf.abs(pairwise_logits))) if tf.is_numeric_tensor(weights): - logging.info('[pairwise_logistic_loss] use sample weight') + logging.info('[%s] use sample weight' % loss_name) weights = tf.expand_dims(tf.cast(weights, tf.float32), -1) batch_size, _ = get_shape_list(weights, 2) pairwise_weights = tf.tile(weights, tf.stack([1, batch_size])) @@ -141,7 +175,12 @@ def pairwise_logistic_loss(labels, else: pairwise_weights = weights - loss = compute_weighted_loss(losses, pairwise_weights) - # set rank loss to zero if a batch has no positive sample. - loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss) - return loss + if ohem_ratio == 1.0: + return compute_weighted_loss(losses, pairwise_weights) + + losses = compute_weighted_loss( + losses, pairwise_weights, reduction=tf.losses.Reduction.NONE) + k = tf.size(losses) * ohem_ratio + topk = tf.nn.top_k(losses, k) + losses = tf.boolean_mask(topk.values, topk.values > 0) + return tf.reduce_mean(losses) diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 65e1364a6..a4bce730e 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -159,10 +159,10 @@ def _build_loss_impl(self, tf.summary.scalar('labels/%s' % label_name, tf.reduce_mean(tf.to_float(self._labels[label_name]))) - kwargs = {} + kwargs = {'loss_name': loss_name} if loss_param is not None: if hasattr(loss_param, 'session_name'): - kwargs['session_ids'] = self._labels[loss_param.session_name] + kwargs['session_ids'] = self._feature_dict[loss_param.session_name] loss_dict[loss_name] = loss_builder.build( loss_type, self._labels[label_name], diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index 4b0f2fd5b..7a6be0238 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -60,6 +60,8 @@ message F1ReweighedLoss { message BinaryFocalLoss { required float gamma = 1 [default = 2.0]; optional float alpha = 2; + optional float ohem_ratio = 3 [default = 1.0]; + optional float label_smoothing = 4 [default = 0]; } message PairwiseLoss { @@ -70,11 +72,14 @@ message PairwiseLoss { message PairwiseFocalLoss { required float gamma = 1 [default = 2.0]; optional float alpha = 2; - required float margin = 3 [default = 0]; + optional float hinge_margin = 3 [default = 1.0]; optional string session_name = 4; + optional float ohem_ratio = 5 [default = 1.0]; } message PairwiseLogisticLoss { required float temperature = 1 [default = 1.0]; - optional string session_name = 4; + optional string session_name = 2; + optional float hinge_margin = 3 [default = 1.0]; + optional float ohem_ratio = 4 [default = 1.0]; } From d65ece39ac191bfa4307d240e5106277653c643d Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 21 Mar 2023 10:36:12 +0800 Subject: [PATCH 07/54] [feat]: add pairwise logistic loss --- easy_rec/python/builders/loss_builder.py | 8 ++++++-- easy_rec/python/loss/focal_loss.py | 12 +++++++---- easy_rec/python/loss/pairwise_loss.py | 26 +++++++++++++++++------- easy_rec/python/protos/loss.proto | 2 ++ 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index 390b7996c..203e3279d 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -42,11 +42,13 @@ def build(loss_type, elif loss_type == LossType.PAIR_WISE_LOSS: session = kwargs.get('session_ids', None) margin = 0 if loss_param is None else loss_param.margin + temp = 1.0 if loss_param is None else loss_param.temperature return pairwise_loss( label, pred, session_ids=session, margin=margin, + temperature=temp, weights=loss_weight, name=loss_name) elif loss_type == LossType.PAIRWISE_LOGISTIC_LOSS: @@ -81,6 +83,7 @@ def build(loss_type, alpha=loss_param.alpha if loss_param.HasField('alpha') else None, hinge_margin=hinge_margin, ohem_ratio=loss_param.ohem_ratio, + temperature=loss_param.temperature, weights=loss_weight, name=loss_name) elif loss_type == LossType.F1_REWEIGHTED_LOSS: @@ -95,7 +98,7 @@ def build(loss_type, elif loss_type == LossType.BINARY_FOCAL_LOSS: if loss_param is None: return sigmoid_focal_loss_with_logits( - label, pred, sample_weights=loss_weight) + label, pred, sample_weights=loss_weight, name=loss_name) gamma = loss_param.gamma alpha = None if loss_param.HasField('alpha'): @@ -107,7 +110,8 @@ def build(loss_type, alpha=alpha, ohem_ratio=loss_param.ohem_ratio, sample_weights=loss_weight, - label_smoothing=loss_param.label_smoothing) + label_smoothing=loss_param.label_smoothing, + name=loss_name) else: raise ValueError('unsupported loss type: %s' % LossType.Name(loss_type)) diff --git a/easy_rec/python/loss/focal_loss.py b/easy_rec/python/loss/focal_loss.py index 2e322782e..515cb506b 100644 --- a/easy_rec/python/loss/focal_loss.py +++ b/easy_rec/python/loss/focal_loss.py @@ -14,7 +14,8 @@ def sigmoid_focal_loss_with_logits(labels, alpha=None, ohem_ratio=1.0, sample_weights=None, - label_smoothing=0): + label_smoothing=0, + name=''): """Implements the focal loss function. Focal loss was first introduced in the RetinaNet paper @@ -36,6 +37,7 @@ def sigmoid_focal_loss_with_logits(labels, `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). label_smoothing: If greater than `0` then smooth the labels. + name: the name of loss Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the @@ -45,12 +47,13 @@ def sigmoid_focal_loss_with_logits(labels, ValueError: If the shape of `sample_weight` is invalid or value of `gamma` is less than zero """ - assert 0 < ohem_ratio <= 1.0, 'ohem_ratio must be in (0, 1]' + loss_name = name if name else 'focal_loss' + assert 0 < ohem_ratio <= 1.0, loss_name + ' ohem_ratio must be in (0, 1]' if gamma and gamma < 0: raise ValueError('Value of gamma should be greater than or equal to zero') logging.info( - '[focal_loss] gamma: {}, alpha: {}, ohem_ratho: {}, label smoothing: {}' - .format(gamma, alpha, ohem_ratio, label_smoothing)) + '[{}] gamma: {}, alpha: {}, ohem_ratho: {}, label smoothing: {}'.format( + loss_name, gamma, alpha, ohem_ratio, label_smoothing)) y_true = tf.cast(labels, logits.dtype) @@ -66,6 +69,7 @@ def sigmoid_focal_loss_with_logits(labels, weights *= alpha_factor if sample_weights is not None: + logging.info('[%s] use sample weight' % loss_name) if tf.is_numeric_tensor(sample_weights): weights *= tf.cast(sample_weights, tf.float32) else: diff --git a/easy_rec/python/loss/pairwise_loss.py b/easy_rec/python/loss/pairwise_loss.py index 1a9382ab7..a54c6d0a7 100644 --- a/easy_rec/python/loss/pairwise_loss.py +++ b/easy_rec/python/loss/pairwise_loss.py @@ -16,6 +16,7 @@ def pairwise_loss(labels, logits, session_ids=None, margin=0, + temperature=1.0, weights=1.0, name=''): """Deprecated Pairwise loss. Also see `pairwise_logistic_loss` below. @@ -25,11 +26,16 @@ def pairwise_loss(labels, logits: a `Tensor` with shape [batch_size]. e.g. the value of last neuron before activation. session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id margin: the margin between positive and negative sample pair + temperature: (Optional) The temperature to use for scaling the logits. weights: sample weights name: the name of loss """ - loss_name = name if name else 'pairwise_logistic_loss' - logging.info('[{}] margin: {}'.format(loss_name, margin)) + loss_name = name if name else 'pairwise_loss' + logging.info('[{}] margin: {}, temperature: {}'.format( + loss_name, margin, temperature)) + + if temperature != 1.0: + logits /= temperature pairwise_logits = tf.math.subtract( tf.expand_dims(logits, -1), tf.expand_dims(logits, 0)) - margin pairwise_mask = tf.greater( @@ -67,15 +73,18 @@ def pairwise_focal_loss(labels, hinge_margin=None, gamma=2, alpha=None, - weights=1.0, ohem_ratio=1.0, + temperature=1.0, + weights=1.0, name=''): loss_name = name if name else 'pairwise_focal_loss' + assert 0 < ohem_ratio <= 1.0, loss_name + ' ohem_ratio must be in (0, 1]' logging.info( - '[{}] hinge margin: {}, gamma: {}, alpha: {}, ohem_ratio: {}'.format( - loss_name, hinge_margin, gamma, alpha, ohem_ratio)) - assert 0 < ohem_ratio <= 1.0, 'ohem_ratio must be in (0, 1]' + '[{}] hinge margin: {}, gamma: {}, alpha: {}, ohem_ratio: {}, temperature: {}' + .format(loss_name, hinge_margin, gamma, alpha, ohem_ratio, temperature)) + if temperature != 1.0: + logits /= temperature pairwise_logits = tf.expand_dims(logits, -1) - tf.expand_dims(logits, 0) pairwise_mask = tf.greater( @@ -140,8 +149,11 @@ def pairwise_logistic_loss(labels, ohem_ratio: the percent of hard examples to be mined name: the name of loss """ - assert 0 < ohem_ratio <= 1.0, 'ohem_ratio must be in (0, 1]' loss_name = name if name else 'pairwise_logistic_loss' + assert 0 < ohem_ratio <= 1.0, loss_name + ' ohem_ratio must be in (0, 1]' + logging.info('[{}] hinge margin: {}, ohem_ratio: {}, temperature: {}'.format( + loss_name, hinge_margin, ohem_ratio, temperature)) + if temperature != 1.0: logits /= temperature pairwise_logits = tf.math.subtract( diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index 7a6be0238..156eec5ae 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -67,6 +67,7 @@ message BinaryFocalLoss { message PairwiseLoss { required float margin = 1 [default = 0]; optional string session_name = 2; + optional float temperature = 3 [default = 1.0]; } message PairwiseFocalLoss { @@ -75,6 +76,7 @@ message PairwiseFocalLoss { optional float hinge_margin = 3 [default = 1.0]; optional string session_name = 4; optional float ohem_ratio = 5 [default = 1.0]; + optional float temperature = 6 [default = 1.0]; } message PairwiseLogisticLoss { From d2793df09caa125c875f9dc5ace4569e7f34bb2d Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 21 Mar 2023 10:45:47 +0800 Subject: [PATCH 08/54] [feat]: add pairwise logistic loss --- easy_rec/python/loss/pairwise_loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easy_rec/python/loss/pairwise_loss.py b/easy_rec/python/loss/pairwise_loss.py index a54c6d0a7..a1eda5873 100644 --- a/easy_rec/python/loss/pairwise_loss.py +++ b/easy_rec/python/loss/pairwise_loss.py @@ -3,7 +3,7 @@ import logging import tensorflow as tf -from focal_loss import sigmoid_focal_loss_with_logits +from easy_rec.python.loss.focal_loss import sigmoid_focal_loss_with_logits from tensorflow.python.ops.losses.losses_impl import compute_weighted_loss from easy_rec.python.utils.shape_utils import get_shape_list From 31e25027e77df9904005d8da50cbcea16e270fee Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 21 Mar 2023 11:44:38 +0800 Subject: [PATCH 09/54] [feat]: add pairwise logistic loss --- easy_rec/python/loss/focal_loss.py | 3 ++- easy_rec/python/loss/pairwise_loss.py | 8 ++++++-- setup.cfg | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/easy_rec/python/loss/focal_loss.py b/easy_rec/python/loss/focal_loss.py index 515cb506b..4d3c13140 100644 --- a/easy_rec/python/loss/focal_loss.py +++ b/easy_rec/python/loss/focal_loss.py @@ -85,7 +85,8 @@ def sigmoid_focal_loss_with_logits(labels, weights=weights, label_smoothing=label_smoothing, reduction=tf.losses.Reduction.NONE) - k = tf.size(losses) * ohem_ratio + k = tf.size(losses, out_type=tf.float32) * tf.convert_to_tensor(ohem_ratio) + k = tf.to_int32(tf.math.rint(k)) topk = tf.nn.top_k(losses, k) losses = tf.boolean_mask(topk.values, topk.values > 0) return tf.reduce_mean(losses) diff --git a/easy_rec/python/loss/pairwise_loss.py b/easy_rec/python/loss/pairwise_loss.py index a1eda5873..07d45896e 100644 --- a/easy_rec/python/loss/pairwise_loss.py +++ b/easy_rec/python/loss/pairwise_loss.py @@ -3,9 +3,9 @@ import logging import tensorflow as tf -from easy_rec.python.loss.focal_loss import sigmoid_focal_loss_with_logits from tensorflow.python.ops.losses.losses_impl import compute_weighted_loss +from easy_rec.python.loss.focal_loss import sigmoid_focal_loss_with_logits from easy_rec.python.utils.shape_utils import get_shape_list if tf.__version__ >= '2.0': @@ -30,6 +30,9 @@ def pairwise_loss(labels, weights: sample weights name: the name of loss """ + logging.warning( + 'The old `pairwise_loss` is being deprecated. ' + 'Please use the new `pairwise_logistic_loss` or `pairwise_focal_loss`') loss_name = name if name else 'pairwise_loss' logging.info('[{}] margin: {}, temperature: {}'.format( loss_name, margin, temperature)) @@ -192,7 +195,8 @@ def pairwise_logistic_loss(labels, losses = compute_weighted_loss( losses, pairwise_weights, reduction=tf.losses.Reduction.NONE) - k = tf.size(losses) * ohem_ratio + k = tf.size(losses, out_type=tf.float32) * tf.convert_to_tensor(ohem_ratio) + k = tf.to_int32(tf.math.rint(k)) topk = tf.nn.top_k(losses, k) losses = tf.boolean_mask(topk.values, topk.values > 0) return tf.reduce_mean(losses) diff --git a/setup.cfg b/setup.cfg index 469407312..b180b9fb1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ multi_line_output = 7 force_single_line = true known_standard_library = setuptools known_first_party = easy_rec -known_third_party = absl,common_io,docutils,focal_loss,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml +known_third_party = absl,common_io,docutils,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml no_lines_before = LOCALFOLDER default_section = THIRDPARTY skip = easy_rec/python/protos From 7eb9c5c66da785b5e3e095001fa002d5a382683b Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 21 Mar 2023 18:42:11 +0800 Subject: [PATCH 10/54] [feat]: add pairwise logistic loss --- easy_rec/python/loss/focal_loss.py | 7 ++++--- easy_rec/python/loss/pairwise_loss.py | 2 +- easy_rec/python/model/multi_task_model.py | 1 + easy_rec/python/model/rank_model.py | 11 ++++++++--- easy_rec/python/protos/loss.proto | 1 + 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/easy_rec/python/loss/focal_loss.py b/easy_rec/python/loss/focal_loss.py index 4d3c13140..9ef6a94a7 100644 --- a/easy_rec/python/loss/focal_loss.py +++ b/easy_rec/python/loss/focal_loss.py @@ -69,10 +69,11 @@ def sigmoid_focal_loss_with_logits(labels, weights *= alpha_factor if sample_weights is not None: - logging.info('[%s] use sample weight' % loss_name) if tf.is_numeric_tensor(sample_weights): + logging.info('[%s] use sample weight' % loss_name) weights *= tf.cast(sample_weights, tf.float32) - else: + elif sample_weights != 1.0: + logging.info('[%s] use sample weight: %f' % (loss_name, sample_weights)) weights *= sample_weights if ohem_ratio == 1.0: @@ -85,7 +86,7 @@ def sigmoid_focal_loss_with_logits(labels, weights=weights, label_smoothing=label_smoothing, reduction=tf.losses.Reduction.NONE) - k = tf.size(losses, out_type=tf.float32) * tf.convert_to_tensor(ohem_ratio) + k = tf.to_float(tf.size(losses)) * tf.convert_to_tensor(ohem_ratio) k = tf.to_int32(tf.math.rint(k)) topk = tf.nn.top_k(losses, k) losses = tf.boolean_mask(topk.values, topk.values > 0) diff --git a/easy_rec/python/loss/pairwise_loss.py b/easy_rec/python/loss/pairwise_loss.py index 07d45896e..a421cdbba 100644 --- a/easy_rec/python/loss/pairwise_loss.py +++ b/easy_rec/python/loss/pairwise_loss.py @@ -195,7 +195,7 @@ def pairwise_logistic_loss(labels, losses = compute_weighted_loss( losses, pairwise_weights, reduction=tf.losses.Reduction.NONE) - k = tf.size(losses, out_type=tf.float32) * tf.convert_to_tensor(ohem_ratio) + k = tf.to_float(tf.size(losses)) * tf.convert_to_tensor(ohem_ratio) k = tf.to_int32(tf.math.rint(k)) topk = tf.nn.top_k(losses, k) losses = tf.boolean_mask(topk.values, topk.values > 0) diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index de321ab7d..4ffd404d9 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -121,6 +121,7 @@ def build_loss_graph(self): loss_weight=loss_weight, num_class=task_tower_cfg.num_class, suffix='_%s' % tower_name, + loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): loss_dict[loss_name] = loss_value * loss.weight diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index a4bce730e..b0463f10d 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -138,6 +138,7 @@ def _build_loss_impl(self, loss_weight=1.0, num_class=1, suffix='', + loss_name='', loss_param=None): loss_dict = {} binary_loss_type = { @@ -146,13 +147,16 @@ def _build_loss_impl(self, LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type == LossType.CLASSIFICATION: - loss_name = 'cross_entropy_loss' + suffix + loss_name = loss_name if loss_name else 'cross_entropy_loss' + suffix pred = self._prediction_dict['logits' + suffix] elif loss_type in binary_loss_type: - loss_name = LossType.Name(loss_type).lower() + suffix + if not loss_name: + loss_name = LossType.Name(loss_type).lower() + suffix + else: + loss_name = loss_name + suffix pred = self._prediction_dict['logits' + suffix] elif loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]: - loss_name = 'l2_loss' + suffix + loss_name = loss_name if loss_name else 'l2_loss' + suffix pred = self._prediction_dict['y' + suffix] else: raise ValueError('invalid loss type: %s' % LossType.Name(loss_type)) @@ -191,6 +195,7 @@ def build_loss_graph(self): label_name=self._label_name, loss_weight=self._sample_weight, num_class=self._num_class, + loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): loss_dict[loss_name] = loss_value * loss.weight diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index 156eec5ae..7004972a0 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -21,6 +21,7 @@ enum LossType { message Loss { required LossType loss_type = 1; required float weight = 2 [default = 1.0]; + optional string loss_name = 3; oneof loss_param { F1ReweighedLoss f1_reweighted_loss = 101; SoftmaxCrossEntropyWithNegativeMining softmax_loss = 102; From 547c807db6e12579d9fb4f270edb6b1dd8a774c2 Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 4 Apr 2023 19:31:41 +0800 Subject: [PATCH 11/54] [feat]: add jrc loss --- easy_rec/python/builders/loss_builder.py | 6 + easy_rec/python/input/input.py | 121 ++++++++++----------- easy_rec/python/layers/din.py | 17 ++- easy_rec/python/loss/jrc_loss.py | 62 +++++++++++ easy_rec/python/main.py | 1 - easy_rec/python/model/multi_task_model.py | 11 +- easy_rec/python/model/rank_model.py | 23 +++- easy_rec/python/protos/loss.proto | 10 +- easy_rec/python/tools/feature_selection.py | 2 +- easy_rec/python/utils/load_class.py | 2 + pai_jobs/run.py | 2 +- 11 files changed, 183 insertions(+), 74 deletions(-) create mode 100644 easy_rec/python/loss/jrc_loss.py diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index 203e3279d..5427c0d54 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -8,6 +8,7 @@ from easy_rec.python.loss.pairwise_loss import pairwise_focal_loss from easy_rec.python.loss.pairwise_loss import pairwise_logistic_loss from easy_rec.python.loss.pairwise_loss import pairwise_loss +from easy_rec.python.loss.jrc_loss import jrc_loss from easy_rec.python.protos.loss_pb2 import LossType from easy_rec.python.loss.f1_reweight_loss import f1_reweight_sigmoid_cross_entropy # NOQA @@ -39,6 +40,11 @@ def build(loss_type, logging.info('%s is used' % LossType.Name(loss_type)) return tf.losses.mean_squared_error( labels=label, predictions=pred, weights=loss_weight, **kwargs) + elif loss_type == LossType.JRC_LOSS: + alpha = 0.5 if loss_param is None else loss_param.alpha + auto_weight = False if loss_param is None else not loss_param.HasField('alpha') + session = kwargs.get('session_ids', None) + return jrc_loss(label, pred, session, alpha, auto_weight=auto_weight, name=loss_name) elif loss_type == LossType.PAIR_WISE_LOSS: session = kwargs.get('session_ids', None) margin = 0 if loss_param is None else loss_param.margin diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index 739024486..09d4c299b 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -127,11 +127,11 @@ def __init__(self, metrics = self._pipeline_config.eval_config.metrics_set for metric in metrics: metric_name = metric.WhichOneof('metric') - if metric_name == 'GAUC': + if metric_name == 'gauc': uid = metric.gauc.uid_field if uid not in self._effective_fields: self._effective_fields.append(uid) - elif metric_name == 'SessionAUC': + elif metric_name == 'session_auc': sid = metric.session_auc.session_id_field if sid not in self._effective_fields: self._effective_fields.append(sid) @@ -139,27 +139,17 @@ def __init__(self, # check multi task model's metrics model_config = self._pipeline_config.model_config model_name = model_config.WhichOneof('model') - model = None - if model_name == 'MMoE': - model = model_config.mmoe - elif model_name == 'ESMM': - model = model_config.esmm - elif model_name == 'DBMTL': - model = model_config.dbmtl - elif model_name == 'SimpleMultiTask': - model = model_config.simple_multi_task - elif model_name == 'PLE': - model = model_config.ple - if model is not None: + if model_name in {'mmoe', 'esmm', 'dbmtl', 'simple_multi_task', 'ple'}: + model = getattr(model_config, model_name) for tower in model.task_towers: metrics = tower.metrics_set for metric in metrics: metric_name = metric.WhichOneof('metric') - if metric_name == 'GAUC': + if metric_name == 'gauc': uid = metric.gauc.uid_field if uid not in self._effective_fields: self._effective_fields.append(uid) - elif metric_name == 'SessionAUC': + elif metric_name == 'session_auc': sid = metric.session_auc.session_id_field if sid not in self._effective_fields: self._effective_fields.append(sid) @@ -482,52 +472,22 @@ def _parse_raw_feature(self, fc, parsed_dict, field_dict): input_0 = fc.input_names[0] feature_name = fc.feature_name if fc.HasField('feature_name') else input_0 if field_dict[input_0].dtype == tf.string: - - def combine(x): - seq = tf.string_split([x], fc.seq_multi_sep) - seq_len = tf.size(seq) - if fc.raw_input_dim > 1: - check_list = [ - tf.py_func( - check_split, - [seq.values, fc.separator, fc.raw_input_dim, input_0], - Tout=tf.bool) - ] if self._check_mode else [] - with tf.control_dependencies(check_list): - emb = tf.string_split(seq.values, fc.separator).values - else: - emb = seq.values - check_list = [ - tf.py_func(check_string_to_number, [emb, input_0], Tout=tf.bool) - ] if self._check_mode else [] - with tf.control_dependencies(check_list): - emb_val = tf.string_to_number(emb) - emb_vec = tf.reshape(emb_val, [seq_len, -1]) - - if fc.combiner == 'max': - emb_vec = tf.reduce_max(emb_vec, axis=0) - elif fc.combiner == 'min': - emb_vec = tf.reduce_min(emb_vec, axis=0) - elif fc.combiner == 'sum': - emb_vec = tf.reduce_sum(emb_vec, axis=0) - elif fc.combiner == 'mean': - emb_vec = tf.reduce_mean(emb_vec, axis=0) - else: - assert False, 'unsupported combine operator: ' + fc.combiner - return emb_vec - if fc.HasField('seq_multi_sep') and fc.HasField('combiner'): - parsed_dict[feature_name] = tf.map_fn( - combine, field_dict[input_0], dtype=tf.float32) - elif fc.raw_input_dim > 1: + fea = tf.string_split(field_dict[input_0], fc.seq_multi_sep) + segment_ids = fea.indices[:, 0] + vals = fea.values + else: + vals = field_dict[input_0] + segment_ids = tf.range(0, tf.shape(vals)[0]) + if fc.raw_input_dim > 1: check_list = [ tf.py_func( check_split, - [field_dict[input_0], fc.separator, fc.raw_input_dim, input_0], + [vals, fc.separator, fc.raw_input_dim, input_0], Tout=tf.bool) ] if self._check_mode else [] with tf.control_dependencies(check_list): - tmp_fea = tf.string_split(field_dict[input_0], fc.separator) + tmp_fea = tf.string_split(vals, fc.separator) check_list = [ tf.py_func( check_string_to_number, [tmp_fea.values, input_0], Tout=tf.bool) @@ -537,19 +497,53 @@ def combine(x): tmp_fea.values, tf.float32, name='multi_raw_fea_to_flt_%s' % input_0) - parsed_dict[feature_name] = tf.sparse_to_dense( + if fc.HasField('seq_multi_sep') and fc.HasField('combiner'): + emb = tf.reshape(tmp_vals, [-1, fc.raw_input_dim]) + if fc.combiner == 'max': + emb = tf.segment_max(emb, segment_ids) + elif fc.combiner == 'sum': + emb = tf.segment_sum(emb, segment_ids) + elif fc.combiner == 'min': + emb = tf.segment_min(emb, segment_ids) + elif fc.combiner == 'mean': + emb = tf.segment_mean(emb, segment_ids) + else: + assert False, 'unsupported combine operator: ' + fc.combiner + parsed_dict[feature_name] = emb + else: + parsed_dict[feature_name] = tf.sparse_to_dense( tmp_fea.indices, [tf.shape(field_dict[input_0])[0], fc.raw_input_dim], tmp_vals, default_value=0) - else: + elif fc.HasField('seq_multi_sep') and fc.HasField('combiner'): check_list = [ - tf.py_func( - check_string_to_number, [field_dict[input_0], input_0], - Tout=tf.bool) + tf.py_func( + check_string_to_number, [vals, input_0], + Tout=tf.bool) ] if self._check_mode else [] with tf.control_dependencies(check_list): - parsed_dict[feature_name] = tf.string_to_number( + emb = tf.string_to_number(vals, tf.float32, + name='raw_fea_to_flt_%s' % input_0) + if fc.combiner == 'max': + emb = tf.segment_max(emb, segment_ids) + elif fc.combiner == 'sum': + emb = tf.segment_sum(emb, segment_ids) + elif fc.combiner == 'min': + emb = tf.segment_min(emb, segment_ids) + elif fc.combiner == 'mean': + emb = tf.segment_mean(emb, segment_ids) + else: + assert False, 'unsupported combine operator: ' + fc.combiner + parsed_dict[feature_name] = emb + else: + check_list = [ + tf.py_func( + check_string_to_number, [field_dict[input_0], input_0], + Tout=tf.bool) + ] if self._check_mode else [] + with tf.control_dependencies(check_list): + parsed_dict[feature_name] = tf.string_to_number( field_dict[input_0], tf.float32) elif field_dict[input_0].dtype in [ tf.int32, tf.int64, tf.double, tf.float32 @@ -563,7 +557,7 @@ def combine(x): fc.max_val - fc.min_val) if fc.HasField('normalizer_fn'): - logging.info('apply normalizer_fn %s' % fc.normalizer_fn) + logging.info('apply normalizer_fn %s to `%s`' % (fc.normalizer_fn, feature_name)) parsed_dict[feature_name] = self._normalizer_fn[feature_name]( parsed_dict[feature_name]) @@ -845,6 +839,9 @@ def _preprocess(self, field_dict): if self._mode != tf.estimator.ModeKeys.PREDICT: parsed_dict[constant.SAMPLE_WEIGHT] = field_dict[ self._data_config.sample_weight] + + if Input.DATA_OFFSET in field_dict: + parsed_dict[Input.DATA_OFFSET] = field_dict[Input.DATA_OFFSET] return {'feature': parsed_dict, 'label': label_dict} def _lookup_preprocess(self, fc, field_dict): diff --git a/easy_rec/python/layers/din.py b/easy_rec/python/layers/din.py index 60d106fe3..717dd9789 100644 --- a/easy_rec/python/layers/din.py +++ b/easy_rec/python/layers/din.py @@ -1,5 +1,7 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. +import logging + import tensorflow as tf from easy_rec.python.layers import dnn @@ -21,13 +23,19 @@ def __call__(self, inputs, training=None, **kwargs): seq_input = [seq_fea for seq_fea, _ in seq_features] keys = tf.concat(seq_input, axis=-1) + query = target_feature target_emb_size = target_feature.shape.as_list()[-1] seq_emb_size = keys.shape.as_list()[-1] - assert target_emb_size == seq_emb_size, 'the embedding size of sequence and target item is not equal' \ - ' in feature group:' + self.name + if target_emb_size != seq_emb_size: + logging.info(' the embedding size of sequence [%d] and target item [%d] is not equal' + ' in feature group: %s', seq_emb_size, target_emb_size, self.name) + if target_emb_size < seq_emb_size: + query = tf.pad(target_feature, [[0, 0], [0, seq_emb_size-target_emb_size]]) + else: + assert False, 'the embedding size of target item is larger than the one of sequence' batch_size, max_seq_len, _ = get_shape_list(keys, 3) - queries = tf.tile(tf.expand_dims(target_feature, 1), [1, max_seq_len, 1]) + queries = tf.tile(tf.expand_dims(query, 1), [1, max_seq_len, 1]) din_all = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1) din_layer = dnn.DNN( @@ -48,6 +56,9 @@ def __call__(self, inputs, training=None, **kwargs): scores = scores / (seq_emb_size**0.5) # normalization with softmax is abandoned according to the original paper scores = tf.nn.sigmoid(scores) + + if target_emb_size < seq_emb_size: + keys = keys[:, :, :target_emb_size] # [B, L, E] output = tf.squeeze(tf.matmul(scores, keys), axis=[1]) if self.config.need_target_feature: output = tf.concat([output, target_feature], axis=-1) diff --git a/easy_rec/python/loss/jrc_loss.py b/easy_rec/python/loss/jrc_loss.py new file mode 100644 index 000000000..930431da7 --- /dev/null +++ b/easy_rec/python/loss/jrc_loss.py @@ -0,0 +1,62 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +import tensorflow as tf + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name=''): + """Joint Optimization of Ranking and Calibration with Contextualized Hybrid Model. + + Args: + labels: a `Tensor` with shape [batch_size]. e.g. click or not click in the session. + logits: a `Tensor` with shape [batch_size, 2]. e.g. the value of last neuron before activation. + session_ids: a `Tensor` with shape [batch_size, 1]. Session ids of each sample, used to max GAUC metric. e.g. user_id + alpha: the weight to balance ranking loss and calibration loss + auto_weight: bool, whether to learn loss weight between ranking loss and calibration loss + name: the name of loss + """ + loss_name = name if name else 'jrc_loss' + logging.info('[{}] alpha: {}'.format(loss_name, alpha)) + + ce_loss = tf.losses.sparse_softmax_cross_entropy(labels, logits) + + labels = tf.expand_dims(labels, 1) # [B, 1] + labels = tf.concat([1 - labels, labels], axis=1) # [B, 2] + + batch_size = tf.shape(logits)[0] + + # Mask: shape [B, B], mask[i,j]=1 indicates the i-th sample + # and j-th sample are in the same context + mask = tf.equal(session_ids, tf.transpose(session_ids)) + + # Tile logits and label: [B, 2]->[B, B, 2] + logits = tf.tile(tf.expand_dims(logits, 1), [1, batch_size, 1]) + y = tf.tile(tf.expand_dims(labels, 1), [1, batch_size, 1]) + + # Set logits that are not in the same context to -inf + mask3d = tf.expand_dims(mask, 2) + y = y * mask3d + logits = logits + (1 - mask) * -1e9 + y_neg, y_pos = y[:, :, 0], y[:, :, 1] + l_neg, l_pos = logits[:, :, 0], logits[:, :, 1] + + # Compute list-wise generative loss -log p(x|y, z) + loss_pos = -tf.sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) + loss_neg = -tf.sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) + ge_loss = tf.mean((loss_pos+loss_neg)/tf.sum(mask, axis=0)) + + # The final JRC model + if auto_weight: + uncertainty1 = tf.Variable(0, name="%s_ranking_loss_weight" % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_ranking_uncertainty' % loss_name, uncertainty1) + uncertainty2 = tf.Variable(0, name="%s_calibration_loss_weight" % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_calibration_uncertainty' % loss_name, uncertainty2) + loss = tf.exp(-uncertainty1) * ce_loss + 0.5 * uncertainty1 + loss += tf.exp(-uncertainty2) * ge_loss + 0.5 * uncertainty2 + else: + loss = alpha*ce_loss + (1-alpha)*ge_loss + return loss diff --git a/easy_rec/python/main.py b/easy_rec/python/main.py index d74e8fe6e..1c7b82637 100644 --- a/easy_rec/python/main.py +++ b/easy_rec/python/main.py @@ -610,7 +610,6 @@ def distribute_evaluate(pipeline_config, eval_result_file = os.path.join(model_dir, eval_result_filename) logging.info('save eval result to file %s' % eval_result_file) if cur_job_name == 'master': - print('eval_result = ', eval_result) logging.info('eval_result = {0}'.format(eval_result)) with gfile.GFile(eval_result_file, 'w') as ofile: result_to_write = {'eval_method': 'distribute'} diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index 4ffd404d9..0e49249ea 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -7,6 +7,7 @@ from easy_rec.python.builders import loss_builder from easy_rec.python.model.rank_model import RankModel from easy_rec.python.protos import tower_pb2 +from easy_rec.python.protos.loss_pb2 import LossType if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -124,7 +125,15 @@ def build_loss_graph(self): loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): - loss_dict[loss_name] = loss_value * loss.weight + if loss.learn_loss_weight: + uncertainty = tf.Variable(0, name="%s_loss_weight" % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) + if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: + loss_dict[loss_name] = 0.5 * tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = loss_value * loss.weight self._loss_dict.update(loss_dict) diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index b0463f10d..7a2b0dc76 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -41,12 +41,18 @@ def _output_to_prediction_impl(self, LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type in binary_loss_type: - assert num_class == 1, 'num_class must be 1 when loss type is F1_REWEIGHTED_LOSS/PAIR_WISE_LOSS' + assert num_class == 1, 'num_class must be 1 when loss type is %s' % loss_type.name output = tf.squeeze(output, axis=1) probs = tf.sigmoid(output) tf.summary.scalar('prediction/probs', tf.reduce_mean(probs)) prediction_dict['logits' + suffix] = output prediction_dict['probs' + suffix] = probs + elif loss_type == LossType.JRC_LOSS: + assert num_class == 2, 'num_class must be 2 when loss type is JRC_LOSS' + probs = tf.nn.softmax(output, axis=1) + tf.summary.scalar('prediction/probs', tf.reduce_mean(probs[:, 1])) + prediction_dict['logits' + suffix] = output + prediction_dict['probs' + suffix] = probs[:, 1] elif loss_type == LossType.CLASSIFICATION: if num_class == 1: output = tf.squeeze(output, axis=1) @@ -103,7 +109,8 @@ def build_rtp_output_dict(self): binary_loss_set = { LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, - LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, + LossType.JRC_LOSS } if loss_types & binary_loss_set: if 'probs' in self._prediction_dict: @@ -144,7 +151,7 @@ def _build_loss_impl(self, binary_loss_type = { LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, - LossType.PAIRWISE_LOGISTIC_LOSS + LossType.PAIRWISE_LOGISTIC_LOSS, LossType.JRC_LOSS } if loss_type == LossType.CLASSIFICATION: loss_name = loss_name if loss_name else 'cross_entropy_loss' + suffix @@ -198,7 +205,15 @@ def build_loss_graph(self): loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): - loss_dict[loss_name] = loss_value * loss.weight + if loss.learn_loss_weight: + uncertainty = tf.Variable(0, name="%s_loss_weight" % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) + if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: + loss_dict[loss_name] = 0.5 * tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = loss_value * loss.weight self._loss_dict.update(loss_dict) diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index 7004972a0..9ec7c78c9 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -16,12 +16,14 @@ enum LossType { BINARY_FOCAL_LOSS = 10; PAIRWISE_FOCAL_LOSS = 11; PAIRWISE_LOGISTIC_LOSS = 12; + JRC_LOSS = 13; } message Loss { required LossType loss_type = 1; - required float weight = 2 [default = 1.0]; + optional float weight = 2 [default = 1.0]; optional string loss_name = 3; + optional bool learn_loss_weight = 4 [default = false]; oneof loss_param { F1ReweighedLoss f1_reweighted_loss = 101; SoftmaxCrossEntropyWithNegativeMining softmax_loss = 102; @@ -31,6 +33,7 @@ message Loss { PairwiseLoss pairwise_loss = 106; PairwiseFocalLoss pairwise_focal_loss = 107; PairwiseLogisticLoss pairwise_logistic_loss = 108; + JRCLoss jrc_loss = 109; } }; @@ -86,3 +89,8 @@ message PairwiseLogisticLoss { optional float hinge_margin = 3 [default = 1.0]; optional float ohem_ratio = 4 [default = 1.0]; } + +message JRCLoss { + required string session_name = 1; + optional float alpha = 2 [default = 0.5]; +} \ No newline at end of file diff --git a/easy_rec/python/tools/feature_selection.py b/easy_rec/python/tools/feature_selection.py index 295698013..05b193897 100644 --- a/easy_rec/python/tools/feature_selection.py +++ b/easy_rec/python/tools/feature_selection.py @@ -20,7 +20,7 @@ import matplotlib.pyplot as plt # NOQA tf.app.flags.DEFINE_string('model_type', 'variational_dropout', - 'feature selection model tyoe') + 'feature selection model type') tf.app.flags.DEFINE_string('config_path', '', 'feature selection model config path') tf.app.flags.DEFINE_string('checkpoint_path', None, diff --git a/easy_rec/python/utils/load_class.py b/easy_rec/python/utils/load_class.py index 5db92a05f..2da1e4e41 100644 --- a/easy_rec/python/utils/load_class.py +++ b/easy_rec/python/utils/load_class.py @@ -37,6 +37,8 @@ def load_by_path(path): path = path.strip() if path == '' or path is None: return None + if 'lambda' in path: + return eval(path) components = path.split('.') if components[0] == 'tf': components[0] = 'tensorflow' diff --git a/pai_jobs/run.py b/pai_jobs/run.py index e0e861a97..ed02c73c5 100644 --- a/pai_jobs/run.py +++ b/pai_jobs/run.py @@ -381,7 +381,7 @@ def main(argv): # TODO: support multi-worker evaluation if not FLAGS.distribute_eval: assert len( - FLAGS.worker_hosts.split(',')) == 1, 'evaluate only need 1 woker' + FLAGS.worker_hosts.split(',')) == 1, 'evaluate only need 1 worker' config_util.auto_expand_share_feature_configs(pipeline_config) if FLAGS.eval_tables: From c7476bba10c9e7ad0d2b86481dd558efbb8d6b4b Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 4 Apr 2023 19:39:13 +0800 Subject: [PATCH 12/54] [feat]: add jrc loss --- easy_rec/python/loss/jrc_loss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/easy_rec/python/loss/jrc_loss.py b/easy_rec/python/loss/jrc_loss.py index 930431da7..51cad279b 100644 --- a/easy_rec/python/loss/jrc_loss.py +++ b/easy_rec/python/loss/jrc_loss.py @@ -14,7 +14,7 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name='') Args: labels: a `Tensor` with shape [batch_size]. e.g. click or not click in the session. logits: a `Tensor` with shape [batch_size, 2]. e.g. the value of last neuron before activation. - session_ids: a `Tensor` with shape [batch_size, 1]. Session ids of each sample, used to max GAUC metric. e.g. user_id + session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id alpha: the weight to balance ranking loss and calibration loss auto_weight: bool, whether to learn loss weight between ranking loss and calibration loss name: the name of loss @@ -31,7 +31,7 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name='') # Mask: shape [B, B], mask[i,j]=1 indicates the i-th sample # and j-th sample are in the same context - mask = tf.equal(session_ids, tf.transpose(session_ids)) + mask = tf.equal(tf.expand_dims(session_ids, 1), tf.expand_dims(session_ids, 0)) # Tile logits and label: [B, 2]->[B, B, 2] logits = tf.tile(tf.expand_dims(logits, 1), [1, batch_size, 1]) From 7f6ee53b1fd8bd21d1b4125966c86e6f483a83bc Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 4 Apr 2023 20:01:01 +0800 Subject: [PATCH 13/54] [feat]: add jrc loss --- easy_rec/python/loss/jrc_loss.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/easy_rec/python/loss/jrc_loss.py b/easy_rec/python/loss/jrc_loss.py index 51cad279b..18b372507 100644 --- a/easy_rec/python/loss/jrc_loss.py +++ b/easy_rec/python/loss/jrc_loss.py @@ -11,6 +11,8 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name=''): """Joint Optimization of Ranking and Calibration with Contextualized Hybrid Model. + https://arxiv.org/abs/2208.06164 + Args: labels: a `Tensor` with shape [batch_size]. e.g. click or not click in the session. logits: a `Tensor` with shape [batch_size, 2]. e.g. the value of last neuron before activation. @@ -32,6 +34,7 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name='') # Mask: shape [B, B], mask[i,j]=1 indicates the i-th sample # and j-th sample are in the same context mask = tf.equal(tf.expand_dims(session_ids, 1), tf.expand_dims(session_ids, 0)) + mask = tf.to_float(mask) # Tile logits and label: [B, 2]->[B, B, 2] logits = tf.tile(tf.expand_dims(logits, 1), [1, batch_size, 1]) @@ -40,7 +43,7 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name='') # Set logits that are not in the same context to -inf mask3d = tf.expand_dims(mask, 2) y = y * mask3d - logits = logits + (1 - mask) * -1e9 + logits = logits + (1 - mask3d) * -1e9 y_neg, y_pos = y[:, :, 0], y[:, :, 1] l_neg, l_pos = logits[:, :, 0], logits[:, :, 1] From 98f9ec43fae686889f105ca09aa847805439bb43 Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 7 Apr 2023 11:54:08 +0800 Subject: [PATCH 14/54] [feat]: add jrc loss --- easy_rec/python/builders/loss_builder.py | 8 +++-- easy_rec/python/input/input.py | 38 +++++++++++------------ easy_rec/python/layers/din.py | 8 +++-- easy_rec/python/loss/jrc_loss.py | 32 ++++++++++++------- easy_rec/python/model/multi_task_model.py | 9 ++++-- easy_rec/python/model/rank_model.py | 28 ++++++++++------- easy_rec/python/protos/loss.proto | 2 +- 7 files changed, 74 insertions(+), 51 deletions(-) diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index 5427c0d54..cb10d870d 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -5,10 +5,10 @@ import tensorflow as tf from easy_rec.python.loss.focal_loss import sigmoid_focal_loss_with_logits +from easy_rec.python.loss.jrc_loss import jrc_loss from easy_rec.python.loss.pairwise_loss import pairwise_focal_loss from easy_rec.python.loss.pairwise_loss import pairwise_logistic_loss from easy_rec.python.loss.pairwise_loss import pairwise_loss -from easy_rec.python.loss.jrc_loss import jrc_loss from easy_rec.python.protos.loss_pb2 import LossType from easy_rec.python.loss.f1_reweight_loss import f1_reweight_sigmoid_cross_entropy # NOQA @@ -42,9 +42,11 @@ def build(loss_type, labels=label, predictions=pred, weights=loss_weight, **kwargs) elif loss_type == LossType.JRC_LOSS: alpha = 0.5 if loss_param is None else loss_param.alpha - auto_weight = False if loss_param is None else not loss_param.HasField('alpha') + auto_weight = False if loss_param is None else not loss_param.HasField( + 'alpha') session = kwargs.get('session_ids', None) - return jrc_loss(label, pred, session, alpha, auto_weight=auto_weight, name=loss_name) + return jrc_loss( + label, pred, session, alpha, auto_weight=auto_weight, name=loss_name) elif loss_type == LossType.PAIR_WISE_LOSS: session = kwargs.get('session_ids', None) margin = 0 if loss_param is None else loss_param.margin diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index 09d4c299b..bef412460 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -253,6 +253,8 @@ def create_multi_placeholders(self, export_config): inputs = {} for fid in effective_fids: input_name = self._input_fields[fid] + if input_name == sample_weight_field: + continue if placeholder_named_by_input: placeholder_name = input_name else: @@ -482,8 +484,7 @@ def _parse_raw_feature(self, fc, parsed_dict, field_dict): if fc.raw_input_dim > 1: check_list = [ tf.py_func( - check_split, - [vals, fc.separator, fc.raw_input_dim, input_0], + check_split, [vals, fc.separator, fc.raw_input_dim, input_0], Tout=tf.bool) ] if self._check_mode else [] with tf.control_dependencies(check_list): @@ -512,19 +513,17 @@ def _parse_raw_feature(self, fc, parsed_dict, field_dict): parsed_dict[feature_name] = emb else: parsed_dict[feature_name] = tf.sparse_to_dense( - tmp_fea.indices, - [tf.shape(field_dict[input_0])[0], fc.raw_input_dim], - tmp_vals, - default_value=0) + tmp_fea.indices, + [tf.shape(field_dict[input_0])[0], fc.raw_input_dim], + tmp_vals, + default_value=0) elif fc.HasField('seq_multi_sep') and fc.HasField('combiner'): check_list = [ - tf.py_func( - check_string_to_number, [vals, input_0], - Tout=tf.bool) + tf.py_func(check_string_to_number, [vals, input_0], Tout=tf.bool) ] if self._check_mode else [] with tf.control_dependencies(check_list): - emb = tf.string_to_number(vals, tf.float32, - name='raw_fea_to_flt_%s' % input_0) + emb = tf.string_to_number( + vals, tf.float32, name='raw_fea_to_flt_%s' % input_0) if fc.combiner == 'max': emb = tf.segment_max(emb, segment_ids) elif fc.combiner == 'sum': @@ -537,13 +536,13 @@ def _parse_raw_feature(self, fc, parsed_dict, field_dict): assert False, 'unsupported combine operator: ' + fc.combiner parsed_dict[feature_name] = emb else: - check_list = [ - tf.py_func( - check_string_to_number, [field_dict[input_0], input_0], - Tout=tf.bool) - ] if self._check_mode else [] - with tf.control_dependencies(check_list): - parsed_dict[feature_name] = tf.string_to_number( + check_list = [ + tf.py_func( + check_string_to_number, [field_dict[input_0], input_0], + Tout=tf.bool) + ] if self._check_mode else [] + with tf.control_dependencies(check_list): + parsed_dict[feature_name] = tf.string_to_number( field_dict[input_0], tf.float32) elif field_dict[input_0].dtype in [ tf.int32, tf.int64, tf.double, tf.float32 @@ -557,7 +556,8 @@ def _parse_raw_feature(self, fc, parsed_dict, field_dict): fc.max_val - fc.min_val) if fc.HasField('normalizer_fn'): - logging.info('apply normalizer_fn %s to `%s`' % (fc.normalizer_fn, feature_name)) + logging.info('apply normalizer_fn %s to `%s`' % + (fc.normalizer_fn, feature_name)) parsed_dict[feature_name] = self._normalizer_fn[feature_name]( parsed_dict[feature_name]) diff --git a/easy_rec/python/layers/din.py b/easy_rec/python/layers/din.py index 717dd9789..81f661165 100644 --- a/easy_rec/python/layers/din.py +++ b/easy_rec/python/layers/din.py @@ -27,10 +27,12 @@ def __call__(self, inputs, training=None, **kwargs): target_emb_size = target_feature.shape.as_list()[-1] seq_emb_size = keys.shape.as_list()[-1] if target_emb_size != seq_emb_size: - logging.info(' the embedding size of sequence [%d] and target item [%d] is not equal' - ' in feature group: %s', seq_emb_size, target_emb_size, self.name) + logging.info( + ' the embedding size of sequence [%d] and target item [%d] is not equal' + ' in feature group: %s', seq_emb_size, target_emb_size, self.name) if target_emb_size < seq_emb_size: - query = tf.pad(target_feature, [[0, 0], [0, seq_emb_size-target_emb_size]]) + query = tf.pad(target_feature, + [[0, 0], [0, seq_emb_size - target_emb_size]]) else: assert False, 'the embedding size of target item is larger than the one of sequence' diff --git a/easy_rec/python/loss/jrc_loss.py b/easy_rec/python/loss/jrc_loss.py index 18b372507..fc8266b2c 100644 --- a/easy_rec/python/loss/jrc_loss.py +++ b/easy_rec/python/loss/jrc_loss.py @@ -8,7 +8,12 @@ tf = tf.compat.v1 -def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name=''): +def jrc_loss(labels, + logits, + session_ids, + alpha=0.5, + auto_weight=False, + name=''): """Joint Optimization of Ranking and Calibration with Contextualized Hybrid Model. https://arxiv.org/abs/2208.06164 @@ -22,7 +27,8 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name='') name: the name of loss """ loss_name = name if name else 'jrc_loss' - logging.info('[{}] alpha: {}'.format(loss_name, alpha)) + logging.info('[{}] alpha: {}, auto_weight: {}'.format(loss_name, alpha, + auto_weight)) ce_loss = tf.losses.sparse_softmax_cross_entropy(labels, logits) @@ -33,7 +39,8 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name='') # Mask: shape [B, B], mask[i,j]=1 indicates the i-th sample # and j-th sample are in the same context - mask = tf.equal(tf.expand_dims(session_ids, 1), tf.expand_dims(session_ids, 0)) + mask = tf.equal( + tf.expand_dims(session_ids, 1), tf.expand_dims(session_ids, 0)) mask = tf.to_float(mask) # Tile logits and label: [B, 2]->[B, B, 2] @@ -42,24 +49,27 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, auto_weight=False, name='') # Set logits that are not in the same context to -inf mask3d = tf.expand_dims(mask, 2) - y = y * mask3d + y = tf.to_float(y) * mask3d logits = logits + (1 - mask3d) * -1e9 y_neg, y_pos = y[:, :, 0], y[:, :, 1] l_neg, l_pos = logits[:, :, 0], logits[:, :, 1] # Compute list-wise generative loss -log p(x|y, z) - loss_pos = -tf.sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) - loss_neg = -tf.sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) - ge_loss = tf.mean((loss_pos+loss_neg)/tf.sum(mask, axis=0)) + loss_pos = -tf.reduce_sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) + loss_neg = -tf.reduce_sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) + ge_loss = tf.reduce_mean((loss_pos + loss_neg) / tf.reduce_sum(mask, axis=0)) # The final JRC model if auto_weight: - uncertainty1 = tf.Variable(0, name="%s_ranking_loss_weight" % loss_name, dtype=tf.float32) + uncertainty1 = tf.Variable( + 0, name='%s_ranking_loss_weight' % loss_name, dtype=tf.float32) tf.summary.scalar('loss/%s_ranking_uncertainty' % loss_name, uncertainty1) - uncertainty2 = tf.Variable(0, name="%s_calibration_loss_weight" % loss_name, dtype=tf.float32) - tf.summary.scalar('loss/%s_calibration_uncertainty' % loss_name, uncertainty2) + uncertainty2 = tf.Variable( + 0, name='%s_calibration_loss_weight' % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_calibration_uncertainty' % loss_name, + uncertainty2) loss = tf.exp(-uncertainty1) * ce_loss + 0.5 * uncertainty1 loss += tf.exp(-uncertainty2) * ge_loss + 0.5 * uncertainty2 else: - loss = alpha*ce_loss + (1-alpha)*ge_loss + loss = alpha * ce_loss + (1 - alpha) * ge_loss return loss diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index 0e49249ea..43e5663ce 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -126,12 +126,15 @@ def build_loss_graph(self): loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): if loss.learn_loss_weight: - uncertainty = tf.Variable(0, name="%s_loss_weight" % loss_name, dtype=tf.float32) + uncertainty = tf.Variable( + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: - loss_dict[loss_name] = 0.5 * tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + loss_dict[loss_name] = 0.5 * tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty else: - loss_dict[loss_name] = tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + loss_dict[loss_name] = tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = loss_value * loss.weight diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 7a2b0dc76..25eff23ea 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -206,12 +206,15 @@ def build_loss_graph(self): loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): if loss.learn_loss_weight: - uncertainty = tf.Variable(0, name="%s_loss_weight" % loss_name, dtype=tf.float32) + uncertainty = tf.Variable( + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: - loss_dict[loss_name] = 0.5 * tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + loss_dict[loss_name] = 0.5 * tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty else: - loss_dict[loss_name] = tf.exp(-uncertainty) * loss_value + 0.5 * uncertainty + loss_dict[loss_name] = tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = loss_value * loss.weight @@ -237,12 +240,13 @@ def _build_metric_impl(self, binary_loss_set = { LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, - LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, + LossType.JRC_LOSS } metric_dict = {} if metric.WhichOneof('metric') == 'auc': assert loss_type & binary_loss_set - if num_class == 1: + if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['auc' + suffix] = metrics_tf.auc( label, @@ -258,7 +262,7 @@ def _build_metric_impl(self, raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'gauc': assert loss_type & binary_loss_set - if num_class == 1: + if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) uids = self._feature_dict[metric.gauc.uid_field] if isinstance(uids, tf.sparse.SparseTensor): @@ -281,7 +285,7 @@ def _build_metric_impl(self, raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'session_auc': assert loss_type & binary_loss_set - if num_class == 1: + if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['session_auc' + suffix] = metrics_lib.session_auc( label, @@ -299,7 +303,7 @@ def _build_metric_impl(self, raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'max_f1': assert loss_type & binary_loss_set - if num_class == 1: + if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['max_f1' + suffix] = metrics_lib.max_f1( label, self._prediction_dict['logits' + suffix]) @@ -376,11 +380,13 @@ def build_metric_graph(self, eval_config): def _get_outputs_impl(self, loss_type, num_class=1, suffix=''): binary_loss_set = { - LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, - LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS + LossType.F1_REWEIGHTED_LOSS, LossType.JRC_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type in binary_loss_set: + return ['probs' + suffix, 'logits' + suffix] + if loss_type == LossType.CLASSIFICATION: if num_class == 1: return ['probs' + suffix, 'logits' + suffix] else: diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index 9ec7c78c9..c5b74f47d 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -93,4 +93,4 @@ message PairwiseLogisticLoss { message JRCLoss { required string session_name = 1; optional float alpha = 2 [default = 0.5]; -} \ No newline at end of file +} From 958621226f90faede38976f2f73aeaca731cf84d Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 7 Apr 2023 20:36:49 +0800 Subject: [PATCH 15/54] [feat]: add jrc loss --- easy_rec/python/builders/loss_builder.py | 2 +- easy_rec/python/input/augment.py | 55 ++++ easy_rec/python/input/input.py | 3 +- easy_rec/python/layers/bst.py | 110 ++++--- .../layers/multihead_cross_attention.py | 112 ++++--- easy_rec/python/layers/sequence_encoder.py | 2 +- easy_rec/python/loss/nce_loss.py | 34 ++ easy_rec/python/main.py | 1 + easy_rec/python/model/easy_rec_model.py | 4 +- easy_rec/python/model/match_model.py | 3 +- easy_rec/python/protos/layer.proto | 4 + easy_rec/python/test/train_eval_test.py | 5 + samples/model_config/bst_cl_on_taobao.config | 304 ++++++++++++++++++ 13 files changed, 539 insertions(+), 100 deletions(-) create mode 100644 easy_rec/python/input/augment.py create mode 100644 easy_rec/python/loss/nce_loss.py create mode 100644 samples/model_config/bst_cl_on_taobao.config diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index cb10d870d..7459372a5 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -24,7 +24,7 @@ def build(loss_type, num_class=1, loss_param=None, **kwargs): - loss_name = kwargs.pop('loss_name') + loss_name = kwargs.pop('loss_name') if 'loss_name' in kwargs else 'unknown' if loss_type == LossType.CLASSIFICATION: if num_class == 1: return tf.losses.sigmoid_cross_entropy( diff --git a/easy_rec/python/input/augment.py b/easy_rec/python/input/augment.py new file mode 100644 index 000000000..75298c430 --- /dev/null +++ b/easy_rec/python/input/augment.py @@ -0,0 +1,55 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf +from easy_rec.python.utils.shape_utils import get_shape_list + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +def assign(input_tensor, position=None, value=None): + input_tensor[tuple(position)] = value + return input_tensor + + +def item_mask(aug_data, length, gamma=0.3): + length1 = tf.cast(length, dtype=tf.float32) + num_mask = tf.cast(tf.math.floor(length1 * gamma), dtype=tf.int32) + seq = tf.range(length, dtype=tf.int32) + mask_index = tf.random.shuffle(seq)[:num_mask] + masked_item_seq = aug_data + masked_item_seq = tf.py_func(assign, inp=[masked_item_seq, [mask_index], 0], Tout=masked_item_seq.dtype) + return masked_item_seq, length + + +def item_crop(aug_data, length, eta=0.6): + length1 = tf.cast(length, dtype=tf.float32) + max_length = tf.cast(get_shape_list(aug_data)[0], dtype=tf.int32) + embedding_size = get_shape_list(aug_data)[1] + + num_left = tf.cast(tf.math.floor(length1 * eta), dtype=tf.int32) + crop_begin = tf.random.uniform([1], minval=0, maxval=length - num_left, dtype=tf.int32)[0] + cropped_item_seq = tf.zeros([get_shape_list(aug_data)[0], embedding_size]) + cropped_item_seq = tf.where(crop_begin + num_left < max_length, + tf.concat([aug_data[crop_begin:crop_begin + num_left], + cropped_item_seq[:max_length - num_left]], axis=0), + tf.concat([aug_data[crop_begin:], cropped_item_seq[:crop_begin]], axis=0)) + return cropped_item_seq, num_left + + +def augment(x): + seq, length = x + flag = tf.range(2, dtype=tf.int32) + flag1 = tf.random.shuffle(flag)[:1][0] + aug_seq, aug_len = tf.cond(tf.equal(flag1, 0), lambda: item_crop(seq, length), lambda: item_mask(seq, length)) + return [aug_seq, aug_len] + + +def input_aug_data(original_data, seq_len): + print("seq_len:", seq_len) + lengths = tf.cast(seq_len, dtype=tf.int32) + aug_seq1, aug_len1 = tf.map_fn(augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) + aug_seq2, aug_len2 = tf.map_fn(augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) + aug_seq1 = tf.reshape(aug_seq1, tf.shape(original_data)) + aug_seq2 = tf.reshape(aug_seq2, tf.shape(original_data)) + return aug_seq1, aug_seq2, aug_len1, aug_len2 diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index bef412460..4aec1ed17 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -141,7 +141,8 @@ def __init__(self, model_name = model_config.WhichOneof('model') if model_name in {'mmoe', 'esmm', 'dbmtl', 'simple_multi_task', 'ple'}: model = getattr(model_config, model_name) - for tower in model.task_towers: + towers = [model.ctr_tower, model.cvr_tower] if model_name == 'esmm' else model.task_towers + for tower in towers: metrics = tower.metrics_set for metric in metrics: metric_name = metric.WhichOneof('metric') diff --git a/easy_rec/python/layers/bst.py b/easy_rec/python/layers/bst.py index 87e12770c..466676fd9 100644 --- a/easy_rec/python/layers/bst.py +++ b/easy_rec/python/layers/bst.py @@ -5,18 +5,51 @@ from easy_rec.python.layers import multihead_cross_attention from easy_rec.python.utils.activation import get_activation from easy_rec.python.utils.shape_utils import get_shape_list - +from easy_rec.python.loss.nce_loss import nce_loss +from easy_rec.python.input.augment import input_aug_data # from tensorflow.python.keras.layers import Layer class BST(object): - def __init__(self, config, l2_reg, name='din', **kwargs): + def __init__(self, config, l2_reg, name='bst', **kwargs): # super(BST, self).__init__(name=name, **kwargs) self.name = name self.l2_reg = l2_reg self.config = config + def encode(self, seq_input, max_position): + seq_fea = multihead_cross_attention.embedding_postprocessor( + seq_input, + position_embedding_name=self.name + '/position_embeddings', + max_position_embeddings=max_position, + reuse_position_embedding=tf.AUTO_REUSE) + + n = tf.count_nonzero(seq_input, axis=-1) + seq_mask = tf.cast(n > 0, tf.int32) + + attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( + from_tensor=seq_fea, to_mask=seq_mask) + + hidden_act = get_activation(self.config.hidden_act) + attention_fea = multihead_cross_attention.transformer_encoder( + seq_fea, + hidden_size=self.config.hidden_size, + num_hidden_layers=self.config.num_hidden_layers, + num_attention_heads=self.config.num_attention_heads, + attention_mask=attention_mask, + intermediate_size=self.config.intermediate_size, + intermediate_act_fn=hidden_act, + hidden_dropout_prob=self.config.hidden_dropout_prob, + attention_probs_dropout_prob=self.config.attention_probs_dropout_prob, + initializer_range=self.config.initializer_range, + name=self.name + '/bst', + reuse=tf.AUTO_REUSE) + # attention_fea shape: [batch_size, seq_length, hidden_size] + out_fea = attention_fea[:, 0, :] # target feature + print('bst output shape:', out_fea.shape) + return out_fea + def __call__(self, inputs, training=None, **kwargs): seq_features, target_feature = inputs if not training: @@ -36,58 +69,49 @@ def __call__(self, inputs, training=None, **kwargs): with tf.control_dependencies([valid_len]): # seq_input: [batch_size, seq_len, embed_size] seq_input = tf.concat(seq_embeds, axis=-1) + if target_feature is not None: + max_position += 1 + + seq_embed_size = seq_input.shape.as_list()[-1] + if seq_embed_size != self.config.hidden_size: + seq_input = tf.layers.dense( + seq_input, + self.config.hidden_size, + activation=tf.nn.relu, + kernel_regularizer=self.l2_reg) # seq_len: [batch_size, 1], the true length of each sequence seq_len = seq_features[0][1] - seq_embed_size = seq_input.shape.as_list()[-1] + + if self.config.need_contrastive_learning: + assert 'loss_dict' in kwargs, "no `loss_dict` in kwargs of bst layer: %s" % self.name + loss = self.contrastive_loss(seq_input, seq_len, max_position) + loss *= self.config.contrastive_loss_weight + loss_dict = kwargs['loss_dict'] + loss_dict['contrastive_loss'] = loss + tf.summary.scalar('loss/%s_contrastive_loss' % self.name, loss) + if target_feature is not None: target_size = target_feature.shape.as_list()[-1] assert seq_embed_size == target_size, 'the embedding size of sequence and target item is not equal' \ ' in feature group:' + self.name + if target_size != self.config.hidden_size: + target_feature = tf.layers.dense( + target_feature, + self.config.hidden_size, + activation=tf.nn.relu, + kernel_regularizer=self.l2_reg) # target_feature: [batch_size, 1, embed_size] target_feature = tf.expand_dims(target_feature, 1) # seq_input: [batch_size, seq_len+1, embed_size] seq_input = tf.concat([target_feature, seq_input], axis=1) - max_seq_len += 1 - seq_len += 1 - max_position += 1 - if seq_embed_size != self.config.hidden_size: - seq_input = tf.layers.dense( - seq_input, - self.config.hidden_size, - activation=tf.nn.relu, - kernel_regularizer=self.l2_reg) - - seq_fea = multihead_cross_attention.embedding_postprocessor( - seq_input, - position_embedding_name=self.name + '/position_embeddings', - max_position_embeddings=max_position) - seq_mask = tf.map_fn( - fn=lambda t: dynamic_mask(t, max_seq_len), elems=tf.to_int32(seq_len)) - attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( - from_tensor=seq_fea, to_mask=seq_mask) - - hidden_act = get_activation(self.config.hidden_act) - attention_fea = multihead_cross_attention.transformer_encoder( - seq_fea, - hidden_size=self.config.hidden_size, - num_hidden_layers=self.config.num_hidden_layers, - num_attention_heads=self.config.num_attention_heads, - attention_mask=attention_mask, - intermediate_size=self.config.intermediate_size, - intermediate_act_fn=hidden_act, - hidden_dropout_prob=self.config.hidden_dropout_prob, - attention_probs_dropout_prob=self.config.attention_probs_dropout_prob, - initializer_range=self.config.initializer_range, - name=self.name + '/bst') - # attention_fea shape: [batch_size, seq_length, hidden_size] - out_fea = attention_fea[:, 0, :] # target feature - print('bst output shape:', out_fea.shape) - return out_fea + return self.encode(seq_input, max_position) + def contrastive_loss(self, seq_input, seq_len, max_position): + aug_seq1, aug_seq2, aug_len1, aug_len2 = input_aug_data(seq_input, seq_len) + seq_output1 = self.encode(aug_seq1, max_position) + seq_output2 = self.encode(aug_seq2, max_position) + loss = nce_loss(seq_output1, seq_output2) + return loss -def dynamic_mask(x, max_len): - ones = tf.ones(shape=tf.stack([x]), dtype=tf.int32) - zeros = tf.zeros(shape=tf.stack([max_len - x]), dtype=tf.int32) - return tf.concat([ones, zeros], axis=0) diff --git a/easy_rec/python/layers/multihead_cross_attention.py b/easy_rec/python/layers/multihead_cross_attention.py index 92b2b64df..41bde3c5e 100644 --- a/easy_rec/python/layers/multihead_cross_attention.py +++ b/easy_rec/python/layers/multihead_cross_attention.py @@ -52,7 +52,8 @@ def attention_layer(from_tensor, do_return_2d_tensor=False, batch_size=None, from_seq_length=None, - to_seq_length=None): + to_seq_length=None, + reuse=None): """Performs multi-headed attention from `from_tensor` to `to_tensor`. This is an implementation of multi-headed attention based on "Attention is all you Need". @@ -95,6 +96,7 @@ def attention_layer(from_tensor, of the 3D version of the `from_tensor`. to_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `to_tensor`. + reuse: whether to reuse this layer Returns: float Tensor of shape [batch_size, from_seq_length, @@ -144,27 +146,30 @@ def transpose_for_scores(input_tensor, batch_size, num_attention_heads, # `query_layer` = [B*F, N*H] query_layer = tf.layers.dense( - from_tensor_2d, - num_attention_heads * size_per_head, - activation=query_act, - name='query', - kernel_initializer=create_initializer(initializer_range)) + from_tensor_2d, + num_attention_heads * size_per_head, + activation=query_act, + name='query', + kernel_initializer=create_initializer(initializer_range), + reuse=reuse) # `key_layer` = [B*T, N*H] key_layer = tf.layers.dense( - to_tensor_2d, - num_attention_heads * size_per_head, - activation=key_act, - name='key', - kernel_initializer=create_initializer(initializer_range)) + to_tensor_2d, + num_attention_heads * size_per_head, + activation=key_act, + name='key', + kernel_initializer=create_initializer(initializer_range), + reuse=reuse) # `value_layer` = [B*T, N*H] value_layer = tf.layers.dense( - to_tensor_2d, - num_attention_heads * size_per_head, - activation=value_act, - name='value', - kernel_initializer=create_initializer(initializer_range)) + to_tensor_2d, + num_attention_heads * size_per_head, + activation=value_act, + name='value', + kernel_initializer=create_initializer(initializer_range), + reuse=reuse) # `query_layer` = [B, N, F, H] query_layer = transpose_for_scores(query_layer, batch_size, @@ -232,16 +237,17 @@ def transpose_for_scores(input_tensor, batch_size, num_attention_heads, def transformer_encoder(input_tensor, - attention_mask=None, - hidden_size=768, - num_hidden_layers=12, - num_attention_heads=12, - intermediate_size=3072, - intermediate_act_fn=gelu, - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - initializer_range=0.02, - name='transformer'): + attention_mask=None, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + intermediate_act_fn=gelu, + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + initializer_range=0.02, + reuse=None, + name='transformer'): """Multi-headed, multi-layer Transformer from "Attention is All You Need". This is almost an exact implementation of the original Transformer encoder. @@ -304,21 +310,23 @@ def transformer_encoder(input_tensor, with tf.variable_scope('self'): # [batch_size * from_seq_length, num_attention_heads * size_per_head] attention_output = attention_layer( - from_tensor=layer_input, - to_tensor=layer_input, - size_per_head=attention_head_size, - num_attention_heads=num_attention_heads, - attention_mask=attention_mask, - attention_probs_dropout_prob=attention_probs_dropout_prob, - initializer_range=initializer_range, - do_return_2d_tensor=True, - batch_size=batch_size, - from_seq_length=seq_length, - to_seq_length=seq_length) + from_tensor=layer_input, + to_tensor=layer_input, + size_per_head=attention_head_size, + num_attention_heads=num_attention_heads, + attention_mask=attention_mask, + attention_probs_dropout_prob=attention_probs_dropout_prob, + initializer_range=initializer_range, + do_return_2d_tensor=True, + batch_size=batch_size, + from_seq_length=seq_length, + to_seq_length=seq_length, + reuse=reuse + ) # Run a linear projection of `hidden_size` then add a residual # with `layer_input`. - with tf.variable_scope('output'): + with tf.variable_scope('output', reuse=reuse): attention_output = tf.layers.dense( attention_output, hidden_size, @@ -327,7 +335,7 @@ def transformer_encoder(input_tensor, attention_output = layer_norm(attention_output + layer_input) # The activation is only applied to the "intermediate" hidden layer. - with tf.variable_scope('intermediate'): + with tf.variable_scope('intermediate', reuse=reuse): intermediate_output = tf.layers.dense( attention_output, intermediate_size, @@ -335,7 +343,7 @@ def transformer_encoder(input_tensor, kernel_initializer=create_initializer(initializer_range)) # Down-project back to `hidden_size` then add the residual. - with tf.variable_scope('output'): + with tf.variable_scope('output', reuse=reuse): layer_output = tf.layers.dense( intermediate_output, hidden_size, @@ -632,16 +640,17 @@ def create_attention_mask_from_input_mask(from_tensor, to_mask): def embedding_postprocessor(input_tensor, - use_token_type=False, - token_type_ids=None, - token_type_vocab_size=16, - token_type_embedding_name='token_type_embeddings', - reuse_token_type=None, - use_position_embeddings=True, - position_embedding_name='position_embeddings', - initializer_range=0.02, - max_position_embeddings=512, - dropout_prob=0.1): + use_token_type=False, + token_type_ids=None, + token_type_vocab_size=16, + token_type_embedding_name='token_type_embeddings', + reuse_token_type=None, + use_position_embeddings=True, + reuse_position_embedding=None, + position_embedding_name='position_embeddings', + initializer_range=0.02, + max_position_embeddings=512, + dropout_prob=0.1): """Performs various post-processing on a word embedding tensor. Args: @@ -698,7 +707,8 @@ def embedding_postprocessor(input_tensor, if use_position_embeddings: assert_op = tf.assert_less_equal(seq_length, max_position_embeddings) with tf.control_dependencies([assert_op]): - full_position_embeddings = tf.get_variable( + with tf.variable_scope("position_embedding", reuse=reuse_position_embedding): + full_position_embeddings = tf.get_variable( name=position_embedding_name, shape=[max_position_embeddings, width], initializer=create_initializer(initializer_range)) diff --git a/easy_rec/python/layers/sequence_encoder.py b/easy_rec/python/layers/sequence_encoder.py index 80c90eafa..5286215d4 100644 --- a/easy_rec/python/layers/sequence_encoder.py +++ b/easy_rec/python/layers/sequence_encoder.py @@ -75,7 +75,7 @@ def __call__(self, features, group_name, is_training=True, *args, **kwargs): encoder_type = encoder.WhichOneof('encoder').lower() if encoder_type == 'bst': bst = BST(encoder.bst, self._l2_reg, name=group_name) - encoding = bst([seq_features, target_feature], is_training) + encoding = bst([seq_features, target_feature], is_training, **kwargs) outputs.append(encoding) elif encoder_type == 'din': din = DIN(encoder.din, self._l2_reg, name=group_name) diff --git a/easy_rec/python/loss/nce_loss.py b/easy_rec/python/loss/nce_loss.py new file mode 100644 index 000000000..7613384ab --- /dev/null +++ b/easy_rec/python/loss/nce_loss.py @@ -0,0 +1,34 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. + +import tensorflow as tf +from easy_rec.python.utils.shape_utils import get_shape_list + + +def mask_samples(batch_size): + part = tf.ones((batch_size, batch_size), bool) + diag_part = tf.linalg.diag_part(part) + diag_part = tf.fill(tf.shape(diag_part), False) + part = tf.linalg.set_diag(part, diag_part) + part_half = tf.concat([part, part], axis=1) + part_total = tf.concat([part_half, part_half], axis=0) + return part_total + + +def nce_loss(z_i, z_j, temp=1): + batch_size = get_shape_list(z_i)[0] + N = 2 * batch_size + z = tf.concat((z_i, z_j), axis=0) + sim = tf.matmul(z, tf.transpose(z)) / temp + sim_i_j = tf.matrix_diag_part(tf.slice(sim, [batch_size, 0], [batch_size, batch_size])) + sim_j_i = tf.matrix_diag_part(tf.slice(sim, [0, batch_size], [batch_size, batch_size])) + positive_samples = tf.reshape(tf.concat((sim_i_j, sim_j_i), axis=0), (N, 1)) + mask = mask_samples(batch_size) + negative_samples = tf.reshape(tf.boolean_mask(sim, mask), (N, -1)) + + labels = tf.zeros(N, dtype=tf.int32) + logits = tf.concat((positive_samples, negative_samples), axis=1) + + loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)) + + return loss diff --git a/easy_rec/python/main.py b/easy_rec/python/main.py index 1c7b82637..d74e8fe6e 100644 --- a/easy_rec/python/main.py +++ b/easy_rec/python/main.py @@ -610,6 +610,7 @@ def distribute_evaluate(pipeline_config, eval_result_file = os.path.join(model_dir, eval_result_filename) logging.info('save eval result to file %s' % eval_result_file) if cur_job_name == 'master': + print('eval_result = ', eval_result) logging.info('eval_result = {0}'.format(eval_result)) with gfile.GFile(eval_result_file, 'w') as ofile: result_to_write = {'eval_method': 'distribute'} diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index e28660c45..e3cdd31ba 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -110,7 +110,7 @@ def get_sequence_encoding(self, group_name=None, is_training=True): if group_name in self._sequence_encoding_by_group_name: return self._sequence_encoding_by_group_name[group_name] encoding = self._sequence_encoder(self._feature_dict, group_name, - is_training) + is_training, loss_dict=self._loss_dict) self._sequence_encoding_by_group_name[group_name] = encoding return encoding @@ -123,7 +123,7 @@ def get_sequence_encoding(self, group_name=None, is_training=True): encoding = self._sequence_encoding_by_group_name[group_name] else: encoding = self._sequence_encoder(self._feature_dict, group_name, - is_training) + is_training, loss_dict=self._loss_dict) self._sequence_encoding_by_group_name[group_name] = encoding if encoding is not None: seq_encoding.append(encoding) diff --git a/easy_rec/python/model/match_model.py b/easy_rec/python/model/match_model.py index 475ae6def..851c7eb38 100644 --- a/easy_rec/python/model/match_model.py +++ b/easy_rec/python/model/match_model.py @@ -174,11 +174,12 @@ def _build_point_wise_loss_graph(self): else: raise ValueError('invalid loss type: %s' % str(self._loss_type)) + kwargs = {'loss_name': loss_name} self._loss_dict[loss_name] = loss_builder.build( self._loss_type, label=label, pred=pred, - loss_weight=self._sample_weight) + loss_weight=self._sample_weight, **kwargs) # build kd loss kd_loss_dict = loss_builder.build_kd_loss(self.kd, self._prediction_dict, diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index a5917a38d..9d565a745 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -105,6 +105,10 @@ message BSTEncoder { required bool use_position_embeddings = 9 [default = true]; // The stddev of the truncated_normal_initializer for initializing all weight matrices required float initializer_range = 10 [default = 0.02]; + // need contrastive learning + required bool need_contrastive_learning = 11 [default = false]; + // the weight of contrastive learning loss + optional float contrastive_loss_weight = 12 [default = 1.0]; } message DINEncoder { diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py index 57c1d79bd..cbdf95dd2 100644 --- a/easy_rec/python/test/train_eval_test.py +++ b/easy_rec/python/test/train_eval_test.py @@ -306,6 +306,11 @@ def test_bst(self): 'samples/model_config/bst_on_taobao.config', self._test_dir) self.assertTrue(self._success) + def test_bst_contrastive_learning(self): + self._success = test_utils.test_single_train_eval( + 'samples/model_config/bst_cl_on_taobao.config', self._test_dir) + self.assertTrue(self._success) + def test_dcn(self): self._success = test_utils.test_single_train_eval( 'samples/model_config/dcn_on_taobao.config', self._test_dir) diff --git a/samples/model_config/bst_cl_on_taobao.config b/samples/model_config/bst_cl_on_taobao.config new file mode 100644 index 000000000..77529db5e --- /dev/null +++ b/samples/model_config/bst_cl_on_taobao.config @@ -0,0 +1,304 @@ +train_input_path: "data/test/tb_data/taobao_train_data" +eval_input_path: "data/test/tb_data/taobao_test_data" +model_dir: "experiments/dbmtl_taobao_ckpt" + +train_config { + optimizer_config { + adam_optimizer { + learning_rate { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 1e-07 + } + } + } + use_moving_average: false + } + num_steps: 100 + sync_replicas: true + save_checkpoints_steps: 100 + log_step_count_steps: 100 +} + +eval_config { + metrics_set { + auc { + } + } +} + +data_config { + batch_size: 4096 + label_fields: "clk" + label_fields: "buy" + prefetch_size: 32 + input_type: CSVInput + input_fields { + input_name: "clk" + input_type: INT32 + } + input_fields { + input_name: "buy" + input_type: INT32 + } + input_fields { + input_name: "pid" + input_type: STRING + } + input_fields { + input_name: "adgroup_id" + input_type: STRING + } + input_fields { + input_name: "cate_id" + input_type: STRING + } + input_fields { + input_name: "campaign_id" + input_type: STRING + } + input_fields { + input_name: "customer" + input_type: STRING + } + input_fields { + input_name: "brand" + input_type: STRING + } + input_fields { + input_name: "user_id" + input_type: STRING + } + input_fields { + input_name: "cms_segid" + input_type: STRING + } + input_fields { + input_name: "cms_group_id" + input_type: STRING + } + input_fields { + input_name: "final_gender_code" + input_type: STRING + } + input_fields { + input_name: "age_level" + input_type: STRING + } + input_fields { + input_name: "pvalue_level" + input_type: STRING + } + input_fields { + input_name: "shopping_level" + input_type: STRING + } + input_fields { + input_name: "occupation" + input_type: STRING + } + input_fields { + input_name: "new_user_class_level" + input_type: STRING + } + input_fields { + input_name: "tag_category_list" + input_type: STRING + } + input_fields { + input_name: "tag_brand_list" + input_type: STRING + } + input_fields { + input_name: "price" + input_type: INT32 + } +} + +feature_config: { + features { + input_names: "pid" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "adgroup_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "cate_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10000 + embedding_name: 'category' + } + features { + input_names: "campaign_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "customer" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "brand" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + embedding_name: 'brand' + } + features { + input_names: "user_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "cms_segid" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features { + input_names: "cms_group_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features { + input_names: "final_gender_code" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "age_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "pvalue_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "shopping_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "occupation" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "new_user_class_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features : { + input_names: 'tag_category_list' + feature_type: SequenceFeature + separator: '|' + hash_bucket_size: 10000 + embedding_dim: 16 + embedding_name: 'category' + } + features : { + input_names: 'tag_brand_list' + feature_type: SequenceFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + embedding_name: 'brand' + } + features { + input_names: "price" + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 50 + } +} + +model_config { + model_class: "DBMTL" + feature_groups { + group_name: "all" + feature_names: "user_id" + feature_names: "cms_segid" + feature_names: "cms_group_id" + feature_names: "age_level" + feature_names: "pvalue_level" + feature_names: "shopping_level" + feature_names: "occupation" + feature_names: "new_user_class_level" + feature_names: "adgroup_id" + feature_names: "cate_id" + feature_names: "campaign_id" + feature_names: "customer" + feature_names: "brand" + feature_names: "price" + feature_names: "pid" + wide_deep: DEEP + } + + feature_groups { + group_name: "seq" + feature_names: "brand" + feature_names: "cate_id" + feature_names: "tag_category_list" + feature_names: "tag_brand_list" + sequence_encoders { + bst { + hidden_size: 256 + num_attention_heads: 4 + num_hidden_layers: 1 + intermediate_size: 512 + hidden_act: 'gelu' + max_position_embeddings: 50 + hidden_dropout_prob: 0.1 + attention_probs_dropout_prob: 0 + need_contrastive_learning: true + } + } + wide_deep: DEEP + } + + dbmtl { + bottom_dnn { + hidden_units: [1024, 512, 256] + } + task_towers { + tower_name: "ctr" + label_name: "clk" + loss_type: CLASSIFICATION + metrics_set: { + auc {} + } + dnn { + hidden_units: [256, 128, 64, 32] + } + relation_dnn { + hidden_units: [32] + } + weight: 1.0 + } + l2_regularization: 1e-6 + use_sequence_encoder: true + } + embedding_regularization: 5e-6 +} From 381c62b1ee5705fd0f8523a10ac7b5c5486cf8fa Mon Sep 17 00:00:00 2001 From: weisu Date: Sun, 23 Apr 2023 16:30:49 +0800 Subject: [PATCH 16/54] [feat]: add more logit --- easy_rec/python/input/augment.py | 25 +- easy_rec/python/layers/bst.py | 11 +- easy_rec/python/layers/dnn.py | 8 +- easy_rec/python/model/easy_rec_model.py | 14 +- easy_rec/python/protos/easy_rec_model.proto | 3 + easy_rec/python/protos/layer.proto | 2 + easy_rec/python/tools/__init__.py | 1 + easy_rec/python/tools/explainer/__init__.py | 1 + easy_rec/python/tools/explainer/deep_shap.py | 710 ++++++++++++++++++ easy_rec/python/tools/explainer/explainer.py | 506 +++++++++++++ .../tools/explainer/feature_importance.py | 50 ++ easy_rec/python/tools/explainer/methods.py | 641 ++++++++++++++++ easy_rec/python/tools/explainer/utils.py | 69 ++ easy_rec/python/utils/activation.py | 75 +- easy_rec/python/utils/io_util.py | 2 +- 15 files changed, 2044 insertions(+), 74 deletions(-) create mode 100644 easy_rec/python/tools/explainer/__init__.py create mode 100644 easy_rec/python/tools/explainer/deep_shap.py create mode 100644 easy_rec/python/tools/explainer/explainer.py create mode 100644 easy_rec/python/tools/explainer/feature_importance.py create mode 100644 easy_rec/python/tools/explainer/methods.py create mode 100644 easy_rec/python/tools/explainer/utils.py diff --git a/easy_rec/python/input/augment.py b/easy_rec/python/input/augment.py index 75298c430..47822c366 100644 --- a/easy_rec/python/input/augment.py +++ b/easy_rec/python/input/augment.py @@ -37,11 +37,32 @@ def item_crop(aug_data, length, eta=0.6): return cropped_item_seq, num_left +def item_reorder(aug_data, length, beta=0.6): + length1 = tf.cast(length,dtype=tf.float32) + num_reorder = tf.cast(tf.math.floor(length1 * beta) ,dtype=tf.int32) + reorder_begin = tf.random.uniform([1], minval=0, maxval=length - num_reorder, dtype=tf.int32)[0] + shuffle_index = tf.range(reorder_begin, reorder_begin + num_reorder) + shuffle_index = tf.random.shuffle(shuffle_index) + x = tf.range(get_shape_list(aug_data)[0]) + left = tf.slice(x, [0], [reorder_begin]) + right = tf.slice(x, [reorder_begin + num_reorder], [-1]) + reordered_item_index = tf.concat([left, shuffle_index, right], axis=0) + reordered_item_seq = tf.scatter_nd(tf.expand_dims(reordered_item_index, axis=1), + aug_data, + tf.shape(aug_data)) + return reordered_item_seq, length + + def augment(x): seq, length = x - flag = tf.range(2, dtype=tf.int32) + flag = tf.range(3, dtype=tf.int32) flag1 = tf.random.shuffle(flag)[:1][0] - aug_seq, aug_len = tf.cond(tf.equal(flag1, 0), lambda: item_crop(seq, length), lambda: item_mask(seq, length)) + aug_seq, aug_len = tf.cond(tf.equal(flag1, 0), + lambda: item_crop(seq, length), + lambda: tf.cond(tf.equal(flag1, 1), + lambda: item_mask(seq, length), + lambda: item_reorder(seq, length))) + return [aug_seq, aug_len] diff --git a/easy_rec/python/layers/bst.py b/easy_rec/python/layers/bst.py index 466676fd9..c9cf7d8c9 100644 --- a/easy_rec/python/layers/bst.py +++ b/easy_rec/python/layers/bst.py @@ -86,10 +86,15 @@ def __call__(self, inputs, training=None, **kwargs): if self.config.need_contrastive_learning: assert 'loss_dict' in kwargs, "no `loss_dict` in kwargs of bst layer: %s" % self.name loss = self.contrastive_loss(seq_input, seq_len, max_position) - loss *= self.config.contrastive_loss_weight + if self.config.auto_contrastive_loss_weight: + uncertainty = tf.Variable( + 0, name='%s_contrastive_loss_weight' % self.name, dtype=tf.float32) + loss = tf.exp(-uncertainty) * loss + 0.5 * uncertainty + else: + loss *= self.config.contrastive_loss_weight loss_dict = kwargs['loss_dict'] - loss_dict['contrastive_loss'] = loss - tf.summary.scalar('loss/%s_contrastive_loss' % self.name, loss) + loss_dict['%s_contrastive_loss' % self.name] = loss + # tf.summary.scalar('loss/%s_contrastive_loss' % self.name, loss) if target_feature is not None: target_size = target_feature.shape.as_list()[-1] diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index 74e355e82..d2af5a4cf 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -34,11 +34,7 @@ def __init__(self, self._name = name self._is_training = is_training logging.info('dnn activation function = %s' % self._config.activation) - self.activations = [ - get_activation( - self._config.activation, is_training=is_training, feat_dim=units) - for units in self.hidden_units - ] + self.activation = get_activation(self._config.activation, is_training=is_training) self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm @@ -71,7 +67,7 @@ def __call__(self, deep_fea, hidden_layer_feature_output=False): trainable=True, name='%s/dnn_%d/bn' % (self._name, i)) if (i + 1 < hidden_units_len) or not self._last_layer_no_activation: - deep_fea = self.activations[i]( + deep_fea = self.activation( deep_fea, name='%s/dnn_%d/act' % (self._name, i)) if len(self.dropout_ratio) > 0 and self._is_training: assert self.dropout_ratio[ diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index e3cdd31ba..871306326 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -17,6 +17,7 @@ from easy_rec.python.utils import estimator_utils from easy_rec.python.utils import restore_filter from easy_rec.python.utils.load_class import get_register_class_meta +from easy_rec.python.layers import dnn if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -129,12 +130,21 @@ def get_sequence_encoding(self, group_name=None, is_training=True): seq_encoding.append(encoding) if len(seq_encoding) > 1: - return tf.concat(seq_encoding, axis=-1) + encoding = tf.concat(seq_encoding, axis=-1) elif len(seq_encoding) == 1: - return seq_encoding[0] + encoding = seq_encoding[0] else: return None + if self._base_model_config.HasField('sequence_dnn'): + sequence_dnn = dnn.DNN( + self._base_model_config.sequence_dnn, + self._l2_reg, + name='sequence_dnn', + is_training=self._is_training) + encoding = sequence_dnn(encoding) + return encoding + @abstractmethod def build_predict_graph(self): pass diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 27dcefadc..42f454d95 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -3,6 +3,7 @@ package protos; import "easy_rec/python/protos/fm.proto"; import "easy_rec/python/protos/deepfm.proto"; +import "easy_rec/python/protos/dnn.proto"; import "easy_rec/python/protos/wide_and_deep.proto"; import "easy_rec/python/protos/multi_tower.proto"; import "easy_rec/python/protos/dlrm.proto"; @@ -102,4 +103,6 @@ message EasyRecModel { repeated Loss losses = 15; + // dnn layers after sequence feature + optional DNN sequence_dnn = 16; } diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 9d565a745..e2ca2e217 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -109,6 +109,8 @@ message BSTEncoder { required bool need_contrastive_learning = 11 [default = false]; // the weight of contrastive learning loss optional float contrastive_loss_weight = 12 [default = 1.0]; + // whether need auto learn contrastive loss weight + optional bool auto_contrastive_loss_weight = 13 [default = false]; } message DINEncoder { diff --git a/easy_rec/python/tools/__init__.py b/easy_rec/python/tools/__init__.py index e69de29bb..d8300f4e3 100644 --- a/easy_rec/python/tools/__init__.py +++ b/easy_rec/python/tools/__init__.py @@ -0,0 +1 @@ +# from .explainer.explainer import create_explainer diff --git a/easy_rec/python/tools/explainer/__init__.py b/easy_rec/python/tools/explainer/__init__.py new file mode 100644 index 000000000..c1917b9fd --- /dev/null +++ b/easy_rec/python/tools/explainer/__init__.py @@ -0,0 +1 @@ +# from .methods import DeepExplain diff --git a/easy_rec/python/tools/explainer/deep_shap.py b/easy_rec/python/tools/explainer/deep_shap.py new file mode 100644 index 000000000..4d0b72890 --- /dev/null +++ b/easy_rec/python/tools/explainer/deep_shap.py @@ -0,0 +1,710 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import numpy as np +import warnings +from tensorflow.python.framework import ops as tf_ops +from tensorflow.python.ops import gradients_impl as tf_gradients_impl + +if not hasattr(tf_gradients_impl, "_IsBackpropagatable"): + from tensorflow.python.ops import gradients_util as tf_gradients_impl +import tensorflow as tf + + +class DeepShap(object): + """ Meant to approximate SHAP values for deep learning models. + + This is an enhanced version of the DeepLIFT algorithm (Deep SHAP) where, similar to Kernel SHAP, we + approximate the conditional expectations of SHAP values using a selection of background samples. + Lundberg and Lee, NIPS 2017 showed that the per node attribution rules in DeepLIFT (Shrikumar, + Greenside, and Kundaje, arXiv 2017) can be chosen to approximate Shapley values. By integrating + over many backgound samples Deep estimates approximate SHAP values such that they sum + up to the difference between the expected model output on the passed background samples and the + current model output (f(x) - E[f(x)]). + """ + + def __init__(self, inputs, output, data, session=None, learning_phase_flags=None): + """ An explainer object for a deep model using a given background dataset. + + Note that the complexity of the method scales linearly with the number of background data + samples. Passing the entire training dataset as `data` will give very accurate expected + values, but be unreasonably expensive. The variance of the expectation estimates scale by + roughly 1/sqrt(N) for N background data samples. So 100 samples will give a good estimate, + and 1000 samples a very good estimate of the expected values. + + Parameters + ---------- + inputs : [tf.Operation] + output : tf.Operation + A pair of TensorFlow operations (or a list and an op) that + specifies the input and output of the model to be explained. Note that SHAP values + are specific to a single output value, so you get an explanation for each element of + the output tensor (which must be a flat rank one vector). + + data : [numpy.array] or [pandas.DataFrame] or function + The background dataset to use for integrating out features. DeepExplainer integrates + over all these samples for each explanation. The data passed here must match the input + operations given to the model. If a function is supplied, it must be a function that + takes a particular input example and generates the background dataset for that example + session : None or tensorflow.Session + The TensorFlow session that has the model we are explaining. If None is passed then + we do our best to find the right session, first looking for a keras session, then + falling back to the default TensorFlow session. + + learning_phase_flags : None or list of tensors + If you have your own custom learning phase flags pass them here. When explaining a prediction + we need to ensure we are not in training mode, since this changes the behavior of ops like + batch norm or dropout. If None is passed then we look for tensors in the graph that look like + learning phase flags. Note that we assume all the flags should + have a value of False during predictions (and hence explanations). + + """ + self.model_inputs = inputs + self.model_output = output + assert type(self.model_output) != list, "The model output to be explained must be a single tensor!" + assert len(self.model_output.shape) < 3, "The model output must be a vector or a single value!" + self.multi_output = True + if len(self.model_output.shape) == 1: + self.multi_output = False + + # check if we have multiple inputs + self.multi_input = True + if type(self.model_inputs) != list or len(self.model_inputs) == 1: + self.multi_input = False + if type(self.model_inputs) != list: + self.model_inputs = [self.model_inputs] + if type(data) != list and (hasattr(data, '__call__') == False): + data = [data] + self.data = data + + self._vinputs = {} # used to track what op inputs depends on the model inputs + self.orig_grads = {} + + if session is None: + try: + session = tf.compat.v1.keras.backend.get_session() + except: + session = tf.keras.backend.get_session() + self.session = tf.get_default_session() if session is None else session + self.graph = self.session.graph + + # if no learning phase flags were given we go looking for them + # ...this will catch the one that keras uses + # we need to find them since we want to make sure learning phase flags are set to False + if learning_phase_flags is None: + self.learning_phase_ops = [] + for op in self.graph.get_operations(): + if 'learning_phase' in op.name and op.type == "Const" and len(op.outputs[0].shape) == 0: + if op.outputs[0].dtype == tf.bool: + self.learning_phase_ops.append(op) + self.learning_phase_flags = [op.outputs[0] for op in self.learning_phase_ops] + else: + self.learning_phase_ops = [t.op for t in learning_phase_flags] + + # save the expected output of the model + # if self.data is a function, set self.expected_value to None + if (hasattr(self.data, '__call__')): + self.expected_value = None + else: + if self.data[0].shape[0] > 5000: + warnings.warn( + "You have provided over 5k background samples! For better performance consider using smaller random sample.") + self.expected_value = self.run(self.model_output, self.model_inputs, self.data).mean(0) + + self._init_between_tensors(self.model_output.op, self.model_inputs) + + # make a blank array that will get lazily filled in with the SHAP value computation + # graphs for each output. Lazy is important since if there are 1000 outputs and we + # only explain the top 5 it would be a waste to build graphs for the other 995 + if not self.multi_output: + self.phi_symbolics = [None] + else: + noutputs = self.model_output.shape.as_list()[1] + if noutputs is not None: + self.phi_symbolics = [None for i in range(noutputs)] + else: + raise Exception("The model output tensor to be explained cannot have a static shape in dim 1 of None!") + + def run(self, out, model_inputs, X): + """ Runs the model while also setting the learning phase flags to False. + """ + feed_dict = dict(zip(model_inputs, X)) + for t in self.learning_phase_flags: + feed_dict[t] = False + return self.session.run(out, feed_dict) + + def phi_symbolic(self, i): + """ Get the SHAP value computation graph for a given model output. + """ + if self.phi_symbolics[i] is None: + def anon(): + out = self.model_output[:, i] if self.multi_output else self.model_output + return tf.gradients(out, self.model_inputs) + + self.phi_symbolics[i] = self.execute_with_overridden_gradients(anon) + + return self.phi_symbolics[i] + + def custom_grad(self, op, *grads): + """ Passes a gradient op creation request to the correct handler. + """ + type_name = op.type[5:] if op.type.startswith("shap_") else op.type + out = op_handlers[type_name](self, op, *grads) # we cut off the shap_ prefex before the lookup + return out + + def execute_with_overridden_gradients(self, f): + # replace the gradients for all the non-linear activations + # we do this by hacking our way into the registry (TODO: find a public API for this if it exists) + reg = tf_ops._gradient_registry._registry + ops_not_in_registry = ['TensorListReserve'] + # NOTE: location_tag taken from tensorflow source for None type ops + location_tag = ("UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN") + # TODO: unclear why some ops are not in the registry with TF 2.0 like TensorListReserve + for non_reg_ops in ops_not_in_registry: + reg[non_reg_ops] = {'type': None, 'location': location_tag} + for n in op_handlers: + if n in reg: + self.orig_grads[n] = reg[n]["type"] + reg["shap_" + n] = { + "type": self.custom_grad, + "location": reg[n]["location"] + } + reg[n]["type"] = self.custom_grad + + # In TensorFlow 1.10 they started pruning out nodes that they think can't be backpropped + # unfortunately that includes the index of embedding layers so we disable that check here + if hasattr(tf_gradients_impl, "_IsBackpropagatable"): + orig_IsBackpropagatable = tf_gradients_impl._IsBackpropagatable + tf_gradients_impl._IsBackpropagatable = lambda tensor: True + + # define the computation graph for the attribution values using a custom gradient-like computation + try: + out = f() + finally: + # reinstate the backpropagatable check + if hasattr(tf_gradients_impl, "_IsBackpropagatable"): + tf_gradients_impl._IsBackpropagatable = orig_IsBackpropagatable + + # restore the original gradient definitions + for n in op_handlers: + if n in reg: + del reg["shap_" + n] + reg[n]["type"] = self.orig_grads[n] + for non_reg_ops in ops_not_in_registry: + del reg[non_reg_ops] + return out + + def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_additivity=True): + """ Return approximate SHAP values for the model applied to the data given by X. + + Parameters + ---------- + X : list, numpy.array, or pandas.DataFrame + A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to + explain the model's output. + + ranked_outputs : None or int + If ranked_outputs is None then we explain all the outputs in a multi-output model. If + ranked_outputs is a positive integer then we only explain that many of the top model + outputs (where "top" is determined by output_rank_order). Note that this causes a pair + of values to be returned (shap_values, indexes), where shap_values is a list of numpy + arrays for each of the output ranks, and indexes is a matrix that indicates for each sample + which output indexes were choses as "top". + + output_rank_order : "max", "min", or "max_abs" + How to order the model outputs when using ranked_outputs, either by maximum, minimum, or + maximum absolute value. + + Returns + ------- + array or list + For a models with a single output this returns a tensor of SHAP values with the same shape + as X. For a model with multiple outputs this returns a list of SHAP value tensors, each of + which are the same shape as X. If ranked_outputs is None then this list of tensors matches + the number of model outputs. If ranked_outputs is a positive integer a pair is returned + (shap_values, indexes), where shap_values is a list of tensors with a length of + ranked_outputs, and indexes is a matrix that indicates for each sample which output indexes + were chosen as "top". + """ + # check if we have multiple inputs + if not self.multi_input: + if type(X) == list and len(X) != 1: + assert False, "Expected a single tensor as model input!" + elif type(X) != list: + X = [X] + else: + assert type(X) == list, "Expected a list of model inputs!" + assert len(self.model_inputs) == len(X), "Number of model inputs (%d) does not match the number given (%d)!" % ( + len(self.model_inputs), len(X)) + + # rank and determine the model outputs that we will explain + if ranked_outputs is not None and self.multi_output: + model_output_values = self.run(self.model_output, self.model_inputs, X) + + if output_rank_order == "max": + model_output_ranks = np.argsort(-model_output_values) + elif output_rank_order == "min": + model_output_ranks = np.argsort(model_output_values) + elif output_rank_order == "max_abs": + model_output_ranks = np.argsort(np.abs(model_output_values)) + else: + assert False, "output_rank_order must be max, min, or max_abs!" + model_output_ranks = model_output_ranks[:, :ranked_outputs] + else: + model_output_ranks = np.tile(np.arange(len(self.phi_symbolics)), (X[0].shape[0], 1)) + + # compute the attributions + output_phis = [] + for i in range(model_output_ranks.shape[1]): + phis = [] + for k in range(len(X)): + phis.append(np.zeros(X[k].shape)) + for j in range(X[0].shape[0]): + if (hasattr(self.data, '__call__')): + bg_data = self.data([X[l][j] for l in range(len(X))]) + if type(bg_data) != list: + bg_data = [bg_data] + else: + bg_data = self.data + + # tile the inputs to line up with the background data samples + tiled_X = [np.tile(X[l][j:j + 1], (bg_data[l].shape[0],) + tuple([1 for k in range(len(X[l].shape) - 1)])) for l + in range(len(X))] + + # we use the first sample for the current sample and the rest for the references + joint_input = [np.concatenate([tiled_X[l], bg_data[l]], 0) for l in range(len(X))] + + # run attribution computation graph + feature_ind = model_output_ranks[j, i] + sample_phis = self.run(self.phi_symbolic(feature_ind), self.model_inputs, joint_input) + + # assign the attributions to the right part of the output arrays + for l in range(len(X)): + phis[l][j] = (sample_phis[l][bg_data[l].shape[0]:] * (X[l][j] - bg_data[l])).mean(0) + + output_phis.append(phis[0] if not self.multi_input else phis) + + # check that the SHAP values sum up to the model output + if check_additivity: + model_output = self.run(self.model_output, self.model_inputs, X) + for l in range(len(self.expected_value)): + if not self.multi_input: + diffs = model_output[:, l] - self.expected_value[l] - output_phis[l].sum( + axis=tuple(range(1, output_phis[l].ndim))) + else: + diffs = model_output[:, l] - self.expected_value[l] + for i in range(len(output_phis[l])): + diffs -= output_phis[l][i].sum(axis=tuple(range(1, output_phis[l][i].ndim))) + assert np.abs( + diffs).max() < 1e-2, "The SHAP explanations do not sum up to the model's output! This is either because of a " \ + "rounding error or because an operator in your computation graph was not fully supported. If " \ + "the sum difference of %f is significant compared the scale of your model outputs please post " \ + "as a github issue, with a reproducible example if possible so we can debug it." % np.abs( + diffs).max() + + if not self.multi_output: + return output_phis[0] + elif ranked_outputs is not None: + return output_phis, model_output_ranks + else: + return output_phis + + def _init_between_tensors(self, out_op, model_inputs): + # find all the operations in the graph between our inputs and outputs + tensor_blacklist = tensors_blocked_by_false(self.learning_phase_ops) # don't follow learning phase branches + dependence_breakers = [k for k in op_handlers if op_handlers[k] == break_dependence] + back_ops = backward_walk_ops( + [out_op], tensor_blacklist, + dependence_breakers + ) + start_ops = [] + for minput in model_inputs: + for op in minput.consumers(): + start_ops.append(op) + self.between_ops = forward_walk_ops( + start_ops, + tensor_blacklist, dependence_breakers, + within_ops=back_ops + ) + + # note all the tensors that are on the path between the inputs and the output + self.between_tensors = {} + for op in self.between_ops: + for t in op.outputs: + self.between_tensors[t.name] = True + for t in model_inputs: + self.between_tensors[t.name] = True + + # save what types are being used + self.used_types = {} + for op in self.between_ops: + self.used_types[op.type] = True + + def _variable_inputs(self, op): + """ Return which inputs of this operation are variable (i.e. depend on the model inputs). + """ + if op not in self._vinputs: + out = np.zeros(len(op.inputs), dtype=np.bool) + for i, t in enumerate(op.inputs): + out[i] = t.name in self.between_tensors + self._vinputs[op] = out + return self._vinputs[op] + + +def tensors_blocked_by_false(ops): + """ Follows a set of ops assuming their value is False and find blocked Switch paths. + + This is used to prune away parts of the model graph that are only used during the training + phase (like dropout, batch norm, etc.). + """ + blocked = [] + + def recurse(op): + if op.type == "Switch": + blocked.append(op.outputs[1]) # the true path is blocked since we assume the ops we trace are False + else: + for out in op.outputs: + for c in out.consumers(): + recurse(c) + + for op in ops: + recurse(op) + + return blocked + + +def backward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist): + found_ops = [] + op_stack = [op for op in start_ops] + while len(op_stack) > 0: + op = op_stack.pop() + if op.type not in op_type_blacklist and op not in found_ops: + found_ops.append(op) + for input in op.inputs: + if input not in tensor_blacklist: + op_stack.append(input.op) + return found_ops + + +def forward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist, within_ops): + found_ops = [] + op_stack = [op for op in start_ops] + while len(op_stack) > 0: + op = op_stack.pop() + if op.type not in op_type_blacklist and op in within_ops and op not in found_ops: + found_ops.append(op) + for out in op.outputs: + if out not in tensor_blacklist: + for c in out.consumers(): + op_stack.append(c) + return found_ops + + +def linearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func): + def handler(explainer, op, *grads): + var = explainer._variable_inputs(op) + if var[input_ind0] and not var[input_ind1]: + return linearity_1d_handler(input_ind0, explainer, op, *grads) + elif var[input_ind1] and not var[input_ind0]: + return linearity_1d_handler(input_ind1, explainer, op, *grads) + elif var[input_ind0] and var[input_ind1]: + return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads) + else: + return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function + + return handler + + +def nonlinearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func): + def handler(explainer, op, *grads): + var = explainer._variable_inputs(op) + if var[input_ind0] and not var[input_ind1]: + return nonlinearity_1d_handler(input_ind0, explainer, op, *grads) + elif var[input_ind1] and not var[input_ind0]: + return nonlinearity_1d_handler(input_ind1, explainer, op, *grads) + elif var[input_ind0] and var[input_ind1]: + return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads) + else: + return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function + + return handler + + +def nonlinearity_1d(input_ind): + def handler(explainer, op, *grads): + return nonlinearity_1d_handler(input_ind, explainer, op, *grads) + + return handler + + +def nonlinearity_1d_handler(input_ind, explainer, op, *grads): + # make sure only the given input varies + op_inputs = op.inputs + if op_inputs is None: + op_inputs = op.outputs[0].op.inputs + + for i in range(len(op_inputs)): + if i != input_ind: + assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!" + + xin0, rin0 = tf.split(op_inputs[input_ind], 2) + xout, rout = tf.split(op.outputs[input_ind], 2) + delta_in0 = xin0 - rin0 + if delta_in0.shape is None: + dup0 = [2, 1] + else: + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + out = [None for _ in op_inputs] + if op.type.startswith("shap_"): + op.type = op.type[5:] + orig_grad = explainer.orig_grads[op.type](op, grads[0]) + out[input_ind] = tf.where( + tf.tile(tf.abs(delta_in0), dup0) < 1e-6, + orig_grad[input_ind] if len(op_inputs) > 1 else orig_grad, + grads[0] * tf.tile((xout - rout) / delta_in0, dup0) + ) + return out + + +def nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads): + assert input_ind0 == 0 and input_ind1 == 1, "TODO: Can't yet handle double inputs that are not first!" + xout, rout = tf.split(op.outputs[0], 2) + in0 = op.inputs[input_ind0] + in1 = op.inputs[input_ind1] + xin0, rin0 = tf.split(in0, 2) + xin1, rin1 = tf.split(in1, 2) + delta_in0 = xin0 - rin0 + delta_in1 = xin1 - rin1 + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + out10 = op_func(xin0, rin1) + out01 = op_func(rin0, xin1) + out11, out00 = xout, rout + out0 = 0.5 * (out11 - out01 + out10 - out00) + out0 = grads[0] * tf.tile(out0 / delta_in0, dup0) + out1 = 0.5 * (out11 - out10 + out01 - out00) + out1 = grads[0] * tf.tile(out1 / delta_in1, dup0) + + # Avoid divide by zero nans + out0 = tf.where(tf.abs(tf.tile(delta_in0, dup0)) < 1e-7, tf.zeros_like(out0), out0) + out1 = tf.where(tf.abs(tf.tile(delta_in1, dup0)) < 1e-7, tf.zeros_like(out1), out1) + + # see if due to broadcasting our gradient shapes don't match our input shapes + if (np.any(np.array(out1.shape) != np.array(in1.shape))): + broadcast_index = np.where(np.array(out1.shape) != np.array(in1.shape))[0][0] + out1 = tf.reduce_sum(out1, axis=broadcast_index, keepdims=True) + elif (np.any(np.array(out0.shape) != np.array(in0.shape))): + broadcast_index = np.where(np.array(out0.shape) != np.array(in0.shape))[0][0] + out0 = tf.reduce_sum(out0, axis=broadcast_index, keepdims=True) + + return [out0, out1] + + +def softmax(explainer, op, *grads): + """ Just decompose softmax into its components and recurse, we can handle all of them :) + + We assume the 'axis' is the last dimension because the TF codebase swaps the 'axis' to + the last dimension before the softmax op if 'axis' is not already the last dimension. + We also don't subtract the max before tf.exp for numerical stability since that might + mess up the attributions and it seems like TensorFlow doesn't define softmax that way + (according to the docs) + """ + in0 = op.inputs[0] + in0_max = tf.reduce_max(in0, axis=-1, keepdims=True, name="in0_max") + in0_centered = in0 - in0_max + evals = tf.exp(in0_centered, name="custom_exp") + rsum = tf.reduce_sum(evals, axis=-1, keepdims=True) + div = evals / rsum + + # mark these as in-between the inputs and outputs + for op in [evals.op, rsum.op, div.op, in0_centered.op]: + for t in op.outputs: + if t.name not in explainer.between_tensors: + explainer.between_tensors[t.name] = False + + out = tf.gradients(div, in0_centered, grad_ys=grads[0])[0] + + # remove the names we just added + for op in [evals.op, rsum.op, div.op, in0_centered.op]: + for t in op.outputs: + if explainer.between_tensors[t.name] is False: + del explainer.between_tensors[t.name] + + # rescale to account for our shift by in0_max (which we did for numerical stability) + xin0, rin0 = tf.split(in0, 2) + xin0_centered, rin0_centered = tf.split(in0_centered, 2) + delta_in0 = xin0 - rin0 + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + return tf.where( + tf.tile(tf.abs(delta_in0), dup0) < 1e-6, + out, + out * tf.tile((xin0_centered - rin0_centered) / delta_in0, dup0) + ) + + +def maxpool(explainer, op, *grads): + xin0, rin0 = tf.split(op.inputs[0], 2) + xout, rout = tf.split(op.outputs[0], 2) + delta_in0 = xin0 - rin0 + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + cross_max = tf.maximum(xout, rout) + diffs = tf.concat([cross_max - rout, xout - cross_max], 0) + if op.type.startswith("shap_"): + op.type = op.type[5:] + xmax_pos, rmax_pos = tf.split(explainer.orig_grads[op.type](op, grads[0] * diffs), 2) + return tf.tile(tf.where( + tf.abs(delta_in0) < 1e-7, + tf.zeros_like(delta_in0), + (xmax_pos + rmax_pos) / delta_in0 + ), dup0) + + +def gather(explainer, op, *grads): + # params = op.inputs[0] + indices = op.inputs[1] + # axis = op.inputs[2] + var = explainer._variable_inputs(op) + if var[1] and not var[0]: + assert len(indices.shape) == 2, "Only scalar indices supported right now in GatherV2!" + + xin1, rin1 = tf.split(tf.cast(op.inputs[1], tf.float32), 2) + xout, rout = tf.split(op.outputs[0], 2) + dup_in1 = [2] + [1 for i in xin1.shape[1:]] + dup_out = [2] + [1 for i in xout.shape[1:]] + delta_in1_t = tf.tile(xin1 - rin1, dup_in1) + out_sum = tf.reduce_sum(grads[0] * tf.tile(xout - rout, dup_out), + list(range(len(indices.shape), len(grads[0].shape)))) + if op.type == "ResourceGather": + return [None, tf.where( + tf.abs(delta_in1_t) < 1e-6, + tf.zeros_like(delta_in1_t), + out_sum / delta_in1_t + )] + return [None, tf.where( + tf.abs(delta_in1_t) < 1e-6, + tf.zeros_like(delta_in1_t), + out_sum / delta_in1_t + ), None] + elif var[0] and not var[1]: + if op.type.startswith("shap_"): + op.type = op.type[5:] + return [explainer.orig_grads[op.type](op, grads[0]), None] # linear in this case + else: + assert False, "Axis not yet supported to be varying for gather op!" + + +def linearity_1d(input_ind): + def handler(explainer, op, *grads): + return linearity_1d_handler(input_ind, explainer, op, *grads) + + return handler + + +def linearity_1d_handler(input_ind, explainer, op, *grads): + # make sure only the given input varies (negative means only that input cannot vary, and is measured from the end of the list) + for i in range(len(op.inputs)): + if i != input_ind: + assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!" + if op.type.startswith("shap_"): + op.type = op.type[5:] + return explainer.orig_grads[op.type](op, *grads) + + +def linearity_with_excluded(input_inds): + def handler(explainer, op, *grads): + return linearity_with_excluded_handler(input_inds, explainer, op, *grads) + + return handler + + +def linearity_with_excluded_handler(input_inds, explainer, op, *grads): + # make sure the given inputs don't vary (negative is measured from the end of the list) + for i in range(len(op.inputs)): + if i in input_inds or i - len(op.inputs) in input_inds: + assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!" + if op.type.startswith("shap_"): + op.type = op.type[5:] + return explainer.orig_grads[op.type](op, *grads) + + +def passthrough(explainer, op, *grads): + if op.type.startswith("shap_"): + op.type = op.type[5:] + return explainer.orig_grads[op.type](op, *grads) + + +def break_dependence(explainer, op, *grads): + """ This function name is used to break attribution dependence in the graph traversal. + + These operation types may be connected above input data values in the graph but their outputs + don't depend on the input values (for example they just depend on the shape). + """ + return [None for _ in op.inputs] + + +op_handlers = {} + +# ops that are always linear +op_handlers["Identity"] = passthrough +op_handlers["StridedSlice"] = passthrough +op_handlers["Squeeze"] = passthrough +op_handlers["ExpandDims"] = passthrough +op_handlers["Pack"] = passthrough +op_handlers["BiasAdd"] = passthrough +op_handlers["Unpack"] = passthrough +op_handlers["Add"] = passthrough +op_handlers["Sub"] = passthrough +op_handlers["Merge"] = passthrough +op_handlers["Sum"] = passthrough +op_handlers["Mean"] = passthrough +op_handlers["Cast"] = passthrough +op_handlers["Transpose"] = passthrough +op_handlers["Enter"] = passthrough +op_handlers["Exit"] = passthrough +op_handlers["NextIteration"] = passthrough +op_handlers["Tile"] = passthrough +op_handlers["TensorArrayScatterV3"] = passthrough +op_handlers["TensorArrayReadV3"] = passthrough +op_handlers["TensorArrayWriteV3"] = passthrough + +# ops that don't pass any attributions to their inputs +op_handlers["Shape"] = break_dependence +op_handlers["RandomUniform"] = break_dependence +op_handlers["ZerosLike"] = break_dependence +# op_handlers["StopGradient"] = break_dependence # this allows us to stop attributions when we want to (like softmax re-centering) + +# ops that are linear and only allow a single input to vary +op_handlers["Reshape"] = linearity_1d(0) +op_handlers["Pad"] = linearity_1d(0) +op_handlers["ReverseV2"] = linearity_1d(0) +op_handlers["ConcatV2"] = linearity_with_excluded([-1]) +op_handlers["Conv2D"] = linearity_1d(0) +op_handlers["Switch"] = linearity_1d(0) +op_handlers["AvgPool"] = linearity_1d(0) +op_handlers["FusedBatchNorm"] = linearity_1d(0) + +# ops that are nonlinear and only allow a single input to vary +op_handlers["Relu"] = nonlinearity_1d(0) +op_handlers["Elu"] = nonlinearity_1d(0) +op_handlers["Sigmoid"] = nonlinearity_1d(0) +op_handlers["Tanh"] = nonlinearity_1d(0) +op_handlers["Softplus"] = nonlinearity_1d(0) +op_handlers["Exp"] = nonlinearity_1d(0) +op_handlers["ClipByValue"] = nonlinearity_1d(0) +op_handlers["Rsqrt"] = nonlinearity_1d(0) +op_handlers["Square"] = nonlinearity_1d(0) +op_handlers["Max"] = nonlinearity_1d(0) + +# ops that are nonlinear and allow two inputs to vary +op_handlers["SquaredDifference"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: (x - y) * (x - y)) +op_handlers["Minimum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.minimum(x, y)) +op_handlers["Maximum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.maximum(x, y)) + +# ops that allow up to two inputs to vary are are linear when only one input varies +op_handlers["Mul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x * y) +op_handlers["RealDiv"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x / y) +op_handlers["MatMul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.matmul(x, y)) + +# ops that need their own custom attribution functions +op_handlers["GatherV2"] = gather +op_handlers["ResourceGather"] = gather +op_handlers["MaxPool"] = maxpool +op_handlers["Softmax"] = softmax diff --git a/easy_rec/python/tools/explainer/explainer.py b/easy_rec/python/tools/explainer/explainer.py new file mode 100644 index 000000000..a40784458 --- /dev/null +++ b/easy_rec/python/tools/explainer/explainer.py @@ -0,0 +1,506 @@ +import tensorflow as tf +from tensorflow.python.platform import gfile +from tensorflow.python.saved_model import signature_constants +from easy_rec.python.utils.load_class import get_register_class_meta +from easy_rec.python.utils.config_util import get_configs_from_pipeline_file +from easy_rec.python.utils.input_utils import get_type_defaults +from easy_rec.python.tools.explainer.methods import DeepExplain +# from easy_rec.python.tools.explainer.deep_shap import DeepShap +from easy_rec.python.protos.dataset_pb2 import DatasetConfig +import abc +import collections +import numpy as np +import logging +import six +import time +from six import moves +import os + +_EXPLAINER_CLASS_MAP = {} +_register_abc_meta = get_register_class_meta( + _EXPLAINER_CLASS_MAP, have_abstract_class=True) + + +class Explainer(six.with_metaclass(_register_abc_meta, object)): + version = 1 + + def __init__(self, deep_explain, model_path, method_name): + """Base class for explainer. + + Args: + deep_explain: a deep explain context manager + model_path: saved_model directory or frozen pb file path + method_name: explain method name + """ + self.deep_explain = deep_explain + self.method = method_name + self._inputs_map = collections.OrderedDict() + self._outputs_map = collections.OrderedDict() + self._model_path = model_path + self._explainer = None + self._effective_fields = None + self._build_model() + + def _build_model(self): + model_path = self._model_path + logging.info('loading model from %s' % model_path) + if gfile.IsDirectory(model_path): + assert tf.saved_model.loader.maybe_saved_model_directory(model_path), \ + 'saved model does not exists in %s' % model_path + else: + raise ValueError('currently only savedmodel is supported, path:' + model_path) + + input_fields = _get_input_fields_from_pipeline_config(model_path) + self._input_fields_info, self._input_fields = input_fields + + de = self.deep_explain + meta_graph_def = tf.saved_model.loader.load( + de.session, [tf.saved_model.tag_constants.SERVING], model_path) + # parse signature + signature_def = meta_graph_def.signature_def[ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + inputs = signature_def.inputs + input_info = [] + self._is_multi_placeholder = len(inputs.items()) > 1 + if self._is_multi_placeholder: + for gid, item in enumerate(inputs.items()): + name, tensor = item + logging.info('Load input binding: %s -> %s' % (name, tensor.name)) + input_name = tensor.name + input_name, _ = input_name.split(':') + try: + input_id = input_name.split('_')[-1] + input_id = int(input_id) + except Exception: + # support for models that are not exported by easy_rec + # in which case, the order of inputs may not be the + # same as they are defined, therefore, list input + # could not be supported, only dict input could be supported + logging.warning( + 'could not determine input_id from input_name: %s' % input_name) + input_id = gid + input_info.append((input_id, name, tensor.dtype)) + self._inputs_map[name] = de.graph.get_tensor_by_name(tensor.name) + else: + # only one input, all features concatenate together + for name, tensor in inputs.items(): + logging.info('Load input binding: %s -> %s' % (name, tensor.name)) + input_info.append((0, name, tensor.dtype)) + self._inputs_map[name] = de.graph.get_tensor_by_name(tensor.name) + + # sort inputs by input_ids so as to match the order of csv data + input_info.sort(key=lambda t: t[0]) + self._input_names = [t[1] for t in input_info] + + outputs = signature_def.outputs + for name, tensor in outputs.items(): + logging.info('Load output binding: %s -> %s' % (name, tensor.name)) + self._outputs_map[name] = de.graph.get_tensor_by_name(tensor.name) + + # get assets + # self._assets = {} + # asset_files = tf.get_collection(constants.ASSETS_KEY) + # for any_proto in asset_files: + # asset_file = meta_graph_pb2.AssetFileDef() + # any_proto.Unpack(asset_file) + # type_name = asset_file.tensor_info.name.split(':')[0] + # asset_path = os.path.join(model_path, constants.ASSETS_DIRECTORY, + # asset_file.filename) + # assert gfile.Exists( + # asset_path), '%s is missing in saved model' % asset_path + # self._assets[type_name] = asset_path + # logging.info(self._assets) + + def default_values(self): + input_fields = self._input_fields if self._effective_fields is None else self._effective_fields + n = len(input_fields) + m = len(self._input_names) + assert m == n, 'the number input columns is not expected, %d given, %d expected\n' \ + 'model inputs: %s\ninput fields: %s' % (n, m, ','.join(self._input_names), ','.join(input_fields)) + + default_value = [] + for i, (field, name) in enumerate(zip(input_fields, self._input_names)): + assert field == name, "input field `%d` has different names: <%s, %s>" % (i, field, name) + value = self._get_defaults(field) + # default_value.append(np.array([value])) # for deep_shap + default_value.append(np.array(value)) # for deep_shap + return default_value + + def _get_defaults(self, col_name, col_type='string'): + if col_name in self._input_fields_info: + col_type, default_val = self._input_fields_info[col_name] + default_val = get_type_defaults(col_type, default_val) + logging.info('col_name: %s, default_val: %s' % (col_name, default_val)) + else: + defaults = {'string': '', 'double': 0.0, 'bigint': 0} + assert col_type in defaults, 'invalid col_type: %s, col_type: %s' % ( + col_name, col_type) + default_val = defaults[col_type] + logging.info( + 'col_name: %s, default_val: %s.[not defined in saved_model_dir/assets/pipeline.config]' + % (col_name, default_val)) + return default_val + + def str_to_number(self, values): + assert len(values) == len(self._input_fields), "value count %d is not equal to the number of input fields %d" % ( + len(values), len(self._input_fields) + ) + result = [] + for i, name in enumerate(self._input_names): + assert name in self._input_fields_info, "input `%s` not in pipeline config" % name + idx = self._input_fields.index(name) + input_type, default_val = self._input_fields_info[name] + if input_type in {DatasetConfig.INT32, DatasetConfig.INT64}: + tmp_field = int(values[idx]) + elif input_type in [DatasetConfig.FLOAT, DatasetConfig.DOUBLE]: + tmp_field = float(values[idx]) + elif input_type in [DatasetConfig.BOOL]: + tmp_field = values[idx].lower() in ['true', '1', 't', 'y', 'yes'] + elif input_type in [DatasetConfig.STRING]: + tmp_field = values[idx] + else: + assert False, 'invalid types: %s' % str(input_type) + result.append(tmp_field) + return result + + def get_explainer(self, output_cols=None): + if output_cols is None or output_cols == 'ALL_COLUMNS': + self._output_cols = sorted(self.output_names) + logging.info('predict output cols: %s' % self._output_cols) + else: + # specified as score float,embedding string + tmp_cols = [] + for x in output_cols.split(','): + if x.strip() == '': + continue + tmp_keys = x.split(' ') + tmp_cols.append(tmp_keys[0].strip()) + self._output_cols = tmp_cols + if len(self._output_cols) > 1: + logging.warning('Only one output can be supported currently, use the first one: %s', self._output_cols[0]) + + output_name = self._output_cols[0] + assert output_name in self.output_names, 'invalid output name `%s` not in model outputs `%s`' % ( + output_name, ','.join(self.output_names)) + if output_name is None: + output = self._outputs_map.values()[0] + elif type(output_name) in {str, unicode}: + output = self._outputs_map[output_name] + else: + raise Exception('unsupported type of output_name: ' + str(type(output_name))) + + def_vals = self.default_values() + # print('default values (%d):' % len(def_vals), def_vals) + inputs = [self._inputs_map[name] for name in self._input_names] + # e = DeepShap(inputs, output, def_vals, session=self._session) + # self._explainer = e + e = self.deep_explain.get_explainer(self.method, output, inputs, baseline=def_vals) + return e + + @property + def input_names(self): + """Input names of the model. + + Returns: + a list, which conaining the name of input nodes available in model + """ + return self._input_names + + @property + def output_names(self): + """Output names of the model. + + Returns: + a list, which containing the name of outputs nodes available in model + """ + return list(self._outputs_map.keys()) + + @abc.abstractmethod + def feature_importance(self, + input_path, + output_path, + reserved_cols='', + output_cols=None, + batch_size=1024, + slice_id=0, + slice_num=1): + pass + + # def create_output_table(self, reserved_cols=''): + # reserved_cols = [x.strip() for x in reserved_cols.split(',') if x != ''] + # outputs = self.input_names + # reserved_cols = filter(lambda r: r not in outputs, reserved_cols) + # output_cols = reserved_cols + outputs + # sql = 'create table output_table ' + # return sql + + +class OdpsExplainer(Explainer): + def feature_importance(self, + input_path, + output_path, + reserved_cols='', + output_cols=None, + batch_size=1024, + slice_id=0, + slice_num=1): + input_cols = self.input_names + input_dim = len(input_cols) + if reserved_cols: + reserved_cols = [x.strip() for x in reserved_cols.split(',') if x.strip() not in input_cols] + input_cols.extend(reserved_cols) + selected_cols = ','.join(input_cols) + print("selected_cols: " + selected_cols) + + explainer = self.get_explainer(output_cols) + print("reference value:", explainer.expected_value) + + import common_io + reader = common_io.table.TableReader(input_path, selected_cols=selected_cols, + slice_id=slice_id, slice_count=slice_num) + + reserved_cols_idx = [] + if reserved_cols: + reserved_cols = [x.strip() for x in reserved_cols.split(',') if x != ''] + schema = reader.get_schema() + columns = [str(x[0]) for x in schema] + reserved_cols_idx = [columns.index(x) for x in reserved_cols] + print(reserved_cols_idx) + + sum_t0, sum_t1, sum_t2 = 0, 0, 0 + writer = common_io.table.TableWriter(output_path, slice_id=slice_id) + total_records_num = reader.get_row_count() + for i in moves.range(0, total_records_num, batch_size): + t0 = time.time() + records = reader.read(batch_size, allow_smaller_final_batch=True) + t1 = time.time() + records = np.array(records) + inputs = list(records[:, :input_dim].T) + sv = explainer.shap_values(inputs, check_additivity=False) + outputs = [records[:, i] for i in reserved_cols_idx] + if outputs: + outputs.extend(sv[0]) + else: + outputs = sv[0] + indices = range(len(outputs)) + t2 = time.time() + writer.write(np.array(outputs).T, indices, allow_type_cast=True) + t3 = time.time() + sum_t0 += (t1 - t0) + sum_t1 += (t2 - t1) + sum_t2 += (t3 - t2) + if i % 100 == 0: + logging.info('progress: batch_num=%d sample_num=%d' % + (i + 1, (i + 1) * batch_size)) + logging.info('time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + logging.info('Final_time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + writer.close() + reader.close() + logging.info('Explain %s done.' % input_path) + + +class OdpsRtpExplainer(Explainer): + def __init__(self, deep_explain, model_path, method_name): + super(OdpsRtpExplainer, self).__init__(deep_explain, model_path, method_name) + pipeline_path = os.path.join(model_path, 'assets/pipeline.config') + if not gfile.Exists(pipeline_path): + logging.warning( + '%s not exists, default values maybe inconsistent with the values used in training.' + % pipeline_path) + return + pipeline_config = get_configs_from_pipeline_file(pipeline_path) + self._fg_separator = pipeline_config.data_config.separator + + if pipeline_config.export_config.filter_inputs: + if len(pipeline_config.feature_configs) > 0: + feature_configs = pipeline_config.feature_configs + elif pipeline_config.feature_config and len( + pipeline_config.feature_config.features) > 0: + feature_configs = pipeline_config.feature_config.features + else: + assert False, 'One of feature_configs and feature_config.features must be configured.' + + self._effective_fields = [] + for fc in feature_configs: + for input_name in fc.input_names: + assert input_name in self._input_fields, 'invalid input_name in %s' % str(fc) + if input_name not in self._effective_fields: + self._effective_fields.append(input_name) + self._effective_fids = [ + self._input_fields.index(x) for x in self._effective_fields + ] + # sort fids from small to large + self._effective_fids = list(set(self._effective_fids)) + self._effective_fields = [ + self._input_fields[x] for x in self._effective_fids + ] + logging.info( + "raw input fields: %d, effective fields: %d" % (len(self._input_fields), len(self._effective_fields))) + + def feature_importance(self, + input_path, + output_path, + reserved_cols='', + output_cols=None, + batch_size=1024, + slice_id=0, + slice_num=1): + input_cols = [x.strip() for x in reserved_cols.split(',') if x != ''] + reserved_dim = len(input_cols) + if 'features' not in input_cols: + input_cols.append('features') + selected_cols = ','.join(input_cols) + print("selected_cols: " + selected_cols) + + explainer = self.get_explainer(output_cols) + print("reference value:", explainer.expected_value) + + import common_io + reader = common_io.table.TableReader(input_path, selected_cols=selected_cols, + slice_id=slice_id, slice_count=slice_num) + + sum_t0, sum_t1, sum_t2 = 0, 0, 0 + writer = common_io.table.TableWriter(output_path, slice_id=slice_id) + total_records_num = reader.get_row_count() + for i in moves.range(0, total_records_num, batch_size): + t0 = time.time() + records = reader.read(batch_size, allow_smaller_final_batch=True) + t1 = time.time() + inputs = [] + reserved = [] + for j in range(len(records)): + if reserved_dim > 0: + reserved.append(records[j][:reserved_dim]) + inputs.append(self.str_to_number(records[j][-1].decode('utf-8').split(self._fg_separator))) + inputs = list(np.array(inputs).T) + print("inputs:", inputs) + # sv = explainer.shap_values(inputs, check_additivity=False) + ret = explainer.run(inputs, batch_size=len(records)) + ret = np.array(ret) + if reserved_dim > 0: + outputs = np.concatenate([np.array(reserved), ret], axis=1) + else: + outputs = ret + indices = range(outputs.shape[1]) + t2 = time.time() + writer.write(outputs.T, indices, allow_type_cast=True) + t3 = time.time() + sum_t0 += (t1 - t0) + sum_t1 += (t2 - t1) + sum_t2 += (t3 - t2) + if i % 2 == 0: + logging.info('progress: batch_num=%d sample_num=%d' % + (i + 1, (i + 1) * batch_size)) + logging.info('time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + logging.info('Final_time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + writer.close() + reader.close() + logging.info('Explain %s done.' % input_path) + + +def _get_input_fields_from_pipeline_config(model_path): + pipeline_path = os.path.join(model_path, 'assets/pipeline.config') + if not gfile.Exists(pipeline_path): + logging.warning( + '%s not exists, default values maybe inconsistent with the values used in training.' + % pipeline_path) + return {}, [] + pipeline_config = get_configs_from_pipeline_file(pipeline_path) + data_config = pipeline_config.data_config + label_fields = data_config.label_fields + labels = {x for x in label_fields} + if data_config.HasField('sample_weight'): + labels.add(data_config.sample_weight) + + input_fields = data_config.input_fields + input_fields_info = { + input_field.input_name: + (input_field.input_type, input_field.default_val) + for input_field in input_fields if input_field.input_name not in labels + } + input_fields_list = [input_field.input_name for input_field in input_fields if input_field.input_name not in labels] + return input_fields_info, input_fields_list + + +def search_pb(directory, use_latest=False): + """Search pb file recursively in model directory. if multiple pb files exist, exception will be raised. + + If multiple pb files exist, exception will be raised. + + Args: + directory: model directory. + + Returns: + directory contain pb file + """ + dir_list = [] + for root, dirs, files in gfile.Walk(directory): + for f in files: + if f.endswith('saved_model.pb'): + dir_list.append(root) + if len(dir_list) == 0: + raise ValueError('savedmodel is not found in directory %s' % directory) + elif len(dir_list) > 1: + if use_latest: + logging.info('find %d models: %s' % (len(dir_list), ','.join(dir_list))) + dir_list = sorted( + dir_list, + key=lambda x: int(x.split('/')[(-2 if (x[-1] == '/') else -1)])) + return dir_list[-1] + else: + raise ValueError('multiple saved model found in directory %s' % + directory) + + return dir_list[0] + + +# def create_explainer(model_path, use_latest=False): +# if gfile.IsDirectory(model_path): +# model_path = search_pb(model_path, use_latest) +# else: +# raise ValueError('model_path should be a directory, path:' + model_path) +# pipeline_path = os.path.join(model_path, 'assets/pipeline.config') +# if not gfile.Exists(pipeline_path): +# logging.warning('%s not exists' % pipeline_path) +# raise ValueError('%s not exists' % pipeline_path) +# +# pipeline_config = get_configs_from_pipeline_file(pipeline_path) +# input_type = pipeline_config.data_config.input_type +# if input_type in {DatasetConfig.OdpsInput, DatasetConfig.OdpsInputV2, DatasetConfig.OdpsInputV3}: +# return OdpsExplainer(model_path) +# if input_type in {DatasetConfig.OdpsRTPInput, DatasetConfig.OdpsRTPInputV2}: +# return OdpsRtpExplainer(model_path) +# raise ValueError("currently unsupported input type: " + input_type) + + +def run(FLAGS): + model_path = FLAGS.saved_model_dir + if gfile.IsDirectory(model_path): + model_path = search_pb(model_path, False) + else: + raise ValueError('model_path should be a directory, path:' + model_path) + pipeline_path = os.path.join(model_path, 'assets/pipeline.config') + if not gfile.Exists(pipeline_path): + logging.warning('%s not exists' % pipeline_path) + raise ValueError('%s not exists' % pipeline_path) + + gpu_options = tf.GPUOptions(allow_growth=True) + session_config = tf.ConfigProto( + gpu_options=gpu_options, + allow_soft_placement=True) + session = tf.Session(config=session_config) + + worker_count = len(FLAGS.worker_hosts.split(',')) + with DeepExplain(session=session) as de: + e = OdpsRtpExplainer(de, model_path, 'deeplift') + e.feature_importance(FLAGS.explain_tables if FLAGS.explain_tables else FLAGS.tables, + FLAGS.outputs, + reserved_cols=FLAGS.reserved_cols, + output_cols=FLAGS.output_cols, + batch_size=FLAGS.batch_size, + slice_id=FLAGS.task_index, + slice_num=worker_count) diff --git a/easy_rec/python/tools/explainer/feature_importance.py b/easy_rec/python/tools/explainer/feature_importance.py new file mode 100644 index 000000000..034f3c0da --- /dev/null +++ b/easy_rec/python/tools/explainer/feature_importance.py @@ -0,0 +1,50 @@ +from __future__ import print_function +from easy_rec.python.tools.explainer.explainer import run +import tensorflow as tf +flags = tf.app.flags + +flags.DEFINE_string('saved_model_dir', '', 'directory where saved_model.pb exists') +flags.DEFINE_string('explain_tables', '', 'tables used for explaination') +flags.DEFINE_string('background_table', '', 'tables used for expected value') +flags.DEFINE_string('tables', '', 'tables passed by pai command') +flags.DEFINE_string('outputs', '', 'output tables') +flags.DEFINE_string( + 'selected_cols', '', + 'columns to keep from input table, they are separated with ,') +flags.DEFINE_string( + 'reserved_cols', '', + 'columns to keep from input table, they are separated with ,') +flags.DEFINE_string( + 'output_cols', None, + 'output columns, such as: score float. multiple columns are separated by ,') +flags.DEFINE_integer('batch_size', 1024, 'predict batch size') +flags.DEFINE_string('worker_hosts', '', 'Comma-separated list of hostname:port pairs') +flags.DEFINE_integer('task_index', 0, 'Index of task within the job') + +FLAGS = flags.FLAGS + + +def main(_): + for k in FLAGS: + if k in ('h', 'help', 'helpshort', 'helpfull'): + continue + print("%s=%s" % (k, FLAGS[k].value)) + + # worker_count = len(FLAGS.worker_hosts.split(',')) + # e = create_explainer(FLAGS.saved_model_dir) + # + # output_names = e.input_names + # print("feature_names:", output_names) + # print("feature_num:", len(output_names)) + # e.feature_importance(FLAGS.explain_tables if FLAGS.explain_tables else FLAGS.tables, + # FLAGS.outputs, + # reserved_cols=FLAGS.reserved_cols, + # output_cols=FLAGS.output_cols, + # batch_size=FLAGS.batch_size, + # slice_id=FLAGS.task_index, + # slice_num=worker_count) + run(FLAGS) + + +if __name__ == '__main__': + tf.app.run(main=main) diff --git a/easy_rec/python/tools/explainer/methods.py b/easy_rec/python/tools/explainer/methods.py new file mode 100644 index 000000000..aa7192acc --- /dev/null +++ b/easy_rec/python/tools/explainer/methods.py @@ -0,0 +1,641 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import numpy as np +from skimage.util import view_as_windows +import warnings, logging +import tensorflow as tf +from tensorflow.python.framework import ops +from tensorflow.python.ops import nn_grad, math_grad +from collections import OrderedDict +from easy_rec.python.tools.explainer.utils import make_batches, slice_arrays, to_list, unpack_singleton + +SUPPORTED_ACTIVATIONS = [ + 'Relu', 'Elu', 'Sigmoid', 'Tanh', 'Softplus' +] + +UNSUPPORTED_ACTIVATIONS = [ + 'CRelu', 'Relu6', 'Softsign' +] + +_ENABLED_METHOD_CLASS = None +_GRAD_OVERRIDE_CHECKFLAG = 0 + + +# ----------------------------------------------------------------------------- +# UTILITY FUNCTIONS +# ----------------------------------------------------------------------------- + + +def activation(type): + """ + Returns Tensorflow's activation op, given its type + :param type: string + :return: op + """ + if type not in SUPPORTED_ACTIVATIONS: + warnings.warn('Activation function (%s) not supported' % type) + f = getattr(tf.nn, type.lower()) + return f + + +def original_grad(op, grad): + """ + Return original Tensorflow gradient for an op + :param op: op + :param grad: Tensor + :return: Tensor + """ + if op.type not in SUPPORTED_ACTIVATIONS: + warnings.warn('Activation function (%s) not supported' % op.type) + opname = '_%sGrad' % op.type + if hasattr(nn_grad, opname): + f = getattr(nn_grad, opname) + else: + f = getattr(math_grad, opname) + return f(op, grad) + + +# ----------------------------------------------------------------------------- +# ATTRIBUTION METHODS BASE CLASSES +# ----------------------------------------------------------------------------- + + +class AttributionMethod(object): + """ + Attribution method base class + """ + def __init__(self, T, X, session, keras_learning_phase=None): + self.T = T # target Tensor + self.X = X # input Tensor + self.Y_shape = [None,] + T.get_shape().as_list()[1:] + # Most often T contains multiple output units. In this case, it is often necessary to select + # a single unit to compute contributions for. This can be achieved passing 'ys' as weight for the output Tensor. + self.Y = tf.placeholder(tf.float32, self.Y_shape) + # placeholder_from_data(ys) if ys is not None else 1.0 # Tensor that represents weights for T + self.T = self.T * self.Y + self.symbolic_attribution = None + self.session = session + self.keras_learning_phase = keras_learning_phase + self.has_multiple_inputs = type(self.X) is list or type(self.X) is tuple + logging.info('Model with multiple inputs: %s' % self.has_multiple_inputs) + + # Set baseline + # TODO: now this sets a baseline also for those methods that does not require it + self._set_check_baseline() + + # References + self._init_references() + + # Create symbolic explanation once during construction (affects only gradient-based methods) + self.explain_symbolic() + + def explain_symbolic(self): + return None + + def run(self, xs, ys=None, batch_size=None): + pass + + def _init_references(self): + pass + + def _check_input_compatibility(self, xs, ys=None, batch_size=None): + if ys is not None: + if not self.has_multiple_inputs and len(xs) != len(ys): + raise RuntimeError('When provided, ys must have the same batch size as xs (xs has batch size {} and ys {})'.format(len(xs), len(ys))) + elif self.has_multiple_inputs and np.all([len(i) != len(ys) for i in xs]): + raise RuntimeError('When provided, ys must have the same batch size as all elements of xs') + if batch_size is not None and batch_size > 0: + if self.T.shape[0].value is not None and self.T.shape[0].value is not batch_size: + raise RuntimeError('When using batch evaluation, the first dimension of the target tensor ' + 'must be compatible with the batch size. Found %s instead' % self.T.shape[0].value) + if isinstance(self.X, list): + for x in self.X: + if x.shape[0].value is not None and x.shape[0].value is not batch_size: + raise RuntimeError('When using batch evaluation, the first dimension of the input tensor ' + 'must be compatible with the batch size. Found %s instead' % x.shape[ + 0].value) + else: + if self.X.shape[0].value is not None and self.X.shape[0].value is not batch_size: + raise RuntimeError('When using batch evaluation, the first dimension of the input tensor ' + 'must be compatible with the batch size. Found %s instead' % self.X.shape[0].value) + + def _session_run_batch(self, T, xs, ys=None): + feed_dict = {} + if self.has_multiple_inputs: + for k, v in zip(self.X, xs): + feed_dict[k] = v + else: + feed_dict[self.X] = xs + + # If ys is not passed, produce a vector of ones that will be broadcasted to all batch samples + feed_dict[self.Y] = ys if ys is not None else np.ones([1,] + self.Y_shape[1:]) + + if self.keras_learning_phase is not None: + feed_dict[self.keras_learning_phase] = 0 + return self.session.run(T, feed_dict) + + def _session_run(self, T, xs, ys=None, batch_size=None): + num_samples = len(xs) + if self.has_multiple_inputs is True: + num_samples = len(xs[0]) + if len(xs) != len(self.X): + raise RuntimeError('List of input tensors and input data have different lengths (%s and %s)' + % (str(len(xs)), str(len(self.X)))) + if batch_size is not None: + for xi in xs: + if len(xi) != num_samples: + raise RuntimeError('Evaluation in batches requires all inputs to have ' + 'the same number of samples') + + if batch_size is None or batch_size <= 0 or num_samples <= batch_size: + return self._session_run_batch(T, xs, ys) + else: + outs = [] + batches = make_batches(num_samples, batch_size) + for batch_index, (batch_start, batch_end) in enumerate(batches): + # Get a batch from data + xs_batch = slice_arrays(xs, batch_start, batch_end) + # If the target tensor has one entry for each sample, we need to batch it as well + ys_batch = None + if ys is not None: + ys_batch = slice_arrays(ys, batch_start, batch_end) + batch_outs = self._session_run_batch(T, xs_batch, ys_batch) + batch_outs = to_list(batch_outs) + if batch_index == 0: + # Pre-allocate the results arrays. + for batch_out in batch_outs: + shape = (num_samples,) + batch_out.shape[1:] + outs.append(np.zeros(shape, dtype=batch_out.dtype)) + for i, batch_out in enumerate(batch_outs): + outs[i][batch_start:batch_end] = batch_out + return unpack_singleton(outs) + + def _set_check_baseline(self): + # Do nothing for those methods that have no baseline required + if not hasattr(self, "baseline"): + return + + if self.baseline is None: + if self.has_multiple_inputs: + self.baseline = [np.zeros([1,] + xi.get_shape().as_list()[1:]) for xi in self.X] + else: + self.baseline = np.zeros([1,] + self.X.get_shape().as_list()[1:]) + + else: + if self.has_multiple_inputs: + for i, xi in enumerate(self.X): + if list(self.baseline[i].shape) == xi.get_shape().as_list()[1:]: + self.baseline[i] = np.expand_dims(self.baseline[i], 0) + else: + raise RuntimeError('Baseline shape %s does not match expected shape %s' + % (self.baseline[i].shape, xi.get_shape().as_list()[1:])) + else: + if list(self.baseline.shape) == self.X.get_shape().as_list()[1:]: + self.baseline = np.expand_dims(self.baseline, 0) + else: + raise RuntimeError('Baseline shape %s does not match expected shape %s' + % (self.baseline.shape, self.X.get_shape().as_list()[1:])) + + +class GradientBasedMethod(AttributionMethod): + """ + Base class for gradient-based attribution methods + """ + def get_symbolic_attribution(self): + return tf.gradients(self.T, self.X) + + def explain_symbolic(self): + if self.symbolic_attribution is None: + self.symbolic_attribution = self.get_symbolic_attribution() + return self.symbolic_attribution + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + results = self._session_run(self.explain_symbolic(), xs, ys, batch_size) + return results[0] if not self.has_multiple_inputs else results + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + return original_grad(op, grad) + + +class PerturbationBasedMethod(AttributionMethod): + """ + Base class for perturbation-based attribution methods + """ + def __init__(self, T, X, session, keras_learning_phase): + super(PerturbationBasedMethod, self).__init__(T, X, session, keras_learning_phase) + self.base_activation = None + + + +# ----------------------------------------------------------------------------- +# ATTRIBUTION METHODS +# ----------------------------------------------------------------------------- +""" +Returns zero attributions. For testing only. +""" + + +class DummyZero(GradientBasedMethod): + + def get_symbolic_attribution(self,): + return tf.gradients(self.T, self.X) + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + input = op.inputs[0] + return tf.zeros_like(input) + +""" +Saliency maps +https://arxiv.org/abs/1312.6034 +""" + + +class Saliency(GradientBasedMethod): + + def get_symbolic_attribution(self): + return [tf.abs(g) for g in tf.gradients(self.T, self.X)] + + +""" +Gradient * Input +https://arxiv.org/pdf/1704.02685.pdf - https://arxiv.org/abs/1611.07270 +""" + + +class GradientXInput(GradientBasedMethod): + + def get_symbolic_attribution(self): + return [g * x for g, x in zip( + tf.gradients(self.T, self.X), + self.X if self.has_multiple_inputs else [self.X])] + + +""" +Integrated Gradients +https://arxiv.org/pdf/1703.01365.pdf +""" + + +class IntegratedGradients(GradientBasedMethod): + + def __init__(self, T, X, session, keras_learning_phase, steps=100, baseline=None): + self.steps = steps + self.baseline = baseline + super(IntegratedGradients, self).__init__(T, X, session, keras_learning_phase) + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + + gradient = None + for alpha in list(np.linspace(1. / self.steps, 1.0, self.steps)): + xs_mod = [b + (x - b) * alpha for x, b in zip(xs, self.baseline)] if self.has_multiple_inputs \ + else self.baseline + (xs - self.baseline) * alpha + _attr = self._session_run(self.explain_symbolic(), xs_mod, ys, batch_size) + if gradient is None: gradient = _attr + else: gradient = [g + a for g, a in zip(gradient, _attr)] + + results = [g * (x - b) / self.steps for g, x, b in zip( + gradient, + xs if self.has_multiple_inputs else [xs], + self.baseline if self.has_multiple_inputs else [self.baseline])] + + return results[0] if not self.has_multiple_inputs else results + + +""" +Layer-wise Relevance Propagation with epsilon rule +http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0130140 +""" + + +class EpsilonLRP(GradientBasedMethod): + eps = None + + def __init__(self, T, X, session, keras_learning_phase, epsilon=1e-4): + assert epsilon > 0.0, 'LRP epsilon must be greater than zero' + global eps + eps = epsilon + super(EpsilonLRP, self).__init__(T, X, session, keras_learning_phase) + + def get_symbolic_attribution(self): + return [g * x for g, x in zip( + tf.gradients(self.T, self.X), + self.X if self.has_multiple_inputs else [self.X])] + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + output = op.outputs[0] + input = op.inputs[0] + return grad * output / (input + eps * + tf.where(input >= 0, tf.ones_like(input), -1 * tf.ones_like(input))) + +""" +DeepLIFT +This reformulation only considers the "Rescale" rule +https://arxiv.org/abs/1704.02685 +""" + + +class DeepLIFTRescale(GradientBasedMethod): + + _deeplift_ref = {} + + def __init__(self, T, X, session, keras_learning_phase, baseline=None): + self.baseline = baseline + super(DeepLIFTRescale, self).__init__(T, X, session, keras_learning_phase) + + def get_symbolic_attribution(self): + return [g * (x - b) for g, x, b in zip( + tf.gradients(self.T, self.X), + self.X if self.has_multiple_inputs else [self.X], + self.baseline if self.has_multiple_inputs else [self.baseline])] + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + output = op.outputs[0] + input = op.inputs[0] + ref_input = cls._deeplift_ref[op.name] + ref_output = activation(op.type)(ref_input) + delta_out = output - ref_output + delta_in = input - ref_input + instant_grad = activation(op.type)(0.5 * (ref_input + input)) + return tf.where(tf.abs(delta_in) > 1e-5, grad * delta_out / delta_in, + original_grad(instant_grad.op, grad)) + + def _init_references(self): + # print ('DeepLIFT: computing references...') + sys.stdout.flush() + self._deeplift_ref.clear() + ops = [] + g = tf.get_default_graph() + for op in g.get_operations(): + if len(op.inputs) > 0 and not op.name.startswith('gradients'): + if op.type in SUPPORTED_ACTIVATIONS: + ops.append(op) + YR = self._session_run([o.inputs[0] for o in ops], self.baseline) + for (r, op) in zip(YR, ops): + self._deeplift_ref[op.name] = r + # print('DeepLIFT: references ready') + sys.stdout.flush() + + +""" +Occlusion method +Generalization of the grey-box method presented in https://arxiv.org/pdf/1311.2901.pdf +This method performs a systematic perturbation of contiguous hyperpatches in the input, +replacing each patch with a user-defined value (by default 0). +window_shape : integer or tuple of length xs_ndim +Defines the shape of the elementary n-dimensional orthotope the rolling window view. +If an integer is given, the shape will be a hypercube of sidelength given by its value. +step : integer or tuple of length xs_ndim +Indicates step size at which extraction shall be performed. +If integer is given, then the step is uniform in all dimensions. +""" + + +class Occlusion(PerturbationBasedMethod): + + def __init__(self, T, X, session, keras_learning_phase, window_shape=None, step=None): + super(Occlusion, self).__init__(T, X, session, keras_learning_phase) + if self.has_multiple_inputs: + raise RuntimeError('Multiple inputs not yet supported for perturbation methods') + + input_shape = X[0].get_shape().as_list() + if window_shape is not None: + assert len(window_shape) == len(input_shape), \ + 'window_shape must have length of input (%d)' % len(input_shape) + self.window_shape = tuple(window_shape) + else: + self.window_shape = (1,) * len(input_shape) + + if step is not None: + assert isinstance(step, int) or len(step) == len(input_shape), \ + 'step must be integer or tuple with the length of input (%d)' % len(input_shape) + self.step = step + else: + self.step = 1 + self.replace_value = 0.0 + logging.info('Input shape: %s; window_shape %s; step %s' % (input_shape, self.window_shape, self.step)) + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + input_shape = xs.shape[1:] + batch_size = xs.shape[0] + total_dim = np.asscalar(np.prod(input_shape)) + + # Create mask + index_matrix = np.arange(total_dim).reshape(input_shape) + idx_patches = view_as_windows(index_matrix, self.window_shape, self.step).reshape((-1,) + self.window_shape) + heatmap = np.zeros_like(xs, dtype=np.float32).reshape((-1), total_dim) + w = np.zeros_like(heatmap) + + # Compute original output + eval0 = self._session_run(self.T, xs, ys, batch_size) + + # Start perturbation loop + for i, p in enumerate(idx_patches): + mask = np.ones(input_shape).flatten() + mask[p.flatten()] = self.replace_value + masked_xs = mask.reshape((1,) + input_shape) * xs + delta = eval0 - self._session_run(self.T, masked_xs, ys, batch_size) + delta_aggregated = np.sum(delta.reshape((batch_size, -1)), -1, keepdims=True) + heatmap[:, p.flatten()] += delta_aggregated + w[:, p.flatten()] += p.size + + attribution = np.reshape(heatmap / w, xs.shape) + if np.isnan(attribution).any(): + warnings.warn('Attributions generated by Occlusion method contain nans, ' + 'probably because window_shape and step do not allow to cover the all input.') + return attribution + + +""" +Shapley Value sampling +Computes approximate Shapley Values using "Polynomial calculation of the Shapley value based on sampling", +Castro et al, 2009 (https://www.sciencedirect.com/science/article/pii/S0305054808000804) +samples : integer (default 5) +Defined the number of samples for each input feature. +Notice that evaluating a model samples * n_input_feature times might take a while. +sampling_dims : list of dimension indexes to run sampling on (feature dimensions). +By default, all dimensions except the batch dimension will be sampled. +For example, with a 4-D tensor that contains color images, single color channels are sampled. +To sample pixels, instead, use sampling_dims=[1,2] +""" + + +class ShapleySampling(PerturbationBasedMethod): + + def __init__(self, T, X, session, keras_learning_phase, samples=5, sampling_dims=None): + super(ShapleySampling, self).__init__(T, X, session, keras_learning_phase) + if self.has_multiple_inputs: + raise RuntimeError('Multiple inputs not yet supported for perturbation methods') + dims = len(X.shape) + if sampling_dims is not None: + if not 0 < len(sampling_dims) <= (dims - 1): + raise RuntimeError('sampling_dims must be a list containing 1 to %d elements' % (dims-1)) + if 0 in sampling_dims: + raise RuntimeError('Cannot sample batch dimension: remove 0 from sampling_dims') + if any([x < 1 or x > dims-1 for x in sampling_dims]): + raise RuntimeError('Invalid value in sampling_dims') + else: + sampling_dims = list(range(1, dims)) + + self.samples = samples + self.sampling_dims = sampling_dims + + def run(self, xs, ys=None, batch_size=None): + xs_shape = list(xs.shape) + batch_size = xs.shape[0] + n_features = int(np.asscalar(np.prod([xs.shape[i] for i in self.sampling_dims]))) + result = np.zeros((xs_shape[0], n_features)) + + run_shape = list(xs_shape) # a copy + run_shape = np.delete(run_shape, self.sampling_dims).tolist() + run_shape.insert(1, -1) + + reconstruction_shape = [xs_shape[0]] + for j in self.sampling_dims: + reconstruction_shape.append(xs_shape[j]) + + for r in range(self.samples): + p = np.random.permutation(n_features) + x = xs.copy().reshape(run_shape) + y = None + for i in p: + if y is None: + y = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) + x[:, i] = 0 + y0 = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) + delta = y - y0 + delta_aggregated = np.sum(delta.reshape((batch_size, -1)), -1, keepdims=False) + result[:, i] += delta_aggregated + y = y0 + + shapley = result / self.samples + return shapley.reshape(reconstruction_shape) + + +# ----------------------------------------------------------------------------- +# END ATTRIBUTION METHODS +# ----------------------------------------------------------------------------- + + +attribution_methods = OrderedDict({ + 'zero': (DummyZero, 0), + 'saliency': (Saliency, 1), + 'grad*input': (GradientXInput, 2), + 'intgrad': (IntegratedGradients, 3), + 'elrp': (EpsilonLRP, 4), + 'deeplift': (DeepLIFTRescale, 5), + 'occlusion': (Occlusion, 6), + 'shapley_sampling': (ShapleySampling, 7) +}) + + + +@ops.RegisterGradient("DeepExplainGrad") +def deepexplain_grad(op, grad): + global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG + _GRAD_OVERRIDE_CHECKFLAG = 1 + if _ENABLED_METHOD_CLASS is not None \ + and issubclass(_ENABLED_METHOD_CLASS, GradientBasedMethod): + return _ENABLED_METHOD_CLASS.nonlinearity_grad_override(op, grad) + else: + return original_grad(op, grad) + + +class DeepExplain(object): + + def __init__(self, graph=None, session=tf.get_default_session()): + self.method = None + self.batch_size = None + self.session = session + self.graph = session.graph if graph is None else graph + self.graph_context = self.graph.as_default() + self.override_context = self.graph.gradient_override_map(self.get_override_map()) + self.keras_phase_placeholder = None + self.context_on = False + if self.session is None: + raise RuntimeError('DeepExplain: could not retrieve a session. Use DeepExplain(session=your_session).') + + def __enter__(self): + # Override gradient of all ops created in context + self.graph_context.__enter__() + self.override_context.__enter__() + self.context_on = True + return self + + def __exit__(self, type, value, traceback): + self.graph_context.__exit__(type, value, traceback) + self.override_context.__exit__(type, value, traceback) + self.context_on = False + + def get_explainer(self, method, T, X, **kwargs): + if not self.context_on: + raise RuntimeError('Explain can be called only within a DeepExplain context.') + global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG + self.method = method + if self.method in attribution_methods: + method_class, method_flag = attribution_methods[self.method] + else: + raise RuntimeError('Method must be in %s' % list(attribution_methods.keys())) + if isinstance(X, list): + for x in X: + if 'tensor' not in str(type(x)).lower(): + raise RuntimeError('If a list, X must contain only Tensorflow Tensor objects') + else: + if 'tensor' not in str(type(X)).lower(): + raise RuntimeError('X must be a Tensorflow Tensor object or a list of them') + + if 'tensor' not in str(type(T)).lower(): + raise RuntimeError('T must be a Tensorflow Tensor object') + + logging.info('DeepExplain: running "%s" explanation method (%d)' % (self.method, method_flag)) + self._check_ops() + _GRAD_OVERRIDE_CHECKFLAG = 0 + + _ENABLED_METHOD_CLASS = method_class + method = _ENABLED_METHOD_CLASS(T, X, + self.session, + keras_learning_phase=self.keras_phase_placeholder, + **kwargs) + + if issubclass(_ENABLED_METHOD_CLASS, GradientBasedMethod) and _GRAD_OVERRIDE_CHECKFLAG == 0: + warnings.warn('DeepExplain detected you are trying to use an attribution method that requires ' + 'gradient override but the original gradient was used instead. You might have forgot to ' + '(re)create your graph within the DeepExlain context. Results are not reliable!') + _ENABLED_METHOD_CLASS = None + _GRAD_OVERRIDE_CHECKFLAG = 0 + self.keras_phase_placeholder = None + return method + + def explain(self, method, T, X, xs, ys=None, batch_size=None, **kwargs): + explainer = self.get_explainer(method, T, X, **kwargs) + return explainer.run(xs, ys, batch_size) + + @staticmethod + def get_override_map(): + return dict((a, 'DeepExplainGrad') for a in SUPPORTED_ACTIVATIONS) + + def _check_ops(self): + """ + Heuristically check if any op is in the list of unsupported activation functions. + This does not cover all cases where explanation methods would fail, and must be improved in the future. + Also, check if the placeholder named 'keras_learning_phase' exists in the graph. This is used by Keras + and needs to be passed in feed_dict. + :return: + """ + g = tf.get_default_graph() + for op in g.get_operations(): + if len(op.inputs) > 0 and not op.name.startswith('gradients'): + if op.type in UNSUPPORTED_ACTIVATIONS: + warnings.warn('Detected unsupported activation (%s). ' + 'This might lead to unexpected or wrong results.' % op.type) + elif 'keras_learning_phase' in op.name: + self.keras_phase_placeholder = op.outputs[0] \ No newline at end of file diff --git a/easy_rec/python/tools/explainer/utils.py b/easy_rec/python/tools/explainer/utils.py new file mode 100644 index 000000000..b697bf230 --- /dev/null +++ b/easy_rec/python/tools/explainer/utils.py @@ -0,0 +1,69 @@ +import numpy as np +import tensorflow as tf + +# Some of the following functions for batch processing have been borrowed and adapter from Keras +# https://github.com/keras-team/keras/blob/master/keras/utils/generic_utils.py +# https://github.com/keras-team/keras/blob/master/keras/engine/training_utils.py + + +def make_batches(size, batch_size): + """Returns a list of batch indices (tuples of indices). + # Arguments + size: Integer, total size of the data to slice into batches. + batch_size: Integer, batch size. + # Returns + A list of tuples of array indices. + """ + num_batches = (size + batch_size - 1) // batch_size # round up + return [(i * batch_size, min(size, (i + 1) * batch_size)) + for i in range(num_batches)] + + +def to_list(x, allow_tuple=False): + """Normalizes a list/tensor into a list. + If a tensor is passed, we return + a list of size 1 containing the tensor. + # Arguments + x: target object to be normalized. + allow_tuple: If False and x is a tuple, + it will be converted into a list + with a single element (the tuple). + Else converts the tuple to a list. + # Returns + A list. + """ + if isinstance(x, list): + return x + if allow_tuple and isinstance(x, tuple): + return list(x) + return [x] + + +def unpack_singleton(x): + """Gets the equivalent np-array if the iterable has only one value. + Otherwise return the iterable. + # Argument + x: A list or tuple. + # Returns + The same iterable or the iterable converted to a np-array. + """ + if len(x) == 1: + return np.array(x) + return x + + +def slice_arrays(arrays, start=None, stop=None): + """Slices an array or list of arrays. + """ + if arrays is None: + return [None] + elif isinstance(arrays, list): + return [None if x is None else x[start:stop] for x in arrays] + else: + return arrays[start:stop] + + +def placeholder_from_data(numpy_array): + if numpy_array is None: + return None + return tf.placeholder('float', [None,] + list(numpy_array.shape[1:])) diff --git a/easy_rec/python/utils/activation.py b/easy_rec/python/utils/activation.py index d05d705b3..a6ec1374f 100644 --- a/easy_rec/python/utils/activation.py +++ b/easy_rec/python/utils/activation.py @@ -4,34 +4,15 @@ import numpy as np import six import tensorflow as tf -from tensorflow.python.keras.layers import Layer - from easy_rec.python.utils.load_class import load_by_path -try: - from tensorflow.python.keras.layers import BatchNormalization -except ImportError: - BatchNormalization = tf.keras.layers.BatchNormalization - -# try: -# from tensorflow.python.ops.init_ops import Zeros -# except ImportError: -# from tensorflow.python.ops.init_ops_v2 import Zeros - -class Dice(Layer): +def dice(_x, axis=-1, epsilon=1e-9, name='dice', training=True): """The Data Adaptive Activation Function in DIN. Which can be viewed as a generalization of PReLu, and can adaptively adjust the rectified point according to distribution of input data. - Input shape - - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - Output shape - - Same shape as the input. - Arguments - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). - **epsilon** : Small float added to variance to avoid dividing by zero. @@ -41,44 +22,18 @@ class Dice(Layer): Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.] (https://arxiv.org/pdf/1706.06978.pdf) """ - - def __init__(self, - feat_dim, - axis=-1, - epsilon=1e-9, - is_training=None, - **kwargs): - super(Dice, self).__init__(**kwargs) - self.axis = axis - self.epsilon = epsilon - self.is_training = is_training - self.bn = BatchNormalization( - axis=self.axis, epsilon=self.epsilon, center=False, scale=False) - self.alphas = tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32) - - # def build(self, input_shape): - # super(Dice, self).build(input_shape) # Be sure to call this somewhere! - # self.bn = BatchNormalization( - # axis=self.axis, epsilon=self.epsilon, center=False, scale=False) - # self.alphas = self.add_weight( - # shape=(input_shape[-1],), - # initializer=Zeros(), - # dtype=tf.float32, - # name='dice_alpha') # name='alpha_'+self.name - # self.uses_learning_phase = True - - def call(self, inputs, **kwargs): - inputs_normed = self.bn(inputs, training=self.is_training) - x_p = tf.sigmoid(inputs_normed) - return self.alphas * (1.0 - x_p) * inputs + x_p * inputs - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self,): - config = {'axis': self.axis, 'epsilon': self.epsilon} - base_config = super(Dice, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + alphas = tf.get_variable('alpha_' + name, _x.get_shape()[-1], + initializer=tf.constant_initializer(0.0), + dtype=tf.float32) + inputs_normed = tf.layers.batch_normalization( + inputs=_x, + axis=axis, + epsilon=epsilon, + center=False, + scale=False, + training=training) + x_p = tf.sigmoid(inputs_normed) + return alphas * (1.0 - x_p) * _x + x_p * _x def gelu(x): @@ -134,7 +89,7 @@ def get_activation(activation_string, **kwargs): return tf.nn.leaky_relu return tf.keras.layers.PReLU(**kwargs) elif act == 'dice': - return Dice(**kwargs) + return lambda x, name: dice(x, name=name, **kwargs) elif act == 'elu': return tf.nn.elu elif act == 'selu': @@ -143,7 +98,7 @@ def get_activation(activation_string, **kwargs): return tf.tanh elif act == 'swish': if tf.__version__ < '1.13.0': - return lambda x: x * tf.sigmoid(x) + return lambda x, name: x * tf.sigmoid(x, name=name) return tf.nn.swish elif act == 'sigmoid': return tf.nn.sigmoid diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index 091e10e07..4c1c28550 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -97,7 +97,7 @@ def download(oss_or_url, dst_dir=''): def create_module_dir(dst_dir): if not os.path.exists(dst_dir): os.makedirs(dst_dir) - with open(os.path.join(dst_dir, '__init__.py'), 'w') as ofile: + with open(os.path.join(dst_dir, 'explainer.py'), 'w') as ofile: ofile.write('\n') From e27b12137afbb1e77dad7ea4a8e863ac59c18ddc Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 1 May 2023 20:16:06 +0800 Subject: [PATCH 17/54] [feat]: add attention normalizer for din --- easy_rec/python/layers/din.py | 10 +++++++--- easy_rec/python/layers/dnn.py | 2 +- easy_rec/python/protos/layer.proto | 2 ++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/easy_rec/python/layers/din.py b/easy_rec/python/layers/din.py index 81f661165..71c6e1ab4 100644 --- a/easy_rec/python/layers/din.py +++ b/easy_rec/python/layers/din.py @@ -55,9 +55,13 @@ def __call__(self, inputs, training=None, **kwargs): seq_mask = tf.expand_dims(seq_mask, 1) paddings = tf.ones_like(scores) * (-2**32 + 1) scores = tf.where(seq_mask, scores, paddings) # [B, 1, L] - scores = scores / (seq_emb_size**0.5) - # normalization with softmax is abandoned according to the original paper - scores = tf.nn.sigmoid(scores) + if self.config.attention_normalizer == 'softmax': + scores = tf.nn.softmax(scores) # (B, 1, L) + elif self.config.attention_normalizer == 'sigmoid': + scores = scores / (seq_emb_size**0.5) + scores = tf.nn.sigmoid(scores) + else: + raise ValueError("unsupported attention normalizer: " + self.config.attention_normalizer) if target_emb_size < seq_emb_size: keys = keys[:, :, :target_emb_size] # [B, L, E] diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index d2af5a4cf..ce36dd677 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -34,7 +34,7 @@ def __init__(self, self._name = name self._is_training = is_training logging.info('dnn activation function = %s' % self._config.activation) - self.activation = get_activation(self._config.activation, is_training=is_training) + self.activation = get_activation(self._config.activation, training=is_training) self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index e2ca2e217..4ddacac5e 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -118,4 +118,6 @@ message DINEncoder { required DNN attention_dnn = 1; // whether to keep target item feature required bool need_target_feature = 2 [default = true]; + // option: softmax, sigmoid + required string attention_normalizer = 3 [default = 'softmax']; } From e834050f7c8e5fb41b0a3890ee516197d43c79a3 Mon Sep 17 00:00:00 2001 From: weisu Date: Thu, 4 May 2023 12:47:22 +0800 Subject: [PATCH 18/54] [feat]: add dice activation --- easy_rec/python/utils/activation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/easy_rec/python/utils/activation.py b/easy_rec/python/utils/activation.py index a6ec1374f..185dee622 100644 --- a/easy_rec/python/utils/activation.py +++ b/easy_rec/python/utils/activation.py @@ -6,6 +6,9 @@ import tensorflow as tf from easy_rec.python.utils.load_class import load_by_path +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + def dice(_x, axis=-1, epsilon=1e-9, name='dice', training=True): """The Data Adaptive Activation Function in DIN. From 05d0e6447bc22396f23800d77eaeb75e80b1d575 Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 5 May 2023 08:32:50 +0800 Subject: [PATCH 19/54] [feat]: add dice activation for dnn layer --- .../feature_column/feature_column_v2.py | 64 ++++++ easy_rec/python/layers/fscd_layer.py | 192 ++++++++++++++++++ easy_rec/python/protos/feature_config.proto | 3 + .../python/protos/variational_dropout.proto | 6 + 4 files changed, 265 insertions(+) create mode 100644 easy_rec/python/layers/fscd_layer.py diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index e1e4d9304..4610f8e52 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -3377,6 +3377,38 @@ def raw_name(self): """See `FeatureColumn` base class.""" return self.categorical_column.raw_name + @property + def cardinality(self): + fc = self.categorical_column + if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): + return fc.hash_bucket_size + + if isinstance(fc, IdentityCategoricalColumn): + return fc.num_buckets + + if isinstance(fc, BucketizedColumn): + return len(fc.boundaries) + 1 + + if isinstance(fc, VocabularyListCategoricalColumn): + return len(fc.vocabulary_list) + fc.num_oov_buckets + + if isinstance(fc, VocabularyFileCategoricalColumn): + return len(fc.vocabulary_size) + fc.num_oov_buckets + + if isinstance(fc, WeightedCategoricalColumn): + sub_fc = fc.categorical_column + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): + return sub_fc.hash_bucket_size + if isinstance(sub_fc, IdentityCategoricalColumn): + return sub_fc.num_buckets + if isinstance(sub_fc, VocabularyListCategoricalColumn): + return len(sub_fc.vocabulary_list) + fc.num_oov_buckets + if isinstance(sub_fc, VocabularyFileCategoricalColumn): + return len(sub_fc.vocabulary_size) + fc.num_oov_buckets + if isinstance(sub_fc, BucketizedColumn): + return len(sub_fc.boundaries) + 1 + return 1 + @property def parse_example_spec(self): """See `FeatureColumn` base class.""" @@ -3727,6 +3759,38 @@ def raw_name(self): """See `FeatureColumn` base class.""" return self.categorical_column.raw_name + @property + def cardinality(self): + fc = self.categorical_column + if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): + return fc.hash_bucket_size + + if isinstance(fc, IdentityCategoricalColumn): + return fc.num_buckets + + if isinstance(fc, BucketizedColumn): + return len(fc.boundaries) + 1 + + if isinstance(fc, VocabularyListCategoricalColumn): + return len(fc.vocabulary_list) + fc.num_oov_buckets + + if isinstance(fc, VocabularyFileCategoricalColumn): + return len(fc.vocabulary_size) + fc.num_oov_buckets + + if isinstance(fc, WeightedCategoricalColumn): + sub_fc = fc.categorical_column + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): + return sub_fc.hash_bucket_size + if isinstance(sub_fc, IdentityCategoricalColumn): + return sub_fc.num_buckets + if isinstance(sub_fc, VocabularyListCategoricalColumn): + return len(sub_fc.vocabulary_list) + fc.num_oov_buckets + if isinstance(sub_fc, VocabularyFileCategoricalColumn): + return len(sub_fc.vocabulary_size) + fc.num_oov_buckets + if isinstance(sub_fc, BucketizedColumn): + return len(sub_fc.boundaries) + 1 + return 1 + @property def parse_example_spec(self): """See `FeatureColumn` base class.""" diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py new file mode 100644 index 000000000..96ea5fd5c --- /dev/null +++ b/easy_rec/python/layers/fscd_layer.py @@ -0,0 +1,192 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +from collections import OrderedDict +import json +import math + +import numpy as np +import tensorflow as tf + +from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA +from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA +from easy_rec.python.compat.feature_column.feature_column_v2 import SharedEmbeddingColumn # NOQA + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class FSCDLayer(object): + """Rank features by variational dropout. + + paper: Towards a Better Tradeoff between Effectiveness and Efficiency in Pre-Ranking, + A Learnable Feature Selection based Approach + arXiv: 2105.07706 + """ + + def __init__(self, + feature_configs, + variational_dropout_config, + features_dimension, + is_training=False, + name=''): + self._config = variational_dropout_config + self.features_dimension = features_dimension + self.features_total_dimension = sum(self.features_dimension.values()) + + self._dropout_param_size = len(self.features_dimension) + self.drop_param_shape = [self._dropout_param_size] + self.evaluate = not is_training + + delta_name = 'delta' if name == 'all' else 'delta_%s' % name + self.delta = tf.get_variable( + name=delta_name, + shape=self.drop_param_shape, + dtype=tf.float32, + initializer=None) + tf.add_to_collection( + 'variational_dropout', + json.dumps([name, list(self.features_dimension.items())])) + + if variational_dropout_config.regularize_by_feature_complexity: + self.regular_params = self.get_feature_regular_params(feature_configs) + self.feature_complexity = {} + + def get_feature_regular_params(self, feature_configs): + feature_regularize = {} + for config in feature_configs: + name = config.input_names[0] + if config.HasField('feature_name'): + name = config.feature_name + + complexity = self._config.feature_complexity_weight * config.complexity + + # dim = 1.0 + # if config.HasField('embedding_dim'): + # dim = float(config.embedding_dim) + dim = self.features_dimension[name] + complexity += self._config.feature_dimension_weight * dim + + cardinal = 1.0 + if config.HasField('hash_bucket_size'): + cardinal = float(config.hash_bucket_size) + elif config.HasField('num_buckets'): + cardinal = float(config.num_buckets) + elif len(config.boundaries) > 0: + cardinal = float(len(config.boundaries) + 1) + complexity += self._config.feature_cardinality_weight * cardinal + + theta = 1.0 - sigmoid(complexity) + alpha = math.log(1.0 - theta) - math.log(theta) + feature_regularize[name] = alpha + + return feature_regularize + + def get_lambda(self): + return self._config.regularization_lambda + + def build_expand_index(self, batch_size): + # Build index_list--->[[0,0],[0,0],[0,0],[0,0],[0,1]......] + expanded_index = [] + for i, index_loop_count in enumerate(self.features_dimension.values()): + for m in range(index_loop_count): + expanded_index.append([i]) + expanded_index = tf.tile(expanded_index, [batch_size, 1]) + batch_size_range = tf.range(batch_size) + expand_range_axis = tf.expand_dims(batch_size_range, 1) + batch_size_range_expand_dim_len = tf.tile( + expand_range_axis, [1, self.features_total_dimension]) + index_i = tf.reshape(batch_size_range_expand_dim_len, [-1, 1]) + expanded_index = tf.concat([index_i, expanded_index], 1) + return expanded_index + + def sample_noisy_input(self, input): + batch_size = tf.shape(input)[0] + if self.evaluate: + expanded_dims_logit_p = tf.expand_dims(self.logit_p, 0) + expanded_logit_p = tf.tile(expanded_dims_logit_p, [batch_size, 1]) + p = tf.sigmoid(expanded_logit_p) + if self.variational_dropout_wise(): + scaled_input = input * (1 - p) + else: + # expand dropout layer + expanded_index = self.build_expand_index(batch_size) + expanded_p = tf.gather_nd(p, expanded_index) + expanded_p = tf.reshape(expanded_p, [-1, self.features_total_dimension]) + scaled_input = input * (1 - expanded_p) + + return scaled_input + else: + bern_val = self.sampled_from_logit_p(batch_size) + bern_val = tf.reshape(bern_val, [-1, self.features_total_dimension]) + noisy_input = input * bern_val + return noisy_input + + def sampled_from_logit_p(self, num_samples): + expand_dims_logit_p = tf.expand_dims(self.logit_p, 0) + expand_logit_p = tf.tile(expand_dims_logit_p, [num_samples, 1]) + dropout_p = tf.sigmoid(expand_logit_p) + bern_val = self.concrete_dropout_neuron(dropout_p) + + if self.variational_dropout_wise(): + return bern_val, bern_val + else: + # from feature_num to embedding_dim_num + expanded_index = self.build_expand_index(num_samples) + bern_val_gather_nd = tf.gather_nd(bern_val, expanded_index) + return bern_val_gather_nd, bern_val + + def concrete_dropout_neuron(self, dropout_p, temp=1.0 / 10.0): + EPSILON = np.finfo(float).eps + unif_noise = tf.random_uniform( + tf.shape(dropout_p), dtype=tf.float32, seed=None, name='unif_noise') + + approx = ( + tf.log(dropout_p + EPSILON) - tf.log(1. - dropout_p + EPSILON) + + tf.log(unif_noise + EPSILON) - tf.log(1. - unif_noise + EPSILON)) + + approx_output = tf.sigmoid(approx / temp) + return 1 - approx_output + + def compute_regular_params(self, cols_to_feature): + alphas = OrderedDict() + for fc, fea in cols_to_feature.items(): + dim = int(fea.shape[-1]) + complexity = self.feature_complexity[fc.raw_name] + cardinal = 1 + if isinstance(fc, EmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): + cardinal = fc.cardinality + c = self._config.feature_complexity_weight * complexity + c += self._config.feature_cardinality_weight * cardinal + c += self._config.feature_dimension_weight * dim + theta = 1.0 - sigmoid(complexity) + alpha = math.log(1.0 - theta) - math.log(theta) + alphas[fc] = alpha + return alphas + + def __call__(self, cols_to_feature): + """ + cols_to_feature: an ordered dict mapping feature_column to feature_values + """ + alphas = self.compute_regular_params(cols_to_feature) + feature_columns = cols_to_feature.keys() + for column in sorted(feature_columns, key=lambda x: x.name): + value = cols_to_feature[column] + + batch_size = tf.shape(output_features)[0] + noisy_input, z = self.sample_noisy_input(output_features) + dropout_p = tf.sigmoid(self.logit_p) + variational_dropout_penalty = 1. - dropout_p + if self._config.regularize_by_feature_complexity: + pass + else: + variational_dropout_penalty_lambda = self.get_lambda() / tf.cast( + batch_size, dtype=tf.float32) + variational_dropout_loss_sum = variational_dropout_penalty_lambda * tf.reduce_sum( + variational_dropout_penalty, axis=0) + tf.add_to_collection('variational_dropout_loss', + variational_dropout_loss_sum) + return noisy_input + + +def sigmoid(x): + return x / (1 + math.exp(-x)) diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index 5ed305c10..b642fff23 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -128,6 +128,9 @@ message FeatureConfig { // embedding variable params optional EVParams ev_params = 31; + + // fg complexity + optional float complexity = 32 [default = 1.0]; } message FeatureConfigV2 { diff --git a/easy_rec/python/protos/variational_dropout.proto b/easy_rec/python/protos/variational_dropout.proto index e72ca54c6..afe4d061c 100644 --- a/easy_rec/python/protos/variational_dropout.proto +++ b/easy_rec/python/protos/variational_dropout.proto @@ -7,4 +7,10 @@ message VariationalDropoutLayer{ optional float regularization_lambda = 1 [default = 0.01]; // variational_dropout dimension optional bool embedding_wise_variational_dropout = 2 [default = false]; + // whether to use FSCD model + optional bool regularize_by_feature_complexity = 3 [default = false]; + + optional float feature_complexity_weight = 4 [default = 1.0]; + optional float feature_dimension_weight = 5 [default = 1e-2]; + optional float feature_cardinality_weight = 6 [default = 1e-7]; } From 23962b23af7859b2691179e3ee962d405178dd4b Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 5 May 2023 14:10:18 +0800 Subject: [PATCH 20/54] [feat]: add FSCD layer --- easy_rec/python/layers/fscd_layer.py | 187 +++++++++----------------- easy_rec/python/layers/input_layer.py | 54 +++++--- 2 files changed, 94 insertions(+), 147 deletions(-) diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py index 96ea5fd5c..c8f94bc81 100644 --- a/easy_rec/python/layers/fscd_layer.py +++ b/easy_rec/python/layers/fscd_layer.py @@ -1,9 +1,7 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. from collections import OrderedDict -import json import math - import numpy as np import tensorflow as tf @@ -15,6 +13,38 @@ tf = tf.compat.v1 +def get_feature_complexity(feature_configs): + feature_complexity = {} + for config in feature_configs: + name = config.input_names[0] + if config.HasField('feature_name'): + name = config.feature_name + feature_complexity[name] = config.complexity + + # complexity = self._config.feature_complexity_weight * config.complexity + # + # # dim = 1.0 + # # if config.HasField('embedding_dim'): + # # dim = float(config.embedding_dim) + # dim = self.features_dimension[name] + # complexity += self._config.feature_dimension_weight * dim + # + # cardinal = 1.0 + # if config.HasField('hash_bucket_size'): + # cardinal = float(config.hash_bucket_size) + # elif config.HasField('num_buckets'): + # cardinal = float(config.num_buckets) + # elif len(config.boundaries) > 0: + # cardinal = float(len(config.boundaries) + 1) + # complexity += self._config.feature_cardinality_weight * cardinal + # + # theta = 1.0 - sigmoid(complexity) + # alpha = math.log(1.0 - theta) - math.log(theta) + # feature_regularize[name] = alpha + + return feature_complexity + + class FSCDLayer(object): """Rank features by variational dropout. @@ -26,126 +56,28 @@ class FSCDLayer(object): def __init__(self, feature_configs, variational_dropout_config, - features_dimension, is_training=False, name=''): self._config = variational_dropout_config - self.features_dimension = features_dimension - self.features_total_dimension = sum(self.features_dimension.values()) + self.is_training = is_training + self.name = name + self.feature_complexity = get_feature_complexity(feature_configs) - self._dropout_param_size = len(self.features_dimension) - self.drop_param_shape = [self._dropout_param_size] - self.evaluate = not is_training - - delta_name = 'delta' if name == 'all' else 'delta_%s' % name - self.delta = tf.get_variable( + def compute_dropout_mask(self, n, temperature=0.1): + delta_name = 'delta' if self.name == 'all' else 'delta_%s' % self.name + delta = tf.get_variable( name=delta_name, - shape=self.drop_param_shape, + shape=[n], dtype=tf.float32, - initializer=None) - tf.add_to_collection( - 'variational_dropout', - json.dumps([name, list(self.features_dimension.items())])) - - if variational_dropout_config.regularize_by_feature_complexity: - self.regular_params = self.get_feature_regular_params(feature_configs) - self.feature_complexity = {} - - def get_feature_regular_params(self, feature_configs): - feature_regularize = {} - for config in feature_configs: - name = config.input_names[0] - if config.HasField('feature_name'): - name = config.feature_name - - complexity = self._config.feature_complexity_weight * config.complexity - - # dim = 1.0 - # if config.HasField('embedding_dim'): - # dim = float(config.embedding_dim) - dim = self.features_dimension[name] - complexity += self._config.feature_dimension_weight * dim - - cardinal = 1.0 - if config.HasField('hash_bucket_size'): - cardinal = float(config.hash_bucket_size) - elif config.HasField('num_buckets'): - cardinal = float(config.num_buckets) - elif len(config.boundaries) > 0: - cardinal = float(len(config.boundaries) + 1) - complexity += self._config.feature_cardinality_weight * cardinal + initializer=tf.constant_initializer(0.5)) - theta = 1.0 - sigmoid(complexity) - alpha = math.log(1.0 - theta) - math.log(theta) - feature_regularize[name] = alpha - - return feature_regularize - - def get_lambda(self): - return self._config.regularization_lambda - - def build_expand_index(self, batch_size): - # Build index_list--->[[0,0],[0,0],[0,0],[0,0],[0,1]......] - expanded_index = [] - for i, index_loop_count in enumerate(self.features_dimension.values()): - for m in range(index_loop_count): - expanded_index.append([i]) - expanded_index = tf.tile(expanded_index, [batch_size, 1]) - batch_size_range = tf.range(batch_size) - expand_range_axis = tf.expand_dims(batch_size_range, 1) - batch_size_range_expand_dim_len = tf.tile( - expand_range_axis, [1, self.features_total_dimension]) - index_i = tf.reshape(batch_size_range_expand_dim_len, [-1, 1]) - expanded_index = tf.concat([index_i, expanded_index], 1) - return expanded_index - - def sample_noisy_input(self, input): - batch_size = tf.shape(input)[0] - if self.evaluate: - expanded_dims_logit_p = tf.expand_dims(self.logit_p, 0) - expanded_logit_p = tf.tile(expanded_dims_logit_p, [batch_size, 1]) - p = tf.sigmoid(expanded_logit_p) - if self.variational_dropout_wise(): - scaled_input = input * (1 - p) - else: - # expand dropout layer - expanded_index = self.build_expand_index(batch_size) - expanded_p = tf.gather_nd(p, expanded_index) - expanded_p = tf.reshape(expanded_p, [-1, self.features_total_dimension]) - scaled_input = input * (1 - expanded_p) - - return scaled_input - else: - bern_val = self.sampled_from_logit_p(batch_size) - bern_val = tf.reshape(bern_val, [-1, self.features_total_dimension]) - noisy_input = input * bern_val - return noisy_input - - def sampled_from_logit_p(self, num_samples): - expand_dims_logit_p = tf.expand_dims(self.logit_p, 0) - expand_logit_p = tf.tile(expand_dims_logit_p, [num_samples, 1]) - dropout_p = tf.sigmoid(expand_logit_p) - bern_val = self.concrete_dropout_neuron(dropout_p) - - if self.variational_dropout_wise(): - return bern_val, bern_val - else: - # from feature_num to embedding_dim_num - expanded_index = self.build_expand_index(num_samples) - bern_val_gather_nd = tf.gather_nd(bern_val, expanded_index) - return bern_val_gather_nd, bern_val - - def concrete_dropout_neuron(self, dropout_p, temp=1.0 / 10.0): EPSILON = np.finfo(float).eps - unif_noise = tf.random_uniform( - tf.shape(dropout_p), dtype=tf.float32, seed=None, name='unif_noise') + unif_noise = tf.random_uniform([n], dtype=tf.float32, seed=None, name='uniform_noise') approx = ( - tf.log(dropout_p + EPSILON) - tf.log(1. - dropout_p + EPSILON) + + tf.log(delta + EPSILON) - tf.log(1. - delta + EPSILON) + tf.log(unif_noise + EPSILON) - tf.log(1. - unif_noise + EPSILON)) - - approx_output = tf.sigmoid(approx / temp) - return 1 - approx_output + return tf.sigmoid(approx / temperature) def compute_regular_params(self, cols_to_feature): alphas = OrderedDict() @@ -167,26 +99,29 @@ def __call__(self, cols_to_feature): """ cols_to_feature: an ordered dict mapping feature_column to feature_values """ - alphas = self.compute_regular_params(cols_to_feature) + output_tensors = [] + alphas = [] + z = self.compute_dropout_mask(len(cols_to_feature)) # keep ratio + regular = self.compute_regular_params(cols_to_feature) feature_columns = cols_to_feature.keys() for column in sorted(feature_columns, key=lambda x: x.name): value = cols_to_feature[column] + alpha = regular[column] + i = len(output_tensors) + out = value * z[i] if self.is_training else value + cols_to_feature[column] = out + output_tensors.append(out) + alphas.append(alpha) + + output_features = tf.concat(output_tensors, 1) batch_size = tf.shape(output_features)[0] - noisy_input, z = self.sample_noisy_input(output_features) - dropout_p = tf.sigmoid(self.logit_p) - variational_dropout_penalty = 1. - dropout_p - if self._config.regularize_by_feature_complexity: - pass - else: - variational_dropout_penalty_lambda = self.get_lambda() / tf.cast( - batch_size, dtype=tf.float32) - variational_dropout_loss_sum = variational_dropout_penalty_lambda * tf.reduce_sum( - variational_dropout_penalty, axis=0) - tf.add_to_collection('variational_dropout_loss', - variational_dropout_loss_sum) - return noisy_input + t_alpha = tf.convert_to_tensor(alphas) # [M] + loss = tf.reduce_sum(t_alpha * z) / batch_size + + tf.add_to_collection('variational_dropout_loss', loss) + return output_features def sigmoid(x): - return x / (1 + math.exp(-x)) + return 1. / (1. + math.exp(-x)) diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index fa17a1c15..6900a9bda 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -12,6 +12,7 @@ from easy_rec.python.feature_column.feature_group import FeatureGroup from easy_rec.python.layers import sequence_feature_layer from easy_rec.python.layers import variational_dropout_layer +from easy_rec.python.layers.fscd_layer import FSCDLayer from easy_rec.python.layers.common_layers import text_cnn from easy_rec.python.protos.feature_config_pb2 import WideOrDeep from easy_rec.python.utils import shape_utils @@ -37,6 +38,7 @@ def __init__(self, embedding_regularizer=None, kernel_regularizer=None, is_training=False): + self._feature_configs = feature_configs self._feature_groups = { x.group_name: FeatureGroup(x) for x in feature_groups_config } @@ -182,12 +184,8 @@ def single_call_input_layer(self, group_columns, cols_to_output_tensors=cols_to_output_tensors, feature_name_to_output_tensors=feature_name_to_output_tensors) - # embedding_reg_lst = [output_features] + embedding_reg_lst = [] - for col, val in cols_to_output_tensors.items(): - if isinstance(col, EmbeddingColumn) or isinstance(col, - SharedEmbeddingColumn): - embedding_reg_lst.append(val) builder = feature_column._LazyBuilder(features) seq_features = [] for column in sorted(group_seq_columns, key=lambda x: x.name): @@ -226,30 +224,44 @@ def single_call_input_layer(self, cols_to_output_tensors[column] = cnn_feature else: raise NotImplementedError + if self._variational_dropout_config is not None: - features_dimension = OrderedDict([ - (k.raw_name, int(v.shape[-1])) - for k, v in cols_to_output_tensors.items() - ]) - concat_features = array_ops.concat( + if self._variational_dropout_config.regularize_by_feature_complexity: + fscd = FSCDLayer(self._feature_configs, self._variational_dropout_config, + is_training=self._is_training, name=group_name) + output_features = fscd(cols_to_output_tensors) + concat_features = array_ops.concat( [output_features] + seq_features, axis=-1) - variational_dropout = variational_dropout_layer.VariationalDropoutLayer( - self._variational_dropout_config, - features_dimension, - self._is_training, - name=group_name) - concat_features = variational_dropout(concat_features) - group_features = tf.split( - concat_features, list(features_dimension.values()), axis=-1) + group_features = [cols_to_output_tensors[x] for x in group_columns] + \ + [cols_to_output_tensors[x] for x in group_seq_columns] + else: + features_dimension = OrderedDict([ + (k.raw_name, int(v.shape[-1])) + for k, v in cols_to_output_tensors.items() + ]) + concat_features = array_ops.concat( + [output_features] + seq_features, axis=-1) + variational_dropout = variational_dropout_layer.VariationalDropoutLayer( + self._variational_dropout_config, + features_dimension, + self._is_training, + name=group_name) + concat_features = variational_dropout(concat_features) + group_features = tf.split( + concat_features, list(features_dimension.values()), axis=-1) else: concat_features = array_ops.concat( [output_features] + seq_features, axis=-1) group_features = [cols_to_output_tensors[x] for x in group_columns] + \ [cols_to_output_tensors[x] for x in group_seq_columns] - if embedding_reg_lst: - regularizers.apply_regularization( - self._embedding_regularizer, weights_list=embedding_reg_lst) + for fc, val in cols_to_output_tensors.items(): + if isinstance(fc, EmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): + embedding_reg_lst.append(val) + + if embedding_reg_lst: + regularizers.apply_regularization( + self._embedding_regularizer, weights_list=embedding_reg_lst) return concat_features, group_features def get_wide_deep_dict(self): From 5dfb29f6a198f460fe7109c0b51e3d047a359262 Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 5 May 2023 16:23:30 +0800 Subject: [PATCH 21/54] [feat]: add dice activation for dnn layer --- .../compat/feature_column/feature_column.py | 37 ++++++++++++++++++- .../feature_column/feature_column_v2.py | 4 +- .../python/feature_column/feature_column.py | 8 +--- easy_rec/python/layers/input_layer.py | 5 +-- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index 1eb27717d..19b1a570b 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -167,6 +167,9 @@ from easy_rec.python.compat import embedding_ops as ev_embedding_ops from easy_rec.python.compat.feature_column import utils as fc_utils +from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn, BucketizedColumn,\ + WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, CrossedColumn, IdentityCategoricalColumn,\ + VocabularyListCategoricalColumn, VocabularyFileCategoricalColumn def _internal_input_layer(features, @@ -2530,7 +2533,39 @@ def name(self): @property def raw_name(self): - return self.categorical_column.name + return self.categorical_column.raw_name + + @property + def cardinality(self): + fc = self.categorical_column + if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): + return fc.hash_bucket_size + + if isinstance(fc, IdentityCategoricalColumn): + return fc.num_buckets + + if isinstance(fc, BucketizedColumn): + return len(fc.boundaries) + 1 + + if isinstance(fc, VocabularyListCategoricalColumn): + return len(fc.vocabulary_list) + fc.num_oov_buckets + + if isinstance(fc, VocabularyFileCategoricalColumn): + return len(fc.vocabulary_size) + fc.num_oov_buckets + + if isinstance(fc, WeightedCategoricalColumn) or isinstance(fc, SequenceWeightedCategoricalColumn): + sub_fc = fc.categorical_column + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): + return sub_fc.hash_bucket_size + if isinstance(sub_fc, IdentityCategoricalColumn): + return sub_fc.num_buckets + if isinstance(sub_fc, VocabularyListCategoricalColumn): + return len(sub_fc.vocabulary_list) + fc.num_oov_buckets + if isinstance(sub_fc, VocabularyFileCategoricalColumn): + return len(sub_fc.vocabulary_size) + fc.num_oov_buckets + if isinstance(sub_fc, BucketizedColumn): + return len(sub_fc.boundaries) + 1 + return 1 @property def _var_scope_name(self): diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index 4610f8e52..13a175041 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -3395,7 +3395,7 @@ def cardinality(self): if isinstance(fc, VocabularyFileCategoricalColumn): return len(fc.vocabulary_size) + fc.num_oov_buckets - if isinstance(fc, WeightedCategoricalColumn): + if isinstance(fc, WeightedCategoricalColumn) or isinstance(fc, SequenceWeightedCategoricalColumn): sub_fc = fc.categorical_column if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): return sub_fc.hash_bucket_size @@ -3777,7 +3777,7 @@ def cardinality(self): if isinstance(fc, VocabularyFileCategoricalColumn): return len(fc.vocabulary_size) + fc.num_oov_buckets - if isinstance(fc, WeightedCategoricalColumn): + if isinstance(fc, WeightedCategoricalColumn) or isinstance(fc, SequenceWeightedCategoricalColumn): sub_fc = fc.categorical_column if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): return sub_fc.hash_bucket_size diff --git a/easy_rec/python/feature_column/feature_column.py b/easy_rec/python/feature_column/feature_column.py index 04fc07baf..cc7cfbe77 100644 --- a/easy_rec/python/feature_column/feature_column.py +++ b/easy_rec/python/feature_column/feature_column.py @@ -331,12 +331,8 @@ def parse_tag_feature(self, config): default_value=0, feature_name=feature_name) - if len(config.input_names) > 1: - tag_fc = feature_column.weighted_categorical_column( - tag_fc, weight_feature_key=feature_name + '_w', dtype=tf.float32) - elif config.HasField('kv_separator'): - tag_fc = feature_column.weighted_categorical_column( - tag_fc, weight_feature_key=feature_name + '_w', dtype=tf.float32) + tag_fc = feature_column.weighted_categorical_column( + tag_fc, weight_feature_key=feature_name + '_w', dtype=tf.float32) if self.is_wide(config): self._add_wide_embedding_column(tag_fc, config) diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index 6900a9bda..8098057ad 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -138,8 +138,7 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): group_features = [cols_to_output_tensors[x] for x in group_columns] for col, val in cols_to_output_tensors.items(): - if isinstance(col, EmbeddingColumn) or isinstance( - col, SharedEmbeddingColumn): + if isinstance(col, EmbeddingColumn) or isinstance(col, _SharedEmbeddingColumn) or isinstance(col, SharedEmbeddingColumn): embedding_reg_lst.append(val) builder = feature_column._LazyBuilder(features) @@ -256,7 +255,7 @@ def single_call_input_layer(self, [cols_to_output_tensors[x] for x in group_seq_columns] for fc, val in cols_to_output_tensors.items(): - if isinstance(fc, EmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): + if isinstance(fc, EmbeddingColumn) or isinstance(fc, _SharedEmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): embedding_reg_lst.append(val) if embedding_reg_lst: From 51428ce799dfa89284aae3e9859c4f832b47b23c Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 8 May 2023 20:05:22 +0800 Subject: [PATCH 22/54] [feat]: add dice activation for dnn layer --- docs/source/train.md | 8 +- .../compat/feature_column/feature_column.py | 13 +- .../feature_column/feature_column_v2.py | 213 +++- .../python/feature_column/feature_column.py | 30 +- easy_rec/python/input/augment.py | 113 +- easy_rec/python/input/input.py | 37 +- easy_rec/python/layers/bst.py | 52 +- easy_rec/python/layers/din.py | 3 +- easy_rec/python/layers/fscd_layer.py | 132 ++- easy_rec/python/layers/input_layer.py | 21 +- .../layers/multihead_cross_attention.py | 9 +- easy_rec/python/loss/nce_loss.py | 51 +- easy_rec/python/model/easy_rec_model.py | 24 +- easy_rec/python/protos/feature_config.proto | 1 + .../python/protos/variational_dropout.proto | 5 +- easy_rec/python/tools/explainer/deep_shap.py | 420 ++++--- easy_rec/python/tools/explainer/explainer.py | 164 +-- .../tools/explainer/feature_importance.py | 13 +- easy_rec/python/tools/explainer/methods.py | 1016 +++++++++-------- easy_rec/python/tools/explainer/utils.py | 97 +- easy_rec/python/utils/activation.py | 2 +- setup.cfg | 2 +- 22 files changed, 1472 insertions(+), 954 deletions(-) diff --git a/docs/source/train.md b/docs/source/train.md index e58bb6862..67a79ad91 100644 --- a/docs/source/train.md +++ b/docs/source/train.md @@ -155,7 +155,7 @@ EasyRec支持两种损失函数配置方式:1)使用单个损失函数;2 - PAIRWISE_FOCAL_LOSS 的参数配置 - gamma: focal loss的指数,默认值2.0 - - alpha: 调节样本权重的类别平衡参数,建议根据正负样本比例来配置alpha, $\\frac{\\alpha}{1-\\alpha}=\\frac{#Neg}{#Pos}$ + - alpha: 调节样本权重的类别平衡参数,建议根据正负样本比例来配置alpha,即 alpha / (1-alpha) = #Neg / #Pos - session_name: pair分组的字段名,比如user_id - hinge_margin: 当pair的logit之差大于该参数值时,当前样本的loss为0,默认值为1.0 - ohem_ratio: 困难样本的百分比,只有部分困难样本参与loss计算,默认值为1.0 @@ -179,7 +179,7 @@ EasyRec支持两种损失函数配置方式:1)使用单个损失函数;2 - BINARY_FOCAL_LOSS 的参数配置 - gamma: focal loss的指数,默认值2.0 - - alpha: 调节样本权重的类别平衡参数,建议根据正负样本比例来配置alpha, $\\frac{\\alpha}{1-\\alpha}=\\frac{#Neg}{#Pos}$ + - alpha: 调节样本权重的类别平衡参数,建议根据正负样本比例来配置alpha,即 alpha / (1-alpha) = #Neg / #Pos - ohem_ratio: 困难样本的百分比,只有部分困难样本参与loss计算,默认值为1.0 - label_smoothing: 标签平滑系数 @@ -188,12 +188,12 @@ EasyRec支持两种损失函数配置方式:1)使用单个损失函数;2 - alpha: ranking loss 与 calibration loss 的相对权重系数;不设置该值时,触发权重自适应学习 - session_name: list分组的字段名,比如user_id - 参考论文:《 [Joint Optimization of Ranking and Calibration with Contextualized Hybrid Model](https://arxiv.org/pdf/2208.06164.pdf) 》 - - 使用示例: [dbmtl_with_jrc_loss.config](https://github.com/alibaba/EasyRec/blob/master/samples/model_config/dbmtl_on_taobao_with_multi_loss.config) + - 使用示例: [dbmtl_with_jrc_loss.config](https://github.com/alibaba/EasyRec/blob/master/samples/model_config/dbmtl_on_taobao_with_multi_loss.config) 排序模型同时使用多个损失函数的完整示例: [cmbf_with_multi_loss.config](https://github.com/alibaba/EasyRec/blob/master/samples/model_config/cmbf_with_multi_loss.config) -多目标排序模型同时使用多个损失函数的完整示例: +多目标排序模型同时使用多个损失函数的完整示例: [dbmtl_with_multi_loss.config](https://github.com/alibaba/EasyRec/blob/master/samples/model_config/dbmtl_on_taobao_with_multi_loss.config) ##### 损失函数权重自适应学习 diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index 19b1a570b..56d3357c7 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -167,9 +167,6 @@ from easy_rec.python.compat import embedding_ops as ev_embedding_ops from easy_rec.python.compat.feature_column import utils as fc_utils -from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn, BucketizedColumn,\ - WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, CrossedColumn, IdentityCategoricalColumn,\ - VocabularyListCategoricalColumn, VocabularyFileCategoricalColumn def _internal_input_layer(features, @@ -2537,6 +2534,10 @@ def raw_name(self): @property def cardinality(self): + from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn, BucketizedColumn, \ + WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, CrossedColumn, IdentityCategoricalColumn, \ + VocabularyListCategoricalColumn, VocabularyFileCategoricalColumn + fc = self.categorical_column if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): return fc.hash_bucket_size @@ -2553,9 +2554,11 @@ def cardinality(self): if isinstance(fc, VocabularyFileCategoricalColumn): return len(fc.vocabulary_size) + fc.num_oov_buckets - if isinstance(fc, WeightedCategoricalColumn) or isinstance(fc, SequenceWeightedCategoricalColumn): + if isinstance(fc, WeightedCategoricalColumn) or isinstance( + fc, SequenceWeightedCategoricalColumn): sub_fc = fc.categorical_column - if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance( + sub_fc, CrossedColumn): return sub_fc.hash_bucket_size if isinstance(sub_fc, IdentityCategoricalColumn): return sub_fc.num_buckets diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index 13a175041..a17ce8fdc 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -1328,6 +1328,83 @@ def numeric_column(key, normalizer_fn=normalizer_fn) +def constant_numeric_column(key, + shape=(1,), + default_value=None, + dtype=dtypes.float32, + normalizer_fn=None, + feature_name=None): + """Represents real valued or numerical features. + + Example: + + ```python + price = numeric_column('price') + columns = [price, ...] + features = tf.io.parse_example(..., features=make_parse_example_spec(columns)) + dense_tensor = input_layer(features, columns) + + # or + bucketized_price = bucketized_column(price, boundaries=[...]) + columns = [bucketized_price, ...] + features = tf.io.parse_example(..., features=make_parse_example_spec(columns)) + linear_prediction = linear_model(features, columns) + ``` + + Args: + key: A unique string identifying the input feature. It is used as the + column name and the dictionary key for feature parsing configs, feature + `Tensor` objects, and feature columns. + shape: An iterable of integers specifies the shape of the `Tensor`. An + integer can be given which means a single dimension `Tensor` with given + width. The `Tensor` representing the column will have the shape of + [batch_size] + `shape`. + default_value: A single value compatible with `dtype` or an iterable of + values compatible with `dtype` which the column takes on during + `tf.Example` parsing if data is missing. A default value of `None` will + cause `tf.io.parse_example` to fail if an example does not contain this + column. If a single value is provided, the same value will be applied as + the default value for every item. If an iterable of values is provided, + the shape of the `default_value` should be equal to the given `shape`. + dtype: defines the type of values. Default value is `tf.float32`. Must be a + non-quantized, real integer or floating point type. + normalizer_fn: If not `None`, a function that can be used to normalize the + value of the tensor after `default_value` is applied for parsing. + Normalizer function takes the input `Tensor` as its argument, and returns + the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that + even though the most common use case of this function is normalization, it + can be used for any kind of Tensorflow transformations. + + Returns: + A `NumericColumn`. + + Raises: + TypeError: if any dimension in shape is not an int + ValueError: if any dimension in shape is not a positive integer + TypeError: if `default_value` is an iterable but not compatible with `shape` + TypeError: if `default_value` is not compatible with `dtype`. + ValueError: if `dtype` is not convertible to `tf.float32`. + """ + shape = _check_shape(shape, key) + if not (dtype.is_integer or dtype.is_floating): + raise ValueError('dtype must be convertible to float. ' + 'dtype: {}, key: {}'.format(dtype, key)) + default_value = fc_utils.check_default_value(shape, default_value, dtype, key) + + if normalizer_fn is not None and not callable(normalizer_fn): + raise TypeError( + 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) + + fc_utils.assert_key_is_string(key) + return ConstantNumericColumn( + feature_name=feature_name, + key=key, + shape=shape, + default_value=default_value, + dtype=dtype, + normalizer_fn=normalizer_fn) + + def bucketized_column(source_column, boundaries): """Represents discretized dense input. @@ -2619,6 +2696,130 @@ def _normalize_feature_columns(feature_columns): return sorted(feature_columns, key=lambda x: x.name) +class ConstantNumericColumn( + DenseColumn, + fc_old._DenseColumn, # pylint: disable=protected-access + collections.namedtuple('ConstantNumericColumn', + ('feature_name', 'key', 'shape', 'default_value', + 'dtype', 'normalizer_fn'))): + """see `numeric_column`.""" + + @property + def _is_v2_column(self): + return True + + @property + def name(self): + """See `FeatureColumn` base class.""" + return self.feature_name if self.feature_name else self.key + + @property + def raw_name(self): + """See `FeatureColumn` base class.""" + return self.key + + @property + def parse_example_spec(self): + """See `FeatureColumn` base class.""" + return { + self.key: + parsing_ops.FixedLenFeature(self.shape, self.dtype, + self.default_value) + } + + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + + def _transform_input_tensor(self, input_tensor): + def_val = 0 if self.default_value is None else self.default_value + return tf.constant(def_val, dtypes.float32, self.shape) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + input_tensor = inputs.get(self.key) + return self._transform_input_tensor(input_tensor) + + def transform_feature(self, transformation_cache, state_manager): + """See `FeatureColumn` base class. + + In this case, we apply the `normalizer_fn` to the input tensor. + + Args: + transformation_cache: A `FeatureTransformationCache` object to access + features. + state_manager: A `StateManager` to create / access resources such as + lookup tables. + + Returns: + Normalized input tensor. + + Raises: + ValueError: If a SparseTensor is passed in. + """ + input_tensor = transformation_cache.get(self.key, state_manager) + return self._transform_input_tensor(input_tensor) + + @property + def variable_shape(self): + """See `DenseColumn` base class.""" + return tensor_shape.TensorShape(self.shape) + + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _variable_shape(self): + return self.variable_shape + + def get_dense_tensor(self, transformation_cache, state_manager): + """Returns dense `Tensor` representing numeric feature. + + Args: + transformation_cache: A `FeatureTransformationCache` object to access + features. + state_manager: A `StateManager` to create / access resources such as + lookup tables. + + Returns: + Dense `Tensor` created within `transform_feature`. + """ + # Feature has been already transformed. Return the intermediate + # representation created by _transform_feature. + return transformation_cache.get(self, state_manager) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): + del weight_collections + del trainable + return inputs.get(self) + + @property + def parents(self): + """See 'FeatureColumn` base class.""" + return [self.key] + + def _get_config(self): + """See 'FeatureColumn` base class.""" + config = dict(zip(self._fields, self)) + config['normalizer_fn'] = utils.serialize_keras_object(self.normalizer_fn) + config['dtype'] = self.dtype.name + return config + + @classmethod + def _from_config(cls, config, custom_objects=None, columns_by_name=None): + """See 'FeatureColumn` base class.""" + _check_config_keys(config, cls._fields) + kwargs = config.copy() + kwargs['normalizer_fn'] = utils.deserialize_keras_object( + config['normalizer_fn'], custom_objects=custom_objects) + kwargs['dtype'] = dtypes.as_dtype(config['dtype']) + return cls(**kwargs) + + class NumericColumn( DenseColumn, fc_old._DenseColumn, # pylint: disable=protected-access @@ -3395,9 +3596,11 @@ def cardinality(self): if isinstance(fc, VocabularyFileCategoricalColumn): return len(fc.vocabulary_size) + fc.num_oov_buckets - if isinstance(fc, WeightedCategoricalColumn) or isinstance(fc, SequenceWeightedCategoricalColumn): + if isinstance(fc, WeightedCategoricalColumn) or isinstance( + fc, SequenceWeightedCategoricalColumn): sub_fc = fc.categorical_column - if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance( + sub_fc, CrossedColumn): return sub_fc.hash_bucket_size if isinstance(sub_fc, IdentityCategoricalColumn): return sub_fc.num_buckets @@ -3777,9 +3980,11 @@ def cardinality(self): if isinstance(fc, VocabularyFileCategoricalColumn): return len(fc.vocabulary_size) + fc.num_oov_buckets - if isinstance(fc, WeightedCategoricalColumn) or isinstance(fc, SequenceWeightedCategoricalColumn): + if isinstance(fc, WeightedCategoricalColumn) or isinstance( + fc, SequenceWeightedCategoricalColumn): sub_fc = fc.categorical_column - if isinstance(sub_fc, HashedCategoricalColumn) or isinstance(sub_fc, CrossedColumn): + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance( + sub_fc, CrossedColumn): return sub_fc.hash_bucket_size if isinstance(sub_fc, IdentityCategoricalColumn): return sub_fc.num_buckets diff --git a/easy_rec/python/feature_column/feature_column.py b/easy_rec/python/feature_column/feature_column.py index cc7cfbe77..8f4a88913 100644 --- a/easy_rec/python/feature_column/feature_column.py +++ b/easy_rec/python/feature_column/feature_column.py @@ -129,6 +129,8 @@ def _cmp_embed_config(a, b): self.parse_sequence_feature(config) elif config.feature_type == config.ExprFeature: self.parse_expr_feature(config) + elif config.feature_type == config.ConstFeature: + self.parse_const_feature(config) else: assert False, 'invalid feature type: %s' % config.feature_type except FeatureKeyError: @@ -331,8 +333,9 @@ def parse_tag_feature(self, config): default_value=0, feature_name=feature_name) - tag_fc = feature_column.weighted_categorical_column( - tag_fc, weight_feature_key=feature_name + '_w', dtype=tf.float32) + if len(config.input_names) > 1 or config.HasField('kv_separator'): + tag_fc = feature_column.weighted_categorical_column( + tag_fc, weight_feature_key=feature_name + '_w', dtype=tf.float32) if self.is_wide(config): self._add_wide_embedding_column(tag_fc, config) @@ -396,9 +399,7 @@ def parse_raw_feature(self, config): self._deep_columns[feature_name] = fc def parse_expr_feature(self, config): - """Generate raw features columns. - - if boundaries is set, will be converted to category_column first. + """Generate expression features columns. Args: config: instance of easy_rec.python.protos.feature_config_pb2.FeatureConfig @@ -408,7 +409,24 @@ def parse_expr_feature(self, config): fc = feature_column.numeric_column( feature_name, shape=(1,), feature_name=feature_name) if self.is_wide(config): - self._add_wide_embedding_column(fc, config) + self._wide_columns[feature_name] = fc + if self.is_deep(config): + self._deep_columns[feature_name] = fc + + def parse_const_feature(self, config): + """Generate constant features columns. + + used for mask input features. + + Args: + config: instance of easy_rec.python.protos.feature_config_pb2.FeatureConfig + """ + feature_name = config.feature_name if config.HasField('feature_name') \ + else config.input_names[0] + fc = feature_column.constant_numeric_column( + feature_name, shape=(config.embedding_dim,), feature_name=feature_name) + if self.is_wide(config): + self._wide_columns[feature_name] = fc if self.is_deep(config): self._deep_columns[feature_name] = fc diff --git a/easy_rec/python/input/augment.py b/easy_rec/python/input/augment.py index 47822c366..c9802c88c 100644 --- a/easy_rec/python/input/augment.py +++ b/easy_rec/python/input/augment.py @@ -1,6 +1,7 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import tensorflow as tf + from easy_rec.python.utils.shape_utils import get_shape_list if tf.__version__ >= '2.0': @@ -8,69 +9,83 @@ def assign(input_tensor, position=None, value=None): - input_tensor[tuple(position)] = value - return input_tensor + input_tensor[tuple(position)] = value + return input_tensor def item_mask(aug_data, length, gamma=0.3): - length1 = tf.cast(length, dtype=tf.float32) - num_mask = tf.cast(tf.math.floor(length1 * gamma), dtype=tf.int32) - seq = tf.range(length, dtype=tf.int32) - mask_index = tf.random.shuffle(seq)[:num_mask] - masked_item_seq = aug_data - masked_item_seq = tf.py_func(assign, inp=[masked_item_seq, [mask_index], 0], Tout=masked_item_seq.dtype) - return masked_item_seq, length + length1 = tf.cast(length, dtype=tf.float32) + num_mask = tf.cast(tf.math.floor(length1 * gamma), dtype=tf.int32) + seq = tf.range(length, dtype=tf.int32) + mask_index = tf.random.shuffle(seq)[:num_mask] + masked_item_seq = aug_data + masked_item_seq = tf.py_func( + assign, + inp=[masked_item_seq, [mask_index], 0], + Tout=masked_item_seq.dtype) + return masked_item_seq, length def item_crop(aug_data, length, eta=0.6): - length1 = tf.cast(length, dtype=tf.float32) - max_length = tf.cast(get_shape_list(aug_data)[0], dtype=tf.int32) - embedding_size = get_shape_list(aug_data)[1] + length1 = tf.cast(length, dtype=tf.float32) + max_length = tf.cast(get_shape_list(aug_data)[0], dtype=tf.int32) + embedding_size = get_shape_list(aug_data)[1] - num_left = tf.cast(tf.math.floor(length1 * eta), dtype=tf.int32) - crop_begin = tf.random.uniform([1], minval=0, maxval=length - num_left, dtype=tf.int32)[0] - cropped_item_seq = tf.zeros([get_shape_list(aug_data)[0], embedding_size]) - cropped_item_seq = tf.where(crop_begin + num_left < max_length, - tf.concat([aug_data[crop_begin:crop_begin + num_left], - cropped_item_seq[:max_length - num_left]], axis=0), - tf.concat([aug_data[crop_begin:], cropped_item_seq[:crop_begin]], axis=0)) - return cropped_item_seq, num_left + num_left = tf.cast(tf.math.floor(length1 * eta), dtype=tf.int32) + crop_begin = tf.random.uniform([1], + minval=0, + maxval=length - num_left, + dtype=tf.int32)[0] + cropped_item_seq = tf.zeros([get_shape_list(aug_data)[0], embedding_size]) + cropped_item_seq = tf.where( + crop_begin + num_left < max_length, + tf.concat([ + aug_data[crop_begin:crop_begin + num_left], + cropped_item_seq[:max_length - num_left] + ], + axis=0), + tf.concat([aug_data[crop_begin:], cropped_item_seq[:crop_begin]], axis=0)) + return cropped_item_seq, num_left def item_reorder(aug_data, length, beta=0.6): - length1 = tf.cast(length,dtype=tf.float32) - num_reorder = tf.cast(tf.math.floor(length1 * beta) ,dtype=tf.int32) - reorder_begin = tf.random.uniform([1], minval=0, maxval=length - num_reorder, dtype=tf.int32)[0] - shuffle_index = tf.range(reorder_begin, reorder_begin + num_reorder) - shuffle_index = tf.random.shuffle(shuffle_index) - x = tf.range(get_shape_list(aug_data)[0]) - left = tf.slice(x, [0], [reorder_begin]) - right = tf.slice(x, [reorder_begin + num_reorder], [-1]) - reordered_item_index = tf.concat([left, shuffle_index, right], axis=0) - reordered_item_seq = tf.scatter_nd(tf.expand_dims(reordered_item_index, axis=1), - aug_data, - tf.shape(aug_data)) - return reordered_item_seq, length + length1 = tf.cast(length, dtype=tf.float32) + num_reorder = tf.cast(tf.math.floor(length1 * beta), dtype=tf.int32) + reorder_begin = tf.random.uniform([1], + minval=0, + maxval=length - num_reorder, + dtype=tf.int32)[0] + shuffle_index = tf.range(reorder_begin, reorder_begin + num_reorder) + shuffle_index = tf.random.shuffle(shuffle_index) + x = tf.range(get_shape_list(aug_data)[0]) + left = tf.slice(x, [0], [reorder_begin]) + right = tf.slice(x, [reorder_begin + num_reorder], [-1]) + reordered_item_index = tf.concat([left, shuffle_index, right], axis=0) + reordered_item_seq = tf.scatter_nd( + tf.expand_dims(reordered_item_index, axis=1), aug_data, + tf.shape(aug_data)) + return reordered_item_seq, length def augment(x): - seq, length = x - flag = tf.range(3, dtype=tf.int32) - flag1 = tf.random.shuffle(flag)[:1][0] - aug_seq, aug_len = tf.cond(tf.equal(flag1, 0), - lambda: item_crop(seq, length), - lambda: tf.cond(tf.equal(flag1, 1), - lambda: item_mask(seq, length), - lambda: item_reorder(seq, length))) + seq, length = x + flag = tf.range(3, dtype=tf.int32) + flag1 = tf.random.shuffle(flag)[:1][0] + aug_seq, aug_len = tf.cond( + tf.equal(flag1, 0), lambda: item_crop(seq, length), lambda: tf.cond( + tf.equal(flag1, 1), lambda: item_mask(seq, length), lambda: + item_reorder(seq, length))) - return [aug_seq, aug_len] + return [aug_seq, aug_len] def input_aug_data(original_data, seq_len): - print("seq_len:", seq_len) - lengths = tf.cast(seq_len, dtype=tf.int32) - aug_seq1, aug_len1 = tf.map_fn(augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) - aug_seq2, aug_len2 = tf.map_fn(augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) - aug_seq1 = tf.reshape(aug_seq1, tf.shape(original_data)) - aug_seq2 = tf.reshape(aug_seq2, tf.shape(original_data)) - return aug_seq1, aug_seq2, aug_len1, aug_len2 + print('seq_len:', seq_len) + lengths = tf.cast(seq_len, dtype=tf.int32) + aug_seq1, aug_len1 = tf.map_fn( + augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) + aug_seq2, aug_len2 = tf.map_fn( + augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) + aug_seq1 = tf.reshape(aug_seq1, tf.shape(original_data)) + aug_seq2 = tf.reshape(aug_seq2, tf.shape(original_data)) + return aug_seq1, aug_seq2, aug_len1, aug_len2 diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index 52581b4e2..d4a990c35 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -225,6 +225,19 @@ def should_stop(self, curr_epoch): total_epoch = 1 return total_epoch is not None and curr_epoch >= total_epoch + def get_erase_features(self): + if self._pipeline_config is None: + return set() + + config = self._pipeline_config.model_config.variational_dropout + if config is None: + return set() + + top_k = config.fine_tune_use_top_k_features + from easy_rec.python.layers.fscd_layer import get_top_and_bottom_features + _, erase_features = get_top_and_bottom_features(self._pipeline_config, top_k) + return erase_features + def create_multi_placeholders(self, export_config): """Create multiply placeholders on export, one for each feature. @@ -252,6 +265,7 @@ def create_multi_placeholders(self, export_config): self._input_fields[fid] != sample_weight_field ] + erase_features = self.get_erase_features() inputs = {} for fid in effective_fids: input_name = self._input_fields[fid] @@ -265,12 +279,20 @@ def create_multi_placeholders(self, export_config): tf_type = self._multi_value_types[input_name] logging.info('multi value input_name: %s, dtype: %s' % (input_name, tf_type)) - finput = tf.placeholder(tf_type, [None, None], name=placeholder_name) + if input_name in erase_features: + def_val = self.get_type_defaults(tf_type, self._input_field_defaults[fid]) + finput = tf.placeholder_with_default(def_val, [None, None], name=placeholder_name) + else: + finput = tf.placeholder(tf_type, [None, None], name=placeholder_name) else: ftype = self._input_field_types[fid] tf_type = get_tf_type(ftype) logging.info('input_name: %s, dtype: %s' % (input_name, tf_type)) - finput = tf.placeholder(tf_type, [None], name=placeholder_name) + if input_name in erase_features: + def_val = self.get_type_defaults(tf_type, self._input_field_defaults[fid]) + finput = tf.placeholder_with_default(def_val, [None], name=placeholder_name) + else: + finput = tf.placeholder(tf_type, [None], name=placeholder_name) inputs[input_name] = finput features = {x: inputs[x] for x in inputs} features = self._preprocess(features) @@ -302,11 +324,15 @@ def create_placeholders(self, export_config): len(effective_fids)) input_vals = tf.reshape( input_vals, [-1, len(effective_fids)], name='input_reshape') + + erase_features = self.get_erase_features() features = {} for tmp_id, fid in enumerate(effective_fids): ftype = self._input_field_types[fid] tf_type = get_tf_type(ftype) input_name = self._input_fields[fid] + if input_name in erase_features: + continue if tf_type in [tf.float32, tf.double, tf.int32, tf.int64]: features[input_name] = tf.string_to_number( input_vals[:, tmp_id], @@ -472,6 +498,11 @@ def _parse_id_feature(self, fc, parsed_dict, field_dict): tf.int32, name='%s_str_2_int' % input_0) + def _parse_const_feature(self, fc, parsed_dict, field_dict): + input_0 = fc.input_names[0] + feature_name = fc.feature_name if fc.HasField('feature_name') else input_0 + parsed_dict[feature_name] = field_dict[input_0] + def _parse_raw_feature(self, fc, parsed_dict, field_dict): input_0 = fc.input_names[0] feature_name = fc.feature_name if fc.HasField('feature_name') else input_0 @@ -779,6 +810,8 @@ def _preprocess(self, field_dict): self._parse_id_feature(fc, parsed_dict, field_dict) elif feature_type == fc.ExprFeature: self._parse_expr_feature(fc, parsed_dict, field_dict) + elif feature_type == fc.ConstFeature: + self._parse_const_feature(fc, parsed_dict, field_dict) else: feature_name = fc.feature_name if fc.HasField( 'feature_name') else fc.input_names[0] diff --git a/easy_rec/python/layers/bst.py b/easy_rec/python/layers/bst.py index c9cf7d8c9..9f2f78030 100644 --- a/easy_rec/python/layers/bst.py +++ b/easy_rec/python/layers/bst.py @@ -2,11 +2,12 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import tensorflow as tf +from easy_rec.python.input.augment import input_aug_data from easy_rec.python.layers import multihead_cross_attention +from easy_rec.python.loss.nce_loss import nce_loss from easy_rec.python.utils.activation import get_activation from easy_rec.python.utils.shape_utils import get_shape_list -from easy_rec.python.loss.nce_loss import nce_loss -from easy_rec.python.input.augment import input_aug_data + # from tensorflow.python.keras.layers import Layer @@ -20,31 +21,31 @@ def __init__(self, config, l2_reg, name='bst', **kwargs): def encode(self, seq_input, max_position): seq_fea = multihead_cross_attention.embedding_postprocessor( - seq_input, - position_embedding_name=self.name + '/position_embeddings', - max_position_embeddings=max_position, - reuse_position_embedding=tf.AUTO_REUSE) + seq_input, + position_embedding_name=self.name + '/position_embeddings', + max_position_embeddings=max_position, + reuse_position_embedding=tf.AUTO_REUSE) n = tf.count_nonzero(seq_input, axis=-1) seq_mask = tf.cast(n > 0, tf.int32) attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( - from_tensor=seq_fea, to_mask=seq_mask) + from_tensor=seq_fea, to_mask=seq_mask) hidden_act = get_activation(self.config.hidden_act) attention_fea = multihead_cross_attention.transformer_encoder( - seq_fea, - hidden_size=self.config.hidden_size, - num_hidden_layers=self.config.num_hidden_layers, - num_attention_heads=self.config.num_attention_heads, - attention_mask=attention_mask, - intermediate_size=self.config.intermediate_size, - intermediate_act_fn=hidden_act, - hidden_dropout_prob=self.config.hidden_dropout_prob, - attention_probs_dropout_prob=self.config.attention_probs_dropout_prob, - initializer_range=self.config.initializer_range, - name=self.name + '/bst', - reuse=tf.AUTO_REUSE) + seq_fea, + hidden_size=self.config.hidden_size, + num_hidden_layers=self.config.num_hidden_layers, + num_attention_heads=self.config.num_attention_heads, + attention_mask=attention_mask, + intermediate_size=self.config.intermediate_size, + intermediate_act_fn=hidden_act, + hidden_dropout_prob=self.config.hidden_dropout_prob, + attention_probs_dropout_prob=self.config.attention_probs_dropout_prob, + initializer_range=self.config.initializer_range, + name=self.name + '/bst', + reuse=tf.AUTO_REUSE) # attention_fea shape: [batch_size, seq_length, hidden_size] out_fea = attention_fea[:, 0, :] # target feature print('bst output shape:', out_fea.shape) @@ -84,11 +85,11 @@ def __call__(self, inputs, training=None, **kwargs): seq_len = seq_features[0][1] if self.config.need_contrastive_learning: - assert 'loss_dict' in kwargs, "no `loss_dict` in kwargs of bst layer: %s" % self.name + assert 'loss_dict' in kwargs, 'no `loss_dict` in kwargs of bst layer: %s' % self.name loss = self.contrastive_loss(seq_input, seq_len, max_position) if self.config.auto_contrastive_loss_weight: uncertainty = tf.Variable( - 0, name='%s_contrastive_loss_weight' % self.name, dtype=tf.float32) + 0, name='%s_contrastive_loss_weight' % self.name, dtype=tf.float32) loss = tf.exp(-uncertainty) * loss + 0.5 * uncertainty else: loss *= self.config.contrastive_loss_weight @@ -102,10 +103,10 @@ def __call__(self, inputs, training=None, **kwargs): ' in feature group:' + self.name if target_size != self.config.hidden_size: target_feature = tf.layers.dense( - target_feature, - self.config.hidden_size, - activation=tf.nn.relu, - kernel_regularizer=self.l2_reg) + target_feature, + self.config.hidden_size, + activation=tf.nn.relu, + kernel_regularizer=self.l2_reg) # target_feature: [batch_size, 1, embed_size] target_feature = tf.expand_dims(target_feature, 1) # seq_input: [batch_size, seq_len+1, embed_size] @@ -119,4 +120,3 @@ def contrastive_loss(self, seq_input, seq_len, max_position): seq_output2 = self.encode(aug_seq2, max_position) loss = nce_loss(seq_output1, seq_output2) return loss - diff --git a/easy_rec/python/layers/din.py b/easy_rec/python/layers/din.py index 71c6e1ab4..18505bd44 100644 --- a/easy_rec/python/layers/din.py +++ b/easy_rec/python/layers/din.py @@ -61,7 +61,8 @@ def __call__(self, inputs, training=None, **kwargs): scores = scores / (seq_emb_size**0.5) scores = tf.nn.sigmoid(scores) else: - raise ValueError("unsupported attention normalizer: " + self.config.attention_normalizer) + raise ValueError('unsupported attention normalizer: ' + + self.config.attention_normalizer) if target_emb_size < seq_emb_size: keys = keys[:, :, :target_emb_size] # [B, L, E] diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py index c8f94bc81..78849f162 100644 --- a/easy_rec/python/layers/fscd_layer.py +++ b/easy_rec/python/layers/fscd_layer.py @@ -1,10 +1,11 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -from collections import OrderedDict import math +import json import numpy as np +import six import tensorflow as tf - +from tensorflow.python.framework.meta_graph import read_meta_graph_file from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA from easy_rec.python.compat.feature_column.feature_column_v2 import SharedEmbeddingColumn # NOQA @@ -20,29 +21,53 @@ def get_feature_complexity(feature_configs): if config.HasField('feature_name'): name = config.feature_name feature_complexity[name] = config.complexity + return feature_complexity - # complexity = self._config.feature_complexity_weight * config.complexity - # - # # dim = 1.0 - # # if config.HasField('embedding_dim'): - # # dim = float(config.embedding_dim) - # dim = self.features_dimension[name] - # complexity += self._config.feature_dimension_weight * dim - # - # cardinal = 1.0 - # if config.HasField('hash_bucket_size'): - # cardinal = float(config.hash_bucket_size) - # elif config.HasField('num_buckets'): - # cardinal = float(config.num_buckets) - # elif len(config.boundaries) > 0: - # cardinal = float(len(config.boundaries) + 1) - # complexity += self._config.feature_cardinality_weight * cardinal - # - # theta = 1.0 - sigmoid(complexity) - # alpha = math.log(1.0 - theta) - math.log(theta) - # feature_regularize[name] = alpha - return feature_complexity +def sigmoid(x): + return 1. / (1. + math.exp(-x)) + + +def get_top_and_bottom_features(pipeline_config, top_k): + assert pipeline_config.model_config.HasField( + 'variational_dropout'), 'variational_dropout must be in model_config' + + checkpoint_path = tf.train.latest_checkpoint(pipeline_config.model_dir) + meta_graph_def = read_meta_graph_file(checkpoint_path + '.meta') + + features_map = dict() + for col_def in meta_graph_def.collection_def[ + 'variational_dropout'].bytes_list.value: + features = json.loads(col_def) + features_map.update(features) + + top_features = set() + tf.logging.info('Reading checkpoint from %s ...' % checkpoint_path) + reader = tf.train.NewCheckpointReader(checkpoint_path) + for feature_group in pipeline_config.model_config.feature_groups: + group_name = feature_group.group_name + delta_name = 'fscd_delta_%s' % group_name + if not reader.has_tensor(delta_name): + continue + assert group_name in features_map, "%s not in feature map" % group_name + feature_dims = features_map[group_name] + delta = reader.get_tensor(delta_name) + values, indices = tf.nn.top_k(delta, top_k) + with tf.Session() as sess: + idx = indices.eval(session=sess) + for i in idx: + feature = feature_dims[i][0] + top_features.add(feature) + + bottom_features = set() + for group_name, features in six.iteritems(features_map): + for name, dim in features: + if name not in top_features: + bottom_features.add(name) + + print("selected top %d features:" % top_k, ','.join(top_features)) + print("removed bottom features:", ','.join(bottom_features)) + return top_features, bottom_features class FSCDLayer(object): @@ -64,41 +89,70 @@ def __init__(self, self.feature_complexity = get_feature_complexity(feature_configs) def compute_dropout_mask(self, n, temperature=0.1): - delta_name = 'delta' if self.name == 'all' else 'delta_%s' % self.name + delta_name = 'fscd_delta_%s' % self.name delta = tf.get_variable( - name=delta_name, - shape=[n], - dtype=tf.float32, - initializer=tf.constant_initializer(0.5)) + name=delta_name, + shape=[n], + dtype=tf.float32, + initializer=tf.constant_initializer(0.)) + delta = tf.nn.sigmoid(delta) EPSILON = np.finfo(float).eps - unif_noise = tf.random_uniform([n], dtype=tf.float32, seed=None, name='uniform_noise') - + unif_noise = tf.random_uniform([n], + dtype=tf.float32, + seed=None, + name='uniform_noise') approx = ( tf.log(delta + EPSILON) - tf.log(1. - delta + EPSILON) + tf.log(unif_noise + EPSILON) - tf.log(1. - unif_noise + EPSILON)) return tf.sigmoid(approx / temperature) def compute_regular_params(self, cols_to_feature): - alphas = OrderedDict() + alphas = {} for fc, fea in cols_to_feature.items(): dim = int(fea.shape[-1]) complexity = self.feature_complexity[fc.raw_name] cardinal = 1 - if isinstance(fc, EmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): + if isinstance(fc, EmbeddingColumn) or isinstance( + fc, _SharedEmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): cardinal = fc.cardinality c = self._config.feature_complexity_weight * complexity c += self._config.feature_cardinality_weight * cardinal c += self._config.feature_dimension_weight * dim - theta = 1.0 - sigmoid(complexity) - alpha = math.log(1.0 - theta) - math.log(theta) + sig_c = sigmoid(c) + theta = 1.0 - sig_c + alpha = math.log(sig_c) - math.log(theta) alphas[fc] = alpha + print(str(fc.raw_name), "complexity:", complexity, "cardinality:", cardinal, + "dimension:", dim, "c:", c, "theta:", theta, "alpha:", alpha) return alphas + # def mask_bottom_features(self, cols_to_feature, top_k): + # feature_map = tf.get_collection('variational_dropout') + # features = feature_map[self.name] + # + # delta_name = 'fscd_delta_%s' % self.name + # graph = tf.get_default_graph() + # delta = graph.get_tensor_by_name(delta_name) + # values, indices = tf.nn.top_k(delta, top_k) + # + # output_tensors = [] + # feature_columns = cols_to_feature.keys() + # for column in sorted(feature_columns, key=lambda x: x.name): + # value = cols_to_feature[column] + # output_tensors.append(value) + # return tf.concat(output_tensors, 1) + def __call__(self, cols_to_feature): """ cols_to_feature: an ordered dict mapping feature_column to feature_values """ + # if self._config.HasField('fine_tune_use_top_k_features'): + # k = self._config.fine_tune_use_top_k_features + # assert k > 0, 'config `fine_tune_use_top_k_features` must be large than 0' + # return self.mask_bottom_features(cols_to_feature, k) + + feature_dimension = [] output_tensors = [] alphas = [] z = self.compute_dropout_mask(len(cols_to_feature)) # keep ratio @@ -112,16 +166,14 @@ def __call__(self, cols_to_feature): cols_to_feature[column] = out output_tensors.append(out) alphas.append(alpha) + feature_dimension.append((column.raw_name, int(value.shape[-1]))) output_features = tf.concat(output_tensors, 1) + tf.add_to_collection('variational_dropout', json.dumps({self.name: feature_dimension})) batch_size = tf.shape(output_features)[0] - t_alpha = tf.convert_to_tensor(alphas) # [M] - loss = tf.reduce_sum(t_alpha * z) / batch_size + t_alpha = tf.convert_to_tensor(alphas, dtype=tf.float32) + loss = tf.reduce_sum(t_alpha * z) / tf.to_float(batch_size) tf.add_to_collection('variational_dropout_loss', loss) return output_features - - -def sigmoid(x): - return 1. / (1. + math.exp(-x)) diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index 8098057ad..7e28458d5 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -1,5 +1,6 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. +import logging from collections import OrderedDict import tensorflow as tf @@ -12,8 +13,8 @@ from easy_rec.python.feature_column.feature_group import FeatureGroup from easy_rec.python.layers import sequence_feature_layer from easy_rec.python.layers import variational_dropout_layer -from easy_rec.python.layers.fscd_layer import FSCDLayer from easy_rec.python.layers.common_layers import text_cnn +from easy_rec.python.layers.fscd_layer import FSCDLayer from easy_rec.python.protos.feature_config_pb2 import WideOrDeep from easy_rec.python.utils import shape_utils @@ -118,7 +119,7 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): return concat_features, group_features else: # return sequence feature in raw format instead of combine them if self._variational_dropout_config is not None: - raise ValueError( + logging.warn( 'variational dropout is not supported in not combined mode now.') feature_group = self._feature_groups[group_name] @@ -138,7 +139,9 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): group_features = [cols_to_output_tensors[x] for x in group_columns] for col, val in cols_to_output_tensors.items(): - if isinstance(col, EmbeddingColumn) or isinstance(col, _SharedEmbeddingColumn) or isinstance(col, SharedEmbeddingColumn): + if isinstance(col, EmbeddingColumn) or isinstance( + col, _SharedEmbeddingColumn) or isinstance( + col, SharedEmbeddingColumn): embedding_reg_lst.append(val) builder = feature_column._LazyBuilder(features) @@ -226,11 +229,14 @@ def single_call_input_layer(self, if self._variational_dropout_config is not None: if self._variational_dropout_config.regularize_by_feature_complexity: - fscd = FSCDLayer(self._feature_configs, self._variational_dropout_config, - is_training=self._is_training, name=group_name) + fscd = FSCDLayer( + self._feature_configs, + self._variational_dropout_config, + is_training=self._is_training, + name=group_name) output_features = fscd(cols_to_output_tensors) concat_features = array_ops.concat( - [output_features] + seq_features, axis=-1) + [output_features] + seq_features, axis=-1) group_features = [cols_to_output_tensors[x] for x in group_columns] + \ [cols_to_output_tensors[x] for x in group_seq_columns] else: @@ -255,7 +261,8 @@ def single_call_input_layer(self, [cols_to_output_tensors[x] for x in group_seq_columns] for fc, val in cols_to_output_tensors.items(): - if isinstance(fc, EmbeddingColumn) or isinstance(fc, _SharedEmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): + if isinstance(fc, EmbeddingColumn) or isinstance( + fc, _SharedEmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): embedding_reg_lst.append(val) if embedding_reg_lst: diff --git a/easy_rec/python/layers/multihead_cross_attention.py b/easy_rec/python/layers/multihead_cross_attention.py index 511b2711d..f230ac974 100644 --- a/easy_rec/python/layers/multihead_cross_attention.py +++ b/easy_rec/python/layers/multihead_cross_attention.py @@ -708,11 +708,12 @@ def embedding_postprocessor(input_tensor, if use_position_embeddings: assert_op = tf.assert_less_equal(seq_length, max_position_embeddings) with tf.control_dependencies([assert_op]): - with tf.variable_scope("position_embedding", reuse=reuse_position_embedding): + with tf.variable_scope( + 'position_embedding', reuse=reuse_position_embedding): full_position_embeddings = tf.get_variable( - name=position_embedding_name, - shape=[max_position_embeddings, width], - initializer=create_initializer(initializer_range)) + name=position_embedding_name, + shape=[max_position_embeddings, width], + initializer=create_initializer(initializer_range)) # Since the position embedding table is a learned variable, we create it # using a (long) sequence length `max_position_embeddings`. The actual # sequence length might be shorter than this, for faster training of diff --git a/easy_rec/python/loss/nce_loss.py b/easy_rec/python/loss/nce_loss.py index 7613384ab..f2e406d20 100644 --- a/easy_rec/python/loss/nce_loss.py +++ b/easy_rec/python/loss/nce_loss.py @@ -2,33 +2,38 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import tensorflow as tf + from easy_rec.python.utils.shape_utils import get_shape_list def mask_samples(batch_size): - part = tf.ones((batch_size, batch_size), bool) - diag_part = tf.linalg.diag_part(part) - diag_part = tf.fill(tf.shape(diag_part), False) - part = tf.linalg.set_diag(part, diag_part) - part_half = tf.concat([part, part], axis=1) - part_total = tf.concat([part_half, part_half], axis=0) - return part_total + part = tf.ones((batch_size, batch_size), bool) + diag_part = tf.linalg.diag_part(part) + diag_part = tf.fill(tf.shape(diag_part), False) + part = tf.linalg.set_diag(part, diag_part) + part_half = tf.concat([part, part], axis=1) + part_total = tf.concat([part_half, part_half], axis=0) + return part_total def nce_loss(z_i, z_j, temp=1): - batch_size = get_shape_list(z_i)[0] - N = 2 * batch_size - z = tf.concat((z_i, z_j), axis=0) - sim = tf.matmul(z, tf.transpose(z)) / temp - sim_i_j = tf.matrix_diag_part(tf.slice(sim, [batch_size, 0], [batch_size, batch_size])) - sim_j_i = tf.matrix_diag_part(tf.slice(sim, [0, batch_size], [batch_size, batch_size])) - positive_samples = tf.reshape(tf.concat((sim_i_j, sim_j_i), axis=0), (N, 1)) - mask = mask_samples(batch_size) - negative_samples = tf.reshape(tf.boolean_mask(sim, mask), (N, -1)) - - labels = tf.zeros(N, dtype=tf.int32) - logits = tf.concat((positive_samples, negative_samples), axis=1) - - loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)) - - return loss + batch_size = get_shape_list(z_i)[0] + N = 2 * batch_size + z = tf.concat((z_i, z_j), axis=0) + sim = tf.matmul(z, tf.transpose(z)) / temp + sim_i_j = tf.matrix_diag_part( + tf.slice(sim, [batch_size, 0], [batch_size, batch_size])) + sim_j_i = tf.matrix_diag_part( + tf.slice(sim, [0, batch_size], [batch_size, batch_size])) + positive_samples = tf.reshape(tf.concat((sim_i_j, sim_j_i), axis=0), (N, 1)) + mask = mask_samples(batch_size) + negative_samples = tf.reshape(tf.boolean_mask(sim, mask), (N, -1)) + + labels = tf.zeros(N, dtype=tf.int32) + logits = tf.concat((positive_samples, negative_samples), axis=1) + + loss = tf.reduce_mean( + tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits)) + + return loss diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index 871306326..6483877b7 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -11,13 +11,13 @@ from tensorflow.python.ops.variables import PartitionedVariable from easy_rec.python.compat import regularizers +from easy_rec.python.layers import dnn from easy_rec.python.layers import input_layer from easy_rec.python.layers.sequence_encoder import SequenceEncoder from easy_rec.python.utils import constant from easy_rec.python.utils import estimator_utils from easy_rec.python.utils import restore_filter from easy_rec.python.utils.load_class import get_register_class_meta -from easy_rec.python.layers import dnn if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -110,8 +110,11 @@ def get_sequence_encoding(self, group_name=None, is_training=True): if group_name is not None: if group_name in self._sequence_encoding_by_group_name: return self._sequence_encoding_by_group_name[group_name] - encoding = self._sequence_encoder(self._feature_dict, group_name, - is_training, loss_dict=self._loss_dict) + encoding = self._sequence_encoder( + self._feature_dict, + group_name, + is_training, + loss_dict=self._loss_dict) self._sequence_encoding_by_group_name[group_name] = encoding return encoding @@ -123,8 +126,11 @@ def get_sequence_encoding(self, group_name=None, is_training=True): if group_name in self._sequence_encoding_by_group_name: encoding = self._sequence_encoding_by_group_name[group_name] else: - encoding = self._sequence_encoder(self._feature_dict, group_name, - is_training, loss_dict=self._loss_dict) + encoding = self._sequence_encoder( + self._feature_dict, + group_name, + is_training, + loss_dict=self._loss_dict) self._sequence_encoding_by_group_name[group_name] = encoding if encoding is not None: seq_encoding.append(encoding) @@ -138,10 +144,10 @@ def get_sequence_encoding(self, group_name=None, is_training=True): if self._base_model_config.HasField('sequence_dnn'): sequence_dnn = dnn.DNN( - self._base_model_config.sequence_dnn, - self._l2_reg, - name='sequence_dnn', - is_training=self._is_training) + self._base_model_config.sequence_dnn, + self._l2_reg, + name='sequence_dnn', + is_training=self._is_training) encoding = sequence_dnn(encoding) return encoding diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index b642fff23..17e501361 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -43,6 +43,7 @@ message FeatureConfig { LookupFeature = 4; SequenceFeature = 5; ExprFeature = 6; + ConstFeature = 7; } enum FieldType { diff --git a/easy_rec/python/protos/variational_dropout.proto b/easy_rec/python/protos/variational_dropout.proto index afe4d061c..c643b3d2e 100644 --- a/easy_rec/python/protos/variational_dropout.proto +++ b/easy_rec/python/protos/variational_dropout.proto @@ -2,15 +2,16 @@ syntax = "proto2"; package protos; -message VariationalDropoutLayer{ +message VariationalDropoutLayer { // regularization coefficient lambda optional float regularization_lambda = 1 [default = 0.01]; // variational_dropout dimension optional bool embedding_wise_variational_dropout = 2 [default = false]; + // whether to use FSCD model optional bool regularize_by_feature_complexity = 3 [default = false]; - optional float feature_complexity_weight = 4 [default = 1.0]; optional float feature_dimension_weight = 5 [default = 1e-2]; optional float feature_cardinality_weight = 6 [default = 1e-7]; + optional uint32 fine_tune_use_top_k_features = 7; } diff --git a/easy_rec/python/tools/explainer/deep_shap.py b/easy_rec/python/tools/explainer/deep_shap.py index 4d0b72890..64508232f 100644 --- a/easy_rec/python/tools/explainer/deep_shap.py +++ b/easy_rec/python/tools/explainer/deep_shap.py @@ -1,17 +1,18 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -import numpy as np import warnings + +import numpy as np +import tensorflow as tf from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import gradients_impl as tf_gradients_impl -if not hasattr(tf_gradients_impl, "_IsBackpropagatable"): +if not hasattr(tf_gradients_impl, '_IsBackpropagatable'): from tensorflow.python.ops import gradients_util as tf_gradients_impl -import tensorflow as tf class DeepShap(object): - """ Meant to approximate SHAP values for deep learning models. + """Meant to approximate SHAP values for deep learning models. This is an enhanced version of the DeepLIFT algorithm (Deep SHAP) where, similar to Kernel SHAP, we approximate the conditional expectations of SHAP values using a selection of background samples. @@ -22,8 +23,13 @@ class DeepShap(object): current model output (f(x) - E[f(x)]). """ - def __init__(self, inputs, output, data, session=None, learning_phase_flags=None): - """ An explainer object for a deep model using a given background dataset. + def __init__(self, + inputs, + output, + data, + session=None, + learning_phase_flags=None): + """An explainer object for a deep model using a given background dataset. Note that the complexity of the method scales linearly with the number of background data samples. Passing the entire training dataset as `data` will give very accurate expected @@ -56,12 +62,14 @@ def __init__(self, inputs, output, data, session=None, learning_phase_flags=None batch norm or dropout. If None is passed then we look for tensors in the graph that look like learning phase flags. Note that we assume all the flags should have a value of False during predictions (and hence explanations). - """ self.model_inputs = inputs self.model_output = output - assert type(self.model_output) != list, "The model output to be explained must be a single tensor!" - assert len(self.model_output.shape) < 3, "The model output must be a vector or a single value!" + assert type( + self.model_output + ) != list, 'The model output to be explained must be a single tensor!' + assert len(self.model_output.shape + ) < 3, 'The model output must be a vector or a single value!' self.multi_output = True if len(self.model_output.shape) == 1: self.multi_output = False @@ -76,7 +84,8 @@ def __init__(self, inputs, output, data, session=None, learning_phase_flags=None data = [data] self.data = data - self._vinputs = {} # used to track what op inputs depends on the model inputs + self._vinputs = { + } # used to track what op inputs depends on the model inputs self.orig_grads = {} if session is None: @@ -93,10 +102,13 @@ def __init__(self, inputs, output, data, session=None, learning_phase_flags=None if learning_phase_flags is None: self.learning_phase_ops = [] for op in self.graph.get_operations(): - if 'learning_phase' in op.name and op.type == "Const" and len(op.outputs[0].shape) == 0: + if 'learning_phase' in op.name and op.type == 'Const' and len( + op.outputs[0].shape) == 0: if op.outputs[0].dtype == tf.bool: self.learning_phase_ops.append(op) - self.learning_phase_flags = [op.outputs[0] for op in self.learning_phase_ops] + self.learning_phase_flags = [ + op.outputs[0] for op in self.learning_phase_ops + ] else: self.learning_phase_ops = [t.op for t in learning_phase_flags] @@ -107,8 +119,10 @@ def __init__(self, inputs, output, data, session=None, learning_phase_flags=None else: if self.data[0].shape[0] > 5000: warnings.warn( - "You have provided over 5k background samples! For better performance consider using smaller random sample.") - self.expected_value = self.run(self.model_output, self.model_inputs, self.data).mean(0) + 'You have provided over 5k background samples! For better performance consider using smaller random sample.' + ) + self.expected_value = self.run(self.model_output, self.model_inputs, + self.data).mean(0) self._init_between_tensors(self.model_output.op, self.model_inputs) @@ -122,22 +136,24 @@ def __init__(self, inputs, output, data, session=None, learning_phase_flags=None if noutputs is not None: self.phi_symbolics = [None for i in range(noutputs)] else: - raise Exception("The model output tensor to be explained cannot have a static shape in dim 1 of None!") + raise Exception( + 'The model output tensor to be explained cannot have a static shape in dim 1 of None!' + ) def run(self, out, model_inputs, X): - """ Runs the model while also setting the learning phase flags to False. - """ + """Runs the model while also setting the learning phase flags to False.""" feed_dict = dict(zip(model_inputs, X)) for t in self.learning_phase_flags: feed_dict[t] = False return self.session.run(out, feed_dict) def phi_symbolic(self, i): - """ Get the SHAP value computation graph for a given model output. - """ + """Get the SHAP value computation graph for a given model output.""" if self.phi_symbolics[i] is None: + def anon(): - out = self.model_output[:, i] if self.multi_output else self.model_output + out = self.model_output[:, + i] if self.multi_output else self.model_output return tf.gradients(out, self.model_inputs) self.phi_symbolics[i] = self.execute_with_overridden_gradients(anon) @@ -145,10 +161,10 @@ def anon(): return self.phi_symbolics[i] def custom_grad(self, op, *grads): - """ Passes a gradient op creation request to the correct handler. - """ - type_name = op.type[5:] if op.type.startswith("shap_") else op.type - out = op_handlers[type_name](self, op, *grads) # we cut off the shap_ prefex before the lookup + """Passes a gradient op creation request to the correct handler.""" + type_name = op.type[5:] if op.type.startswith('shap_') else op.type + out = op_handlers[type_name]( + self, op, *grads) # we cut off the shap_ prefex before the lookup return out def execute_with_overridden_gradients(self, f): @@ -157,22 +173,22 @@ def execute_with_overridden_gradients(self, f): reg = tf_ops._gradient_registry._registry ops_not_in_registry = ['TensorListReserve'] # NOTE: location_tag taken from tensorflow source for None type ops - location_tag = ("UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN") + location_tag = ('UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN') # TODO: unclear why some ops are not in the registry with TF 2.0 like TensorListReserve for non_reg_ops in ops_not_in_registry: reg[non_reg_ops] = {'type': None, 'location': location_tag} for n in op_handlers: if n in reg: - self.orig_grads[n] = reg[n]["type"] - reg["shap_" + n] = { - "type": self.custom_grad, - "location": reg[n]["location"] + self.orig_grads[n] = reg[n]['type'] + reg['shap_' + n] = { + 'type': self.custom_grad, + 'location': reg[n]['location'] } - reg[n]["type"] = self.custom_grad + reg[n]['type'] = self.custom_grad # In TensorFlow 1.10 they started pruning out nodes that they think can't be backpropped # unfortunately that includes the index of embedding layers so we disable that check here - if hasattr(tf_gradients_impl, "_IsBackpropagatable"): + if hasattr(tf_gradients_impl, '_IsBackpropagatable'): orig_IsBackpropagatable = tf_gradients_impl._IsBackpropagatable tf_gradients_impl._IsBackpropagatable = lambda tensor: True @@ -181,20 +197,24 @@ def execute_with_overridden_gradients(self, f): out = f() finally: # reinstate the backpropagatable check - if hasattr(tf_gradients_impl, "_IsBackpropagatable"): + if hasattr(tf_gradients_impl, '_IsBackpropagatable'): tf_gradients_impl._IsBackpropagatable = orig_IsBackpropagatable # restore the original gradient definitions for n in op_handlers: if n in reg: - del reg["shap_" + n] - reg[n]["type"] = self.orig_grads[n] + del reg['shap_' + n] + reg[n]['type'] = self.orig_grads[n] for non_reg_ops in ops_not_in_registry: del reg[non_reg_ops] return out - def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_additivity=True): - """ Return approximate SHAP values for the model applied to the data given by X. + def shap_values(self, + X, + ranked_outputs=None, + output_rank_order='max', + check_additivity=True): + """Return approximate SHAP values for the model applied to the data given by X. Parameters ---------- @@ -228,29 +248,32 @@ def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_add # check if we have multiple inputs if not self.multi_input: if type(X) == list and len(X) != 1: - assert False, "Expected a single tensor as model input!" + assert False, 'Expected a single tensor as model input!' elif type(X) != list: X = [X] else: - assert type(X) == list, "Expected a list of model inputs!" - assert len(self.model_inputs) == len(X), "Number of model inputs (%d) does not match the number given (%d)!" % ( - len(self.model_inputs), len(X)) + assert type(X) == list, 'Expected a list of model inputs!' + assert len(self.model_inputs) == len( + X + ), 'Number of model inputs (%d) does not match the number given (%d)!' % ( + len(self.model_inputs), len(X)) # rank and determine the model outputs that we will explain if ranked_outputs is not None and self.multi_output: model_output_values = self.run(self.model_output, self.model_inputs, X) - if output_rank_order == "max": + if output_rank_order == 'max': model_output_ranks = np.argsort(-model_output_values) - elif output_rank_order == "min": + elif output_rank_order == 'min': model_output_ranks = np.argsort(model_output_values) - elif output_rank_order == "max_abs": + elif output_rank_order == 'max_abs': model_output_ranks = np.argsort(np.abs(model_output_values)) else: - assert False, "output_rank_order must be max, min, or max_abs!" + assert False, 'output_rank_order must be max, min, or max_abs!' model_output_ranks = model_output_ranks[:, :ranked_outputs] else: - model_output_ranks = np.tile(np.arange(len(self.phi_symbolics)), (X[0].shape[0], 1)) + model_output_ranks = np.tile( + np.arange(len(self.phi_symbolics)), (X[0].shape[0], 1)) # compute the attributions output_phis = [] @@ -267,19 +290,27 @@ def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_add bg_data = self.data # tile the inputs to line up with the background data samples - tiled_X = [np.tile(X[l][j:j + 1], (bg_data[l].shape[0],) + tuple([1 for k in range(len(X[l].shape) - 1)])) for l - in range(len(X))] + tiled_X = [ + np.tile(X[l][j:j + 1], (bg_data[l].shape[0],) + + tuple([1 + for k in range(len(X[l].shape) - 1)])) + for l in range(len(X)) + ] # we use the first sample for the current sample and the rest for the references - joint_input = [np.concatenate([tiled_X[l], bg_data[l]], 0) for l in range(len(X))] + joint_input = [ + np.concatenate([tiled_X[l], bg_data[l]], 0) for l in range(len(X)) + ] # run attribution computation graph feature_ind = model_output_ranks[j, i] - sample_phis = self.run(self.phi_symbolic(feature_ind), self.model_inputs, joint_input) + sample_phis = self.run( + self.phi_symbolic(feature_ind), self.model_inputs, joint_input) # assign the attributions to the right part of the output arrays for l in range(len(X)): - phis[l][j] = (sample_phis[l][bg_data[l].shape[0]:] * (X[l][j] - bg_data[l])).mean(0) + phis[l][j] = (sample_phis[l][bg_data[l].shape[0]:] * + (X[l][j] - bg_data[l])).mean(0) output_phis.append(phis[0] if not self.multi_input else phis) @@ -288,17 +319,19 @@ def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_add model_output = self.run(self.model_output, self.model_inputs, X) for l in range(len(self.expected_value)): if not self.multi_input: - diffs = model_output[:, l] - self.expected_value[l] - output_phis[l].sum( - axis=tuple(range(1, output_phis[l].ndim))) + diffs = model_output[:, + l] - self.expected_value[l] - output_phis[l].sum( + axis=tuple(range(1, output_phis[l].ndim))) else: diffs = model_output[:, l] - self.expected_value[l] for i in range(len(output_phis[l])): - diffs -= output_phis[l][i].sum(axis=tuple(range(1, output_phis[l][i].ndim))) + diffs -= output_phis[l][i].sum( + axis=tuple(range(1, output_phis[l][i].ndim))) assert np.abs( diffs).max() < 1e-2, "The SHAP explanations do not sum up to the model's output! This is either because of a " \ - "rounding error or because an operator in your computation graph was not fully supported. If " \ - "the sum difference of %f is significant compared the scale of your model outputs please post " \ - "as a github issue, with a reproducible example if possible so we can debug it." % np.abs( + 'rounding error or because an operator in your computation graph was not fully supported. If ' \ + 'the sum difference of %f is significant compared the scale of your model outputs please post ' \ + 'as a github issue, with a reproducible example if possible so we can debug it.' % np.abs( diffs).max() if not self.multi_output: @@ -310,21 +343,19 @@ def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_add def _init_between_tensors(self, out_op, model_inputs): # find all the operations in the graph between our inputs and outputs - tensor_blacklist = tensors_blocked_by_false(self.learning_phase_ops) # don't follow learning phase branches - dependence_breakers = [k for k in op_handlers if op_handlers[k] == break_dependence] - back_ops = backward_walk_ops( - [out_op], tensor_blacklist, - dependence_breakers - ) + tensor_blacklist = tensors_blocked_by_false( + self.learning_phase_ops) # don't follow learning phase branches + dependence_breakers = [ + k for k in op_handlers if op_handlers[k] == break_dependence + ] + back_ops = backward_walk_ops([out_op], tensor_blacklist, + dependence_breakers) start_ops = [] for minput in model_inputs: for op in minput.consumers(): start_ops.append(op) self.between_ops = forward_walk_ops( - start_ops, - tensor_blacklist, dependence_breakers, - within_ops=back_ops - ) + start_ops, tensor_blacklist, dependence_breakers, within_ops=back_ops) # note all the tensors that are on the path between the inputs and the output self.between_tensors = {} @@ -340,8 +371,7 @@ def _init_between_tensors(self, out_op, model_inputs): self.used_types[op.type] = True def _variable_inputs(self, op): - """ Return which inputs of this operation are variable (i.e. depend on the model inputs). - """ + """Return which inputs of this operation are variable (i.e. depend on the model inputs).""" if op not in self._vinputs: out = np.zeros(len(op.inputs), dtype=np.bool) for i, t in enumerate(op.inputs): @@ -351,7 +381,7 @@ def _variable_inputs(self, op): def tensors_blocked_by_false(ops): - """ Follows a set of ops assuming their value is False and find blocked Switch paths. + """Follows a set of ops assuming their value is False and find blocked Switch paths. This is used to prune away parts of the model graph that are only used during the training phase (like dropout, batch norm, etc.). @@ -359,8 +389,10 @@ def tensors_blocked_by_false(ops): blocked = [] def recurse(op): - if op.type == "Switch": - blocked.append(op.outputs[1]) # the true path is blocked since we assume the ops we trace are False + if op.type == 'Switch': + blocked.append( + op.outputs[1] + ) # the true path is blocked since we assume the ops we trace are False else: for out in op.outputs: for c in out.consumers(): @@ -385,7 +417,8 @@ def backward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist): return found_ops -def forward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist, within_ops): +def forward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist, + within_ops): found_ops = [] op_stack = [op for op in start_ops] while len(op_stack) > 0: @@ -400,6 +433,7 @@ def forward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist, within_ops) def linearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func): + def handler(explainer, op, *grads): var = explainer._variable_inputs(op) if var[input_ind0] and not var[input_ind1]: @@ -407,14 +441,17 @@ def handler(explainer, op, *grads): elif var[input_ind1] and not var[input_ind0]: return linearity_1d_handler(input_ind1, explainer, op, *grads) elif var[input_ind0] and var[input_ind1]: - return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads) + return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, + op, *grads) else: - return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function + return [None for _ in op.inputs + ] # no inputs vary, we must be hidden by a switch function return handler def nonlinearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func): + def handler(explainer, op, *grads): var = explainer._variable_inputs(op) if var[input_ind0] and not var[input_ind1]: @@ -422,14 +459,17 @@ def handler(explainer, op, *grads): elif var[input_ind1] and not var[input_ind0]: return nonlinearity_1d_handler(input_ind1, explainer, op, *grads) elif var[input_ind0] and var[input_ind1]: - return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads) + return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, + op, *grads) else: - return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function + return [None for _ in op.inputs + ] # no inputs vary, we must be hidden by a switch function return handler def nonlinearity_1d(input_ind): + def handler(explainer, op, *grads): return nonlinearity_1d_handler(input_ind, explainer, op, *grads) @@ -444,7 +484,8 @@ def nonlinearity_1d_handler(input_ind, explainer, op, *grads): for i in range(len(op_inputs)): if i != input_ind: - assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!" + assert not explainer._variable_inputs( + op)[i], str(i) + 'th input to ' + op.name + ' cannot vary!' xin0, rin0 = tf.split(op_inputs[input_ind], 2) xout, rout = tf.split(op.outputs[input_ind], 2) @@ -454,18 +495,18 @@ def nonlinearity_1d_handler(input_ind, explainer, op, *grads): else: dup0 = [2] + [1 for i in delta_in0.shape[1:]] out = [None for _ in op_inputs] - if op.type.startswith("shap_"): + if op.type.startswith('shap_'): op.type = op.type[5:] orig_grad = explainer.orig_grads[op.type](op, grads[0]) out[input_ind] = tf.where( - tf.tile(tf.abs(delta_in0), dup0) < 1e-6, - orig_grad[input_ind] if len(op_inputs) > 1 else orig_grad, - grads[0] * tf.tile((xout - rout) / delta_in0, dup0) - ) + tf.tile(tf.abs(delta_in0), dup0) < 1e-6, + orig_grad[input_ind] if len(op_inputs) > 1 else orig_grad, + grads[0] * tf.tile((xout - rout) / delta_in0, dup0)) return out -def nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads): +def nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, + *grads): assert input_ind0 == 0 and input_ind1 == 1, "TODO: Can't yet handle double inputs that are not first!" xout, rout = tf.split(op.outputs[0], 2) in0 = op.inputs[input_ind0] @@ -484,33 +525,37 @@ def nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *gra out1 = grads[0] * tf.tile(out1 / delta_in1, dup0) # Avoid divide by zero nans - out0 = tf.where(tf.abs(tf.tile(delta_in0, dup0)) < 1e-7, tf.zeros_like(out0), out0) - out1 = tf.where(tf.abs(tf.tile(delta_in1, dup0)) < 1e-7, tf.zeros_like(out1), out1) + out0 = tf.where( + tf.abs(tf.tile(delta_in0, dup0)) < 1e-7, tf.zeros_like(out0), out0) + out1 = tf.where( + tf.abs(tf.tile(delta_in1, dup0)) < 1e-7, tf.zeros_like(out1), out1) # see if due to broadcasting our gradient shapes don't match our input shapes if (np.any(np.array(out1.shape) != np.array(in1.shape))): - broadcast_index = np.where(np.array(out1.shape) != np.array(in1.shape))[0][0] + broadcast_index = np.where( + np.array(out1.shape) != np.array(in1.shape))[0][0] out1 = tf.reduce_sum(out1, axis=broadcast_index, keepdims=True) elif (np.any(np.array(out0.shape) != np.array(in0.shape))): - broadcast_index = np.where(np.array(out0.shape) != np.array(in0.shape))[0][0] + broadcast_index = np.where( + np.array(out0.shape) != np.array(in0.shape))[0][0] out0 = tf.reduce_sum(out0, axis=broadcast_index, keepdims=True) return [out0, out1] def softmax(explainer, op, *grads): - """ Just decompose softmax into its components and recurse, we can handle all of them :) + """Just decompose softmax into its components and recurse, we can handle all of them :) - We assume the 'axis' is the last dimension because the TF codebase swaps the 'axis' to - the last dimension before the softmax op if 'axis' is not already the last dimension. - We also don't subtract the max before tf.exp for numerical stability since that might - mess up the attributions and it seems like TensorFlow doesn't define softmax that way - (according to the docs) - """ + We assume the 'axis' is the last dimension because the TF codebase swaps the 'axis' to + the last dimension before the softmax op if 'axis' is not already the last dimension. + We also don't subtract the max before tf.exp for numerical stability since that might + mess up the attributions and it seems like TensorFlow doesn't define softmax that way + (according to the docs) + """ in0 = op.inputs[0] - in0_max = tf.reduce_max(in0, axis=-1, keepdims=True, name="in0_max") + in0_max = tf.reduce_max(in0, axis=-1, keepdims=True, name='in0_max') in0_centered = in0 - in0_max - evals = tf.exp(in0_centered, name="custom_exp") + evals = tf.exp(in0_centered, name='custom_exp') rsum = tf.reduce_sum(evals, axis=-1, keepdims=True) div = evals / rsum @@ -534,10 +579,8 @@ def softmax(explainer, op, *grads): delta_in0 = xin0 - rin0 dup0 = [2] + [1 for i in delta_in0.shape[1:]] return tf.where( - tf.tile(tf.abs(delta_in0), dup0) < 1e-6, - out, - out * tf.tile((xin0_centered - rin0_centered) / delta_in0, dup0) - ) + tf.tile(tf.abs(delta_in0), dup0) < 1e-6, out, + out * tf.tile((xin0_centered - rin0_centered) / delta_in0, dup0)) def maxpool(explainer, op, *grads): @@ -547,14 +590,14 @@ def maxpool(explainer, op, *grads): dup0 = [2] + [1 for i in delta_in0.shape[1:]] cross_max = tf.maximum(xout, rout) diffs = tf.concat([cross_max - rout, xout - cross_max], 0) - if op.type.startswith("shap_"): + if op.type.startswith('shap_'): op.type = op.type[5:] - xmax_pos, rmax_pos = tf.split(explainer.orig_grads[op.type](op, grads[0] * diffs), 2) - return tf.tile(tf.where( - tf.abs(delta_in0) < 1e-7, - tf.zeros_like(delta_in0), - (xmax_pos + rmax_pos) / delta_in0 - ), dup0) + xmax_pos, rmax_pos = tf.split( + explainer.orig_grads[op.type](op, grads[0] * diffs), 2) + return tf.tile( + tf.where( + tf.abs(delta_in0) < 1e-7, tf.zeros_like(delta_in0), + (xmax_pos + rmax_pos) / delta_in0), dup0) def gather(explainer, op, *grads): @@ -563,35 +606,41 @@ def gather(explainer, op, *grads): # axis = op.inputs[2] var = explainer._variable_inputs(op) if var[1] and not var[0]: - assert len(indices.shape) == 2, "Only scalar indices supported right now in GatherV2!" + assert len(indices.shape + ) == 2, 'Only scalar indices supported right now in GatherV2!' xin1, rin1 = tf.split(tf.cast(op.inputs[1], tf.float32), 2) xout, rout = tf.split(op.outputs[0], 2) dup_in1 = [2] + [1 for i in xin1.shape[1:]] dup_out = [2] + [1 for i in xout.shape[1:]] delta_in1_t = tf.tile(xin1 - rin1, dup_in1) - out_sum = tf.reduce_sum(grads[0] * tf.tile(xout - rout, dup_out), - list(range(len(indices.shape), len(grads[0].shape)))) - if op.type == "ResourceGather": - return [None, tf.where( - tf.abs(delta_in1_t) < 1e-6, - tf.zeros_like(delta_in1_t), - out_sum / delta_in1_t - )] - return [None, tf.where( - tf.abs(delta_in1_t) < 1e-6, - tf.zeros_like(delta_in1_t), - out_sum / delta_in1_t - ), None] + out_sum = tf.reduce_sum( + grads[0] * tf.tile(xout - rout, dup_out), + list(range(len(indices.shape), len(grads[0].shape)))) + if op.type == 'ResourceGather': + return [ + None, + tf.where( + tf.abs(delta_in1_t) < 1e-6, tf.zeros_like(delta_in1_t), + out_sum / delta_in1_t) + ] + return [ + None, + tf.where( + tf.abs(delta_in1_t) < 1e-6, tf.zeros_like(delta_in1_t), + out_sum / delta_in1_t), None + ] elif var[0] and not var[1]: - if op.type.startswith("shap_"): + if op.type.startswith('shap_'): op.type = op.type[5:] - return [explainer.orig_grads[op.type](op, grads[0]), None] # linear in this case + return [explainer.orig_grads[op.type](op, grads[0]), + None] # linear in this case else: - assert False, "Axis not yet supported to be varying for gather op!" + assert False, 'Axis not yet supported to be varying for gather op!' def linearity_1d(input_ind): + def handler(explainer, op, *grads): return linearity_1d_handler(input_ind, explainer, op, *grads) @@ -602,13 +651,15 @@ def linearity_1d_handler(input_ind, explainer, op, *grads): # make sure only the given input varies (negative means only that input cannot vary, and is measured from the end of the list) for i in range(len(op.inputs)): if i != input_ind: - assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!" - if op.type.startswith("shap_"): + assert not explainer._variable_inputs( + op)[i], str(i) + 'th input to ' + op.name + ' cannot vary!' + if op.type.startswith('shap_'): op.type = op.type[5:] return explainer.orig_grads[op.type](op, *grads) def linearity_with_excluded(input_inds): + def handler(explainer, op, *grads): return linearity_with_excluded_handler(input_inds, explainer, op, *grads) @@ -619,20 +670,21 @@ def linearity_with_excluded_handler(input_inds, explainer, op, *grads): # make sure the given inputs don't vary (negative is measured from the end of the list) for i in range(len(op.inputs)): if i in input_inds or i - len(op.inputs) in input_inds: - assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!" - if op.type.startswith("shap_"): + assert not explainer._variable_inputs( + op)[i], str(i) + 'th input to ' + op.name + ' cannot vary!' + if op.type.startswith('shap_'): op.type = op.type[5:] return explainer.orig_grads[op.type](op, *grads) def passthrough(explainer, op, *grads): - if op.type.startswith("shap_"): + if op.type.startswith('shap_'): op.type = op.type[5:] return explainer.orig_grads[op.type](op, *grads) def break_dependence(explainer, op, *grads): - """ This function name is used to break attribution dependence in the graph traversal. + """This function name is used to break attribution dependence in the graph traversal. These operation types may be connected above input data values in the graph but their outputs don't depend on the input values (for example they just depend on the shape). @@ -643,68 +695,72 @@ def break_dependence(explainer, op, *grads): op_handlers = {} # ops that are always linear -op_handlers["Identity"] = passthrough -op_handlers["StridedSlice"] = passthrough -op_handlers["Squeeze"] = passthrough -op_handlers["ExpandDims"] = passthrough -op_handlers["Pack"] = passthrough -op_handlers["BiasAdd"] = passthrough -op_handlers["Unpack"] = passthrough -op_handlers["Add"] = passthrough -op_handlers["Sub"] = passthrough -op_handlers["Merge"] = passthrough -op_handlers["Sum"] = passthrough -op_handlers["Mean"] = passthrough -op_handlers["Cast"] = passthrough -op_handlers["Transpose"] = passthrough -op_handlers["Enter"] = passthrough -op_handlers["Exit"] = passthrough -op_handlers["NextIteration"] = passthrough -op_handlers["Tile"] = passthrough -op_handlers["TensorArrayScatterV3"] = passthrough -op_handlers["TensorArrayReadV3"] = passthrough -op_handlers["TensorArrayWriteV3"] = passthrough +op_handlers['Identity'] = passthrough +op_handlers['StridedSlice'] = passthrough +op_handlers['Squeeze'] = passthrough +op_handlers['ExpandDims'] = passthrough +op_handlers['Pack'] = passthrough +op_handlers['BiasAdd'] = passthrough +op_handlers['Unpack'] = passthrough +op_handlers['Add'] = passthrough +op_handlers['Sub'] = passthrough +op_handlers['Merge'] = passthrough +op_handlers['Sum'] = passthrough +op_handlers['Mean'] = passthrough +op_handlers['Cast'] = passthrough +op_handlers['Transpose'] = passthrough +op_handlers['Enter'] = passthrough +op_handlers['Exit'] = passthrough +op_handlers['NextIteration'] = passthrough +op_handlers['Tile'] = passthrough +op_handlers['TensorArrayScatterV3'] = passthrough +op_handlers['TensorArrayReadV3'] = passthrough +op_handlers['TensorArrayWriteV3'] = passthrough # ops that don't pass any attributions to their inputs -op_handlers["Shape"] = break_dependence -op_handlers["RandomUniform"] = break_dependence -op_handlers["ZerosLike"] = break_dependence +op_handlers['Shape'] = break_dependence +op_handlers['RandomUniform'] = break_dependence +op_handlers['ZerosLike'] = break_dependence # op_handlers["StopGradient"] = break_dependence # this allows us to stop attributions when we want to (like softmax re-centering) # ops that are linear and only allow a single input to vary -op_handlers["Reshape"] = linearity_1d(0) -op_handlers["Pad"] = linearity_1d(0) -op_handlers["ReverseV2"] = linearity_1d(0) -op_handlers["ConcatV2"] = linearity_with_excluded([-1]) -op_handlers["Conv2D"] = linearity_1d(0) -op_handlers["Switch"] = linearity_1d(0) -op_handlers["AvgPool"] = linearity_1d(0) -op_handlers["FusedBatchNorm"] = linearity_1d(0) +op_handlers['Reshape'] = linearity_1d(0) +op_handlers['Pad'] = linearity_1d(0) +op_handlers['ReverseV2'] = linearity_1d(0) +op_handlers['ConcatV2'] = linearity_with_excluded([-1]) +op_handlers['Conv2D'] = linearity_1d(0) +op_handlers['Switch'] = linearity_1d(0) +op_handlers['AvgPool'] = linearity_1d(0) +op_handlers['FusedBatchNorm'] = linearity_1d(0) # ops that are nonlinear and only allow a single input to vary -op_handlers["Relu"] = nonlinearity_1d(0) -op_handlers["Elu"] = nonlinearity_1d(0) -op_handlers["Sigmoid"] = nonlinearity_1d(0) -op_handlers["Tanh"] = nonlinearity_1d(0) -op_handlers["Softplus"] = nonlinearity_1d(0) -op_handlers["Exp"] = nonlinearity_1d(0) -op_handlers["ClipByValue"] = nonlinearity_1d(0) -op_handlers["Rsqrt"] = nonlinearity_1d(0) -op_handlers["Square"] = nonlinearity_1d(0) -op_handlers["Max"] = nonlinearity_1d(0) +op_handlers['Relu'] = nonlinearity_1d(0) +op_handlers['Elu'] = nonlinearity_1d(0) +op_handlers['Sigmoid'] = nonlinearity_1d(0) +op_handlers['Tanh'] = nonlinearity_1d(0) +op_handlers['Softplus'] = nonlinearity_1d(0) +op_handlers['Exp'] = nonlinearity_1d(0) +op_handlers['ClipByValue'] = nonlinearity_1d(0) +op_handlers['Rsqrt'] = nonlinearity_1d(0) +op_handlers['Square'] = nonlinearity_1d(0) +op_handlers['Max'] = nonlinearity_1d(0) # ops that are nonlinear and allow two inputs to vary -op_handlers["SquaredDifference"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: (x - y) * (x - y)) -op_handlers["Minimum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.minimum(x, y)) -op_handlers["Maximum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.maximum(x, y)) +op_handlers['SquaredDifference'] = nonlinearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: (x - y) * (x - y)) +op_handlers['Minimum'] = nonlinearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: tf.minimum(x, y)) +op_handlers['Maximum'] = nonlinearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: tf.maximum(x, y)) # ops that allow up to two inputs to vary are are linear when only one input varies -op_handlers["Mul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x * y) -op_handlers["RealDiv"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x / y) -op_handlers["MatMul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.matmul(x, y)) +op_handlers['Mul'] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x * y) +op_handlers['RealDiv'] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x / y) +op_handlers['MatMul'] = linearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: tf.matmul(x, y)) # ops that need their own custom attribution functions -op_handlers["GatherV2"] = gather -op_handlers["ResourceGather"] = gather -op_handlers["MaxPool"] = maxpool -op_handlers["Softmax"] = softmax +op_handlers['GatherV2'] = gather +op_handlers['ResourceGather'] = gather +op_handlers['MaxPool'] = maxpool +op_handlers['Softmax'] = softmax diff --git a/easy_rec/python/tools/explainer/explainer.py b/easy_rec/python/tools/explainer/explainer.py index a40784458..04d2bc4dc 100644 --- a/easy_rec/python/tools/explainer/explainer.py +++ b/easy_rec/python/tools/explainer/explainer.py @@ -1,24 +1,26 @@ -import tensorflow as tf -from tensorflow.python.platform import gfile -from tensorflow.python.saved_model import signature_constants -from easy_rec.python.utils.load_class import get_register_class_meta -from easy_rec.python.utils.config_util import get_configs_from_pipeline_file -from easy_rec.python.utils.input_utils import get_type_defaults -from easy_rec.python.tools.explainer.methods import DeepExplain -# from easy_rec.python.tools.explainer.deep_shap import DeepShap -from easy_rec.python.protos.dataset_pb2 import DatasetConfig import abc import collections -import numpy as np import logging -import six +import os import time + +import numpy as np +import six +import tensorflow as tf from six import moves -import os +from tensorflow.python.platform import gfile +from tensorflow.python.saved_model import signature_constants + +# from easy_rec.python.tools.explainer.deep_shap import DeepShap +from easy_rec.python.protos.dataset_pb2 import DatasetConfig +from easy_rec.python.tools.explainer.methods import DeepExplain +from easy_rec.python.utils.config_util import get_configs_from_pipeline_file +from easy_rec.python.utils.input_utils import get_type_defaults +from easy_rec.python.utils.load_class import get_register_class_meta _EXPLAINER_CLASS_MAP = {} _register_abc_meta = get_register_class_meta( - _EXPLAINER_CLASS_MAP, have_abstract_class=True) + _EXPLAINER_CLASS_MAP, have_abstract_class=True) class Explainer(six.with_metaclass(_register_abc_meta, object)): @@ -48,17 +50,18 @@ def _build_model(self): assert tf.saved_model.loader.maybe_saved_model_directory(model_path), \ 'saved model does not exists in %s' % model_path else: - raise ValueError('currently only savedmodel is supported, path:' + model_path) + raise ValueError('currently only savedmodel is supported, path:' + + model_path) input_fields = _get_input_fields_from_pipeline_config(model_path) self._input_fields_info, self._input_fields = input_fields de = self.deep_explain meta_graph_def = tf.saved_model.loader.load( - de.session, [tf.saved_model.tag_constants.SERVING], model_path) + de.session, [tf.saved_model.tag_constants.SERVING], model_path) # parse signature signature_def = meta_graph_def.signature_def[ - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] inputs = signature_def.inputs input_info = [] self._is_multi_placeholder = len(inputs.items()) > 1 @@ -76,8 +79,8 @@ def _build_model(self): # in which case, the order of inputs may not be the # same as they are defined, therefore, list input # could not be supported, only dict input could be supported - logging.warning( - 'could not determine input_id from input_name: %s' % input_name) + logging.warning('could not determine input_id from input_name: %s' % + input_name) input_id = gid input_info.append((input_id, name, tensor.dtype)) self._inputs_map[name] = de.graph.get_tensor_by_name(tensor.name) @@ -120,7 +123,8 @@ def default_values(self): default_value = [] for i, (field, name) in enumerate(zip(input_fields, self._input_names)): - assert field == name, "input field `%d` has different names: <%s, %s>" % (i, field, name) + assert field == name, 'input field `%d` has different names: <%s, %s>' % ( + i, field, name) value = self._get_defaults(field) # default_value.append(np.array([value])) # for deep_shap default_value.append(np.array(value)) # for deep_shap @@ -134,20 +138,21 @@ def _get_defaults(self, col_name, col_type='string'): else: defaults = {'string': '', 'double': 0.0, 'bigint': 0} assert col_type in defaults, 'invalid col_type: %s, col_type: %s' % ( - col_name, col_type) + col_name, col_type) default_val = defaults[col_type] logging.info( - 'col_name: %s, default_val: %s.[not defined in saved_model_dir/assets/pipeline.config]' - % (col_name, default_val)) + 'col_name: %s, default_val: %s.[not defined in saved_model_dir/assets/pipeline.config]' + % (col_name, default_val)) return default_val def str_to_number(self, values): - assert len(values) == len(self._input_fields), "value count %d is not equal to the number of input fields %d" % ( - len(values), len(self._input_fields) - ) + assert len(values) == len( + self._input_fields + ), 'value count %d is not equal to the number of input fields %d' % ( + len(values), len(self._input_fields)) result = [] for i, name in enumerate(self._input_names): - assert name in self._input_fields_info, "input `%s` not in pipeline config" % name + assert name in self._input_fields_info, 'input `%s` not in pipeline config' % name idx = self._input_fields.index(name) input_type, default_val = self._input_fields_info[name] if input_type in {DatasetConfig.INT32, DatasetConfig.INT64}: @@ -177,24 +182,28 @@ def get_explainer(self, output_cols=None): tmp_cols.append(tmp_keys[0].strip()) self._output_cols = tmp_cols if len(self._output_cols) > 1: - logging.warning('Only one output can be supported currently, use the first one: %s', self._output_cols[0]) + logging.warning( + 'Only one output can be supported currently, use the first one: %s', + self._output_cols[0]) output_name = self._output_cols[0] assert output_name in self.output_names, 'invalid output name `%s` not in model outputs `%s`' % ( - output_name, ','.join(self.output_names)) + output_name, ','.join(self.output_names)) if output_name is None: output = self._outputs_map.values()[0] elif type(output_name) in {str, unicode}: output = self._outputs_map[output_name] else: - raise Exception('unsupported type of output_name: ' + str(type(output_name))) + raise Exception('unsupported type of output_name: ' + + str(type(output_name))) def_vals = self.default_values() # print('default values (%d):' % len(def_vals), def_vals) inputs = [self._inputs_map[name] for name in self._input_names] # e = DeepShap(inputs, output, def_vals, session=self._session) # self._explainer = e - e = self.deep_explain.get_explainer(self.method, output, inputs, baseline=def_vals) + e = self.deep_explain.get_explainer( + self.method, output, inputs, baseline=def_vals) return e @property @@ -236,6 +245,7 @@ def feature_importance(self, class OdpsExplainer(Explainer): + def feature_importance(self, input_path, output_path, @@ -247,17 +257,24 @@ def feature_importance(self, input_cols = self.input_names input_dim = len(input_cols) if reserved_cols: - reserved_cols = [x.strip() for x in reserved_cols.split(',') if x.strip() not in input_cols] + reserved_cols = [ + x.strip() + for x in reserved_cols.split(',') + if x.strip() not in input_cols + ] input_cols.extend(reserved_cols) selected_cols = ','.join(input_cols) - print("selected_cols: " + selected_cols) + print('selected_cols: ' + selected_cols) explainer = self.get_explainer(output_cols) - print("reference value:", explainer.expected_value) + print('reference value:', explainer.expected_value) import common_io - reader = common_io.table.TableReader(input_path, selected_cols=selected_cols, - slice_id=slice_id, slice_count=slice_num) + reader = common_io.table.TableReader( + input_path, + selected_cols=selected_cols, + slice_id=slice_id, + slice_count=slice_num) reserved_cols_idx = [] if reserved_cols: @@ -302,13 +319,15 @@ def feature_importance(self, class OdpsRtpExplainer(Explainer): + def __init__(self, deep_explain, model_path, method_name): - super(OdpsRtpExplainer, self).__init__(deep_explain, model_path, method_name) + super(OdpsRtpExplainer, self).__init__(deep_explain, model_path, + method_name) pipeline_path = os.path.join(model_path, 'assets/pipeline.config') if not gfile.Exists(pipeline_path): logging.warning( - '%s not exists, default values maybe inconsistent with the values used in training.' - % pipeline_path) + '%s not exists, default values maybe inconsistent with the values used in training.' + % pipeline_path) return pipeline_config = get_configs_from_pipeline_file(pipeline_path) self._fg_separator = pipeline_config.data_config.separator @@ -325,19 +344,20 @@ def __init__(self, deep_explain, model_path, method_name): self._effective_fields = [] for fc in feature_configs: for input_name in fc.input_names: - assert input_name in self._input_fields, 'invalid input_name in %s' % str(fc) + assert input_name in self._input_fields, 'invalid input_name in %s' % str( + fc) if input_name not in self._effective_fields: self._effective_fields.append(input_name) self._effective_fids = [ - self._input_fields.index(x) for x in self._effective_fields + self._input_fields.index(x) for x in self._effective_fields ] # sort fids from small to large self._effective_fids = list(set(self._effective_fids)) self._effective_fields = [ - self._input_fields[x] for x in self._effective_fids + self._input_fields[x] for x in self._effective_fids ] - logging.info( - "raw input fields: %d, effective fields: %d" % (len(self._input_fields), len(self._effective_fields))) + logging.info('raw input fields: %d, effective fields: %d' % + (len(self._input_fields), len(self._effective_fields))) def feature_importance(self, input_path, @@ -352,14 +372,17 @@ def feature_importance(self, if 'features' not in input_cols: input_cols.append('features') selected_cols = ','.join(input_cols) - print("selected_cols: " + selected_cols) + print('selected_cols: ' + selected_cols) explainer = self.get_explainer(output_cols) - print("reference value:", explainer.expected_value) + print('reference value:', explainer.expected_value) import common_io - reader = common_io.table.TableReader(input_path, selected_cols=selected_cols, - slice_id=slice_id, slice_count=slice_num) + reader = common_io.table.TableReader( + input_path, + selected_cols=selected_cols, + slice_id=slice_id, + slice_count=slice_num) sum_t0, sum_t1, sum_t2 = 0, 0, 0 writer = common_io.table.TableWriter(output_path, slice_id=slice_id) @@ -373,9 +396,11 @@ def feature_importance(self, for j in range(len(records)): if reserved_dim > 0: reserved.append(records[j][:reserved_dim]) - inputs.append(self.str_to_number(records[j][-1].decode('utf-8').split(self._fg_separator))) + inputs.append( + self.str_to_number(records[j][-1].decode('utf-8').split( + self._fg_separator))) inputs = list(np.array(inputs).T) - print("inputs:", inputs) + print('inputs:', inputs) # sv = explainer.shap_values(inputs, check_additivity=False) ret = explainer.run(inputs, batch_size=len(records)) ret = np.array(ret) @@ -406,8 +431,8 @@ def _get_input_fields_from_pipeline_config(model_path): pipeline_path = os.path.join(model_path, 'assets/pipeline.config') if not gfile.Exists(pipeline_path): logging.warning( - '%s not exists, default values maybe inconsistent with the values used in training.' - % pipeline_path) + '%s not exists, default values maybe inconsistent with the values used in training.' + % pipeline_path) return {}, [] pipeline_config = get_configs_from_pipeline_file(pipeline_path) data_config = pipeline_config.data_config @@ -418,11 +443,15 @@ def _get_input_fields_from_pipeline_config(model_path): input_fields = data_config.input_fields input_fields_info = { - input_field.input_name: - (input_field.input_type, input_field.default_val) - for input_field in input_fields if input_field.input_name not in labels + input_field.input_name: (input_field.input_type, input_field.default_val) + for input_field in input_fields + if input_field.input_name not in labels } - input_fields_list = [input_field.input_name for input_field in input_fields if input_field.input_name not in labels] + input_fields_list = [ + input_field.input_name + for input_field in input_fields + if input_field.input_name not in labels + ] return input_fields_info, input_fields_list @@ -448,12 +477,11 @@ def search_pb(directory, use_latest=False): if use_latest: logging.info('find %d models: %s' % (len(dir_list), ','.join(dir_list))) dir_list = sorted( - dir_list, - key=lambda x: int(x.split('/')[(-2 if (x[-1] == '/') else -1)])) + dir_list, + key=lambda x: int(x.split('/')[(-2 if (x[-1] == '/') else -1)])) return dir_list[-1] else: - raise ValueError('multiple saved model found in directory %s' % - directory) + raise ValueError('multiple saved model found in directory %s' % directory) return dir_list[0] @@ -490,17 +518,17 @@ def run(FLAGS): gpu_options = tf.GPUOptions(allow_growth=True) session_config = tf.ConfigProto( - gpu_options=gpu_options, - allow_soft_placement=True) + gpu_options=gpu_options, allow_soft_placement=True) session = tf.Session(config=session_config) worker_count = len(FLAGS.worker_hosts.split(',')) with DeepExplain(session=session) as de: e = OdpsRtpExplainer(de, model_path, 'deeplift') - e.feature_importance(FLAGS.explain_tables if FLAGS.explain_tables else FLAGS.tables, - FLAGS.outputs, - reserved_cols=FLAGS.reserved_cols, - output_cols=FLAGS.output_cols, - batch_size=FLAGS.batch_size, - slice_id=FLAGS.task_index, - slice_num=worker_count) + e.feature_importance( + FLAGS.explain_tables if FLAGS.explain_tables else FLAGS.tables, + FLAGS.outputs, + reserved_cols=FLAGS.reserved_cols, + output_cols=FLAGS.output_cols, + batch_size=FLAGS.batch_size, + slice_id=FLAGS.task_index, + slice_num=worker_count) diff --git a/easy_rec/python/tools/explainer/feature_importance.py b/easy_rec/python/tools/explainer/feature_importance.py index 034f3c0da..7085274ab 100644 --- a/easy_rec/python/tools/explainer/feature_importance.py +++ b/easy_rec/python/tools/explainer/feature_importance.py @@ -1,9 +1,13 @@ from __future__ import print_function -from easy_rec.python.tools.explainer.explainer import run + import tensorflow as tf + +from easy_rec.python.tools.explainer.explainer import run + flags = tf.app.flags -flags.DEFINE_string('saved_model_dir', '', 'directory where saved_model.pb exists') +flags.DEFINE_string('saved_model_dir', '', + 'directory where saved_model.pb exists') flags.DEFINE_string('explain_tables', '', 'tables used for explaination') flags.DEFINE_string('background_table', '', 'tables used for expected value') flags.DEFINE_string('tables', '', 'tables passed by pai command') @@ -18,7 +22,8 @@ 'output_cols', None, 'output columns, such as: score float. multiple columns are separated by ,') flags.DEFINE_integer('batch_size', 1024, 'predict batch size') -flags.DEFINE_string('worker_hosts', '', 'Comma-separated list of hostname:port pairs') +flags.DEFINE_string('worker_hosts', '', + 'Comma-separated list of hostname:port pairs') flags.DEFINE_integer('task_index', 0, 'Index of task within the job') FLAGS = flags.FLAGS @@ -28,7 +33,7 @@ def main(_): for k in FLAGS: if k in ('h', 'help', 'helpshort', 'helpfull'): continue - print("%s=%s" % (k, FLAGS[k].value)) + print('%s=%s' % (k, FLAGS[k].value)) # worker_count = len(FLAGS.worker_hosts.split(',')) # e = create_explainer(FLAGS.saved_model_dir) diff --git a/easy_rec/python/tools/explainer/methods.py b/easy_rec/python/tools/explainer/methods.py index aa7192acc..38c53be55 100644 --- a/easy_rec/python/tools/explainer/methods.py +++ b/easy_rec/python/tools/explainer/methods.py @@ -2,60 +2,62 @@ from __future__ import division from __future__ import print_function +import logging import sys +import warnings +from collections import OrderedDict + import numpy as np -from skimage.util import view_as_windows -import warnings, logging import tensorflow as tf +from skimage.util import view_as_windows from tensorflow.python.framework import ops -from tensorflow.python.ops import nn_grad, math_grad -from collections import OrderedDict -from easy_rec.python.tools.explainer.utils import make_batches, slice_arrays, to_list, unpack_singleton +from tensorflow.python.ops import math_grad +from tensorflow.python.ops import nn_grad + +from easy_rec.python.tools.explainer.utils import make_batches +from easy_rec.python.tools.explainer.utils import slice_arrays +from easy_rec.python.tools.explainer.utils import to_list +from easy_rec.python.tools.explainer.utils import unpack_singleton -SUPPORTED_ACTIVATIONS = [ - 'Relu', 'Elu', 'Sigmoid', 'Tanh', 'Softplus' -] +SUPPORTED_ACTIVATIONS = ['Relu', 'Elu', 'Sigmoid', 'Tanh', 'Softplus'] -UNSUPPORTED_ACTIVATIONS = [ - 'CRelu', 'Relu6', 'Softsign' -] +UNSUPPORTED_ACTIVATIONS = ['CRelu', 'Relu6', 'Softsign'] _ENABLED_METHOD_CLASS = None _GRAD_OVERRIDE_CHECKFLAG = 0 - # ----------------------------------------------------------------------------- # UTILITY FUNCTIONS # ----------------------------------------------------------------------------- def activation(type): - """ - Returns Tensorflow's activation op, given its type - :param type: string - :return: op - """ - if type not in SUPPORTED_ACTIVATIONS: - warnings.warn('Activation function (%s) not supported' % type) - f = getattr(tf.nn, type.lower()) - return f + """Returns Tensorflow's activation op, given its type. + + :param type: string + :return: op + """ + if type not in SUPPORTED_ACTIVATIONS: + warnings.warn('Activation function (%s) not supported' % type) + f = getattr(tf.nn, type.lower()) + return f def original_grad(op, grad): - """ - Return original Tensorflow gradient for an op - :param op: op - :param grad: Tensor - :return: Tensor - """ - if op.type not in SUPPORTED_ACTIVATIONS: - warnings.warn('Activation function (%s) not supported' % op.type) - opname = '_%sGrad' % op.type - if hasattr(nn_grad, opname): - f = getattr(nn_grad, opname) - else: - f = getattr(math_grad, opname) - return f(op, grad) + """Return original Tensorflow gradient for an op. + + :param op: op + :param grad: Tensor + :return: Tensor + """ + if op.type not in SUPPORTED_ACTIVATIONS: + warnings.warn('Activation function (%s) not supported' % op.type) + opname = '_%sGrad' % op.type + if hasattr(nn_grad, opname): + f = getattr(nn_grad, opname) + else: + f = getattr(math_grad, opname) + return f(op, grad) # ----------------------------------------------------------------------------- @@ -64,172 +66,194 @@ def original_grad(op, grad): class AttributionMethod(object): - """ - Attribution method base class - """ - def __init__(self, T, X, session, keras_learning_phase=None): - self.T = T # target Tensor - self.X = X # input Tensor - self.Y_shape = [None,] + T.get_shape().as_list()[1:] - # Most often T contains multiple output units. In this case, it is often necessary to select - # a single unit to compute contributions for. This can be achieved passing 'ys' as weight for the output Tensor. - self.Y = tf.placeholder(tf.float32, self.Y_shape) - # placeholder_from_data(ys) if ys is not None else 1.0 # Tensor that represents weights for T - self.T = self.T * self.Y - self.symbolic_attribution = None - self.session = session - self.keras_learning_phase = keras_learning_phase - self.has_multiple_inputs = type(self.X) is list or type(self.X) is tuple - logging.info('Model with multiple inputs: %s' % self.has_multiple_inputs) - - # Set baseline - # TODO: now this sets a baseline also for those methods that does not require it - self._set_check_baseline() - - # References - self._init_references() - - # Create symbolic explanation once during construction (affects only gradient-based methods) - self.explain_symbolic() - - def explain_symbolic(self): - return None - - def run(self, xs, ys=None, batch_size=None): - pass - - def _init_references(self): - pass - - def _check_input_compatibility(self, xs, ys=None, batch_size=None): + """Attribution method base class.""" + + def __init__(self, T, X, session, keras_learning_phase=None): + self.T = T # target Tensor + self.X = X # input Tensor + self.Y_shape = [ + None, + ] + T.get_shape().as_list()[1:] + # Most often T contains multiple output units. In this case, it is often necessary to select + # a single unit to compute contributions for. This can be achieved passing 'ys' as weight for the output Tensor. + self.Y = tf.placeholder(tf.float32, self.Y_shape) + # placeholder_from_data(ys) if ys is not None else 1.0 # Tensor that represents weights for T + self.T = self.T * self.Y + self.symbolic_attribution = None + self.session = session + self.keras_learning_phase = keras_learning_phase + self.has_multiple_inputs = type(self.X) is list or type(self.X) is tuple + logging.info('Model with multiple inputs: %s' % self.has_multiple_inputs) + + # Set baseline + # TODO: now this sets a baseline also for those methods that does not require it + self._set_check_baseline() + + # References + self._init_references() + + # Create symbolic explanation once during construction (affects only gradient-based methods) + self.explain_symbolic() + + def explain_symbolic(self): + return None + + def run(self, xs, ys=None, batch_size=None): + pass + + def _init_references(self): + pass + + def _check_input_compatibility(self, xs, ys=None, batch_size=None): + if ys is not None: + if not self.has_multiple_inputs and len(xs) != len(ys): + raise RuntimeError( + 'When provided, ys must have the same batch size as xs (xs has batch size {} and ys {})' + .format(len(xs), len(ys))) + elif self.has_multiple_inputs and np.all([len(i) != len(ys) for i in xs]): + raise RuntimeError( + 'When provided, ys must have the same batch size as all elements of xs' + ) + if batch_size is not None and batch_size > 0: + if self.T.shape[0].value is not None and self.T.shape[ + 0].value is not batch_size: + raise RuntimeError( + 'When using batch evaluation, the first dimension of the target tensor ' + 'must be compatible with the batch size. Found %s instead' % + self.T.shape[0].value) + if isinstance(self.X, list): + for x in self.X: + if x.shape[0].value is not None and x.shape[0].value is not batch_size: + raise RuntimeError( + 'When using batch evaluation, the first dimension of the input tensor ' + 'must be compatible with the batch size. Found %s instead' % + x.shape[0].value) + else: + if self.X.shape[0].value is not None and self.X.shape[ + 0].value is not batch_size: + raise RuntimeError( + 'When using batch evaluation, the first dimension of the input tensor ' + 'must be compatible with the batch size. Found %s instead' % + self.X.shape[0].value) + + def _session_run_batch(self, T, xs, ys=None): + feed_dict = {} + if self.has_multiple_inputs: + for k, v in zip(self.X, xs): + feed_dict[k] = v + else: + feed_dict[self.X] = xs + + # If ys is not passed, produce a vector of ones that will be broadcasted to all batch samples + feed_dict[self.Y] = ys if ys is not None else np.ones([ + 1, + ] + self.Y_shape[1:]) + + if self.keras_learning_phase is not None: + feed_dict[self.keras_learning_phase] = 0 + return self.session.run(T, feed_dict) + + def _session_run(self, T, xs, ys=None, batch_size=None): + num_samples = len(xs) + if self.has_multiple_inputs is True: + num_samples = len(xs[0]) + if len(xs) != len(self.X): + raise RuntimeError( + 'List of input tensors and input data have different lengths (%s and %s)' + % (str(len(xs)), str(len(self.X)))) + if batch_size is not None: + for xi in xs: + if len(xi) != num_samples: + raise RuntimeError( + 'Evaluation in batches requires all inputs to have ' + 'the same number of samples') + + if batch_size is None or batch_size <= 0 or num_samples <= batch_size: + return self._session_run_batch(T, xs, ys) + else: + outs = [] + batches = make_batches(num_samples, batch_size) + for batch_index, (batch_start, batch_end) in enumerate(batches): + # Get a batch from data + xs_batch = slice_arrays(xs, batch_start, batch_end) + # If the target tensor has one entry for each sample, we need to batch it as well + ys_batch = None if ys is not None: - if not self.has_multiple_inputs and len(xs) != len(ys): - raise RuntimeError('When provided, ys must have the same batch size as xs (xs has batch size {} and ys {})'.format(len(xs), len(ys))) - elif self.has_multiple_inputs and np.all([len(i) != len(ys) for i in xs]): - raise RuntimeError('When provided, ys must have the same batch size as all elements of xs') - if batch_size is not None and batch_size > 0: - if self.T.shape[0].value is not None and self.T.shape[0].value is not batch_size: - raise RuntimeError('When using batch evaluation, the first dimension of the target tensor ' - 'must be compatible with the batch size. Found %s instead' % self.T.shape[0].value) - if isinstance(self.X, list): - for x in self.X: - if x.shape[0].value is not None and x.shape[0].value is not batch_size: - raise RuntimeError('When using batch evaluation, the first dimension of the input tensor ' - 'must be compatible with the batch size. Found %s instead' % x.shape[ - 0].value) - else: - if self.X.shape[0].value is not None and self.X.shape[0].value is not batch_size: - raise RuntimeError('When using batch evaluation, the first dimension of the input tensor ' - 'must be compatible with the batch size. Found %s instead' % self.X.shape[0].value) - - def _session_run_batch(self, T, xs, ys=None): - feed_dict = {} - if self.has_multiple_inputs: - for k, v in zip(self.X, xs): - feed_dict[k] = v - else: - feed_dict[self.X] = xs - - # If ys is not passed, produce a vector of ones that will be broadcasted to all batch samples - feed_dict[self.Y] = ys if ys is not None else np.ones([1,] + self.Y_shape[1:]) - - if self.keras_learning_phase is not None: - feed_dict[self.keras_learning_phase] = 0 - return self.session.run(T, feed_dict) - - def _session_run(self, T, xs, ys=None, batch_size=None): - num_samples = len(xs) - if self.has_multiple_inputs is True: - num_samples = len(xs[0]) - if len(xs) != len(self.X): - raise RuntimeError('List of input tensors and input data have different lengths (%s and %s)' - % (str(len(xs)), str(len(self.X)))) - if batch_size is not None: - for xi in xs: - if len(xi) != num_samples: - raise RuntimeError('Evaluation in batches requires all inputs to have ' - 'the same number of samples') - - if batch_size is None or batch_size <= 0 or num_samples <= batch_size: - return self._session_run_batch(T, xs, ys) - else: - outs = [] - batches = make_batches(num_samples, batch_size) - for batch_index, (batch_start, batch_end) in enumerate(batches): - # Get a batch from data - xs_batch = slice_arrays(xs, batch_start, batch_end) - # If the target tensor has one entry for each sample, we need to batch it as well - ys_batch = None - if ys is not None: - ys_batch = slice_arrays(ys, batch_start, batch_end) - batch_outs = self._session_run_batch(T, xs_batch, ys_batch) - batch_outs = to_list(batch_outs) - if batch_index == 0: - # Pre-allocate the results arrays. - for batch_out in batch_outs: - shape = (num_samples,) + batch_out.shape[1:] - outs.append(np.zeros(shape, dtype=batch_out.dtype)) - for i, batch_out in enumerate(batch_outs): - outs[i][batch_start:batch_end] = batch_out - return unpack_singleton(outs) - - def _set_check_baseline(self): - # Do nothing for those methods that have no baseline required - if not hasattr(self, "baseline"): - return - - if self.baseline is None: - if self.has_multiple_inputs: - self.baseline = [np.zeros([1,] + xi.get_shape().as_list()[1:]) for xi in self.X] - else: - self.baseline = np.zeros([1,] + self.X.get_shape().as_list()[1:]) + ys_batch = slice_arrays(ys, batch_start, batch_end) + batch_outs = self._session_run_batch(T, xs_batch, ys_batch) + batch_outs = to_list(batch_outs) + if batch_index == 0: + # Pre-allocate the results arrays. + for batch_out in batch_outs: + shape = (num_samples,) + batch_out.shape[1:] + outs.append(np.zeros(shape, dtype=batch_out.dtype)) + for i, batch_out in enumerate(batch_outs): + outs[i][batch_start:batch_end] = batch_out + return unpack_singleton(outs) + + def _set_check_baseline(self): + # Do nothing for those methods that have no baseline required + if not hasattr(self, 'baseline'): + return + + if self.baseline is None: + if self.has_multiple_inputs: + self.baseline = [ + np.zeros([ + 1, + ] + xi.get_shape().as_list()[1:]) for xi in self.X + ] + else: + self.baseline = np.zeros([ + 1, + ] + self.X.get_shape().as_list()[1:]) + else: + if self.has_multiple_inputs: + for i, xi in enumerate(self.X): + if list(self.baseline[i].shape) == xi.get_shape().as_list()[1:]: + self.baseline[i] = np.expand_dims(self.baseline[i], 0) + else: + raise RuntimeError( + 'Baseline shape %s does not match expected shape %s' % + (self.baseline[i].shape, xi.get_shape().as_list()[1:])) + else: + if list(self.baseline.shape) == self.X.get_shape().as_list()[1:]: + self.baseline = np.expand_dims(self.baseline, 0) else: - if self.has_multiple_inputs: - for i, xi in enumerate(self.X): - if list(self.baseline[i].shape) == xi.get_shape().as_list()[1:]: - self.baseline[i] = np.expand_dims(self.baseline[i], 0) - else: - raise RuntimeError('Baseline shape %s does not match expected shape %s' - % (self.baseline[i].shape, xi.get_shape().as_list()[1:])) - else: - if list(self.baseline.shape) == self.X.get_shape().as_list()[1:]: - self.baseline = np.expand_dims(self.baseline, 0) - else: - raise RuntimeError('Baseline shape %s does not match expected shape %s' - % (self.baseline.shape, self.X.get_shape().as_list()[1:])) + raise RuntimeError( + 'Baseline shape %s does not match expected shape %s' % + (self.baseline.shape, self.X.get_shape().as_list()[1:])) class GradientBasedMethod(AttributionMethod): - """ - Base class for gradient-based attribution methods - """ - def get_symbolic_attribution(self): - return tf.gradients(self.T, self.X) + """Base class for gradient-based attribution methods.""" + + def get_symbolic_attribution(self): + return tf.gradients(self.T, self.X) - def explain_symbolic(self): - if self.symbolic_attribution is None: - self.symbolic_attribution = self.get_symbolic_attribution() - return self.symbolic_attribution + def explain_symbolic(self): + if self.symbolic_attribution is None: + self.symbolic_attribution = self.get_symbolic_attribution() + return self.symbolic_attribution - def run(self, xs, ys=None, batch_size=None): - self._check_input_compatibility(xs, ys, batch_size) - results = self._session_run(self.explain_symbolic(), xs, ys, batch_size) - return results[0] if not self.has_multiple_inputs else results + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + results = self._session_run(self.explain_symbolic(), xs, ys, batch_size) + return results[0] if not self.has_multiple_inputs else results - @classmethod - def nonlinearity_grad_override(cls, op, grad): - return original_grad(op, grad) + @classmethod + def nonlinearity_grad_override(cls, op, grad): + return original_grad(op, grad) class PerturbationBasedMethod(AttributionMethod): - """ - Base class for perturbation-based attribution methods - """ - def __init__(self, T, X, session, keras_learning_phase): - super(PerturbationBasedMethod, self).__init__(T, X, session, keras_learning_phase) - self.base_activation = None + """Base class for perturbation-based attribution methods.""" + def __init__(self, T, X, session, keras_learning_phase): + super(PerturbationBasedMethod, self).__init__(T, X, session, + keras_learning_phase) + self.base_activation = None # ----------------------------------------------------------------------------- @@ -242,13 +266,14 @@ def __init__(self, T, X, session, keras_learning_phase): class DummyZero(GradientBasedMethod): - def get_symbolic_attribution(self,): - return tf.gradients(self.T, self.X) + def get_symbolic_attribution(self,): + return tf.gradients(self.T, self.X) + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + input = op.inputs[0] + return tf.zeros_like(input) - @classmethod - def nonlinearity_grad_override(cls, op, grad): - input = op.inputs[0] - return tf.zeros_like(input) """ Saliency maps @@ -258,8 +283,8 @@ def nonlinearity_grad_override(cls, op, grad): class Saliency(GradientBasedMethod): - def get_symbolic_attribution(self): - return [tf.abs(g) for g in tf.gradients(self.T, self.X)] + def get_symbolic_attribution(self): + return [tf.abs(g) for g in tf.gradients(self.T, self.X)] """ @@ -270,10 +295,12 @@ def get_symbolic_attribution(self): class GradientXInput(GradientBasedMethod): - def get_symbolic_attribution(self): - return [g * x for g, x in zip( + def get_symbolic_attribution(self): + return [ + g * x for g, x in zip( tf.gradients(self.T, self.X), - self.X if self.has_multiple_inputs else [self.X])] + self.X if self.has_multiple_inputs else [self.X]) + ] """ @@ -284,28 +311,38 @@ def get_symbolic_attribution(self): class IntegratedGradients(GradientBasedMethod): - def __init__(self, T, X, session, keras_learning_phase, steps=100, baseline=None): - self.steps = steps - self.baseline = baseline - super(IntegratedGradients, self).__init__(T, X, session, keras_learning_phase) - - def run(self, xs, ys=None, batch_size=None): - self._check_input_compatibility(xs, ys, batch_size) - - gradient = None - for alpha in list(np.linspace(1. / self.steps, 1.0, self.steps)): - xs_mod = [b + (x - b) * alpha for x, b in zip(xs, self.baseline)] if self.has_multiple_inputs \ - else self.baseline + (xs - self.baseline) * alpha - _attr = self._session_run(self.explain_symbolic(), xs_mod, ys, batch_size) - if gradient is None: gradient = _attr - else: gradient = [g + a for g, a in zip(gradient, _attr)] - - results = [g * (x - b) / self.steps for g, x, b in zip( - gradient, - xs if self.has_multiple_inputs else [xs], - self.baseline if self.has_multiple_inputs else [self.baseline])] - - return results[0] if not self.has_multiple_inputs else results + def __init__(self, + T, + X, + session, + keras_learning_phase, + steps=100, + baseline=None): + self.steps = steps + self.baseline = baseline + super(IntegratedGradients, self).__init__(T, X, session, + keras_learning_phase) + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + + gradient = None + for alpha in list(np.linspace(1. / self.steps, 1.0, self.steps)): + xs_mod = [b + (x - b) * alpha for x, b in zip(xs, self.baseline)] if self.has_multiple_inputs \ + else self.baseline + (xs - self.baseline) * alpha + _attr = self._session_run(self.explain_symbolic(), xs_mod, ys, batch_size) + if gradient is None: + gradient = _attr + else: + gradient = [g + a for g, a in zip(gradient, _attr)] + + results = [ + g * (x - b) / self.steps for g, x, b in zip( + gradient, xs if self.has_multiple_inputs else [xs], + self.baseline if self.has_multiple_inputs else [self.baseline]) + ] + + return results[0] if not self.has_multiple_inputs else results """ @@ -315,25 +352,29 @@ def run(self, xs, ys=None, batch_size=None): class EpsilonLRP(GradientBasedMethod): - eps = None + eps = None - def __init__(self, T, X, session, keras_learning_phase, epsilon=1e-4): - assert epsilon > 0.0, 'LRP epsilon must be greater than zero' - global eps - eps = epsilon - super(EpsilonLRP, self).__init__(T, X, session, keras_learning_phase) + def __init__(self, T, X, session, keras_learning_phase, epsilon=1e-4): + assert epsilon > 0.0, 'LRP epsilon must be greater than zero' + global eps + eps = epsilon + super(EpsilonLRP, self).__init__(T, X, session, keras_learning_phase) - def get_symbolic_attribution(self): - return [g * x for g, x in zip( + def get_symbolic_attribution(self): + return [ + g * x for g, x in zip( tf.gradients(self.T, self.X), - self.X if self.has_multiple_inputs else [self.X])] + self.X if self.has_multiple_inputs else [self.X]) + ] + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + output = op.outputs[0] + input = op.inputs[0] + return grad * output / ( + input + eps * + tf.where(input >= 0, tf.ones_like(input), -1 * tf.ones_like(input))) - @classmethod - def nonlinearity_grad_override(cls, op, grad): - output = op.outputs[0] - input = op.inputs[0] - return grad * output / (input + eps * - tf.where(input >= 0, tf.ones_like(input), -1 * tf.ones_like(input))) """ DeepLIFT @@ -344,45 +385,48 @@ def nonlinearity_grad_override(cls, op, grad): class DeepLIFTRescale(GradientBasedMethod): - _deeplift_ref = {} + _deeplift_ref = {} - def __init__(self, T, X, session, keras_learning_phase, baseline=None): - self.baseline = baseline - super(DeepLIFTRescale, self).__init__(T, X, session, keras_learning_phase) + def __init__(self, T, X, session, keras_learning_phase, baseline=None): + self.baseline = baseline + super(DeepLIFTRescale, self).__init__(T, X, session, keras_learning_phase) - def get_symbolic_attribution(self): - return [g * (x - b) for g, x, b in zip( + def get_symbolic_attribution(self): + return [ + g * (x - b) for g, x, b in zip( tf.gradients(self.T, self.X), self.X if self.has_multiple_inputs else [self.X], - self.baseline if self.has_multiple_inputs else [self.baseline])] - - @classmethod - def nonlinearity_grad_override(cls, op, grad): - output = op.outputs[0] - input = op.inputs[0] - ref_input = cls._deeplift_ref[op.name] - ref_output = activation(op.type)(ref_input) - delta_out = output - ref_output - delta_in = input - ref_input - instant_grad = activation(op.type)(0.5 * (ref_input + input)) - return tf.where(tf.abs(delta_in) > 1e-5, grad * delta_out / delta_in, - original_grad(instant_grad.op, grad)) - - def _init_references(self): - # print ('DeepLIFT: computing references...') - sys.stdout.flush() - self._deeplift_ref.clear() - ops = [] - g = tf.get_default_graph() - for op in g.get_operations(): - if len(op.inputs) > 0 and not op.name.startswith('gradients'): - if op.type in SUPPORTED_ACTIVATIONS: - ops.append(op) - YR = self._session_run([o.inputs[0] for o in ops], self.baseline) - for (r, op) in zip(YR, ops): - self._deeplift_ref[op.name] = r - # print('DeepLIFT: references ready') - sys.stdout.flush() + self.baseline if self.has_multiple_inputs else [self.baseline]) + ] + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + output = op.outputs[0] + input = op.inputs[0] + ref_input = cls._deeplift_ref[op.name] + ref_output = activation(op.type)(ref_input) + delta_out = output - ref_output + delta_in = input - ref_input + instant_grad = activation(op.type)(0.5 * (ref_input + input)) + return tf.where( + tf.abs(delta_in) > 1e-5, grad * delta_out / delta_in, + original_grad(instant_grad.op, grad)) + + def _init_references(self): + # print ('DeepLIFT: computing references...') + sys.stdout.flush() + self._deeplift_ref.clear() + ops = [] + g = tf.get_default_graph() + for op in g.get_operations(): + if len(op.inputs) > 0 and not op.name.startswith('gradients'): + if op.type in SUPPORTED_ACTIVATIONS: + ops.append(op) + YR = self._session_run([o.inputs[0] for o in ops], self.baseline) + for (r, op) in zip(YR, ops): + self._deeplift_ref[op.name] = r + # print('DeepLIFT: references ready') + sys.stdout.flush() """ @@ -401,58 +445,70 @@ def _init_references(self): class Occlusion(PerturbationBasedMethod): - def __init__(self, T, X, session, keras_learning_phase, window_shape=None, step=None): - super(Occlusion, self).__init__(T, X, session, keras_learning_phase) - if self.has_multiple_inputs: - raise RuntimeError('Multiple inputs not yet supported for perturbation methods') - - input_shape = X[0].get_shape().as_list() - if window_shape is not None: - assert len(window_shape) == len(input_shape), \ - 'window_shape must have length of input (%d)' % len(input_shape) - self.window_shape = tuple(window_shape) - else: - self.window_shape = (1,) * len(input_shape) + def __init__(self, + T, + X, + session, + keras_learning_phase, + window_shape=None, + step=None): + super(Occlusion, self).__init__(T, X, session, keras_learning_phase) + if self.has_multiple_inputs: + raise RuntimeError( + 'Multiple inputs not yet supported for perturbation methods') + + input_shape = X[0].get_shape().as_list() + if window_shape is not None: + assert len(window_shape) == len(input_shape), \ + 'window_shape must have length of input (%d)' % len(input_shape) + self.window_shape = tuple(window_shape) + else: + self.window_shape = (1,) * len(input_shape) - if step is not None: - assert isinstance(step, int) or len(step) == len(input_shape), \ - 'step must be integer or tuple with the length of input (%d)' % len(input_shape) - self.step = step - else: - self.step = 1 - self.replace_value = 0.0 - logging.info('Input shape: %s; window_shape %s; step %s' % (input_shape, self.window_shape, self.step)) - - def run(self, xs, ys=None, batch_size=None): - self._check_input_compatibility(xs, ys, batch_size) - input_shape = xs.shape[1:] - batch_size = xs.shape[0] - total_dim = np.asscalar(np.prod(input_shape)) - - # Create mask - index_matrix = np.arange(total_dim).reshape(input_shape) - idx_patches = view_as_windows(index_matrix, self.window_shape, self.step).reshape((-1,) + self.window_shape) - heatmap = np.zeros_like(xs, dtype=np.float32).reshape((-1), total_dim) - w = np.zeros_like(heatmap) - - # Compute original output - eval0 = self._session_run(self.T, xs, ys, batch_size) - - # Start perturbation loop - for i, p in enumerate(idx_patches): - mask = np.ones(input_shape).flatten() - mask[p.flatten()] = self.replace_value - masked_xs = mask.reshape((1,) + input_shape) * xs - delta = eval0 - self._session_run(self.T, masked_xs, ys, batch_size) - delta_aggregated = np.sum(delta.reshape((batch_size, -1)), -1, keepdims=True) - heatmap[:, p.flatten()] += delta_aggregated - w[:, p.flatten()] += p.size - - attribution = np.reshape(heatmap / w, xs.shape) - if np.isnan(attribution).any(): - warnings.warn('Attributions generated by Occlusion method contain nans, ' - 'probably because window_shape and step do not allow to cover the all input.') - return attribution + if step is not None: + assert isinstance(step, int) or len(step) == len(input_shape), \ + 'step must be integer or tuple with the length of input (%d)' % len(input_shape) + self.step = step + else: + self.step = 1 + self.replace_value = 0.0 + logging.info('Input shape: %s; window_shape %s; step %s' % + (input_shape, self.window_shape, self.step)) + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + input_shape = xs.shape[1:] + batch_size = xs.shape[0] + total_dim = np.asscalar(np.prod(input_shape)) + + # Create mask + index_matrix = np.arange(total_dim).reshape(input_shape) + idx_patches = view_as_windows(index_matrix, self.window_shape, + self.step).reshape((-1,) + self.window_shape) + heatmap = np.zeros_like(xs, dtype=np.float32).reshape((-1), total_dim) + w = np.zeros_like(heatmap) + + # Compute original output + eval0 = self._session_run(self.T, xs, ys, batch_size) + + # Start perturbation loop + for i, p in enumerate(idx_patches): + mask = np.ones(input_shape).flatten() + mask[p.flatten()] = self.replace_value + masked_xs = mask.reshape((1,) + input_shape) * xs + delta = eval0 - self._session_run(self.T, masked_xs, ys, batch_size) + delta_aggregated = np.sum( + delta.reshape((batch_size, -1)), -1, keepdims=True) + heatmap[:, p.flatten()] += delta_aggregated + w[:, p.flatten()] += p.size + + attribution = np.reshape(heatmap / w, xs.shape) + if np.isnan(attribution).any(): + warnings.warn( + 'Attributions generated by Occlusion method contain nans, ' + 'probably because window_shape and step do not allow to cover the all input.' + ) + return attribution """ @@ -460,7 +516,7 @@ def run(self, xs, ys=None, batch_size=None): Computes approximate Shapley Values using "Polynomial calculation of the Shapley value based on sampling", Castro et al, 2009 (https://www.sciencedirect.com/science/article/pii/S0305054808000804) samples : integer (default 5) -Defined the number of samples for each input feature. +Defined the number of samples for each input feature. Notice that evaluating a model samples * n_input_feature times might take a while. sampling_dims : list of dimension indexes to run sampling on (feature dimensions). By default, all dimensions except the batch dimension will be sampled. @@ -471,61 +527,72 @@ def run(self, xs, ys=None, batch_size=None): class ShapleySampling(PerturbationBasedMethod): - def __init__(self, T, X, session, keras_learning_phase, samples=5, sampling_dims=None): - super(ShapleySampling, self).__init__(T, X, session, keras_learning_phase) - if self.has_multiple_inputs: - raise RuntimeError('Multiple inputs not yet supported for perturbation methods') - dims = len(X.shape) - if sampling_dims is not None: - if not 0 < len(sampling_dims) <= (dims - 1): - raise RuntimeError('sampling_dims must be a list containing 1 to %d elements' % (dims-1)) - if 0 in sampling_dims: - raise RuntimeError('Cannot sample batch dimension: remove 0 from sampling_dims') - if any([x < 1 or x > dims-1 for x in sampling_dims]): - raise RuntimeError('Invalid value in sampling_dims') - else: - sampling_dims = list(range(1, dims)) - - self.samples = samples - self.sampling_dims = sampling_dims - - def run(self, xs, ys=None, batch_size=None): - xs_shape = list(xs.shape) - batch_size = xs.shape[0] - n_features = int(np.asscalar(np.prod([xs.shape[i] for i in self.sampling_dims]))) - result = np.zeros((xs_shape[0], n_features)) - - run_shape = list(xs_shape) # a copy - run_shape = np.delete(run_shape, self.sampling_dims).tolist() - run_shape.insert(1, -1) - - reconstruction_shape = [xs_shape[0]] - for j in self.sampling_dims: - reconstruction_shape.append(xs_shape[j]) - - for r in range(self.samples): - p = np.random.permutation(n_features) - x = xs.copy().reshape(run_shape) - y = None - for i in p: - if y is None: - y = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) - x[:, i] = 0 - y0 = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) - delta = y - y0 - delta_aggregated = np.sum(delta.reshape((batch_size, -1)), -1, keepdims=False) - result[:, i] += delta_aggregated - y = y0 - - shapley = result / self.samples - return shapley.reshape(reconstruction_shape) + def __init__(self, + T, + X, + session, + keras_learning_phase, + samples=5, + sampling_dims=None): + super(ShapleySampling, self).__init__(T, X, session, keras_learning_phase) + if self.has_multiple_inputs: + raise RuntimeError( + 'Multiple inputs not yet supported for perturbation methods') + dims = len(X.shape) + if sampling_dims is not None: + if not 0 < len(sampling_dims) <= (dims - 1): + raise RuntimeError( + 'sampling_dims must be a list containing 1 to %d elements' % + (dims - 1)) + if 0 in sampling_dims: + raise RuntimeError( + 'Cannot sample batch dimension: remove 0 from sampling_dims') + if any([x < 1 or x > dims - 1 for x in sampling_dims]): + raise RuntimeError('Invalid value in sampling_dims') + else: + sampling_dims = list(range(1, dims)) + + self.samples = samples + self.sampling_dims = sampling_dims + + def run(self, xs, ys=None, batch_size=None): + xs_shape = list(xs.shape) + batch_size = xs.shape[0] + n_features = int( + np.asscalar(np.prod([xs.shape[i] for i in self.sampling_dims]))) + result = np.zeros((xs_shape[0], n_features)) + + run_shape = list(xs_shape) # a copy + run_shape = np.delete(run_shape, self.sampling_dims).tolist() + run_shape.insert(1, -1) + + reconstruction_shape = [xs_shape[0]] + for j in self.sampling_dims: + reconstruction_shape.append(xs_shape[j]) + + for r in range(self.samples): + p = np.random.permutation(n_features) + x = xs.copy().reshape(run_shape) + y = None + for i in p: + if y is None: + y = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) + x[:, i] = 0 + y0 = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) + delta = y - y0 + delta_aggregated = np.sum( + delta.reshape((batch_size, -1)), -1, keepdims=False) + result[:, i] += delta_aggregated + y = y0 + + shapley = result / self.samples + return shapley.reshape(reconstruction_shape) # ----------------------------------------------------------------------------- # END ATTRIBUTION METHODS # ----------------------------------------------------------------------------- - attribution_methods = OrderedDict({ 'zero': (DummyZero, 0), 'saliency': (Saliency, 1), @@ -538,104 +605,117 @@ def run(self, xs, ys=None, batch_size=None): }) - -@ops.RegisterGradient("DeepExplainGrad") +@ops.RegisterGradient('DeepExplainGrad') def deepexplain_grad(op, grad): - global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG - _GRAD_OVERRIDE_CHECKFLAG = 1 - if _ENABLED_METHOD_CLASS is not None \ - and issubclass(_ENABLED_METHOD_CLASS, GradientBasedMethod): - return _ENABLED_METHOD_CLASS.nonlinearity_grad_override(op, grad) - else: - return original_grad(op, grad) + global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG + _GRAD_OVERRIDE_CHECKFLAG = 1 + if _ENABLED_METHOD_CLASS is not None \ + and issubclass(_ENABLED_METHOD_CLASS, GradientBasedMethod): + return _ENABLED_METHOD_CLASS.nonlinearity_grad_override(op, grad) + else: + return original_grad(op, grad) class DeepExplain(object): - def __init__(self, graph=None, session=tf.get_default_session()): - self.method = None - self.batch_size = None - self.session = session - self.graph = session.graph if graph is None else graph - self.graph_context = self.graph.as_default() - self.override_context = self.graph.gradient_override_map(self.get_override_map()) - self.keras_phase_placeholder = None - self.context_on = False - if self.session is None: - raise RuntimeError('DeepExplain: could not retrieve a session. Use DeepExplain(session=your_session).') - - def __enter__(self): - # Override gradient of all ops created in context - self.graph_context.__enter__() - self.override_context.__enter__() - self.context_on = True - return self - - def __exit__(self, type, value, traceback): - self.graph_context.__exit__(type, value, traceback) - self.override_context.__exit__(type, value, traceback) - self.context_on = False - - def get_explainer(self, method, T, X, **kwargs): - if not self.context_on: - raise RuntimeError('Explain can be called only within a DeepExplain context.') - global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG - self.method = method - if self.method in attribution_methods: - method_class, method_flag = attribution_methods[self.method] - else: - raise RuntimeError('Method must be in %s' % list(attribution_methods.keys())) - if isinstance(X, list): - for x in X: - if 'tensor' not in str(type(x)).lower(): - raise RuntimeError('If a list, X must contain only Tensorflow Tensor objects') - else: - if 'tensor' not in str(type(X)).lower(): - raise RuntimeError('X must be a Tensorflow Tensor object or a list of them') - - if 'tensor' not in str(type(T)).lower(): - raise RuntimeError('T must be a Tensorflow Tensor object') - - logging.info('DeepExplain: running "%s" explanation method (%d)' % (self.method, method_flag)) - self._check_ops() - _GRAD_OVERRIDE_CHECKFLAG = 0 - - _ENABLED_METHOD_CLASS = method_class - method = _ENABLED_METHOD_CLASS(T, X, - self.session, - keras_learning_phase=self.keras_phase_placeholder, - **kwargs) - - if issubclass(_ENABLED_METHOD_CLASS, GradientBasedMethod) and _GRAD_OVERRIDE_CHECKFLAG == 0: - warnings.warn('DeepExplain detected you are trying to use an attribution method that requires ' - 'gradient override but the original gradient was used instead. You might have forgot to ' - '(re)create your graph within the DeepExlain context. Results are not reliable!') - _ENABLED_METHOD_CLASS = None - _GRAD_OVERRIDE_CHECKFLAG = 0 - self.keras_phase_placeholder = None - return method - - def explain(self, method, T, X, xs, ys=None, batch_size=None, **kwargs): - explainer = self.get_explainer(method, T, X, **kwargs) - return explainer.run(xs, ys, batch_size) - - @staticmethod - def get_override_map(): - return dict((a, 'DeepExplainGrad') for a in SUPPORTED_ACTIVATIONS) - - def _check_ops(self): - """ - Heuristically check if any op is in the list of unsupported activation functions. - This does not cover all cases where explanation methods would fail, and must be improved in the future. - Also, check if the placeholder named 'keras_learning_phase' exists in the graph. This is used by Keras - and needs to be passed in feed_dict. - :return: - """ - g = tf.get_default_graph() - for op in g.get_operations(): - if len(op.inputs) > 0 and not op.name.startswith('gradients'): - if op.type in UNSUPPORTED_ACTIVATIONS: - warnings.warn('Detected unsupported activation (%s). ' - 'This might lead to unexpected or wrong results.' % op.type) - elif 'keras_learning_phase' in op.name: - self.keras_phase_placeholder = op.outputs[0] \ No newline at end of file + def __init__(self, graph=None, session=tf.get_default_session()): + self.method = None + self.batch_size = None + self.session = session + self.graph = session.graph if graph is None else graph + self.graph_context = self.graph.as_default() + self.override_context = self.graph.gradient_override_map( + self.get_override_map()) + self.keras_phase_placeholder = None + self.context_on = False + if self.session is None: + raise RuntimeError( + 'DeepExplain: could not retrieve a session. Use DeepExplain(session=your_session).' + ) + + def __enter__(self): + # Override gradient of all ops created in context + self.graph_context.__enter__() + self.override_context.__enter__() + self.context_on = True + return self + + def __exit__(self, type, value, traceback): + self.graph_context.__exit__(type, value, traceback) + self.override_context.__exit__(type, value, traceback) + self.context_on = False + + def get_explainer(self, method, T, X, **kwargs): + if not self.context_on: + raise RuntimeError( + 'Explain can be called only within a DeepExplain context.') + global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG + self.method = method + if self.method in attribution_methods: + method_class, method_flag = attribution_methods[self.method] + else: + raise RuntimeError('Method must be in %s' % + list(attribution_methods.keys())) + if isinstance(X, list): + for x in X: + if 'tensor' not in str(type(x)).lower(): + raise RuntimeError( + 'If a list, X must contain only Tensorflow Tensor objects') + else: + if 'tensor' not in str(type(X)).lower(): + raise RuntimeError( + 'X must be a Tensorflow Tensor object or a list of them') + + if 'tensor' not in str(type(T)).lower(): + raise RuntimeError('T must be a Tensorflow Tensor object') + + logging.info('DeepExplain: running "%s" explanation method (%d)' % + (self.method, method_flag)) + self._check_ops() + _GRAD_OVERRIDE_CHECKFLAG = 0 + + _ENABLED_METHOD_CLASS = method_class + method = _ENABLED_METHOD_CLASS( + T, + X, + self.session, + keras_learning_phase=self.keras_phase_placeholder, + **kwargs) + + if issubclass(_ENABLED_METHOD_CLASS, + GradientBasedMethod) and _GRAD_OVERRIDE_CHECKFLAG == 0: + warnings.warn( + 'DeepExplain detected you are trying to use an attribution method that requires ' + 'gradient override but the original gradient was used instead. You might have forgot to ' + '(re)create your graph within the DeepExlain context. Results are not reliable!' + ) + _ENABLED_METHOD_CLASS = None + _GRAD_OVERRIDE_CHECKFLAG = 0 + self.keras_phase_placeholder = None + return method + + def explain(self, method, T, X, xs, ys=None, batch_size=None, **kwargs): + explainer = self.get_explainer(method, T, X, **kwargs) + return explainer.run(xs, ys, batch_size) + + @staticmethod + def get_override_map(): + return dict((a, 'DeepExplainGrad') for a in SUPPORTED_ACTIVATIONS) + + def _check_ops(self): + """Heuristically check if any op is in the list of unsupported activation functions. + + This does not cover all cases where explanation methods would fail, and must be improved in the future. + Also, check if the placeholder named 'keras_learning_phase' exists in the graph. This is used by Keras + and needs to be passed in feed_dict. + :return: + """ + g = tf.get_default_graph() + for op in g.get_operations(): + if len(op.inputs) > 0 and not op.name.startswith('gradients'): + if op.type in UNSUPPORTED_ACTIVATIONS: + warnings.warn('Detected unsupported activation (%s). ' + 'This might lead to unexpected or wrong results.' % + op.type) + elif 'keras_learning_phase' in op.name: + self.keras_phase_placeholder = op.outputs[0] diff --git a/easy_rec/python/tools/explainer/utils.py b/easy_rec/python/tools/explainer/utils.py index b697bf230..574d067a8 100644 --- a/easy_rec/python/tools/explainer/utils.py +++ b/easy_rec/python/tools/explainer/utils.py @@ -7,63 +7,64 @@ def make_batches(size, batch_size): - """Returns a list of batch indices (tuples of indices). - # Arguments - size: Integer, total size of the data to slice into batches. - batch_size: Integer, batch size. - # Returns - A list of tuples of array indices. - """ - num_batches = (size + batch_size - 1) // batch_size # round up - return [(i * batch_size, min(size, (i + 1) * batch_size)) - for i in range(num_batches)] + """Returns a list of batch indices (tuples of indices). + + # Arguments + size: Integer, total size of the data to slice into batches. + batch_size: Integer, batch size. + # Returns + A list of tuples of array indices. + """ + num_batches = (size + batch_size - 1) // batch_size # round up + return [(i * batch_size, min(size, (i + 1) * batch_size)) + for i in range(num_batches)] def to_list(x, allow_tuple=False): - """Normalizes a list/tensor into a list. - If a tensor is passed, we return - a list of size 1 containing the tensor. - # Arguments - x: target object to be normalized. - allow_tuple: If False and x is a tuple, - it will be converted into a list - with a single element (the tuple). - Else converts the tuple to a list. - # Returns - A list. - """ - if isinstance(x, list): - return x - if allow_tuple and isinstance(x, tuple): - return list(x) - return [x] + """Normalizes a list/tensor into a list. If a tensor is passed, we return a list of size 1 containing the tensor. + + # Arguments + x: target object to be normalized. + allow_tuple: If False and x is a tuple, + it will be converted into a list + with a single element (the tuple). + Else converts the tuple to a list. + # Returns + A list. + """ + if isinstance(x, list): + return x + if allow_tuple and isinstance(x, tuple): + return list(x) + return [x] def unpack_singleton(x): - """Gets the equivalent np-array if the iterable has only one value. - Otherwise return the iterable. - # Argument - x: A list or tuple. - # Returns - The same iterable or the iterable converted to a np-array. - """ - if len(x) == 1: - return np.array(x) - return x + """Gets the equivalent np-array if the iterable has only one value. Otherwise return the iterable. + + # Argument + x: A list or tuple. + # Returns + The same iterable or the iterable converted to a np-array. + """ + if len(x) == 1: + return np.array(x) + return x def slice_arrays(arrays, start=None, stop=None): - """Slices an array or list of arrays. - """ - if arrays is None: - return [None] - elif isinstance(arrays, list): - return [None if x is None else x[start:stop] for x in arrays] - else: - return arrays[start:stop] + """Slices an array or list of arrays.""" + if arrays is None: + return [None] + elif isinstance(arrays, list): + return [None if x is None else x[start:stop] for x in arrays] + else: + return arrays[start:stop] def placeholder_from_data(numpy_array): - if numpy_array is None: - return None - return tf.placeholder('float', [None,] + list(numpy_array.shape[1:])) + if numpy_array is None: + return None + return tf.placeholder('float', [ + None, + ] + list(numpy_array.shape[1:])) diff --git a/easy_rec/python/utils/activation.py b/easy_rec/python/utils/activation.py index f52a012ae..89044f7a3 100644 --- a/easy_rec/python/utils/activation.py +++ b/easy_rec/python/utils/activation.py @@ -57,7 +57,7 @@ def gelu(x, name='gelu'): """ with tf.name_scope(name): cdf = 0.5 * (1.0 + tf.tanh( - (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) + (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) return x * cdf diff --git a/setup.cfg b/setup.cfg index b180b9fb1..82650a70f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ multi_line_output = 7 force_single_line = true known_standard_library = setuptools known_first_party = easy_rec -known_third_party = absl,common_io,docutils,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml +known_third_party = absl,common_io,docutils,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,skimage,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml no_lines_before = LOCALFOLDER default_section = THIRDPARTY skip = easy_rec/python/protos From 8509174b4346c5c1b4e87fc5d6799272a28d29ff Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 8 May 2023 20:08:32 +0800 Subject: [PATCH 23/54] [feat]: add const feature column --- easy_rec/python/feature_column/feature_column.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/easy_rec/python/feature_column/feature_column.py b/easy_rec/python/feature_column/feature_column.py index 8f4a88913..1f62faef1 100644 --- a/easy_rec/python/feature_column/feature_column.py +++ b/easy_rec/python/feature_column/feature_column.py @@ -423,8 +423,11 @@ def parse_const_feature(self, config): """ feature_name = config.feature_name if config.HasField('feature_name') \ else config.input_names[0] + dim = config.raw_input_dim + if config.HasField('embedding_dim'): + dim = config.embedding_dim fc = feature_column.constant_numeric_column( - feature_name, shape=(config.embedding_dim,), feature_name=feature_name) + feature_name, shape=(dim,), feature_name=feature_name) if self.is_wide(config): self._wide_columns[feature_name] = fc if self.is_deep(config): From eba4219c82d784cb48e32aa17d34ab3ed6d4a366 Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 9 May 2023 14:56:30 +0800 Subject: [PATCH 24/54] [feat]: add feature selection tool --- easy_rec/python/compat/sort_ops.py | 217 +++++++++++++++++++++ easy_rec/python/input/input.py | 10 +- easy_rec/python/layers/fscd_layer.py | 66 ++++--- easy_rec/python/tools/feature_selection.py | 93 +++++++++ easy_rec/python/utils/tf_utils.py | 13 ++ 5 files changed, 363 insertions(+), 36 deletions(-) create mode 100644 easy_rec/python/compat/sort_ops.py diff --git a/easy_rec/python/compat/sort_ops.py b/easy_rec/python/compat/sort_ops.py new file mode 100644 index 000000000..f7c5bf3a5 --- /dev/null +++ b/easy_rec/python/compat/sort_ops.py @@ -0,0 +1,217 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Support for sorting tensors. + +@@argsort +@@sort +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops as framework_ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.util.tf_export import tf_export + + +@tf_export('sort') +def sort(values, axis=-1, direction='ASCENDING', name=None): + """Sorts a tensor. + + Usage: + + ```python + import tensorflow as tf + a = [1, 10, 26.9, 2.8, 166.32, 62.3] + b = tf.sort(a,axis=-1,direction='ASCENDING',name=None) + c = tf.keras.backend.eval(b) + # Here, c = [ 1. 2.8 10. 26.9 62.3 166.32] + ``` + + Args: + values: 1-D or higher numeric `Tensor`. + axis: The axis along which to sort. The default is -1, which sorts the last + axis. + direction: The direction in which to sort the values (`'ASCENDING'` or + `'DESCENDING'`). + name: Optional name for the operation. + + Returns: + A `Tensor` with the same dtype and shape as `values`, with the elements + sorted along the given `axis`. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + with framework_ops.name_scope(name, 'sort'): + return _sort_or_argsort(values, axis, direction, return_argsort=False) + + +@tf_export('argsort') +def argsort(values, axis=-1, direction='ASCENDING', stable=False, name=None): + """Returns the indices of a tensor that give its sorted order along an axis. + + For a 1D tensor, `tf.gather(values, tf.argsort(values))` is equivalent to + `tf.sort(values)`. For higher dimensions, the output has the same shape as + `values`, but along the given axis, values represent the index of the sorted + element in that slice of the tensor at the given position. + + Usage: + + ```python + import tensorflow as tf + a = [1, 10, 26.9, 2.8, 166.32, 62.3] + b = tf.argsort(a,axis=-1,direction='ASCENDING',stable=False,name=None) + c = tf.keras.backend.eval(b) + # Here, c = [0 3 1 2 5 4] + ``` + + Args: + values: 1-D or higher numeric `Tensor`. + axis: The axis along which to sort. The default is -1, which sorts the last + axis. + direction: The direction in which to sort the values (`'ASCENDING'` or + `'DESCENDING'`). + stable: If True, equal elements in the original tensor will not be + re-ordered in the returned order. Unstable sort is not yet implemented, + but will eventually be the default for performance reasons. If you require + a stable order, pass `stable=True` for forwards compatibility. + name: Optional name for the operation. + + Returns: + An int32 `Tensor` with the same shape as `values`. The indices that would + sort each slice of the given `values` along the given `axis`. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + del stable # Unused. + with framework_ops.name_scope(name, 'argsort'): + return _sort_or_argsort(values, axis, direction, return_argsort=True) + + +def _sort_or_argsort(values, axis, direction, return_argsort): + """Internal sort/argsort implementation. + + Args: + values: The input values. + axis: The axis along which to sort. + direction: 'ASCENDING' or 'DESCENDING'. + return_argsort: Whether to return the argsort result. + + Returns: + Either the sorted values, or the indices of the sorted values in the + original tensor. See the `sort` and `argsort` docstrings. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + if direction not in _SORT_IMPL: + raise ValueError('%s should be one of %s' % (direction, ', '.join( + sorted(_SORT_IMPL.keys())))) + # Axis must be an integer, not a Tensor. + axis = framework_ops.convert_to_tensor(axis, name='axis') + axis_static = tensor_util.constant_value(axis) + if axis.shape.ndims != 0 or axis_static is None: + raise ValueError('axis must be a constant scalar') + axis_static = int(axis_static) # Avoids NumPy casting error + + values = framework_ops.convert_to_tensor(values, name='values') + + return _SORT_IMPL[direction](values, axis_static, return_argsort) + + +def _descending_sort(values, axis, return_argsort=False): + """Sorts values in reverse using `top_k`. + + Args: + values: Tensor of numeric values. + axis: Index of the axis which values should be sorted along. + return_argsort: If False, return the sorted values. If True, return the + indices that would sort the values. + + Returns: + The sorted values. + """ + k = array_ops.shape(values)[axis] + rank = array_ops.rank(values) + static_rank = values.shape.ndims + # Fast path: sorting the last axis. + if axis == -1 or axis + 1 == values.get_shape().ndims: + top_k_input = values + transposition = None + else: + # Otherwise, transpose the array. Swap axes `axis` and `rank - 1`. + if axis < 0: + # Calculate the actual axis index if counting from the end. Use the static + # rank if available, or else make the axis back into a tensor. + axis += static_rank or rank + if static_rank is not None: + # Prefer to calculate the transposition array in NumPy and make it a + # constant. + transposition = constant_op.constant( + np.r_[ + # Axes up to axis are unchanged. + np.arange(axis), + # Swap axis and rank - 1. + [static_rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + np.arange(axis + 1, static_rank - 1), + # Swap axis and rank - 1. + [axis]], + name='transposition') + else: + # Generate the transposition array from the tensors. + transposition = array_ops.concat( + [ + # Axes up to axis are unchanged. + math_ops.range(axis), + # Swap axis and rank - 1. + [rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + math_ops.range(axis + 1, rank - 1), + # Swap axis and rank - 1. + [axis] + ], + axis=0) + top_k_input = array_ops.transpose(values, transposition) + + values, indices = nn_ops.top_k(top_k_input, k) + return_value = indices if return_argsort else values + if transposition is not None: + # transposition contains a single cycle of length 2 (swapping 2 elements), + # so it is an involution (it is its own inverse). + return_value = array_ops.transpose(return_value, transposition) + return return_value + + +def _ascending_sort(values, axis, return_argsort=False): + # Negate the values to get the ascending order from descending sort. + values_or_indices = _descending_sort(-values, axis, return_argsort) + # If not argsort, negate the values again. + return values_or_indices if return_argsort else -values_or_indices + + +_SORT_IMPL = { + 'ASCENDING': _ascending_sort, + 'DESCENDING': _descending_sort, +} diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index d4a990c35..686355ac0 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -18,6 +18,7 @@ from easy_rec.python.utils.input_utils import get_type_defaults from easy_rec.python.utils.load_class import get_register_class_meta from easy_rec.python.utils.load_class import load_by_path +from easy_rec.python.utils.tf_utils import get_config_type from easy_rec.python.utils.tf_utils import get_tf_type if tf.__version__ >= '2.0': @@ -280,8 +281,9 @@ def create_multi_placeholders(self, export_config): logging.info('multi value input_name: %s, dtype: %s' % (input_name, tf_type)) if input_name in erase_features: - def_val = self.get_type_defaults(tf_type, self._input_field_defaults[fid]) - finput = tf.placeholder_with_default(def_val, [None, None], name=placeholder_name) + conf_type = get_config_type(tf_type) + def_val = self.get_type_defaults(conf_type, self._input_field_defaults[fid]) + finput = tf.placeholder_with_default([def_val], [None, None], name=placeholder_name) else: finput = tf.placeholder(tf_type, [None, None], name=placeholder_name) else: @@ -289,8 +291,8 @@ def create_multi_placeholders(self, export_config): tf_type = get_tf_type(ftype) logging.info('input_name: %s, dtype: %s' % (input_name, tf_type)) if input_name in erase_features: - def_val = self.get_type_defaults(tf_type, self._input_field_defaults[fid]) - finput = tf.placeholder_with_default(def_val, [None], name=placeholder_name) + def_val = self.get_type_defaults(ftype, self._input_field_defaults[fid]) + finput = tf.placeholder_with_default([def_val], [None], name=placeholder_name) else: finput = tf.placeholder(tf_type, [None], name=placeholder_name) inputs[input_name] = finput diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py index 78849f162..a99e8aa4b 100644 --- a/easy_rec/python/layers/fscd_layer.py +++ b/easy_rec/python/layers/fscd_layer.py @@ -1,14 +1,16 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. +import logging +from collections import OrderedDict import math import json import numpy as np -import six import tensorflow as tf from tensorflow.python.framework.meta_graph import read_meta_graph_file from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA from easy_rec.python.compat.feature_column.feature_column_v2 import SharedEmbeddingColumn # NOQA +from easy_rec.python.compat.sort_ops import argsort if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -28,7 +30,7 @@ def sigmoid(x): return 1. / (1. + math.exp(-x)) -def get_top_and_bottom_features(pipeline_config, top_k): +def get_feature_importance(pipeline_config, feature_group_name=None): assert pipeline_config.model_config.HasField( 'variational_dropout'), 'variational_dropout must be in model_config' @@ -41,29 +43,50 @@ def get_top_and_bottom_features(pipeline_config, top_k): features = json.loads(col_def) features_map.update(features) - top_features = set() + feature_importance = OrderedDict() tf.logging.info('Reading checkpoint from %s ...' % checkpoint_path) reader = tf.train.NewCheckpointReader(checkpoint_path) for feature_group in pipeline_config.model_config.feature_groups: group_name = feature_group.group_name - delta_name = 'fscd_delta_%s' % group_name - if not reader.has_tensor(delta_name): + if feature_group_name is not None and feature_group_name != group_name: continue assert group_name in features_map, "%s not in feature map" % group_name feature_dims = features_map[group_name] + + delta_name = 'fscd_delta_%s' % group_name + if not reader.has_tensor(delta_name): + logging.warn("feature group `%s` doesn't be involved in FSCD layer") + for feature, dim in feature_dims: + feature_importance[feature] = 1.0 + continue + delta = reader.get_tensor(delta_name) - values, indices = tf.nn.top_k(delta, top_k) + indices = argsort(delta, direction='DESCENDING') + keep_prob = tf.nn.sigmoid(delta) with tf.Session() as sess: idx = indices.eval(session=sess) + probs = keep_prob.eval(session=sess) for i in idx: feature = feature_dims[i][0] - top_features.add(feature) + if feature in feature_importance: + raw = feature_importance[feature] + if probs[i] > raw: + logging.info("%s importance change from %d to %d", feature, raw, probs[i]) + feature_importance[feature] = probs[i] + else: + feature_importance[feature] = probs[i] + return feature_importance + +def get_top_and_bottom_features(pipeline_config, top_k): + feature_score = get_feature_importance(pipeline_config) + top_features = set() bottom_features = set() - for group_name, features in six.iteritems(features_map): - for name, dim in features: - if name not in top_features: - bottom_features.add(name) + for feature, score in feature_score.iteritems(): + if len(top_features) < top_k: + top_features.add(feature) + else: + bottom_features.add(feature) print("selected top %d features:" % top_k, ','.join(top_features)) print("removed bottom features:", ','.join(bottom_features)) @@ -127,31 +150,10 @@ def compute_regular_params(self, cols_to_feature): "dimension:", dim, "c:", c, "theta:", theta, "alpha:", alpha) return alphas - # def mask_bottom_features(self, cols_to_feature, top_k): - # feature_map = tf.get_collection('variational_dropout') - # features = feature_map[self.name] - # - # delta_name = 'fscd_delta_%s' % self.name - # graph = tf.get_default_graph() - # delta = graph.get_tensor_by_name(delta_name) - # values, indices = tf.nn.top_k(delta, top_k) - # - # output_tensors = [] - # feature_columns = cols_to_feature.keys() - # for column in sorted(feature_columns, key=lambda x: x.name): - # value = cols_to_feature[column] - # output_tensors.append(value) - # return tf.concat(output_tensors, 1) - def __call__(self, cols_to_feature): """ cols_to_feature: an ordered dict mapping feature_column to feature_values """ - # if self._config.HasField('fine_tune_use_top_k_features'): - # k = self._config.fine_tune_use_top_k_features - # assert k > 0, 'config `fine_tune_use_top_k_features` must be large than 0' - # return self.mask_bottom_features(cols_to_feature, k) - feature_dimension = [] output_tensors = [] alphas = [] diff --git a/easy_rec/python/tools/feature_selection.py b/easy_rec/python/tools/feature_selection.py index 05b193897..cbe717351 100644 --- a/easy_rec/python/tools/feature_selection.py +++ b/easy_rec/python/tools/feature_selection.py @@ -294,6 +294,90 @@ def _visualize_feature_importance(self, feature_importance, group_name): plt.savefig(f, format='png') +class FSCD(object): + def __init__(self, + config_path, + output_dir, + topk, + checkpoint_path=None, + fg_path=None, + visualize=False): + self._config_path = config_path + self._output_dir = output_dir + self._topk = topk + if not tf.gfile.Exists(self._output_dir): + tf.gfile.MakeDirs(self._output_dir) + self._checkpoint_path = checkpoint_path + self._fg_path = fg_path + self._visualize = visualize + + def process(self): + tf.logging.info('Loading delta of FSCD layer ...') + config = config_util.get_configs_from_pipeline_file(self._config_path) + assert config.model_config.HasField( + 'variational_dropout'), 'variational_dropout must be in model_config' + + feature_importance_map = {} + from easy_rec.python.layers.fscd_layer import get_feature_importance + for feature_group in config.model_config.feature_groups: + group_name = feature_group.group_name + tf.logging.info('Calculating %s feature importance ...' % group_name) + feature_importance = get_feature_importance(config, group_name) + feature_importance_map[group_name] = feature_importance + + tf.logging.info('Dump %s feature importance to csv ...' % group_name) + self._dump_to_csv(feature_importance, group_name) + + if self._visualize: + tf.logging.info('Visualizing %s feature importance ...' % group_name) + self._visualize_feature_importance(feature_importance, group_name) + + tf.logging.info('Processing model config ...') + self._process_config(feature_importance_map) + + def _dump_to_csv(self, feature_importance, group_name): + """Dump feature importance data to a csv file.""" + with tf.gfile.Open( + os.path.join(self._output_dir, + 'feature_importance_%s.csv' % group_name), 'w') as f: + df = pd.DataFrame( + columns=['feature_name', 'importance'], + data=[list(kv) for kv in feature_importance.items()]) + df.to_csv(f, encoding='gbk') + + def _visualize_feature_importance(self, feature_importance, group_name): + """Draw feature importance histogram.""" + df = pd.DataFrame( + columns=['feature_name', 'importance'], + data=[list(kv) for kv in feature_importance.items()]) + df['color'] = ['red' if x < 0.5 else 'green' for x in df['importance']] + df.sort_values('importance', inplace=True, ascending=False) + df.reset_index(inplace=True) + # Draw plot + plt.figure(figsize=(90, 200), dpi=100) + plt.hlines(y=df.index, xmin=0, xmax=df.mean_drop_p) + for x, y, tex in zip(df.mean_drop_p, df.index, df.mean_drop_p): + plt.text( + x, + y, + round(tex, 2), + horizontalalignment='right' if x < 0 else 'left', + verticalalignment='center', + fontdict={ + 'color': 'red' if x < 0 else 'green', + 'size': 14 + }) + # Decorations + plt.yticks(df.index, df.feature_name, fontsize=20) + plt.title('Feature Importance', fontdict={'size': 30}) + plt.grid(linestyle='--', alpha=0.5) + plt.xlim(0, 1) + with tf.gfile.GFile( + os.path.join(self._output_dir, + 'feature_importance_pic_%s.png' % group_name), 'wb') as f: + plt.savefig(f, format='png') + + if __name__ == '__main__': if FLAGS.model_type == 'variational_dropout': fs = VariationalDropoutFS( @@ -304,6 +388,15 @@ def _visualize_feature_importance(self, feature_importance, group_name): fg_path=FLAGS.fg_path, visualize=FLAGS.visualize) fs.process() + elif FLAGS.model_type == 'fscd': + fs = FSCD( + FLAGS.config_path, + FLAGS.output_dir, + FLAGS.topk, + checkpoint_path=FLAGS.checkpoint_path, + fg_path=FLAGS.fg_path, + visualize=FLAGS.visualize) + fs.process() else: raise ValueError('Unknown feature selection model type %s' % FLAGS.model_type) diff --git a/easy_rec/python/utils/tf_utils.py b/easy_rec/python/utils/tf_utils.py index 20e19496c..e1026c132 100644 --- a/easy_rec/python/utils/tf_utils.py +++ b/easy_rec/python/utils/tf_utils.py @@ -33,3 +33,16 @@ def get_col_type(tf_type): } assert tf_type in type_map, 'invalid type: %s' % tf_type return type_map[tf_type] + + +def get_config_type(tf_type): + type_map = { + tf.int32: DatasetConfig.INT32, + tf.int64: DatasetConfig.INT64, + tf.string: DatasetConfig.STRING, + tf.bool: DatasetConfig.BOOL, + tf.float32: DatasetConfig.FLOAT, + tf.double: DatasetConfig.DOUBLE + } + assert tf_type in type_map, 'invalid type: %s' % tf_type + return type_map[tf_type] From c27c1d88bc7adaf1ddc8239454d5c6fc5bdf61ff Mon Sep 17 00:00:00 2001 From: weisu Date: Thu, 11 May 2023 20:12:41 +0800 Subject: [PATCH 25/54] [feat]: add feature selection tool --- easy_rec/python/builders/loss_builder.py | 11 ++- .../feature_column/feature_column_v2.py | 31 ++----- easy_rec/python/compat/sort_ops.py | 47 +++++----- easy_rec/python/inference/predictor.py | 3 + easy_rec/python/input/input.py | 57 ++++++++---- easy_rec/python/layers/fscd_layer.py | 65 +++++++------ easy_rec/python/loss/jrc_loss.py | 13 ++- easy_rec/python/tools/feature_selection.py | 93 ++++++++++++++++--- easy_rec/python/tools/view_saved_model.py | 38 ++++++++ setup.cfg | 2 +- 10 files changed, 255 insertions(+), 105 deletions(-) create mode 100644 easy_rec/python/tools/view_saved_model.py diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index 7459372a5..e1b32fde1 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -42,11 +42,16 @@ def build(loss_type, labels=label, predictions=pred, weights=loss_weight, **kwargs) elif loss_type == LossType.JRC_LOSS: alpha = 0.5 if loss_param is None else loss_param.alpha - auto_weight = False if loss_param is None else not loss_param.HasField( - 'alpha') + auto = False if loss_param is None else not loss_param.HasField('alpha') session = kwargs.get('session_ids', None) return jrc_loss( - label, pred, session, alpha, auto_weight=auto_weight, name=loss_name) + label, + pred, + session, + alpha, + auto_weight=auto, + sample_weights=loss_weight, + name=loss_name) elif loss_type == LossType.PAIR_WISE_LOSS: session = kwargs.get('session_ids', None) margin = 0 if loss_param is None else loss_param.margin diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index a17ce8fdc..578b0a50a 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -1329,11 +1329,10 @@ def numeric_column(key, def constant_numeric_column(key, - shape=(1,), - default_value=None, - dtype=dtypes.float32, - normalizer_fn=None, - feature_name=None): + shape=(1,), + default_value=None, + dtype=dtypes.float32, + feature_name=None): """Represents real valued or numerical features. Example: @@ -1368,12 +1367,6 @@ def constant_numeric_column(key, the shape of the `default_value` should be equal to the given `shape`. dtype: defines the type of values. Default value is `tf.float32`. Must be a non-quantized, real integer or floating point type. - normalizer_fn: If not `None`, a function that can be used to normalize the - value of the tensor after `default_value` is applied for parsing. - Normalizer function takes the input `Tensor` as its argument, and returns - the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that - even though the most common use case of this function is normalization, it - can be used for any kind of Tensorflow transformations. Returns: A `NumericColumn`. @@ -1391,18 +1384,13 @@ def constant_numeric_column(key, 'dtype: {}, key: {}'.format(dtype, key)) default_value = fc_utils.check_default_value(shape, default_value, dtype, key) - if normalizer_fn is not None and not callable(normalizer_fn): - raise TypeError( - 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) - fc_utils.assert_key_is_string(key) return ConstantNumericColumn( feature_name=feature_name, key=key, shape=shape, default_value=default_value, - dtype=dtype, - normalizer_fn=normalizer_fn) + dtype=dtype) def bucketized_column(source_column, boundaries): @@ -2701,7 +2689,7 @@ class ConstantNumericColumn( fc_old._DenseColumn, # pylint: disable=protected-access collections.namedtuple('ConstantNumericColumn', ('feature_name', 'key', 'shape', 'default_value', - 'dtype', 'normalizer_fn'))): + 'dtype'))): """see `numeric_column`.""" @property @@ -2734,8 +2722,11 @@ def _parse_example_spec(self): return self.parse_example_spec def _transform_input_tensor(self, input_tensor): + shape = [1] + list(self.shape) def_val = 0 if self.default_value is None else self.default_value - return tf.constant(def_val, dtypes.float32, self.shape) + row = tf.constant(def_val, dtypes.float32, shape) + batch_size = tf.shape(input_tensor)[0] + return tf.tile(row, [batch_size, 1]) @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, _FEATURE_COLUMN_DEPRECATION) @@ -2746,8 +2737,6 @@ def _transform_feature(self, inputs): def transform_feature(self, transformation_cache, state_manager): """See `FeatureColumn` base class. - In this case, we apply the `normalizer_fn` to the input tensor. - Args: transformation_cache: A `FeatureTransformationCache` object to access features. diff --git a/easy_rec/python/compat/sort_ops.py b/easy_rec/python/compat/sort_ops.py index f7c5bf3a5..bd7f92ab1 100644 --- a/easy_rec/python/compat/sort_ops.py +++ b/easy_rec/python/compat/sort_ops.py @@ -23,7 +23,6 @@ from __future__ import print_function import numpy as np - from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops as framework_ops from tensorflow.python.framework import tensor_util @@ -126,8 +125,8 @@ def _sort_or_argsort(values, axis, direction, return_argsort): ValueError: If axis is not a constant scalar, or the direction is invalid. """ if direction not in _SORT_IMPL: - raise ValueError('%s should be one of %s' % (direction, ', '.join( - sorted(_SORT_IMPL.keys())))) + raise ValueError('%s should be one of %s' % + (direction, ', '.join(sorted(_SORT_IMPL.keys())))) # Axis must be an integer, not a Tensor. axis = framework_ops.convert_to_tensor(axis, name='axis') axis_static = tensor_util.constant_value(axis) @@ -169,30 +168,30 @@ def _descending_sort(values, axis, return_argsort=False): # Prefer to calculate the transposition array in NumPy and make it a # constant. transposition = constant_op.constant( - np.r_[ - # Axes up to axis are unchanged. - np.arange(axis), - # Swap axis and rank - 1. - [static_rank - 1], - # Axes in [axis + 1, rank - 1) are unchanged. - np.arange(axis + 1, static_rank - 1), - # Swap axis and rank - 1. - [axis]], - name='transposition') + np.r_[ + # Axes up to axis are unchanged. + np.arange(axis), + # Swap axis and rank - 1. + [static_rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + np.arange(axis + 1, static_rank - 1), + # Swap axis and rank - 1. + [axis]], + name='transposition') else: # Generate the transposition array from the tensors. transposition = array_ops.concat( - [ - # Axes up to axis are unchanged. - math_ops.range(axis), - # Swap axis and rank - 1. - [rank - 1], - # Axes in [axis + 1, rank - 1) are unchanged. - math_ops.range(axis + 1, rank - 1), - # Swap axis and rank - 1. - [axis] - ], - axis=0) + [ + # Axes up to axis are unchanged. + math_ops.range(axis), + # Swap axis and rank - 1. + [rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + math_ops.range(axis + 1, rank - 1), + # Swap axis and rank - 1. + [axis] + ], + axis=0) top_k_input = array_ops.transpose(values, transposition) values, indices = nn_ops.top_k(top_k_input, k) diff --git a/easy_rec/python/inference/predictor.py b/easy_rec/python/inference/predictor.py index dba53f967..e39592c18 100644 --- a/easy_rec/python/inference/predictor.py +++ b/easy_rec/python/inference/predictor.py @@ -222,6 +222,9 @@ def _build_model(self): logging.info('Load input binding: %s -> %s' % (name, tensor.name)) input_name = tensor.name input_name, _ = input_name.split(':') + input_op = self._graph.get_operation_by_name(input_name) + if input_op.type == "PlaceholderWithDefault": + continue try: input_id = input_name.split('_')[-1] input_id = int(input_id) diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index 686355ac0..2775ad1ac 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -94,12 +94,14 @@ def __init__(self, # from the types defined in input_fields # it is used in create_multi_placeholders self._multi_value_types = {} - + self._const_features = set() self._normalizer_fn = {} for fc in self._feature_configs: for input_name in fc.input_names: assert input_name in self._input_fields, 'invalid input_name in %s' % str( fc) + if fc.feature_type == fc.ConstFeature: + self._const_features.add(input_name) if input_name not in self._effective_fields: self._effective_fields.append(input_name) @@ -227,17 +229,17 @@ def should_stop(self, curr_epoch): return total_epoch is not None and curr_epoch >= total_epoch def get_erase_features(self): - if self._pipeline_config is None: - return set() + if len(self._const_features) == 0: + return self._const_features - config = self._pipeline_config.model_config.variational_dropout - if config is None: - return set() + for fc in self._feature_configs: + if fc.feature_type == fc.ConstFeature: + continue + for input_name in fc.input_names: + if input_name in self._const_features: + self._const_features.remove(input_name) - top_k = config.fine_tune_use_top_k_features - from easy_rec.python.layers.fscd_layer import get_top_and_bottom_features - _, erase_features = get_top_and_bottom_features(self._pipeline_config, top_k) - return erase_features + return self._const_features def create_multi_placeholders(self, export_config): """Create multiply placeholders on export, one for each feature. @@ -282,8 +284,10 @@ def create_multi_placeholders(self, export_config): (input_name, tf_type)) if input_name in erase_features: conf_type = get_config_type(tf_type) - def_val = self.get_type_defaults(conf_type, self._input_field_defaults[fid]) - finput = tf.placeholder_with_default([def_val], [None, None], name=placeholder_name) + def_val = self.get_type_defaults(conf_type, + self._input_field_defaults[fid]) + finput = tf.placeholder_with_default([def_val], [None, None], + name=placeholder_name) else: finput = tf.placeholder(tf_type, [None, None], name=placeholder_name) else: @@ -291,8 +295,10 @@ def create_multi_placeholders(self, export_config): tf_type = get_tf_type(ftype) logging.info('input_name: %s, dtype: %s' % (input_name, tf_type)) if input_name in erase_features: - def_val = self.get_type_defaults(ftype, self._input_field_defaults[fid]) - finput = tf.placeholder_with_default([def_val], [None], name=placeholder_name) + def_val = self.get_type_defaults(ftype, + self._input_field_defaults[fid]) + finput = tf.placeholder_with_default([def_val], [None], + name=placeholder_name) else: finput = tf.placeholder(tf_type, [None], name=placeholder_name) inputs[input_name] = finput @@ -500,10 +506,19 @@ def _parse_id_feature(self, fc, parsed_dict, field_dict): tf.int32, name='%s_str_2_int' % input_0) - def _parse_const_feature(self, fc, parsed_dict, field_dict): + def _parse_const_feature(self, fc, parsed_dict, field_dict, batch_size): input_0 = fc.input_names[0] + input_tensor = field_dict[input_0] + + def expand_input(): + multiples = [1] * input_tensor.shape.ndims + multiples[0] = batch_size + return tf.tile(input_tensor, multiples) + + input_tensor = tf.cond(tf.equal(tf.shape(input_tensor)[0], batch_size), + lambda: input_tensor, expand_input) feature_name = fc.feature_name if fc.HasField('feature_name') else input_0 - parsed_dict[feature_name] = field_dict[input_0] + parsed_dict[feature_name] = input_tensor def _parse_raw_feature(self, fc, parsed_dict, field_dict): input_0 = fc.input_names[0] @@ -795,6 +810,14 @@ def _preprocess(self, field_dict): parsed_dict[k] = v self._appended_fields.append(k) + batch_size = 1 + for fc in self._feature_configs: + feature_type = fc.feature_type + if feature_type != fc.ConstFeature: + input_0 = fc.input_names[0] + batch_size = tf.shape(field_dict[input_0])[0] + break + for fc in self._feature_configs: feature_name = fc.feature_name feature_type = fc.feature_type @@ -813,7 +836,7 @@ def _preprocess(self, field_dict): elif feature_type == fc.ExprFeature: self._parse_expr_feature(fc, parsed_dict, field_dict) elif feature_type == fc.ConstFeature: - self._parse_const_feature(fc, parsed_dict, field_dict) + self._parse_const_feature(fc, parsed_dict, field_dict, batch_size) else: feature_name = fc.feature_name if fc.HasField( 'feature_name') else fc.input_names[0] diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py index a99e8aa4b..163cf18f7 100644 --- a/easy_rec/python/layers/fscd_layer.py +++ b/easy_rec/python/layers/fscd_layer.py @@ -1,16 +1,19 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. +import json import logging -from collections import OrderedDict import math -import json +from collections import OrderedDict + import numpy as np import tensorflow as tf from tensorflow.python.framework.meta_graph import read_meta_graph_file + +from easy_rec.python.compat.sort_ops import argsort + from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA from easy_rec.python.compat.feature_column.feature_column_v2 import SharedEmbeddingColumn # NOQA -from easy_rec.python.compat.sort_ops import argsort if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -32,14 +35,14 @@ def sigmoid(x): def get_feature_importance(pipeline_config, feature_group_name=None): assert pipeline_config.model_config.HasField( - 'variational_dropout'), 'variational_dropout must be in model_config' + 'variational_dropout'), 'variational_dropout must be in model_config' checkpoint_path = tf.train.latest_checkpoint(pipeline_config.model_dir) meta_graph_def = read_meta_graph_file(checkpoint_path + '.meta') features_map = dict() for col_def in meta_graph_def.collection_def[ - 'variational_dropout'].bytes_list.value: + 'variational_dropout'].bytes_list.value: features = json.loads(col_def) features_map.update(features) @@ -50,7 +53,12 @@ def get_feature_importance(pipeline_config, feature_group_name=None): group_name = feature_group.group_name if feature_group_name is not None and feature_group_name != group_name: continue - assert group_name in features_map, "%s not in feature map" % group_name + # assert group_name in features_map, "%s not in feature map" % group_name + if group_name not in features_map: + # for now, sequence feature groups are not supported + logging.warn('%s not in feature map' % group_name) + continue + feature_dims = features_map[group_name] delta_name = 'fscd_delta_%s' % group_name @@ -71,26 +79,27 @@ def get_feature_importance(pipeline_config, feature_group_name=None): if feature in feature_importance: raw = feature_importance[feature] if probs[i] > raw: - logging.info("%s importance change from %d to %d", feature, raw, probs[i]) + logging.info('%s importance change from %d to %d', feature, raw, + probs[i]) feature_importance[feature] = probs[i] else: feature_importance[feature] = probs[i] return feature_importance -def get_top_and_bottom_features(pipeline_config, top_k): - feature_score = get_feature_importance(pipeline_config) - top_features = set() - bottom_features = set() - for feature, score in feature_score.iteritems(): - if len(top_features) < top_k: - top_features.add(feature) - else: - bottom_features.add(feature) - - print("selected top %d features:" % top_k, ','.join(top_features)) - print("removed bottom features:", ','.join(bottom_features)) - return top_features, bottom_features +# def get_top_and_bottom_features(pipeline_config, top_k): +# feature_score = get_feature_importance(pipeline_config) +# top_features = set() +# bottom_features = set() +# for feature, score in feature_score.iteritems(): +# if len(top_features) < top_k: +# top_features.add(feature) +# else: +# bottom_features.add(feature) +# +# print("selected top %d features:" % top_k, ','.join(top_features)) +# print("removed bottom features:", ','.join(bottom_features)) +# return top_features, bottom_features class FSCDLayer(object): @@ -114,10 +123,10 @@ def __init__(self, def compute_dropout_mask(self, n, temperature=0.1): delta_name = 'fscd_delta_%s' % self.name delta = tf.get_variable( - name=delta_name, - shape=[n], - dtype=tf.float32, - initializer=tf.constant_initializer(0.)) + name=delta_name, + shape=[n], + dtype=tf.float32, + initializer=tf.constant_initializer(0.)) delta = tf.nn.sigmoid(delta) EPSILON = np.finfo(float).eps @@ -146,8 +155,9 @@ def compute_regular_params(self, cols_to_feature): theta = 1.0 - sig_c alpha = math.log(sig_c) - math.log(theta) alphas[fc] = alpha - print(str(fc.raw_name), "complexity:", complexity, "cardinality:", cardinal, - "dimension:", dim, "c:", c, "theta:", theta, "alpha:", alpha) + print( + str(fc.raw_name), 'complexity:', complexity, 'cardinality:', cardinal, + 'dimension:', dim, 'c:', c, 'theta:', theta, 'alpha:', alpha) return alphas def __call__(self, cols_to_feature): @@ -171,7 +181,8 @@ def __call__(self, cols_to_feature): feature_dimension.append((column.raw_name, int(value.shape[-1]))) output_features = tf.concat(output_tensors, 1) - tf.add_to_collection('variational_dropout', json.dumps({self.name: feature_dimension})) + tf.add_to_collection('variational_dropout', + json.dumps({self.name: feature_dimension})) batch_size = tf.shape(output_features)[0] t_alpha = tf.convert_to_tensor(alphas, dtype=tf.float32) diff --git a/easy_rec/python/loss/jrc_loss.py b/easy_rec/python/loss/jrc_loss.py index fc8266b2c..fc77bda86 100644 --- a/easy_rec/python/loss/jrc_loss.py +++ b/easy_rec/python/loss/jrc_loss.py @@ -13,6 +13,7 @@ def jrc_loss(labels, session_ids, alpha=0.5, auto_weight=False, + sample_weights=1.0, name=''): """Joint Optimization of Ranking and Calibration with Contextualized Hybrid Model. @@ -24,13 +25,16 @@ def jrc_loss(labels, session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id alpha: the weight to balance ranking loss and calibration loss auto_weight: bool, whether to learn loss weight between ranking loss and calibration loss + sample_weights: Coefficients for the loss. This must be scalar or broadcastable to + `labels` (i.e. same rank and each dimension is either 1 or the same). name: the name of loss """ loss_name = name if name else 'jrc_loss' logging.info('[{}] alpha: {}, auto_weight: {}'.format(loss_name, alpha, auto_weight)) - ce_loss = tf.losses.sparse_softmax_cross_entropy(labels, logits) + ce_loss = tf.losses.sparse_softmax_cross_entropy( + labels, logits, weights=sample_weights) labels = tf.expand_dims(labels, 1) # [B, 1] labels = tf.concat([1 - labels, labels], axis=1) # [B, 2] @@ -54,6 +58,13 @@ def jrc_loss(labels, y_neg, y_pos = y[:, :, 0], y[:, :, 1] l_neg, l_pos = logits[:, :, 0], logits[:, :, 1] + if tf.is_numeric_tensor(sample_weights): + logging.info('[%s] use sample weight' % loss_name) + weights = tf.expand_dims(tf.cast(sample_weights, tf.float32), 0) + pairwise_weights = tf.tile(weights, tf.stack([batch_size, 1])) + y_pos *= pairwise_weights + y_neg *= pairwise_weights + # Compute list-wise generative loss -log p(x|y, z) loss_pos = -tf.reduce_sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) loss_neg = -tf.reduce_sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) diff --git a/easy_rec/python/tools/feature_selection.py b/easy_rec/python/tools/feature_selection.py index cbe717351..065993652 100644 --- a/easy_rec/python/tools/feature_selection.py +++ b/easy_rec/python/tools/feature_selection.py @@ -10,6 +10,7 @@ import tensorflow as tf from tensorflow.python.framework.meta_graph import read_meta_graph_file +from easy_rec.python.protos.feature_config_pb2 import FeatureConfig from easy_rec.python.utils import config_util if tf.__version__ >= '2.0': @@ -19,8 +20,9 @@ matplotlib.use('Agg') # NOQA import matplotlib.pyplot as plt # NOQA -tf.app.flags.DEFINE_string('model_type', 'variational_dropout', - 'feature selection model type') +tf.app.flags.DEFINE_enum('model_type', 'variational_dropout', + ['variational_dropout', 'fscd'], + 'feature selection model type') tf.app.flags.DEFINE_string('config_path', '', 'feature selection model config path') tf.app.flags.DEFINE_string('checkpoint_path', None, @@ -295,6 +297,7 @@ def _visualize_feature_importance(self, feature_importance, group_name): class FSCD(object): + def __init__(self, config_path, output_dir, @@ -318,11 +321,16 @@ def process(self): 'variational_dropout'), 'variational_dropout must be in model_config' feature_importance_map = {} + white_feature_group = set() from easy_rec.python.layers.fscd_layer import get_feature_importance for feature_group in config.model_config.feature_groups: group_name = feature_group.group_name tf.logging.info('Calculating %s feature importance ...' % group_name) feature_importance = get_feature_importance(config, group_name) + if len(feature_importance) == 0: + tf.logging.info('No feature importance in group %s' % group_name) + white_feature_group.add(group_name) + continue feature_importance_map[group_name] = feature_importance tf.logging.info('Dump %s feature importance to csv ...' % group_name) @@ -333,7 +341,7 @@ def process(self): self._visualize_feature_importance(feature_importance, group_name) tf.logging.info('Processing model config ...') - self._process_config(feature_importance_map) + self._process_config(feature_importance_map, white_feature_group) def _dump_to_csv(self, feature_importance, group_name): """Dump feature importance data to a csv file.""" @@ -355,8 +363,8 @@ def _visualize_feature_importance(self, feature_importance, group_name): df.reset_index(inplace=True) # Draw plot plt.figure(figsize=(90, 200), dpi=100) - plt.hlines(y=df.index, xmin=0, xmax=df.mean_drop_p) - for x, y, tex in zip(df.mean_drop_p, df.index, df.mean_drop_p): + plt.hlines(y=df.index, xmin=0, xmax=df.importance) + for x, y, tex in zip(df.importance, df.index, df.importance): plt.text( x, y, @@ -377,6 +385,69 @@ def _visualize_feature_importance(self, feature_importance, group_name): 'feature_importance_pic_%s.png' % group_name), 'wb') as f: plt.savefig(f, format='png') + def _process_config(self, feature_importance_map, white_feature_group): + """Process model config and fg config with feature selection.""" + excluded_features = set() + for group_name, feature_importance in feature_importance_map.items(): + for i, (feature_name, _) in enumerate(feature_importance.items()): + if i >= self._topk: + excluded_features.add(feature_name) + + config = config_util.get_configs_from_pipeline_file(self._config_path) + # keep sequence features and side-infos + sequence_features = set() + for feature_group in config.model_config.feature_groups: + for sequence_feature in feature_group.sequence_features: + for seq_att_map in sequence_feature.seq_att_map: + for key in seq_att_map.key: + sequence_features.add(key) + for hist_seq in seq_att_map.hist_seq: + sequence_features.add(hist_seq) + # compat with din + for sequence_feature in config.model_config.seq_att_groups: + for seq_att_map in sequence_feature.seq_att_map: + for key in seq_att_map.key: + sequence_features.add(key) + for hist_seq in seq_att_map.hist_seq: + sequence_features.add(hist_seq) + # sequence feature group + for feature_group in config.model_config.feature_groups: + group_name = feature_group.group_name + if group_name not in white_feature_group: + continue + for feature_name in feature_group.feature_names: + sequence_features.add(feature_name) + + excluded_features = excluded_features - sequence_features + + for feature_config in config_util.get_compatible_feature_configs(config): + feature_name = feature_config.input_names[0] + if feature_config.HasField('feature_name'): + feature_name = feature_config.feature_name + if feature_name in excluded_features: + feature_config.feature_type = FeatureConfig.FeatureType.ConstFeature + + config.model_config.ClearField('variational_dropout') + config_util.save_message( + config, + os.path.join(self._output_dir, os.path.basename(self._config_path))) + + if self._fg_path is not None and len(self._fg_path) > 0: + with tf.gfile.Open(self._fg_path) as f: + fg_json = json.load(f, object_pairs_hook=OrderedDict) + features = [] + for feature in fg_json['features']: + if 'feature_name' in feature: + if feature['feature_name'] not in excluded_features: + features.append(feature) + else: + features.append(feature) + fg_json['features'] = features + + fg_file = os.path.join(self._output_dir, os.path.basename(self._fg_path)) + with tf.gfile.Open(fg_file, 'w') as f: + json.dump(fg_json, f, indent=4) + if __name__ == '__main__': if FLAGS.model_type == 'variational_dropout': @@ -390,12 +461,12 @@ def _visualize_feature_importance(self, feature_importance, group_name): fs.process() elif FLAGS.model_type == 'fscd': fs = FSCD( - FLAGS.config_path, - FLAGS.output_dir, - FLAGS.topk, - checkpoint_path=FLAGS.checkpoint_path, - fg_path=FLAGS.fg_path, - visualize=FLAGS.visualize) + FLAGS.config_path, + FLAGS.output_dir, + FLAGS.topk, + checkpoint_path=FLAGS.checkpoint_path, + fg_path=FLAGS.fg_path, + visualize=FLAGS.visualize) fs.process() else: raise ValueError('Unknown feature selection model type %s' % diff --git a/easy_rec/python/tools/view_saved_model.py b/easy_rec/python/tools/view_saved_model.py new file mode 100644 index 000000000..a3c01909b --- /dev/null +++ b/easy_rec/python/tools/view_saved_model.py @@ -0,0 +1,38 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import argparse +import logging + +from google.protobuf import text_format +from tensorflow.python.platform.gfile import GFile +from tensorflow.core.protobuf import saved_model_pb2 + +logging.basicConfig( + format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s', + level=logging.INFO) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input', type=str, default=None, help='saved model path') + parser.add_argument('--output', type=str, default=None, help='saved model save path') + args = parser.parse_args() + + assert args.input is not None and args.output is not None + + logging.info('saved_model_path: %s' % args.input) + + saved_model = saved_model_pb2.SavedModel() + if args.input.endswith('.pb'): + with GFile(args.input, 'rb') as fin: + saved_model.ParseFromString(fin.read()) + else: + with GFile(args.input, 'r') as fin: + text_format.Merge(fin.read(), saved_model) + + if args.output.endswith('.pbtxt'): + with GFile(args.output, 'w') as fout: + fout.write(text_format.MessageToString(saved_model, as_utf8=True)) + else: + with GFile(args.output, 'wb') as fout: + fout.write(saved_model.SerializeToString()) diff --git a/setup.cfg b/setup.cfg index 2303ef802..cd2b0ac0c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ multi_line_output = 7 force_single_line = true known_standard_library = setuptools known_first_party = easy_rec -known_third_party = absl,common_io,docutils,eas_prediction,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml +known_third_party = absl,common_io,docutils,eas_prediction,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,skimage,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml no_lines_before = LOCALFOLDER default_section = THIRDPARTY skip = easy_rec/python/protos From 524ce671c7445f4842e557a42fbb472f66de337b Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 15 May 2023 14:25:45 +0800 Subject: [PATCH 26/54] [feat]: add feature selection tool --- .../feature_column/feature_column_v2.py | 4 +- easy_rec/python/layers/fscd_layer.py | 15 ------- easy_rec/python/model/multi_task_model.py | 42 ++++++++++++++----- easy_rec/python/model/rank_model.py | 34 ++++++++++----- easy_rec/python/protos/easy_rec_model.proto | 9 +++- 5 files changed, 65 insertions(+), 39 deletions(-) diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index 578b0a50a..eb952e7be 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -1338,7 +1338,7 @@ def constant_numeric_column(key, Example: ```python - price = numeric_column('price') + price = constant_numeric_column('price') columns = [price, ...] features = tf.io.parse_example(..., features=make_parse_example_spec(columns)) dense_tensor = input_layer(features, columns) @@ -1369,7 +1369,7 @@ def constant_numeric_column(key, non-quantized, real integer or floating point type. Returns: - A `NumericColumn`. + A `ConstantNumericColumn`. Raises: TypeError: if any dimension in shape is not an int diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py index 163cf18f7..2b1071787 100644 --- a/easy_rec/python/layers/fscd_layer.py +++ b/easy_rec/python/layers/fscd_layer.py @@ -87,21 +87,6 @@ def get_feature_importance(pipeline_config, feature_group_name=None): return feature_importance -# def get_top_and_bottom_features(pipeline_config, top_k): -# feature_score = get_feature_importance(pipeline_config) -# top_features = set() -# bottom_features = set() -# for feature, score in feature_score.iteritems(): -# if len(top_features) < top_k: -# top_features.add(feature) -# else: -# bottom_features.add(feature) -# -# print("selected top %d features:" % top_k, ','.join(top_features)) -# print("removed bottom features:", ','.join(bottom_features)) -# return top_features, bottom_features - - class FSCDLayer(object): """Rank features by variational dropout. diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index 43e5663ce..a6bd1b29d 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -88,6 +88,17 @@ def build_metric_graph(self, eval_config): def build_loss_graph(self): """Build loss graph for multi task model.""" + strategy = self._base_model_config.loss_weight_strategy + loss_weight_arr = [1.0] * len(self._task_towers) + if strategy == self._base_model_config.Random: + num = 0 + for task_tower_cfg in self._task_towers: + losses = task_tower_cfg.losses + num += 1 if len(losses) == 0 else len(losses) + weights = tf.random_normal([num]) + loss_weight_arr = tf.nn.softmax(weights) + + offset = 0 for task_tower_cfg in self._task_towers: tower_name = task_tower_cfg.tower_name loss_weight = task_tower_cfg.weight @@ -111,8 +122,12 @@ def build_loss_graph(self): loss_weight=loss_weight, num_class=task_tower_cfg.num_class, suffix='_%s' % tower_name) + if strategy == self._base_model_config.Random: + for loss_name in loss_dict.keys(): + loss_dict[loss_name] = loss_dict[loss_name] * loss_weight_arr[offset] + offset += 1 else: - for loss in losses: + for i, loss in enumerate(losses): loss_param = loss.WhichOneof('loss_param') if loss_param is not None: loss_param = getattr(loss, loss_param) @@ -125,19 +140,26 @@ def build_loss_graph(self): loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): - if loss.learn_loss_weight: - uncertainty = tf.Variable( + if strategy == self._base_model_config.Fixed: + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Uncertainty: + if loss.learn_loss_weight: + uncertainty = tf.Variable( 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) - tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) - if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: - loss_dict[loss_name] = 0.5 * tf.exp( + tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) + if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: + loss_dict[loss_name] = 0.5 * tf.exp( -uncertainty) * loss_value + 0.5 * uncertainty - else: - loss_dict[loss_name] = tf.exp( + else: + loss_dict[loss_name] = tf.exp( -uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Random: + loss_dict[loss_name] = loss_value * loss_weight_arr[i + offset] else: - loss_dict[loss_name] = loss_value * loss.weight - + raise ValueError("Unsupported loss weight strategy: " + strategy.Name) + offset += len(losses) self._loss_dict.update(loss_dict) kd_loss_dict = loss_builder.build_kd_loss(self.kd, self._prediction_dict, diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 25eff23ea..e4a38fa2d 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -193,7 +193,12 @@ def build_loss_graph(self): loss_weight=self._sample_weight, num_class=self._num_class) else: - for loss in self._losses: + strategy = self._base_model_config.loss_weight_strategy + loss_weight = [1.0] + if strategy == self._base_model_config.Random and len(self._losses) > 1: + weights = tf.random_normal([len(self._losses)]) + loss_weight = tf.nn.softmax(weights) + for i, loss in enumerate(self._losses): loss_param = loss.WhichOneof('loss_param') if loss_param is not None: loss_param = getattr(loss, loss_param) @@ -205,18 +210,25 @@ def build_loss_graph(self): loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): - if loss.learn_loss_weight: - uncertainty = tf.Variable( - 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) - tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) - if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: - loss_dict[loss_name] = 0.5 * tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + if strategy == self._base_model_config.Fixed: + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Uncertainty: + if loss.learn_loss_weight: + uncertainty = tf.Variable( + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) + if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: + loss_dict[loss_name] = 0.5 * tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty else: - loss_dict[loss_name] = tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Random: + loss_dict[loss_name] = loss_value * loss_weight[i] else: - loss_dict[loss_name] = loss_value * loss.weight + raise ValueError("Unsupported loss weight strategy: " + strategy.Name) self._loss_dict.update(loss_dict) diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 42f454d95..770611880 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -103,6 +103,13 @@ message EasyRecModel { repeated Loss losses = 15; + enum LossWeightStrategy { + Fixed = 0; + Uncertainty = 1; + Random = 2; + } + required LossWeightStrategy loss_weight_strategy = 16 [default = Fixed]; + // dnn layers after sequence feature - optional DNN sequence_dnn = 16; + optional DNN sequence_dnn = 17; } From 3a8d7329a378f41b4bf48d3b8e007f799006cca6 Mon Sep 17 00:00:00 2001 From: weisu Date: Thu, 25 May 2023 11:15:06 +0800 Subject: [PATCH 27/54] [feat]: add fibinet & masknet --- .../compat/feature_column/feature_column.py | 24 ++- .../feature_column/feature_column_v2.py | 6 +- easy_rec/python/inference/predictor.py | 2 +- easy_rec/python/input/input.py | 5 +- easy_rec/python/layers/common_layers.py | 171 ++++++++++++++++++ easy_rec/python/layers/fibinet.py | 53 ++++++ easy_rec/python/layers/fscd_layer.py | 57 ++++-- easy_rec/python/layers/input_layer.py | 10 +- easy_rec/python/layers/mask_net.py | 73 ++++++++ easy_rec/python/model/dbmtl.py | 13 ++ easy_rec/python/model/easy_rec_model.py | 3 +- easy_rec/python/model/multi_task_model.py | 19 +- easy_rec/python/model/rank_model.py | 3 +- easy_rec/python/protos/dbmtl.proto | 6 + easy_rec/python/protos/easy_rec_model.proto | 2 + easy_rec/python/protos/fibinet.proto | 15 ++ easy_rec/python/protos/masknet.proto | 17 ++ .../python/protos/variational_dropout.proto | 6 +- easy_rec/python/tools/feature_selection.py | 2 +- easy_rec/python/tools/view_saved_model.py | 7 +- 20 files changed, 452 insertions(+), 42 deletions(-) create mode 100644 easy_rec/python/layers/fibinet.py create mode 100644 easy_rec/python/layers/mask_net.py create mode 100644 easy_rec/python/protos/fibinet.proto create mode 100644 easy_rec/python/protos/masknet.proto diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index 56d3357c7..09d791386 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -167,6 +167,7 @@ from easy_rec.python.compat import embedding_ops as ev_embedding_ops from easy_rec.python.compat.feature_column import utils as fc_utils +from easy_rec.python.layers.common_layers import layer_norm def _internal_input_layer(features, @@ -177,7 +178,8 @@ def _internal_input_layer(features, scope=None, cols_to_output_tensors=None, from_template=False, - feature_name_to_output_tensors=None): + feature_name_to_output_tensors=None, + do_normalize=False): """See input_layer, `scope` is a name or variable scope to use.""" feature_columns = _normalize_feature_columns(feature_columns) for column in feature_columns: @@ -208,6 +210,18 @@ def _get_logits(): # pylint: disable=missing-docstring batch_size = array_ops.shape(tensor)[0] output_tensor = array_ops.reshape( tensor, shape=(batch_size, num_elements)) + if do_normalize: + from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn, NumericColumn, \ + WeightedCategoricalColumn + from tensorflow.python.layers.normalization import batch_normalization + if isinstance(column, EmbeddingColumn) or isinstance(column, _SharedEmbeddingColumn): + fc = column.categorical_column + if isinstance(fc, WeightedCategoricalColumn) and fc.weight_feature_key.endswith('_raw_proj_val'): + output_tensor = layer_norm(output_tensor, name='ln_' + column.name) + else: + output_tensor = batch_normalization(output_tensor, name='bn_'+column.name) + elif isinstance(column, NumericColumn) and int(column.shape[-1]) > 1: + output_tensor = layer_norm(output_tensor, name='ln_' + column.name) output_tensors.append(output_tensor) if cols_to_vars is not None: # Retrieve any variables created (some _DenseColumn's don't create @@ -239,7 +253,8 @@ def input_layer(features, trainable=True, cols_to_vars=None, cols_to_output_tensors=None, - feature_name_to_output_tensors=None): + feature_name_to_output_tensors=None, + do_normalize=False): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. @@ -287,6 +302,8 @@ def input_layer(features, cols_to_output_tensors: If not `None`, must be a dictionary that will be filled with a mapping from '_FeatureColumn' to the associated output `Tensor`s. + do_normalize: Whether to do layer normalization for numerical features and + batch normalization operation for categorical features. Returns: A `Tensor` which represents input layer of a model. Its shape @@ -303,7 +320,8 @@ def input_layer(features, trainable=trainable, cols_to_vars=cols_to_vars, cols_to_output_tensors=cols_to_output_tensors, - feature_name_to_output_tensors=feature_name_to_output_tensors) + feature_name_to_output_tensors=feature_name_to_output_tensors, + do_normalize=do_normalize) # TODO(akshayka): InputLayer should be a subclass of Layer, and it diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index eb952e7be..c264c30c2 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -2687,9 +2687,9 @@ def _normalize_feature_columns(feature_columns): class ConstantNumericColumn( DenseColumn, fc_old._DenseColumn, # pylint: disable=protected-access - collections.namedtuple('ConstantNumericColumn', - ('feature_name', 'key', 'shape', 'default_value', - 'dtype'))): + collections.namedtuple( + 'ConstantNumericColumn', + ('feature_name', 'key', 'shape', 'default_value', 'dtype'))): """see `numeric_column`.""" @property diff --git a/easy_rec/python/inference/predictor.py b/easy_rec/python/inference/predictor.py index e39592c18..e17871892 100644 --- a/easy_rec/python/inference/predictor.py +++ b/easy_rec/python/inference/predictor.py @@ -223,7 +223,7 @@ def _build_model(self): input_name = tensor.name input_name, _ = input_name.split(':') input_op = self._graph.get_operation_by_name(input_name) - if input_op.type == "PlaceholderWithDefault": + if input_op.type == 'PlaceholderWithDefault': continue try: input_id = input_name.split('_')[-1] diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index 2775ad1ac..d2325e680 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -515,8 +515,9 @@ def expand_input(): multiples[0] = batch_size return tf.tile(input_tensor, multiples) - input_tensor = tf.cond(tf.equal(tf.shape(input_tensor)[0], batch_size), - lambda: input_tensor, expand_input) + input_tensor = tf.cond( + tf.equal(tf.shape(input_tensor)[0], batch_size), lambda: input_tensor, + expand_input) feature_name = fc.feature_name if fc.HasField('feature_name') else input_0 parsed_dict[feature_name] = input_tensor diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index 165fce5e1..e3bb65f64 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -1,8 +1,12 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. +import itertools +import logging import tensorflow as tf +from easy_rec.python.compat.layers import layer_norm as tf_layer_norm + if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -61,3 +65,170 @@ def text_cnn(x, pool_flat = tf.concat( pooled_outputs, 1) # shape: (batch_size, num_filters * len(filter_sizes)) return pool_flat + + +def layer_norm(input_tensor, name=None, reuse=None): + """Run layer normalization on the last dimension of the tensor.""" + return tf_layer_norm( + inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, reuse=reuse, scope=name) + + +class SENet(object): + """ + SENet+ Layer,支持不同field的embedding dimension不等 + arxiv: 2209.05016 + """ + + def __init__(self, reduction_ratio, num_groups, name='SENet'): + self.reduction_ratio = reduction_ratio + self.num_groups = num_groups + self.name = name + + def __call__(self, embedding_list): + """ + + :param embedding_list: [embedding_1,...,embedding_i,...,embedding_f],f为field的数目,embedding_i is [bs, dim] + :return: + """ + print("SENET layer with %d inputs" % len(embedding_list)) + for emb in embedding_list: + assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' + + field_size = len(embedding_list) + feature_size_list = [emb.shape.as_list()[-1] for emb in embedding_list] + + # Squeeze + g = self.num_groups + # embedding dimension 必须能被 g 整除 + group_embs = [ + tf.reshape(emb, [-1, g, tf.shape(emb)[-1] // g]) + for emb in embedding_list + ] + + squeezed = [] + for emb in group_embs: + squeezed.append(tf.reduce_max(emb, axis=-1)) + squeezed.append(tf.reduce_mean(emb, axis=-1)) + z = tf.concat(squeezed, axis=1) # [bs, field_size * num_groups * 2] + + # Excitation + reduction_size = max(1, field_size * g * 2 // self.reduction_ratio) + + initializer = tf.glorot_normal_initializer() + a1 = tf.layers.dense( + z, + reduction_size, + kernel_initializer=initializer, + activation=tf.nn.relu, + name='%s/W1' % self.name) + a2 = tf.layers.dense( + a1, + sum(feature_size_list), + kernel_initializer=initializer, + name='%s/W2' % self.name) + + # Re-weight & Fuse + a = tf.split(a2, feature_size_list, axis=1) + senet_like_embeddings = [ + layer_norm(emb * w + emb) for emb, w in zip(embedding_list, a) + ] + return tf.concat(senet_like_embeddings, axis=-1) + + +def _full_interaction(v_i, v_j): + # [bs, 1, dim] x [bs, dim, 1] = [bs, 1] + interaction = tf.matmul( + tf.expand_dims(v_i, axis=1), tf.expand_dims(v_j, axis=-1)) + return tf.squeeze(interaction, axis=1) + + +class BiLinear(object): + + def __init__(self, + output_size, + bilinear_type, + bilinear_plus=True, + name='bilinear'): + """双线性特征交互层,支持不同field embeddings的size不等. + + arxiv: 2209.05016 + :param output_size: 输出的size + :param bilinear_type: ['all', 'each', 'interaction'],支持其中一种 + :param bilinear_plus: 是否使用bi-linear+ + """ + self.name = name + self.bilinear_type = bilinear_type.lower() + self.output_size = output_size + + if bilinear_type not in ['all', 'each', 'interaction']: + raise NotImplementedError( + "bilinear_type only support: ['all', 'each', 'interaction']") + + if bilinear_plus: + self.func = _full_interaction + else: + self.func = tf.multiply + + def __call__(self, embeddings): + print("Bilinear Layer with %d inputs" % len(embeddings)) + if len(embeddings) > 200: + logging.warn("There are too many inputs for bilinear layer: %d" % len(embeddings)) + equal_dim = True + _dim = embeddings[0].shape[-1] + for emb in embeddings: + assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' + if emb.shape[-1] != _dim: + equal_dim = False + if not equal_dim and self.bilinear_type != 'interaction': + raise ValueError('all embedding dimensions must be same when use bilinear type: interaction') + dim = int(_dim) + + field_size = len(embeddings) + initializer = tf.glorot_normal_initializer() + + # bi-linear+: p的维度为[bs, f*(f-1)/2] + # bi-linear: + # 当equal_dim=True时,p的维度为[bs, f*(f-1)/2*k],k为embeddings的size + # 当equal_dim=False时,p的维度为[bs, (k_2+k_3+...+k_f)+...+(k_i+k_{i+1}+...+k_f)+...+k_f], + # 其中 k_i为第i个field的embedding的size + if self.bilinear_type == 'all': + v_dot = [ + tf.layers.dense( + v_i, + dim, + kernel_initializer=initializer, + name='%s/all' % self.name, + reuse=tf.AUTO_REUSE) for v_i in embeddings[:-1] + ] + p = [ + self.func(v_dot[i], embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + elif self.bilinear_type == 'each': + v_dot = [ + tf.layers.dense( + v_i, + dim, + kernel_initializer=initializer, + name='%s/each_%d' % (self.name, i), + reuse=tf.AUTO_REUSE) for i, v_i in enumerate(embeddings[:-1]) + ] + p = [ + self.func(v_dot[i], embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + else: # interaction + p = [ + self.func( + tf.layers.dense( + embeddings[i], + embeddings[j].shape.as_list()[-1], + kernel_initializer=initializer, + name='%s/interaction_%d_%d' % (self.name, i, j), + reuse=tf.AUTO_REUSE), embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + + output = tf.layers.dense( + tf.concat(p, axis=-1), self.output_size, kernel_initializer=initializer) + return output diff --git a/easy_rec/python/layers/fibinet.py b/easy_rec/python/layers/fibinet.py new file mode 100644 index 000000000..9a419e004 --- /dev/null +++ b/easy_rec/python/layers/fibinet.py @@ -0,0 +1,53 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf +from easy_rec.python.layers.common_layers import SENet +from easy_rec.python.layers.common_layers import BiLinear +from easy_rec.python.layers import dnn + + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class FiBiNetLayer(object): + """FiBiNet++:Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction. + + This is almost an exact implementation of the original FiBiNet++ model. + See the original paper: + https://arxiv.org/pdf/2209.05016.pdf + """ + + def __init__(self, fibinet_config, features, input_layer): + self._config = fibinet_config + self._input_layer = input_layer + self._features = features + + def __call__(self, group_name, is_training, l2_reg=0, *args, **kwargs): + feature_list = [] + _, group_features = self._input_layer(self._features, group_name) + senet = SENet(reduction_ratio=self._config.senet_reduction_ratio, + num_groups=self._config.num_senet_squeeze_group, + name='%s_senet' % group_name) + senet_output = senet(group_features) + feature_list.append(senet_output) + + if self._config.bilinear_type != 'none': + bilinear = BiLinear(output_size=self._config.bilinear_output_units, + bilinear_type=self._config.bilinear_type, + bilinear_plus=self._config.use_bilinear_plus, + name='%s_bilinear' % group_name) + bilinear_output = bilinear(group_features) + feature_list.append(bilinear_output) + + if len(feature_list) > 1: + feature = tf.concat(feature_list, axis=-1) + else: + feature = feature_list[0] + + final_dnn = dnn.DNN( + self._config.mlp, + l2_reg, + name='%s_fibinet_mlp' % group_name, + is_training=is_training) + return final_dnn(feature) diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py index 2b1071787..ec115f547 100644 --- a/easy_rec/python/layers/fscd_layer.py +++ b/easy_rec/python/layers/fscd_layer.py @@ -35,14 +35,14 @@ def sigmoid(x): def get_feature_importance(pipeline_config, feature_group_name=None): assert pipeline_config.model_config.HasField( - 'variational_dropout'), 'variational_dropout must be in model_config' + 'variational_dropout'), 'variational_dropout must be in model_config' checkpoint_path = tf.train.latest_checkpoint(pipeline_config.model_dir) meta_graph_def = read_meta_graph_file(checkpoint_path + '.meta') features_map = dict() for col_def in meta_graph_def.collection_def[ - 'variational_dropout'].bytes_list.value: + 'variational_dropout'].bytes_list.value: features = json.loads(col_def) features_map.update(features) @@ -105,24 +105,30 @@ def __init__(self, self.name = name self.feature_complexity = get_feature_complexity(feature_configs) - def compute_dropout_mask(self, n, temperature=0.1): + def compute_dropout_mask(self, n): delta_name = 'fscd_delta_%s' % self.name delta = tf.get_variable( - name=delta_name, - shape=[n], - dtype=tf.float32, - initializer=tf.constant_initializer(0.)) + name=delta_name, + shape=[n], + dtype=tf.float32, + initializer=tf.constant_initializer(0.)) delta = tf.nn.sigmoid(delta) + epsilon = np.finfo(float).eps + max_keep_ratio = self._config.max_keep_ratio + min_keep_ratio = self._config.min_keep_ratio + if max_keep_ratio >= 1.0: + max_keep_ratio = 1.0 - epsilon + if min_keep_ratio <= 0.0: + min_keep_ratio = epsilon + delta = tf.clip_by_value(delta, min_keep_ratio, max_keep_ratio) - EPSILON = np.finfo(float).eps unif_noise = tf.random_uniform([n], dtype=tf.float32, seed=None, name='uniform_noise') - approx = ( - tf.log(delta + EPSILON) - tf.log(1. - delta + EPSILON) + - tf.log(unif_noise + EPSILON) - tf.log(1. - unif_noise + EPSILON)) - return tf.sigmoid(approx / temperature) + approx = (tf.log(delta) - tf.log(1. - delta) + + tf.log(unif_noise) - tf.log(1. - unif_noise)) + return tf.sigmoid(approx / self._config.temperature), delta def compute_regular_params(self, cols_to_feature): alphas = {} @@ -141,8 +147,8 @@ def compute_regular_params(self, cols_to_feature): alpha = math.log(sig_c) - math.log(theta) alphas[fc] = alpha print( - str(fc.raw_name), 'complexity:', complexity, 'cardinality:', cardinal, - 'dimension:', dim, 'c:', c, 'theta:', theta, 'alpha:', alpha) + str(fc.raw_name), 'complexity:', complexity, 'cardinality:', cardinal, + 'dimension:', dim, 'c:', c, 'theta:', theta, 'alpha:', alpha) return alphas def __call__(self, cols_to_feature): @@ -152,14 +158,21 @@ def __call__(self, cols_to_feature): feature_dimension = [] output_tensors = [] alphas = [] - z = self.compute_dropout_mask(len(cols_to_feature)) # keep ratio + z, delta = self.compute_dropout_mask(len(cols_to_feature)) # keep ratio + tf.summary.histogram('fscd_keep_ratio', delta) + tf.summary.histogram('fscd_keep_mask', z) regular = self.compute_regular_params(cols_to_feature) + feature_columns = cols_to_feature.keys() for column in sorted(feature_columns, key=lambda x: x.name): value = cols_to_feature[column] alpha = regular[column] i = len(output_tensors) - out = value * z[i] if self.is_training else value + if self.is_training: + scaled_value = tf.div(value, delta[i]) + out = tf.multiply(scaled_value, z[i], name='fscd_dropout') + else: + out = value cols_to_feature[column] = out output_tensors.append(out) alphas.append(alpha) @@ -175,3 +188,15 @@ def __call__(self, cols_to_feature): tf.add_to_collection('variational_dropout_loss', loss) return output_features + + +# def dropout(p): +# u = np.random.uniform() +# x = math.log(p) - math.log(1-p) + math.log(u) - math.log(1-u) +# z = sigmoid(x/0.1) +# return z +# +# +# if __name__ == '__main__': +# for i in range(100): +# print(dropout(0.5)) diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index 7e28458d5..ced65c0cf 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -38,7 +38,8 @@ def __init__(self, ev_params=None, embedding_regularizer=None, kernel_regularizer=None, - is_training=False): + is_training=False, + do_feature_normalize=False): self._feature_configs = feature_configs self._feature_groups = { x.group_name: FeatureGroup(x) for x in feature_groups_config @@ -66,6 +67,7 @@ def __init__(self, self._kernel_regularizer = kernel_regularizer self._is_training = is_training self._variational_dropout_config = variational_dropout_config + self._do_feature_normalize = do_feature_normalize def has_group(self, group_name): return group_name in self._feature_groups @@ -135,7 +137,8 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): features, group_columns, cols_to_output_tensors=cols_to_output_tensors, - feature_name_to_output_tensors=feature_name_to_output_tensors) + feature_name_to_output_tensors=feature_name_to_output_tensors, + do_normalize=self._do_feature_normalize) group_features = [cols_to_output_tensors[x] for x in group_columns] for col, val in cols_to_output_tensors.items(): @@ -185,7 +188,8 @@ def single_call_input_layer(self, features, group_columns, cols_to_output_tensors=cols_to_output_tensors, - feature_name_to_output_tensors=feature_name_to_output_tensors) + feature_name_to_output_tensors=feature_name_to_output_tensors, + do_normalize=self._do_feature_normalize) embedding_reg_lst = [] builder = feature_column._LazyBuilder(features) diff --git a/easy_rec/python/layers/mask_net.py b/easy_rec/python/layers/mask_net.py new file mode 100644 index 000000000..fe4816fe8 --- /dev/null +++ b/easy_rec/python/layers/mask_net.py @@ -0,0 +1,73 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +from easy_rec.python.layers import dnn +from easy_rec.python.layers.common_layers import layer_norm + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class MaskBlock(object): + def __init__(self, mask_block_config): + self.mask_block_config = mask_block_config + + def __call__(self, net, mask_input): + mask_input_dim = int(mask_input.shape[-1]) + if self.mask_block_config.HasField('reduction_factor'): + aggregation_size = int(mask_input_dim * self.mask_block_config.reduction_factor) + elif self.mask_block_config.HasField('aggregation_size') is not None: + aggregation_size = self.mask_block_config.aggregation_size + else: + raise ValueError("Need one of reduction factor or aggregation size for MaskBlock.") + + if self.mask_block_config.input_layer_norm: + input_name = net.name.replace(':', '_') + net = layer_norm(net, reuse=tf.AUTO_REUSE, name='ln_' + input_name) + + # initializer = tf.initializers.variance_scaling() + initializer = tf.glorot_uniform_initializer() + mask = tf.layers.dense(mask_input, aggregation_size, + activation=tf.nn.relu, + kernel_initializer=initializer) + mask = tf.layers.dense(mask, net.shape[-1]) + masked_net = net * mask + + output_size = self.mask_block_config.output_size + hidden_layer_output = tf.layers.dense(masked_net, output_size) + return layer_norm(hidden_layer_output) + + +class MaskNet(object): + def __init__(self, mask_net_config, name='mask_net'): + self.mask_net_config = mask_net_config + self.name = name + + def __call__(self, inputs, is_training, l2_reg=None): + conf = self.mask_net_config + if conf.use_parallel: + mask_outputs = [] + for block_conf in self.mask_net_config.mask_blocks: + mask_layer = MaskBlock(block_conf) + mask_outputs.append(mask_layer(mask_input=inputs, net=inputs)) + all_mask_outputs = tf.concat(mask_outputs, axis=1) + + if conf.HasField('mlp'): + mlp = dnn.DNN(conf.mlp, l2_reg, name='%s/mlp' % self.name, is_training=is_training) + output = mlp(all_mask_outputs) + else: + output = all_mask_outputs + return output + else: + net = inputs + for block_conf in self.mask_net_config.mask_blocks: + mask_layer = MaskBlock(block_conf) + net = mask_layer(net=net, mask_input=inputs) + + if conf.HasField('mlp'): + mlp = dnn.DNN(conf.mlp, l2_reg, name='%s/mlp' % self.name, is_training=is_training) + output = mlp(net) + else: + output = net + return output diff --git a/easy_rec/python/model/dbmtl.py b/easy_rec/python/model/dbmtl.py index 3639bf029..e829ba57f 100644 --- a/easy_rec/python/model/dbmtl.py +++ b/easy_rec/python/model/dbmtl.py @@ -6,6 +6,8 @@ from easy_rec.python.layers import dnn from easy_rec.python.layers import mmoe from easy_rec.python.layers import uniter +from easy_rec.python.layers import fibinet +from easy_rec.python.layers import mask_net from easy_rec.python.model.multi_task_model import MultiTaskModel from easy_rec.python.protos.dbmtl_pb2 import DBMTL as DBMTLConfig @@ -37,6 +39,13 @@ def __init__(self, features, self._model_config.bottom_uniter, self._input_layer) + elif self._model_config.HasField('bottom_fibinet'): + self._fibinet_layer = fibinet.FiBiNetLayer(self._model_config.bottom_fibinet, + features, + self._input_layer) + elif self._model_config.HasField('bottom_mask_net'): + self._mask_net_layer = mask_net.MaskNet(self._model_config.bottom_mask_net) + self._features, _ = self._input_layer(self._feature_dict, 'all') else: self._features, _ = self._input_layer(self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) @@ -60,6 +69,10 @@ def build_predict_graph(self): bottom_fea = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) elif self._model_config.HasField('bottom_uniter'): bottom_fea = self._uniter_layer(self._is_training, l2_reg=self._l2_reg) + elif self._model_config.HasField('bottom_fibinet'): + bottom_fea = self._fibinet_layer('all', self._is_training, l2_reg=self._l2_reg) + elif self._model_config.HasField('bottom_mask_net'): + bottom_fea = self._mask_net_layer(self._features, self._is_training, l2_reg=self._l2_reg) elif self._model_config.HasField('bottom_dnn'): bottom_dnn = dnn.DNN( self._model_config.bottom_dnn, diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index 6483877b7..4a7ad6330 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -104,7 +104,8 @@ def build_input_layer(self, model_config, feature_configs): kernel_regularizer=self._l2_reg, variational_dropout_config=model_config.variational_dropout if model_config.HasField('variational_dropout') else None, - is_training=self._is_training) + is_training=self._is_training, + do_feature_normalize=model_config.do_feature_normalize) def get_sequence_encoding(self, group_name=None, is_training=True): if group_name is not None: diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index a6bd1b29d..06dc53f8a 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -124,7 +124,8 @@ def build_loss_graph(self): suffix='_%s' % tower_name) if strategy == self._base_model_config.Random: for loss_name in loss_dict.keys(): - loss_dict[loss_name] = loss_dict[loss_name] * loss_weight_arr[offset] + loss_dict[ + loss_name] = loss_dict[loss_name] * loss_weight_arr[offset] offset += 1 else: for i, loss in enumerate(losses): @@ -145,20 +146,24 @@ def build_loss_graph(self): elif strategy == self._base_model_config.Uncertainty: if loss.learn_loss_weight: uncertainty = tf.Variable( - 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) - tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) - if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_uncertainty' % loss_name, + uncertainty) + if loss.loss_type in { + LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS + }: loss_dict[loss_name] = 0.5 * tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = loss_value * loss.weight elif strategy == self._base_model_config.Random: loss_dict[loss_name] = loss_value * loss_weight_arr[i + offset] else: - raise ValueError("Unsupported loss weight strategy: " + strategy.Name) + raise ValueError('Unsupported loss weight strategy: ' + + strategy.Name) offset += len(losses) self._loss_dict.update(loss_dict) diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index e4a38fa2d..4f4368b9f 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -228,7 +228,8 @@ def build_loss_graph(self): elif strategy == self._base_model_config.Random: loss_dict[loss_name] = loss_value * loss_weight[i] else: - raise ValueError("Unsupported loss weight strategy: " + strategy.Name) + raise ValueError('Unsupported loss weight strategy: ' + + strategy.Name) self._loss_dict.update(loss_dict) diff --git a/easy_rec/python/protos/dbmtl.proto b/easy_rec/python/protos/dbmtl.proto index 2b1f981aa..5c7152ee1 100644 --- a/easy_rec/python/protos/dbmtl.proto +++ b/easy_rec/python/protos/dbmtl.proto @@ -4,12 +4,18 @@ package protos; import "easy_rec/python/protos/dnn.proto"; import "easy_rec/python/protos/tower.proto"; import "easy_rec/python/protos/layer.proto"; +import "easy_rec/python/protos/fibinet.proto"; +import "easy_rec/python/protos/masknet.proto"; message DBMTL { // shared bottom cmbf layer optional CMBFTower bottom_cmbf = 101; // shared bottom uniter layer optional UniterTower bottom_uniter = 102; + // shared bottom fibinet layer + optional FiBiNetTower bottom_fibinet = 103; + // shared bottom mask net layer + optional MaskNet bottom_mask_net = 104; // shared bottom dnn layer optional DNN bottom_dnn = 1; // mmoe expert dnn layer definition diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 770611880..f28180e10 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -112,4 +112,6 @@ message EasyRecModel { // dnn layers after sequence feature optional DNN sequence_dnn = 17; + + optional bool do_feature_normalize = 18; } diff --git a/easy_rec/python/protos/fibinet.proto b/easy_rec/python/protos/fibinet.proto new file mode 100644 index 000000000..b13fd7cba --- /dev/null +++ b/easy_rec/python/protos/fibinet.proto @@ -0,0 +1,15 @@ +syntax = "proto2"; +package protos; + +import "easy_rec/python/protos/dnn.proto"; + +message FiBiNetTower { + required string bilinear_type = 1 [default = 'interaction']; + required bool use_bilinear_plus = 2 [default = true]; + required uint32 bilinear_output_units = 3; + + required uint32 senet_reduction_ratio = 4 [default = 3]; + optional uint32 num_senet_squeeze_group = 5 [default = 2]; + + required DNN mlp = 6; +} diff --git a/easy_rec/python/protos/masknet.proto b/easy_rec/python/protos/masknet.proto new file mode 100644 index 000000000..c9b0b703a --- /dev/null +++ b/easy_rec/python/protos/masknet.proto @@ -0,0 +1,17 @@ +syntax = "proto2"; +package protos; + +import "easy_rec/python/protos/dnn.proto"; + +message MaskBlock { + optional float reduction_factor = 1; + required uint32 output_size = 2; + optional uint32 aggregation_size = 3; + optional bool input_layer_norm = 4 [default = true]; +} + +message MaskNet { + repeated MaskBlock mask_blocks = 1; + required bool use_parallel = 2 [default = true]; + optional DNN mlp = 3; +} \ No newline at end of file diff --git a/easy_rec/python/protos/variational_dropout.proto b/easy_rec/python/protos/variational_dropout.proto index c643b3d2e..e76a0fb3b 100644 --- a/easy_rec/python/protos/variational_dropout.proto +++ b/easy_rec/python/protos/variational_dropout.proto @@ -13,5 +13,9 @@ message VariationalDropoutLayer { optional float feature_complexity_weight = 4 [default = 1.0]; optional float feature_dimension_weight = 5 [default = 1e-2]; optional float feature_cardinality_weight = 6 [default = 1e-7]; - optional uint32 fine_tune_use_top_k_features = 7; + // temperature + optional float temperature = 7 [default = 0.1]; + + optional float min_keep_ratio = 8 [default = 1e-3]; + optional float max_keep_ratio = 9 [default = 1.0]; } diff --git a/easy_rec/python/tools/feature_selection.py b/easy_rec/python/tools/feature_selection.py index 065993652..bd31fef9b 100644 --- a/easy_rec/python/tools/feature_selection.py +++ b/easy_rec/python/tools/feature_selection.py @@ -423,7 +423,7 @@ def _process_config(self, feature_importance_map, white_feature_group): for feature_config in config_util.get_compatible_feature_configs(config): feature_name = feature_config.input_names[0] if feature_config.HasField('feature_name'): - feature_name = feature_config.feature_name + feature_name = feature_config.feature_name if feature_name in excluded_features: feature_config.feature_type = FeatureConfig.FeatureType.ConstFeature diff --git a/easy_rec/python/tools/view_saved_model.py b/easy_rec/python/tools/view_saved_model.py index a3c01909b..022bcf1aa 100644 --- a/easy_rec/python/tools/view_saved_model.py +++ b/easy_rec/python/tools/view_saved_model.py @@ -4,8 +4,8 @@ import logging from google.protobuf import text_format -from tensorflow.python.platform.gfile import GFile from tensorflow.core.protobuf import saved_model_pb2 +from tensorflow.python.platform.gfile import GFile logging.basicConfig( format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s', @@ -15,7 +15,8 @@ parser = argparse.ArgumentParser() parser.add_argument( '--input', type=str, default=None, help='saved model path') - parser.add_argument('--output', type=str, default=None, help='saved model save path') + parser.add_argument( + '--output', type=str, default=None, help='saved model save path') args = parser.parse_args() assert args.input is not None and args.output is not None @@ -29,7 +30,7 @@ else: with GFile(args.input, 'r') as fin: text_format.Merge(fin.read(), saved_model) - + if args.output.endswith('.pbtxt'): with GFile(args.output, 'w') as fout: fout.write(text_format.MessageToString(saved_model, as_utf8=True)) From 48601c7f3559f9456c8cca3c436de9317d81dcc5 Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 9 Jun 2023 18:02:06 +0800 Subject: [PATCH 28/54] [feat]: add backbone network --- easy_rec/python/builders/loss_builder.py | 9 +- easy_rec/python/compat/array_ops.py | 229 ++++++++++++++++++ .../compat/feature_column/feature_column.py | 23 +- easy_rec/python/layers/backbone.py | 195 +++++++++++++++ easy_rec/python/layers/common_layers.py | 68 +++--- easy_rec/python/layers/dnn.py | 11 +- easy_rec/python/layers/fibinet.py | 49 ++-- easy_rec/python/layers/fscd_layer.py | 25 +- easy_rec/python/layers/mask_net.py | 58 +++-- easy_rec/python/layers/numerical_embedding.py | 39 +++ easy_rec/python/loss/info_nce_loss.py | 41 ++++ easy_rec/python/loss/jrc_loss.py | 57 ++++- easy_rec/python/model/dbmtl.py | 92 +++---- easy_rec/python/model/easy_rec_model.py | 35 ++- easy_rec/python/protos/backbone.proto | 44 ++++ easy_rec/python/protos/cmbf.proto | 43 +++- easy_rec/python/protos/dbmtl.proto | 20 +- easy_rec/python/protos/easy_rec_model.proto | 7 +- easy_rec/python/protos/fibinet.proto | 22 +- easy_rec/python/protos/layer.proto | 65 ----- easy_rec/python/protos/loss.proto | 2 + easy_rec/python/protos/masknet.proto | 2 +- easy_rec/python/protos/uniter.proto | 26 +- easy_rec/python/utils/dag.py | 205 ++++++++++++++++ setup.cfg | 2 +- 25 files changed, 1114 insertions(+), 255 deletions(-) create mode 100644 easy_rec/python/compat/array_ops.py create mode 100644 easy_rec/python/layers/backbone.py create mode 100644 easy_rec/python/layers/numerical_embedding.py create mode 100644 easy_rec/python/loss/info_nce_loss.py create mode 100644 easy_rec/python/protos/backbone.proto create mode 100644 easy_rec/python/utils/dag.py diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index e1b32fde1..ec4ab57c8 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -41,16 +41,17 @@ def build(loss_type, return tf.losses.mean_squared_error( labels=label, predictions=pred, weights=loss_weight, **kwargs) elif loss_type == LossType.JRC_LOSS: - alpha = 0.5 if loss_param is None else loss_param.alpha - auto = False if loss_param is None else not loss_param.HasField('alpha') session = kwargs.get('session_ids', None) + if loss_param is None: + return jrc_loss(label, pred, session, name=loss_name) return jrc_loss( label, pred, session, - alpha, - auto_weight=auto, + loss_param.alpha, + loss_weight_strategy=loss_param.loss_weight_strategy, sample_weights=loss_weight, + same_label_loss=loss_param.same_label_loss, name=loss_name) elif loss_type == LossType.PAIR_WISE_LOSS: session = kwargs.get('session_ids', None) diff --git a/easy_rec/python/compat/array_ops.py b/easy_rec/python/compat/array_ops.py new file mode 100644 index 000000000..3e8929ceb --- /dev/null +++ b/easy_rec/python/compat/array_ops.py @@ -0,0 +1,229 @@ +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import gen_math_ops + + +def convert_to_int_tensor(tensor, name, dtype=tf.int32): + """Converts the given value to an integer Tensor.""" + tensor = ops.convert_to_tensor(tensor, name=name, preferred_dtype=dtype) + if tensor.dtype.is_integer: + tensor = gen_math_ops.cast(tensor, dtype) + else: + raise TypeError('%s must be an integer tensor; dtype=%s' % + (name, tensor.dtype)) + return tensor + + +def _with_nonzero_rank(data): + """If `data` is scalar, then add a dimension; otherwise return as-is.""" + if data.shape.ndims is not None: + if data.shape.ndims == 0: + return tf.stack([data]) + else: + return data + else: + data_shape = tf.shape(data) + data_ndims = tf.rank(data) + return tf.reshape(data, tf.concat([[1], data_shape], axis=0)[-data_ndims:]) + + +def get_positive_axis(axis, ndims): + """Validate an `axis` parameter, and normalize it to be positive. + + If `ndims` is known (i.e., not `None`), then check that `axis` is in the + range `-ndims <= axis < ndims`, and return `axis` (if `axis >= 0`) or + `axis + ndims` (otherwise). + If `ndims` is not known, and `axis` is positive, then return it as-is. + If `ndims` is not known, and `axis` is negative, then report an error. + + Args: + axis: An integer constant + ndims: An integer constant, or `None` + + Returns: + The normalized `axis` value. + + Raises: + ValueError: If `axis` is out-of-bounds, or if `axis` is negative and + `ndims is None`. + """ + if not isinstance(axis, int): + raise TypeError('axis must be an int; got %s' % type(axis).__name__) + if ndims is not None: + if 0 <= axis < ndims: + return axis + elif -ndims <= axis < 0: + return axis + ndims + else: + raise ValueError('axis=%s out of bounds: expected %s<=axis<%s' % + (axis, -ndims, ndims)) + elif axis < 0: + raise ValueError('axis may only be negative if ndims is statically known.') + return axis + + +def tile_one_dimension(data, axis, multiple): + """Tiles a single dimension of a tensor.""" + # Assumes axis is a nonnegative int. + if data.shape.ndims is not None: + multiples = [1] * data.shape.ndims + multiples[axis] = multiple + else: + ones_value = tf.ones(tf.rank(data), tf.int32) + multiples = tf.concat( + [ones_value[:axis], [multiple], ones_value[axis + 1:]], axis=0) + return tf.tile(data, multiples) + + +def _all_dimensions(x): + """Returns a 1D-tensor listing all dimensions in x.""" + # Fast path: avoid creating Rank and Range ops if ndims is known. + if isinstance(x, ops.Tensor) and x.get_shape().ndims is not None: + return constant_op.constant(np.arange(x.get_shape().ndims), dtype=tf.int32) + if (isinstance(x, sparse_tensor.SparseTensor) and + x.dense_shape.get_shape().is_fully_defined()): + r = x.dense_shape.get_shape().dims[0].value # sparse.dense_shape is 1-D. + return constant_op.constant(np.arange(r), dtype=tf.int32) + + # Otherwise, we rely on `range` and `rank` to do the right thing at runtime. + return gen_math_ops._range(0, tf.rank(x), 1) + + +# This op is intended to exactly match the semantics of numpy.repeat, with +# one exception: numpy.repeat has special (and somewhat non-intuitive) behavior +# when axis is not specified. Rather than implement that special behavior, we +# simply make `axis` be a required argument. +# +# External (OSS) `tf.repeat` feature request: +# https://github.com/tensorflow/tensorflow/issues/8246 +def repeat_with_axis(data, repeats, axis, name=None): + """Repeats elements of `data`. + + Args: + data: An `N`-dimensional tensor. + repeats: A 1-D integer tensor specifying how many times each element in + `axis` should be repeated. `len(repeats)` must equal `data.shape[axis]`. + Supports broadcasting from a scalar value. + axis: `int`. The axis along which to repeat values. Must be less than + `max(N, 1)`. + name: A name for the operation. + + Returns: + A tensor with `max(N, 1)` dimensions. Has the same shape as `data`, + except that dimension `axis` has size `sum(repeats)`. + #### Examples: + ```python + >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0) + ['a', 'a', 'a', 'c', 'c'] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0) + [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1) + [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]] + ``` + """ + if not isinstance(axis, int): + raise TypeError('axis must be an int; got %s' % type(axis).__name__) + + with ops.name_scope(name, 'Repeat', [data, repeats]): + data = ops.convert_to_tensor(data, name='data') + repeats = convert_to_int_tensor(repeats, name='repeats') + repeats.shape.with_rank_at_most(1) + + # If `data` is a scalar, then upgrade it to a vector. + data = _with_nonzero_rank(data) + data_shape = tf.shape(data) + + # If `axis` is negative, then convert it to a positive value. + axis = get_positive_axis(axis, data.shape.ndims) + + # Check data Tensor shapes. + if repeats.shape.ndims == 1: + data.shape.dims[axis].assert_is_compatible_with(repeats.shape[0]) + + # If we know that `repeats` is a scalar, then we can just tile & reshape. + if repeats.shape.ndims == 0: + expanded = tf.expand_dims(data, axis + 1) + tiled = tile_one_dimension(expanded, axis + 1, repeats) + result_shape = tf.concat([data_shape[:axis], [-1], data_shape[axis + 1:]], + axis=0) + return tf.reshape(tiled, result_shape) + + # Broadcast the `repeats` tensor so rank(repeats) == axis + 1. + if repeats.shape.ndims != axis + 1: + repeats_shape = tf.shape(repeats) + repeats_ndims = tf.rank(repeats) + broadcast_shape = tf.concat( + [data_shape[:axis + 1 - repeats_ndims], repeats_shape], axis=0) + repeats = tf.broadcast_to(repeats, broadcast_shape) + repeats.set_shape([None] * (axis + 1)) + + # Create a "sequence mask" based on `repeats`, where slices across `axis` + # contain one `True` value for each repetition. E.g., if + # `repeats = [3, 1, 2]`, then `mask = [[1, 1, 1], [1, 0, 0], [1, 1, 0]]`. + max_repeat = gen_math_ops.maximum( + 0, gen_math_ops._max(repeats, _all_dimensions(repeats))) + mask = tf.sequence_mask(repeats, max_repeat) + + # Add a new dimension around each value that needs to be repeated, and + # then tile that new dimension to match the maximum number of repetitions. + expanded = tf.expand_dims(data, axis + 1) + tiled = tile_one_dimension(expanded, axis + 1, max_repeat) + + # Use `boolean_mask` to discard the extra repeated values. This also + # flattens all dimensions up through `axis`. + masked = tf.boolean_mask(tiled, mask) + + # Reshape the output tensor to add the outer dimensions back. + if axis == 0: + result = masked + else: + result_shape = tf.concat([data_shape[:axis], [-1], data_shape[axis + 1:]], + axis=0) + result = tf.reshape(masked, result_shape) + + # Preserve shape information. + if data.shape.ndims is not None: + new_axis_size = 0 if repeats.shape[0] == 0 else None + result.set_shape(data.shape[:axis].concatenate( + [new_axis_size]).concatenate(data.shape[axis + 1:])) + + return result + + +def repeat(input, repeats, axis=None, name=None): # pylint: disable=redefined-builtin + """Repeat elements of `input` + + Args: + input: An `N`-dimensional Tensor. + repeats: An 1-D `int` Tensor. The number of repetitions for each element. + repeats is broadcasted to fit the shape of the given axis. `len(repeats)` + must equal `input.shape[axis]` if axis is not None. + axis: An int. The axis along which to repeat values. By default (axis=None), + use the flattened input array, and return a flat output array. + name: A name for the operation. + + Returns: + A Tensor which has the same shape as `input`, except along the given axis. + If axis is None then the output array is flattened to match the flattened + input array. + #### Examples: + ```python + >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0) + ['a', 'a', 'a', 'c', 'c'] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0) + [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1) + [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]] + >>> repeat(3, repeats=4) + [3, 3, 3, 3] + >>> repeat([[1,2], [3,4]], repeats=2) + [1, 1, 2, 2, 3, 3, 4, 4] + ``` + """ + if axis is None: + input = tf.reshape(input, [-1]) + axis = 0 + return repeat_with_axis(input, repeats, axis, name) diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index 09d791386..7d8419528 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -211,15 +211,19 @@ def _get_logits(): # pylint: disable=missing-docstring output_tensor = array_ops.reshape( tensor, shape=(batch_size, num_elements)) if do_normalize: - from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn, NumericColumn, \ - WeightedCategoricalColumn + from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn,\ + NumericColumn, WeightedCategoricalColumn from tensorflow.python.layers.normalization import batch_normalization - if isinstance(column, EmbeddingColumn) or isinstance(column, _SharedEmbeddingColumn): + if isinstance(column, EmbeddingColumn) or isinstance( + column, _SharedEmbeddingColumn): fc = column.categorical_column - if isinstance(fc, WeightedCategoricalColumn) and fc.weight_feature_key.endswith('_raw_proj_val'): - output_tensor = layer_norm(output_tensor, name='ln_' + column.name) + if isinstance(fc, WeightedCategoricalColumn + ) and fc.weight_feature_key.endswith('_raw_proj_val'): + output_tensor = layer_norm( + output_tensor, name='ln_' + column.name) else: - output_tensor = batch_normalization(output_tensor, name='bn_'+column.name) + output_tensor = batch_normalization( + output_tensor, name='bn_' + column.name) elif isinstance(column, NumericColumn) and int(column.shape[-1]) > 1: output_tensor = layer_norm(output_tensor, name='ln_' + column.name) output_tensors.append(output_tensor) @@ -2552,9 +2556,10 @@ def raw_name(self): @property def cardinality(self): - from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn, BucketizedColumn, \ - WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, CrossedColumn, IdentityCategoricalColumn, \ - VocabularyListCategoricalColumn, VocabularyFileCategoricalColumn + from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn,\ + BucketizedColumn, WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, \ + CrossedColumn, IdentityCategoricalColumn, VocabularyListCategoricalColumn,\ + VocabularyFileCategoricalColumn fc = self.categorical_column if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py new file mode 100644 index 000000000..285ff80c5 --- /dev/null +++ b/easy_rec/python/layers/backbone.py @@ -0,0 +1,195 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +import tensorflow as tf + +from easy_rec.python.utils.dag import DAG +from easy_rec.python.layers import dnn +from easy_rec.python.layers.common_layers import layer_norm, SENet +from easy_rec.python.layers.numerical_embedding import NumericalEmbedding +from easy_rec.python.layers.fibinet import FiBiNetLayer +from easy_rec.python.layers.mask_net import MaskNet + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class EnhancedInputLayer(object): + def __init__(self, config, input_layer, feature_dict): + if config.do_batch_norm and config.do_layer_norm: + raise ValueError('can not do batch norm and layer norm for input layer at the same time') + self._config = config + self._input_layer = input_layer + self._feature_dict = feature_dict + + def __call__(self, feature_group, is_training, *args, **kwargs): + features, feature_list = self._input_layer(self._feature_dict, feature_group) + num_features = len(feature_list) + + do_feature_dropout = 0.0 < self._config.feature_dropout_rate < 1.0 + if self._config.output_feature_list or do_feature_dropout: + if self._config.do_layer_norm or self._config.do_batch_norm: + for i in range(num_features): + fea = feature_list[i] + if self._config.do_batch_norm: + fea = tf.layers.batch_normalization(fea, training=is_training) + elif self._config.do_layer_norm: + fea = layer_norm(fea) + feature_list[i] = fea + elif self._config.do_batch_norm: + features = tf.layers.batch_normalization(features, training=is_training) + elif self._config.do_layer_norm: + features = layer_norm(features) + + if do_feature_dropout and is_training: + keep_prob = 1.0 - self._config.feature_dropout_rate + bern = tf.distributions.Bernoulli(probs=keep_prob) + mask = bern.sample(num_features) + for i in range(num_features): + fea = tf.div(feature_list[i], keep_prob) * mask[i] + feature_list[i] = fea + features = tf.concat(feature_list, axis=-1) + + do_dropout = 0.0 < self._config.dropout_rate < 1.0 + if self._config.output_feature_list: + if do_dropout: + for i in range(num_features): + fea = feature_list[i] + fea = tf.layers.dropout(fea, self._config.dropout_rate, training=is_training) + feature_list[i] = fea + return feature_list + if do_dropout: + return tf.layers.dropout(features, self._config.dropout_rate, training=is_training) + return features + + +class Backbone(object): + def __init__(self, config, model, features, input_layer, l2_reg=None): + self._model = model + self._config = config + self._features = features + self._input_layer = input_layer + self._l2_reg = l2_reg + self._dag = DAG() + self._name_to_blocks = {} + for block in config.blocks: + self._name_to_blocks[block.name] = block + self._dag.add_node(block.name) + assert len(self._name_to_blocks) > 0, 'there must be more than one block in backbone' + for block in config.blocks: + assert len(block.inputs) > 0, 'there is no input for block: %s' % block.name + for node in block.inputs: + if node in self._name_to_blocks: + self._dag.add_edge(node, block.name) + + def block_input(self, config, block_outputs): + inputs = [] + for input_name in config.inputs: + if input_name in block_outputs: + input_feature = block_outputs[input_name] + else: + input_feature, _ = self._input_layer(self._features, input_name) + inputs.append(input_feature) + return concat_inputs(inputs, config.name) + + def __call__(self, is_training, *args, **kwargs): + block_outputs = {} + blocks = self._dag.topological_sort() + logging.info("backbone topological: " + ','.join(blocks)) + for block in blocks: + config = self._name_to_blocks[block] + layer = config.WhichOneof('layer') + if layer == 'input_layer': + assert len(config.inputs) == 1, 'only one input needed for input_layer: ' + block.name + conf = config.input_layer + input_layer = EnhancedInputLayer(conf, self._input_layer, self._features) + output = input_layer(config.inputs[0], is_training) + block_outputs[block] = output + elif layer == 'numerical_embedding': + conf = config.numerical_embedding + num_emb = NumericalEmbedding(conf.embedding_dim, stddev=conf.coef_stddev, + scope='%s_numerical_embedding' % block) + input_feature = self.block_input(config, block_outputs) + block_outputs[block] = num_emb(input_feature) + elif layer == 'mlp': + mlp = dnn.DNN( + config.mlp, + self._l2_reg, + name='%s_mlp' % block, + is_training=is_training) + input_feature = self.block_input(config, block_outputs) + output = mlp(input_feature) + block_outputs[block] = output + elif layer == 'sequence_encoder': + block_outputs[block] = self.sequence_encoder(config, is_training) + elif layer == 'masknet': + conf = config.masknet + mask_net = MaskNet( + conf, + name=block, + reuse=tf.AUTO_REUSE) + input_feature = self.block_input(config, block_outputs) + output = mask_net( + input_feature, is_training, l2_reg=self._l2_reg) + block_outputs[block] = output + elif layer == 'senet': + conf = config.senet + senet = SENet(conf, name=block) + input_feature = self.block_input(config, block_outputs) + output = senet(input_feature) + block_outputs[block] = output + elif layer == 'fibinet': + conf = config.fibinet + fibinet = FiBiNetLayer(conf, name=block) + input_feature = self.block_input(config, block_outputs) + output = fibinet(input_feature, is_training, l2_reg=self._l2_reg) + block_outputs[block] = output + else: + raise ValueError('Unsupported backbone layer:' + layer) + + temp = [] + for output in self._config.concat_blocks: + if output in block_outputs: + temp.append(block_outputs[output]) + else: + raise ValueError('No output `%s` of backbone to be concat' % output) + + output = concat_inputs(temp) + if self._config.HasField('top_mlp'): + final_dnn = dnn.DNN( + self._config.top_mlp, + self._l2_reg, + name='backbone_top_mlp', + is_training=is_training) + output = final_dnn(output) + return output + + def sequence_encoder(self, config, is_training): + encodings = [] + for seq_input in config.inputs: + encoding = self._model.get_sequence_encoding(seq_input, is_training) + encodings.append(encoding) + encoding = concat_inputs(encodings) + conf = config.sequence_encoder + if conf.HasField('mlp'): + sequence_dnn = dnn.DNN( + conf.mlp, + self._l2_reg, + name='%s_seq_dnn' % config.name, + is_training=is_training) + encoding = sequence_dnn(encoding) + return encoding + + +def concat_inputs(inputs, msg=''): + if len(inputs) > 1: + if type(inputs[0]) == list: + from functools import reduce + return reduce(lambda x, y: x + y, inputs) + return tf.concat(inputs, axis=-1) + if len(inputs) == 1: + return inputs[0] + raise ValueError('no inputs to be concat:' + msg) + + diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index e3bb65f64..892e75550 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -70,49 +70,52 @@ def text_cnn(x, def layer_norm(input_tensor, name=None, reuse=None): """Run layer normalization on the last dimension of the tensor.""" return tf_layer_norm( - inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, reuse=reuse, scope=name) + inputs=input_tensor, + begin_norm_axis=-1, + begin_params_axis=-1, + reuse=reuse, + scope=name) class SENet(object): + """SENet+ Layer used in FiBiNET,支持不同field的embedding dimension不等. + + arxiv: 2209.05016 """ - SENet+ Layer,支持不同field的embedding dimension不等 - arxiv: 2209.05016 - """ - def __init__(self, reduction_ratio, num_groups, name='SENet'): - self.reduction_ratio = reduction_ratio - self.num_groups = num_groups + def __init__(self, config, name='SENet'): + self.config = config self.name = name def __call__(self, embedding_list): - """ - - :param embedding_list: [embedding_1,...,embedding_i,...,embedding_f],f为field的数目,embedding_i is [bs, dim] - :return: - """ - print("SENET layer with %d inputs" % len(embedding_list)) + """embedding_list: - A list of 2D tensor with shape: ``(batch_size,embedding_size)``.""" + print('SENET layer with %d inputs' % len(embedding_list)) + g = self.config.num_squeeze_group for emb in embedding_list: assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' + dim = int(emb.shape[-1]) + assert dim >= g and dim % g == 0, 'field embedding dimension %d must be divisible by %d' % ( + dim, g) field_size = len(embedding_list) feature_size_list = [emb.shape.as_list()[-1] for emb in embedding_list] # Squeeze - g = self.num_groups # embedding dimension 必须能被 g 整除 group_embs = [ - tf.reshape(emb, [-1, g, tf.shape(emb)[-1] // g]) + tf.reshape(emb, [-1, g, int(emb.shape[-1]) // g]) for emb in embedding_list ] squeezed = [] for emb in group_embs: - squeezed.append(tf.reduce_max(emb, axis=-1)) - squeezed.append(tf.reduce_mean(emb, axis=-1)) + squeezed.append(tf.reduce_max(emb, axis=-1)) # [B, g] + squeezed.append(tf.reduce_mean(emb, axis=-1)) # [B, g] z = tf.concat(squeezed, axis=1) # [bs, field_size * num_groups * 2] # Excitation - reduction_size = max(1, field_size * g * 2 // self.reduction_ratio) + r = self.config.reduction_ratio + reduction_size = max(1, field_size * g * 2 // r) initializer = tf.glorot_normal_initializer() a1 = tf.layers.dense( @@ -121,18 +124,24 @@ def __call__(self, embedding_list): kernel_initializer=initializer, activation=tf.nn.relu, name='%s/W1' % self.name) - a2 = tf.layers.dense( + weights = tf.layers.dense( a1, sum(feature_size_list), kernel_initializer=initializer, name='%s/W2' % self.name) - # Re-weight & Fuse - a = tf.split(a2, feature_size_list, axis=1) - senet_like_embeddings = [ - layer_norm(emb * w + emb) for emb, w in zip(embedding_list, a) - ] - return tf.concat(senet_like_embeddings, axis=-1) + # Re-weight + inputs = tf.concat(embedding_list, axis=-1) + output = inputs * weights + + # Fuse, add skip-connection + if self.config.use_skip_connection: + output += inputs + + # Layer Normalization + if self.config.use_output_layer_norm: + output = layer_norm(output) + return output def _full_interaction(v_i, v_j): @@ -170,9 +179,10 @@ def __init__(self, self.func = tf.multiply def __call__(self, embeddings): - print("Bilinear Layer with %d inputs" % len(embeddings)) + print('Bilinear Layer with %d inputs' % len(embeddings)) if len(embeddings) > 200: - logging.warn("There are too many inputs for bilinear layer: %d" % len(embeddings)) + logging.warn('There are too many inputs for bilinear layer: %d' % + len(embeddings)) equal_dim = True _dim = embeddings[0].shape[-1] for emb in embeddings: @@ -180,7 +190,9 @@ def __call__(self, embeddings): if emb.shape[-1] != _dim: equal_dim = False if not equal_dim and self.bilinear_type != 'interaction': - raise ValueError('all embedding dimensions must be same when use bilinear type: interaction') + raise ValueError( + 'all embedding dimensions must be same when not use bilinear type: interaction' + ) dim = int(_dim) field_size = len(embeddings) diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index 7a57f5661..e09891845 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -18,7 +18,8 @@ def __init__(self, name='dnn', is_training=False, last_layer_no_activation=False, - last_layer_no_batch_norm=False): + last_layer_no_batch_norm=False, + reuse=None): """Initializes a `DNN` Layer. Args: @@ -28,6 +29,7 @@ def __init__(self, is_training: train phase or not, impact batch_norm and dropout last_layer_no_activation: in last layer, use or not use activation last_layer_no_batch_norm: in last layer, use or not use batch norm + reuse: Boolean, whether to reuse the weights of a previous layer by the same name. """ self._config = dnn_config self._l2_reg = l2_reg @@ -38,6 +40,7 @@ def __init__(self, self._config.activation, training=is_training) self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm + self._reuse = reuse @property def hidden_units(self): @@ -59,14 +62,16 @@ def __call__(self, deep_fea, hidden_layer_feature_output=False): units=unit, kernel_regularizer=self._l2_reg, activation=None, - name='%s/dnn_%d' % (self._name, i)) + name='%s/dnn_%d' % (self._name, i), + reuse=self._reuse) if self._config.use_bn and ((i + 1 < hidden_units_len) or not self._last_layer_no_batch_norm): deep_fea = tf.layers.batch_normalization( deep_fea, training=self._is_training, trainable=True, - name='%s/dnn_%d/bn' % (self._name, i)) + name='%s/dnn_%d/bn' % (self._name, i), + reuse=self._reuse) if (i + 1 < hidden_units_len) or not self._last_layer_no_activation: deep_fea = self.activation( deep_fea, name='%s/dnn_%d/act' % (self._name, i)) diff --git a/easy_rec/python/layers/fibinet.py b/easy_rec/python/layers/fibinet.py index 9a419e004..d112561ff 100644 --- a/easy_rec/python/layers/fibinet.py +++ b/easy_rec/python/layers/fibinet.py @@ -1,10 +1,10 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import tensorflow as tf -from easy_rec.python.layers.common_layers import SENet -from easy_rec.python.layers.common_layers import BiLinear -from easy_rec.python.layers import dnn +from easy_rec.python.layers import dnn +from easy_rec.python.layers.common_layers import BiLinear +from easy_rec.python.layers.common_layers import SENet if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -18,26 +18,25 @@ class FiBiNetLayer(object): https://arxiv.org/pdf/2209.05016.pdf """ - def __init__(self, fibinet_config, features, input_layer): + def __init__(self, fibinet_config, name='fibinet'): self._config = fibinet_config - self._input_layer = input_layer - self._features = features + self.name = name - def __call__(self, group_name, is_training, l2_reg=0, *args, **kwargs): + def __call__(self, inputs, is_training, l2_reg=None, *args, **kwargs): feature_list = [] - _, group_features = self._input_layer(self._features, group_name) - senet = SENet(reduction_ratio=self._config.senet_reduction_ratio, - num_groups=self._config.num_senet_squeeze_group, - name='%s_senet' % group_name) - senet_output = senet(group_features) + + senet = SENet(self._config.senet, name='%s_senet' % self.name) + senet_output = senet(inputs) feature_list.append(senet_output) - if self._config.bilinear_type != 'none': - bilinear = BiLinear(output_size=self._config.bilinear_output_units, - bilinear_type=self._config.bilinear_type, - bilinear_plus=self._config.use_bilinear_plus, - name='%s_bilinear' % group_name) - bilinear_output = bilinear(group_features) + if self._config.HasField('bilinear'): + conf = self._config.bilinear + bilinear = BiLinear( + output_size=conf.output_units, + bilinear_type=conf.type, + bilinear_plus=conf.use_plus, + name='%s_bilinear' % self.name) + bilinear_output = bilinear(inputs) feature_list.append(bilinear_output) if len(feature_list) > 1: @@ -45,9 +44,11 @@ def __call__(self, group_name, is_training, l2_reg=0, *args, **kwargs): else: feature = feature_list[0] - final_dnn = dnn.DNN( - self._config.mlp, - l2_reg, - name='%s_fibinet_mlp' % group_name, - is_training=is_training) - return final_dnn(feature) + if self._config.HasField('mlp'): + final_dnn = dnn.DNN( + self._config.mlp, + l2_reg, + name='%s_fibinet_mlp' % self.name, + is_training=is_training) + feature = final_dnn(feature) + return feature diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py index ec115f547..daccf750e 100644 --- a/easy_rec/python/layers/fscd_layer.py +++ b/easy_rec/python/layers/fscd_layer.py @@ -35,14 +35,14 @@ def sigmoid(x): def get_feature_importance(pipeline_config, feature_group_name=None): assert pipeline_config.model_config.HasField( - 'variational_dropout'), 'variational_dropout must be in model_config' + 'variational_dropout'), 'variational_dropout must be in model_config' checkpoint_path = tf.train.latest_checkpoint(pipeline_config.model_dir) meta_graph_def = read_meta_graph_file(checkpoint_path + '.meta') features_map = dict() for col_def in meta_graph_def.collection_def[ - 'variational_dropout'].bytes_list.value: + 'variational_dropout'].bytes_list.value: features = json.loads(col_def) features_map.update(features) @@ -108,10 +108,10 @@ def __init__(self, def compute_dropout_mask(self, n): delta_name = 'fscd_delta_%s' % self.name delta = tf.get_variable( - name=delta_name, - shape=[n], - dtype=tf.float32, - initializer=tf.constant_initializer(0.)) + name=delta_name, + shape=[n], + dtype=tf.float32, + initializer=tf.constant_initializer(0.)) delta = tf.nn.sigmoid(delta) epsilon = np.finfo(float).eps max_keep_ratio = self._config.max_keep_ratio @@ -126,8 +126,9 @@ def compute_dropout_mask(self, n): dtype=tf.float32, seed=None, name='uniform_noise') - approx = (tf.log(delta) - tf.log(1. - delta) + - tf.log(unif_noise) - tf.log(1. - unif_noise)) + approx = ( + tf.log(delta) - tf.log(1. - delta) + tf.log(unif_noise) - + tf.log(1. - unif_noise)) return tf.sigmoid(approx / self._config.temperature), delta def compute_regular_params(self, cols_to_feature): @@ -147,14 +148,12 @@ def compute_regular_params(self, cols_to_feature): alpha = math.log(sig_c) - math.log(theta) alphas[fc] = alpha print( - str(fc.raw_name), 'complexity:', complexity, 'cardinality:', cardinal, - 'dimension:', dim, 'c:', c, 'theta:', theta, 'alpha:', alpha) + str(fc.raw_name), 'complexity:', complexity, 'cardinality:', cardinal, + 'dimension:', dim, 'c:', c, 'theta:', theta, 'alpha:', alpha) return alphas def __call__(self, cols_to_feature): - """ - cols_to_feature: an ordered dict mapping feature_column to feature_values - """ + """cols_to_feature: an ordered dict mapping feature_column to feature_values.""" feature_dimension = [] output_tensors = [] alphas = [] diff --git a/easy_rec/python/layers/mask_net.py b/easy_rec/python/layers/mask_net.py index fe4816fe8..fbe75c13c 100644 --- a/easy_rec/python/layers/mask_net.py +++ b/easy_rec/python/layers/mask_net.py @@ -10,17 +10,22 @@ class MaskBlock(object): - def __init__(self, mask_block_config): + + def __init__(self, mask_block_config, name='mask_block', reuse=None): self.mask_block_config = mask_block_config + self.name = name + self.reuse = reuse def __call__(self, net, mask_input): mask_input_dim = int(mask_input.shape[-1]) if self.mask_block_config.HasField('reduction_factor'): - aggregation_size = int(mask_input_dim * self.mask_block_config.reduction_factor) + aggregation_size = int(mask_input_dim * + self.mask_block_config.reduction_factor) elif self.mask_block_config.HasField('aggregation_size') is not None: aggregation_size = self.mask_block_config.aggregation_size else: - raise ValueError("Need one of reduction factor or aggregation size for MaskBlock.") + raise ValueError( + 'Need one of reduction factor or aggregation size for MaskBlock.') if self.mask_block_config.input_layer_norm: input_name = net.name.replace(':', '_') @@ -28,45 +33,66 @@ def __call__(self, net, mask_input): # initializer = tf.initializers.variance_scaling() initializer = tf.glorot_uniform_initializer() - mask = tf.layers.dense(mask_input, aggregation_size, - activation=tf.nn.relu, - kernel_initializer=initializer) - mask = tf.layers.dense(mask, net.shape[-1]) + mask = tf.layers.dense( + mask_input, + aggregation_size, + activation=tf.nn.relu, + kernel_initializer=initializer, + name='%s/hidden' % self.name, + reuse=self.reuse) + mask = tf.layers.dense( + mask, net.shape[-1], name='%s/mask' % self.name, reuse=self.reuse) masked_net = net * mask output_size = self.mask_block_config.output_size - hidden_layer_output = tf.layers.dense(masked_net, output_size) - return layer_norm(hidden_layer_output) + hidden_layer_output = tf.layers.dense( + masked_net, output_size, name='%s/output' % self.name, reuse=self.reuse) + return layer_norm( + hidden_layer_output, name='%s/ln_output' % self.name, reuse=self.reuse) class MaskNet(object): - def __init__(self, mask_net_config, name='mask_net'): + + def __init__(self, mask_net_config, name='mask_net', reuse=None): self.mask_net_config = mask_net_config self.name = name + self.reuse = reuse def __call__(self, inputs, is_training, l2_reg=None): conf = self.mask_net_config if conf.use_parallel: mask_outputs = [] - for block_conf in self.mask_net_config.mask_blocks: - mask_layer = MaskBlock(block_conf) + for i, block_conf in enumerate(self.mask_net_config.mask_blocks): + mask_layer = MaskBlock( + block_conf, name='%s/block_%d' % (self.name, i), reuse=self.reuse) mask_outputs.append(mask_layer(mask_input=inputs, net=inputs)) all_mask_outputs = tf.concat(mask_outputs, axis=1) if conf.HasField('mlp'): - mlp = dnn.DNN(conf.mlp, l2_reg, name='%s/mlp' % self.name, is_training=is_training) + mlp = dnn.DNN( + conf.mlp, + l2_reg, + name='%s/mlp' % self.name, + is_training=is_training, + reuse=self.reuse) output = mlp(all_mask_outputs) else: output = all_mask_outputs return output else: net = inputs - for block_conf in self.mask_net_config.mask_blocks: - mask_layer = MaskBlock(block_conf) + for i, block_conf in enumerate(self.mask_net_config.mask_blocks): + mask_layer = MaskBlock( + block_conf, name='%s/block_%d' % (self.name, i), reuse=self.reuse) net = mask_layer(net=net, mask_input=inputs) if conf.HasField('mlp'): - mlp = dnn.DNN(conf.mlp, l2_reg, name='%s/mlp' % self.name, is_training=is_training) + mlp = dnn.DNN( + conf.mlp, + l2_reg, + name='%s/mlp' % self.name, + is_training=is_training, + reuse=self.reuse) output = mlp(net) else: output = net diff --git a/easy_rec/python/layers/numerical_embedding.py b/easy_rec/python/layers/numerical_embedding.py new file mode 100644 index 000000000..420716254 --- /dev/null +++ b/easy_rec/python/layers/numerical_embedding.py @@ -0,0 +1,39 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import math + +import tensorflow as tf +from easy_rec.python.compat.array_ops import repeat +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class NumericalEmbedding(object): + + def __init__(self, embedding_dim, scope='numerical_embedding', stddev=1.0): + self.embedding_dim = embedding_dim + self.scope = scope + self.initializer = tf.random_normal_initializer(stddev=stddev) + + def __call__(self, inputs, *args, **kwargs): + if inputs.shape.ndims != 2: + raise ValueError('inputs of NumericalEmbedding must have 2 dimensions.') + + num_features = int(inputs.shape[-1]) + with tf.variable_scope(self.scope): + c = tf.get_variable( + 'coef', + shape=[1, num_features * self.embedding_dim], + initializer=self.initializer) + + features = repeat(inputs, self.embedding_dim, axis=1) + v = features * c * 2 * math.pi + sin_v = tf.split(tf.sin(v), num_features, axis=1) + cos_v = tf.split(tf.cos(v), num_features, axis=1) + + embeddings = [] + for val in zip(sin_v, cos_v): + embedding = tf.concat(val, axis=1) + embedding = tf.layers.dense(embedding, int(embedding.shape[-1]), activation=tf.nn.relu) + embeddings.append(embedding) + return tf.concat(embeddings, axis=1) diff --git a/easy_rec/python/loss/info_nce_loss.py b/easy_rec/python/loss/info_nce_loss.py new file mode 100644 index 000000000..3fd6b6b18 --- /dev/null +++ b/easy_rec/python/loss/info_nce_loss.py @@ -0,0 +1,41 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +def info_nce_loss(query, positive, temperature=0.1): + """Calculates the InfoNCE loss for self-supervised learning. + + This contrastive loss enforces the embeddings of similar (positive) samples to be close + and those of different (negative) samples to be distant. + A query embedding is compared with one positive key and with one or more negative keys. + + References: + https://arxiv.org/abs/1807.03748v2 + https://arxiv.org/abs/2010.05113 + """ + # Check input dimensionality. + if query.shape.ndims != 2: + raise ValueError(' must have 2 dimensions.') + if positive.shape.ndims != 2: + raise ValueError(' must have 2 dimensions.') + # Embedding vectors should have same number of components. + if query.shape[-1] != positive.shape[-1]: + raise ValueError( + 'Vectors of and should have the same number of components.' + ) + + # Negative keys are implicitly off-diagonal positive keys. + + # Cosine between all combinations + logits = tf.matmul(query, positive, transpose_b=True) + logits /= temperature + + # Positive keys are the entries on the diagonal + batch_size = tf.shape(query)[0] + labels = tf.range(batch_size) + + return tf.losses.sparse_softmax_cross_entropy(labels, logits) diff --git a/easy_rec/python/loss/jrc_loss.py b/easy_rec/python/loss/jrc_loss.py index fc77bda86..778068e7e 100644 --- a/easy_rec/python/loss/jrc_loss.py +++ b/easy_rec/python/loss/jrc_loss.py @@ -12,8 +12,9 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, - auto_weight=False, + loss_weight_strategy='fixed', sample_weights=1.0, + same_label_loss=True, name=''): """Joint Optimization of Ranking and Calibration with Contextualized Hybrid Model. @@ -24,14 +25,15 @@ def jrc_loss(labels, logits: a `Tensor` with shape [batch_size, 2]. e.g. the value of last neuron before activation. session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id alpha: the weight to balance ranking loss and calibration loss - auto_weight: bool, whether to learn loss weight between ranking loss and calibration loss + loss_weight_strategy: str, the loss weight strategy to balancing between ce_loss and ge_loss sample_weights: Coefficients for the loss. This must be scalar or broadcastable to `labels` (i.e. same rank and each dimension is either 1 or the same). + same_label_loss: enable ge_loss for sample with same label in a session or not. name: the name of loss """ loss_name = name if name else 'jrc_loss' - logging.info('[{}] alpha: {}, auto_weight: {}'.format(loss_name, alpha, - auto_weight)) + logging.info('[{}] alpha: {}, loss_weight_strategy: {}'.format( + loss_name, alpha, loss_weight_strategy)) ce_loss = tf.losses.sparse_softmax_cross_entropy( labels, logits, weights=sample_weights) @@ -66,12 +68,48 @@ def jrc_loss(labels, y_neg *= pairwise_weights # Compute list-wise generative loss -log p(x|y, z) - loss_pos = -tf.reduce_sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) - loss_neg = -tf.reduce_sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) - ge_loss = tf.reduce_mean((loss_pos + loss_neg) / tf.reduce_sum(mask, axis=0)) + if same_label_loss: + logging.info('[%s] enable same_label_loss' % loss_name) + loss_pos = -tf.reduce_sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) + loss_neg = -tf.reduce_sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) + ge_loss = tf.reduce_mean( + (loss_pos + loss_neg) / tf.reduce_sum(mask, axis=0)) + else: + logging.info('[%s] disable same_label_loss' % loss_name) + diag = tf.one_hot(tf.range(batch_size), batch_size) + l_pos = l_pos + (1 - diag) * y_pos * -1e9 + l_neg = l_neg + (1 - diag) * y_neg * -1e9 + loss_pos = -tf.linalg.diag_part(y_pos * tf.nn.log_softmax(l_pos, axis=0)) + loss_neg = -tf.linalg.diag_part(y_neg * tf.nn.log_softmax(l_neg, axis=0)) + ge_loss = tf.reduce_mean(loss_pos + loss_neg) + + tf.summary.scalar('loss/%s_ce' % loss_name, ce_loss) + tf.summary.scalar('loss/%s_ge' % loss_name, ge_loss) # The final JRC model - if auto_weight: + if loss_weight_strategy == 'fixed': + loss = alpha * ce_loss + (1 - alpha) * ge_loss + elif loss_weight_strategy == 'random_uniform': + weight = tf.random_uniform([]) + loss = weight * ce_loss + (1 - weight) * ge_loss + tf.summary.scalar('loss/%s_ce_weight' % loss_name, weight) + tf.summary.scalar('loss/%s_ge_weight' % loss_name, 1 - weight) + elif loss_weight_strategy == 'random_normal': + weights = tf.random_normal([2]) + loss_weight = tf.nn.softmax(weights) + loss = loss_weight[0] * ce_loss + loss_weight[1] * ge_loss + tf.summary.scalar('loss/%s_ce_weight' % loss_name, loss_weight[0]) + tf.summary.scalar('loss/%s_ge_weight' % loss_name, loss_weight[1]) + elif loss_weight_strategy == 'random_bernoulli': + bern = tf.distributions.Bernoulli(probs=0.5, dtype=tf.float32) + weights = bern.sample(2) + loss_weight = tf.cond( + tf.equal(tf.reduce_sum(weights), 1), lambda: weights, + lambda: tf.convert_to_tensor([0.5, 0.5])) + loss = loss_weight[0] * ce_loss + loss_weight[1] * ge_loss + tf.summary.scalar('loss/%s_ce_weight' % loss_name, loss_weight[0]) + tf.summary.scalar('loss/%s_ge_weight' % loss_name, loss_weight[1]) + elif loss_weight_strategy == 'uncertainty': uncertainty1 = tf.Variable( 0, name='%s_ranking_loss_weight' % loss_name, dtype=tf.float32) tf.summary.scalar('loss/%s_ranking_uncertainty' % loss_name, uncertainty1) @@ -82,5 +120,6 @@ def jrc_loss(labels, loss = tf.exp(-uncertainty1) * ce_loss + 0.5 * uncertainty1 loss += tf.exp(-uncertainty2) * ge_loss + 0.5 * uncertainty2 else: - loss = alpha * ce_loss + (1 - alpha) * ge_loss + raise ValueError('Unsupported loss weight strategy `%s` for jrc loss' % + loss_weight_strategy) return loss diff --git a/easy_rec/python/model/dbmtl.py b/easy_rec/python/model/dbmtl.py index e829ba57f..a1ebbf14b 100644 --- a/easy_rec/python/model/dbmtl.py +++ b/easy_rec/python/model/dbmtl.py @@ -6,8 +6,6 @@ from easy_rec.python.layers import dnn from easy_rec.python.layers import mmoe from easy_rec.python.layers import uniter -from easy_rec.python.layers import fibinet -from easy_rec.python.layers import mask_net from easy_rec.python.model.multi_task_model import MultiTaskModel from easy_rec.python.protos.dbmtl_pb2 import DBMTL as DBMTLConfig @@ -39,54 +37,56 @@ def __init__(self, features, self._model_config.bottom_uniter, self._input_layer) - elif self._model_config.HasField('bottom_fibinet'): - self._fibinet_layer = fibinet.FiBiNetLayer(self._model_config.bottom_fibinet, - features, - self._input_layer) - elif self._model_config.HasField('bottom_mask_net'): - self._mask_net_layer = mask_net.MaskNet(self._model_config.bottom_mask_net) - self._features, _ = self._input_layer(self._feature_dict, 'all') - else: - self._features, _ = self._input_layer(self._feature_dict, 'all') + elif not self.has_backbone: + self._features, self._feature_list = self._input_layer( + self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def build_predict_graph(self): - if self._model_config.use_input_batch_norm: - self._features = tf.layers.batch_normalization( - self._features, - training=self._is_training, - trainable=True, - name='input_bn') - if self._model_config.HasField('input_dropout_rate'): - drop_rate = self._model_config.input_dropout_rate - self._features = tf.layers.dropout( - self._features, - rate=drop_rate, - training=self._is_training, - name='input_dropout') - - if self._model_config.HasField('bottom_cmbf'): - bottom_fea = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) - elif self._model_config.HasField('bottom_uniter'): - bottom_fea = self._uniter_layer(self._is_training, l2_reg=self._l2_reg) - elif self._model_config.HasField('bottom_fibinet'): - bottom_fea = self._fibinet_layer('all', self._is_training, l2_reg=self._l2_reg) - elif self._model_config.HasField('bottom_mask_net'): - bottom_fea = self._mask_net_layer(self._features, self._is_training, l2_reg=self._l2_reg) - elif self._model_config.HasField('bottom_dnn'): - bottom_dnn = dnn.DNN( - self._model_config.bottom_dnn, - self._l2_reg, - name='bottom_dnn', - is_training=self._is_training) - bottom_fea = bottom_dnn(self._features) - else: - bottom_fea = self._features + # if self._model_config.use_self_supervised_learning: + # bern = tf.distributions.Bernoulli(probs=0.5) + # num_features = len(self._feature_list) + # mask = bern.sample(num_features) + # left_features, right_features = [], [] + # for i in range(num_features): + # fea = self._feature_list[i] + # zero = tf.zeros_like(fea) + # left, right = tf.cond( + # tf.equal(mask[i], 1), lambda: (fea, zero), lambda: (zero, fea)) + # left_features.append(left) + # right_features.append(right) + # left_feature = tf.concat(left_features, axis=-1) + # right_feature = tf.concat(right_features, axis=-1) + # if self._model_config.HasField('bottom_mask_net'): + # left_encoding = self._mask_net_layer( + # left_feature, self._is_training, l2_reg=self._l2_reg) + # right_encoding = self._mask_net_layer( + # right_feature, self._is_training, l2_reg=self._l2_reg) + # else: + # raise ValueError( + # 'Unsupported bottom layer when use self supervised learning') + # + # loss = info_nce_loss( + # left_encoding, + # right_encoding, + # temperature=self._model_config.ssl_loss_temperature) + # self._loss_dict['ssl_loss'] = loss * self._model_config.ssl_loss_weight - if self._model_config.use_sequence_encoder: - seq_encoding = self.get_sequence_encoding(is_training=self._is_training) - if seq_encoding is not None: - bottom_fea = tf.concat([bottom_fea, seq_encoding], axis=-1) + bottom_fea = self.backbone + if bottom_fea is None: + if self._model_config.HasField('bottom_cmbf'): + bottom_fea = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) + elif self._model_config.HasField('bottom_uniter'): + bottom_fea = self._uniter_layer(self._is_training, l2_reg=self._l2_reg) + elif self._model_config.HasField('bottom_dnn'): + bottom_dnn = dnn.DNN( + self._model_config.bottom_dnn, + self._l2_reg, + name='bottom_dnn', + is_training=self._is_training) + bottom_fea = bottom_dnn(self._features) + else: + bottom_fea = self._features # MMOE block if self._model_config.HasField('expert_dnn'): diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index 4a7ad6330..c6d864498 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -11,7 +11,7 @@ from tensorflow.python.ops.variables import PartitionedVariable from easy_rec.python.compat import regularizers -from easy_rec.python.layers import dnn +from easy_rec.python.layers.backbone import Backbone from easy_rec.python.layers import input_layer from easy_rec.python.layers.sequence_encoder import SequenceEncoder from easy_rec.python.utils import constant @@ -66,6 +66,22 @@ def __init__(self, model_config.feature_groups, self._l2_reg) self._sequence_encoding_by_group_name = {} + if model_config.HasField('backbone'): + self._backbone = Backbone(model_config.backbone, self, features, + input_layer=self._input_layer, + l2_reg=self._l2_reg) + else: + self._backbone = None + + @property + def has_backbone(self): + return self._base_model_config.HasField('backbone') + + @property + def backbone(self): + if self._backbone: + return self._backbone(self._is_training) + return None @property def embedding_regularization(self): @@ -104,8 +120,7 @@ def build_input_layer(self, model_config, feature_configs): kernel_regularizer=self._l2_reg, variational_dropout_config=model_config.variational_dropout if model_config.HasField('variational_dropout') else None, - is_training=self._is_training, - do_feature_normalize=model_config.do_feature_normalize) + is_training=self._is_training) def get_sequence_encoding(self, group_name=None, is_training=True): if group_name is not None: @@ -143,13 +158,13 @@ def get_sequence_encoding(self, group_name=None, is_training=True): else: return None - if self._base_model_config.HasField('sequence_dnn'): - sequence_dnn = dnn.DNN( - self._base_model_config.sequence_dnn, - self._l2_reg, - name='sequence_dnn', - is_training=self._is_training) - encoding = sequence_dnn(encoding) + # if self._base_model_config.HasField('sequence_dnn'): + # sequence_dnn = dnn.DNN( + # self._base_model_config.sequence_dnn, + # self._l2_reg, + # name='sequence_dnn', + # is_training=self._is_training) + # encoding = sequence_dnn(encoding) return encoding @abstractmethod diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto new file mode 100644 index 000000000..7b128afe4 --- /dev/null +++ b/easy_rec/python/protos/backbone.proto @@ -0,0 +1,44 @@ +syntax = "proto2"; +package protos; + +import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/fibinet.proto"; +import "easy_rec/python/protos/masknet.proto"; + +message NumericalEmbedding { + required uint32 embedding_dim = 1; + required float coef_stddev = 2 [default = 1.0]; +} + +message SequenceLayer { + optional DNN mlp = 1; +} + +message InputLayer { + optional bool do_batch_norm = 1; + optional bool do_layer_norm = 2; + optional float dropout_rate = 3; + optional float feature_dropout_rate = 4; + optional bool output_feature_list = 5; +} + +message Block { + required string name = 1; + // the input names of feature groups or other blocks + repeated string inputs = 2; + oneof layer { + InputLayer input_layer = 100; + NumericalEmbedding numerical_embedding = 101; + SequenceLayer sequence_encoder = 102; + MaskNet masknet = 103; + SENet senet = 104; + FiBiNetTower fibinet = 105; + DNN mlp = 106; + } +} + +message BackboneTower { + repeated Block blocks = 1; + repeated string concat_blocks = 2; + optional DNN top_mlp = 3; +} \ No newline at end of file diff --git a/easy_rec/python/protos/cmbf.proto b/easy_rec/python/protos/cmbf.proto index 598bf1ecf..34e082115 100644 --- a/easy_rec/python/protos/cmbf.proto +++ b/easy_rec/python/protos/cmbf.proto @@ -1,9 +1,50 @@ syntax = "proto2"; package protos; -import "easy_rec/python/protos/layer.proto"; import "easy_rec/python/protos/dnn.proto"; +message CMBFTower { + // The number of heads of cross modal fusion layer + required uint32 multi_head_num = 1 [default = 1]; + // The number of heads of image feature learning layer + required uint32 image_multi_head_num = 101 [default = 1]; + // The number of heads of text feature learning layer + required uint32 text_multi_head_num = 102 [default = 1]; + // The dimension of text heads + required uint32 text_head_size = 2; + // The dimension of image heads + required uint32 image_head_size = 3 [default = 64]; + // The number of patches of image feature, take effect when there is only one image feature + required uint32 image_feature_patch_num = 4 [default = 1]; + // Do dimension reduce to this size for image feature before single modal learning module + required uint32 image_feature_dim = 5 [default = 0]; + // The number of self attention layers for image features + required uint32 image_self_attention_layer_num = 6 [default = 0]; + // The number of self attention layers for text features + required uint32 text_self_attention_layer_num = 7 [default = 1]; + // The number of cross modal layers + required uint32 cross_modal_layer_num = 8 [default = 1]; + // The dimension of image cross modal heads + required uint32 image_cross_head_size = 9; + // The dimension of text cross modal heads + required uint32 text_cross_head_size = 10; + // Dropout probability for hidden layers + required float hidden_dropout_prob = 11 [default = 0.0]; + // Dropout probability of the attention probabilities + required float attention_probs_dropout_prob = 12 [default = 0.0]; + + // Whether to add embeddings for different text sequence features + required bool use_token_type = 13 [default = false]; + // Whether to add position embeddings for the position of each token in the text sequence + required bool use_position_embeddings = 14 [default = true]; + // Maximum sequence length that might ever be used with this model + required uint32 max_position_embeddings = 15 [default = 0]; + // Dropout probability for text sequence embeddings + required float text_seq_emb_dropout_prob = 16 [default = 0.1]; + // dnn layers for other features + optional DNN other_feature_dnn = 17; +} + message CMBF { required CMBFTower config = 1; diff --git a/easy_rec/python/protos/dbmtl.proto b/easy_rec/python/protos/dbmtl.proto index 5c7152ee1..a9c4a2e74 100644 --- a/easy_rec/python/protos/dbmtl.proto +++ b/easy_rec/python/protos/dbmtl.proto @@ -3,19 +3,14 @@ package protos; import "easy_rec/python/protos/dnn.proto"; import "easy_rec/python/protos/tower.proto"; -import "easy_rec/python/protos/layer.proto"; -import "easy_rec/python/protos/fibinet.proto"; -import "easy_rec/python/protos/masknet.proto"; +import "easy_rec/python/protos/cmbf.proto"; +import "easy_rec/python/protos/uniter.proto"; message DBMTL { // shared bottom cmbf layer optional CMBFTower bottom_cmbf = 101; // shared bottom uniter layer optional UniterTower bottom_uniter = 102; - // shared bottom fibinet layer - optional FiBiNetTower bottom_fibinet = 103; - // shared bottom mask net layer - optional MaskNet bottom_mask_net = 104; // shared bottom dnn layer optional DNN bottom_dnn = 1; // mmoe expert dnn layer definition @@ -26,10 +21,9 @@ message DBMTL { repeated BayesTaskTower task_towers = 4; // l2 regularization optional float l2_regularization = 5 [default = 1e-4]; - // Whether to user sequence encoder - required bool use_sequence_encoder = 6 [default = false]; - // Whether to user sequence encoder - required bool use_input_batch_norm = 7 [default = false]; - // input layer dropout rate - optional float input_dropout_rate = 8 [default = 0]; + + // Whether to use self supervised learning + required bool use_self_supervised_learning = 9 [default = false]; + optional float ssl_loss_weight = 10 [default = 1.0]; + optional float ssl_loss_temperature = 11 [default = 0.1]; } diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index f28180e10..faa78a0bf 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -1,9 +1,9 @@ syntax = "proto2"; package protos; +import "easy_rec/python/protos/backbone.proto"; import "easy_rec/python/protos/fm.proto"; import "easy_rec/python/protos/deepfm.proto"; -import "easy_rec/python/protos/dnn.proto"; import "easy_rec/python/protos/wide_and_deep.proto"; import "easy_rec/python/protos/multi_tower.proto"; import "easy_rec/python/protos/dlrm.proto"; @@ -110,8 +110,7 @@ message EasyRecModel { } required LossWeightStrategy loss_weight_strategy = 16 [default = Fixed]; - // dnn layers after sequence feature - optional DNN sequence_dnn = 17; + optional BackboneTower backbone = 17; - optional bool do_feature_normalize = 18; + // optional bool do_feature_normalize = 18; } diff --git a/easy_rec/python/protos/fibinet.proto b/easy_rec/python/protos/fibinet.proto index b13fd7cba..124bebfe4 100644 --- a/easy_rec/python/protos/fibinet.proto +++ b/easy_rec/python/protos/fibinet.proto @@ -3,13 +3,21 @@ package protos; import "easy_rec/python/protos/dnn.proto"; -message FiBiNetTower { - required string bilinear_type = 1 [default = 'interaction']; - required bool use_bilinear_plus = 2 [default = true]; - required uint32 bilinear_output_units = 3; +message SENet { + required uint32 reduction_ratio = 1 [default = 4]; + optional uint32 num_squeeze_group = 2 [default = 2]; + optional bool use_skip_connection = 3 [default = true]; + optional bool use_output_layer_norm = 4 [default = true]; +} - required uint32 senet_reduction_ratio = 4 [default = 3]; - optional uint32 num_senet_squeeze_group = 5 [default = 2]; +message Bilinear { + required string type = 1 [default = 'interaction']; + required bool use_plus = 2 [default = true]; + required uint32 output_units = 3; +} - required DNN mlp = 6; +message FiBiNetTower { + optional Bilinear bilinear = 1; + required SENet senet = 2; + optional DNN mlp = 8; } diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 4ddacac5e..b2ac0d789 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -8,72 +8,7 @@ message HighWayTower { required uint32 emb_size = 2; } -message CMBFTower { - // The number of heads of cross modal fusion layer - required uint32 multi_head_num = 1 [default = 1]; - // The number of heads of image feature learning layer - required uint32 image_multi_head_num = 101 [default = 1]; - // The number of heads of text feature learning layer - required uint32 text_multi_head_num = 102 [default = 1]; - // The dimension of text heads - required uint32 text_head_size = 2; - // The dimension of image heads - required uint32 image_head_size = 3 [default = 64]; - // The number of patches of image feature, take effect when there is only one image feature - required uint32 image_feature_patch_num = 4 [default = 1]; - // Do dimension reduce to this size for image feature before single modal learning module - required uint32 image_feature_dim = 5 [default = 0]; - // The number of self attention layers for image features - required uint32 image_self_attention_layer_num = 6 [default = 0]; - // The number of self attention layers for text features - required uint32 text_self_attention_layer_num = 7 [default = 1]; - // The number of cross modal layers - required uint32 cross_modal_layer_num = 8 [default = 1]; - // The dimension of image cross modal heads - required uint32 image_cross_head_size = 9; - // The dimension of text cross modal heads - required uint32 text_cross_head_size = 10; - // Dropout probability for hidden layers - required float hidden_dropout_prob = 11 [default = 0.0]; - // Dropout probability of the attention probabilities - required float attention_probs_dropout_prob = 12 [default = 0.0]; - // Whether to add embeddings for different text sequence features - required bool use_token_type = 13 [default = false]; - // Whether to add position embeddings for the position of each token in the text sequence - required bool use_position_embeddings = 14 [default = true]; - // Maximum sequence length that might ever be used with this model - required uint32 max_position_embeddings = 15 [default = 0]; - // Dropout probability for text sequence embeddings - required float text_seq_emb_dropout_prob = 16 [default = 0.1]; - // dnn layers for other features - optional DNN other_feature_dnn = 17; -} - -message UniterTower { - // Size of the encoder layers and the pooler layer - required uint32 hidden_size = 1; - // Number of hidden layers in the Transformer encoder - required uint32 num_hidden_layers = 2; - // Number of attention heads for each attention layer in the Transformer encoder - required uint32 num_attention_heads = 3; - // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder - required uint32 intermediate_size = 4; - // The non-linear activation function (function or string) in the encoder and pooler. - required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. - // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler - required float hidden_dropout_prob = 6 [default = 0.1]; - // The dropout ratio for the attention probabilities - required float attention_probs_dropout_prob = 7 [default = 0.1]; - // The maximum sequence length that this model might ever be used with - required uint32 max_position_embeddings = 8 [default = 512]; - // Whether to add position embeddings for the position of each token in the text sequence - required bool use_position_embeddings = 9 [default = true]; - // The stddev of the truncated_normal_initializer for initializing all weight matrices - required float initializer_range = 10 [default = 0.02]; - // dnn layers for other features - optional DNN other_feature_dnn = 11; -} message SequenceEncoder { // encoder parameters diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index c5b74f47d..5c913bf6e 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -93,4 +93,6 @@ message PairwiseLogisticLoss { message JRCLoss { required string session_name = 1; optional float alpha = 2 [default = 0.5]; + optional bool same_label_loss = 3 [default = true]; + required string loss_weight_strategy = 4 [default = 'fixed']; } diff --git a/easy_rec/python/protos/masknet.proto b/easy_rec/python/protos/masknet.proto index c9b0b703a..3feba334e 100644 --- a/easy_rec/python/protos/masknet.proto +++ b/easy_rec/python/protos/masknet.proto @@ -14,4 +14,4 @@ message MaskNet { repeated MaskBlock mask_blocks = 1; required bool use_parallel = 2 [default = true]; optional DNN mlp = 3; -} \ No newline at end of file +} diff --git a/easy_rec/python/protos/uniter.proto b/easy_rec/python/protos/uniter.proto index 7e78ad23e..9efc1dc9e 100644 --- a/easy_rec/python/protos/uniter.proto +++ b/easy_rec/python/protos/uniter.proto @@ -1,9 +1,33 @@ syntax = "proto2"; package protos; -import "easy_rec/python/protos/layer.proto"; import "easy_rec/python/protos/dnn.proto"; +message UniterTower { + // Size of the encoder layers and the pooler layer + required uint32 hidden_size = 1; + // Number of hidden layers in the Transformer encoder + required uint32 num_hidden_layers = 2; + // Number of attention heads for each attention layer in the Transformer encoder + required uint32 num_attention_heads = 3; + // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder + required uint32 intermediate_size = 4; + // The non-linear activation function (function or string) in the encoder and pooler. + required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. + // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler + required float hidden_dropout_prob = 6 [default = 0.1]; + // The dropout ratio for the attention probabilities + required float attention_probs_dropout_prob = 7 [default = 0.1]; + // The maximum sequence length that this model might ever be used with + required uint32 max_position_embeddings = 8 [default = 512]; + // Whether to add position embeddings for the position of each token in the text sequence + required bool use_position_embeddings = 9 [default = true]; + // The stddev of the truncated_normal_initializer for initializing all weight matrices + required float initializer_range = 10 [default = 0.02]; + // dnn layers for other features + optional DNN other_feature_dnn = 11; +} + message Uniter { required UniterTower config = 1; diff --git a/easy_rec/python/utils/dag.py b/easy_rec/python/utils/dag.py new file mode 100644 index 000000000..5063c8473 --- /dev/null +++ b/easy_rec/python/utils/dag.py @@ -0,0 +1,205 @@ +from collections import OrderedDict, defaultdict +from copy import copy, deepcopy + + +class DAG(object): + """ Directed acyclic graph implementation. """ + + def __init__(self): + """ Construct a new DAG with no nodes or edges. """ + self.reset_graph() + + def add_node(self, node_name, graph=None): + """ Add a node if it does not exist yet, or error out. """ + if not graph: + graph = self.graph + if node_name in graph: + raise KeyError('node %s already exists' % node_name) + graph[node_name] = set() + + def add_node_if_not_exists(self, node_name, graph=None): + try: + self.add_node(node_name, graph=graph) + except KeyError: + pass + + def delete_node(self, node_name, graph=None): + """ Deletes this node and all edges referencing it. """ + if not graph: + graph = self.graph + if node_name not in graph: + raise KeyError('node %s does not exist' % node_name) + graph.pop(node_name) + + for node, edges in graph.items(): + if node_name in edges: + edges.remove(node_name) + + def delete_node_if_exists(self, node_name, graph=None): + try: + self.delete_node(node_name, graph=graph) + except KeyError: + pass + + def add_edge(self, ind_node, dep_node, graph=None): + """ Add an edge (dependency) between the specified nodes. """ + if not graph: + graph = self.graph + if ind_node not in graph or dep_node not in graph: + raise KeyError('one or more nodes do not exist in graph') + test_graph = deepcopy(graph) + test_graph[ind_node].add(dep_node) + is_valid, message = self.validate(test_graph) + if is_valid: + graph[ind_node].add(dep_node) + else: + raise Exception() + + def delete_edge(self, ind_node, dep_node, graph=None): + """ Delete an edge from the graph. """ + if not graph: + graph = self.graph + if dep_node not in graph.get(ind_node, []): + raise KeyError('this edge does not exist in graph') + graph[ind_node].remove(dep_node) + + def rename_edges(self, old_task_name, new_task_name, graph=None): + """ Change references to a task in existing edges. """ + if not graph: + graph = self.graph + for node, edges in graph.items(): + + if node == old_task_name: + graph[new_task_name] = copy(edges) + del graph[old_task_name] + + else: + if old_task_name in edges: + edges.remove(old_task_name) + edges.add(new_task_name) + + def predecessors(self, node, graph=None): + """ Returns a list of all predecessors of the given node """ + if graph is None: + graph = self.graph + return [key for key in graph if node in graph[key]] + + def downstream(self, node, graph=None): + """ Returns a list of all nodes this node has edges towards. """ + if graph is None: + graph = self.graph + if node not in graph: + raise KeyError('node %s is not in graph' % node) + return list(graph[node]) + + def all_downstreams(self, node, graph=None): + """Returns a list of all nodes ultimately downstream + of the given node in the dependency graph, in + topological order.""" + if graph is None: + graph = self.graph + nodes = [node] + nodes_seen = set() + i = 0 + while i < len(nodes): + downstreams = self.downstream(nodes[i], graph) + for downstream_node in downstreams: + if downstream_node not in nodes_seen: + nodes_seen.add(downstream_node) + nodes.append(downstream_node) + i += 1 + return list( + filter( + lambda node: node in nodes_seen, + self.topological_sort(graph=graph) + ) + ) + + def all_leaves(self, graph=None): + """ Return a list of all leaves (nodes with no downstreams) """ + if graph is None: + graph = self.graph + return [key for key in graph if not graph[key]] + + def from_dict(self, graph_dict): + """ Reset the graph and build it from the passed dictionary. + The dictionary takes the form of {node_name: [directed edges]} + """ + + self.reset_graph() + for new_node in graph_dict.keys(): + self.add_node(new_node) + for ind_node, dep_nodes in graph_dict.items(): + if not isinstance(dep_nodes, list): + raise TypeError('dict values must be lists') + for dep_node in dep_nodes: + self.add_edge(ind_node, dep_node) + + def reset_graph(self): + """ Restore the graph to an empty state. """ + self.graph = OrderedDict() + + def ind_nodes(self, graph=None): + """ Returns a list of all nodes in the graph with no dependencies. """ + if graph is None: + graph = self.graph + + dependent_nodes = set( + node for dependents in graph.values() for node in dependents + ) + return [node for node in graph.keys() if node not in dependent_nodes] + + def validate(self, graph=None): + """ Returns (Boolean, message) of whether DAG is valid. """ + graph = graph if graph is not None else self.graph + if len(self.ind_nodes(graph)) == 0: + return False, 'no independent nodes detected' + try: + self.topological_sort(graph) + except ValueError: + return False, 'failed topological sort' + return True, 'valid' + + def topological_sort(self, graph=None): + """ Returns a topological ordering of the DAG. + Raises an error if this is not possible (graph is not valid). + """ + if graph is None: + graph = self.graph + result = [] + in_degree = defaultdict(lambda: 0) + + for u in graph: + for v in graph[u]: + in_degree[v] += 1 + ready = [node for node in graph if not in_degree[node]] + + while ready: + u = ready.pop() + result.append(u) + for v in graph[u]: + in_degree[v] -= 1 + if in_degree[v] == 0: + ready.append(v) + + if len(result) == len(graph): + return result + else: + raise ValueError('graph is not acyclic') + + def size(self): + return len(self.graph) + + +if __name__ == '__main__': + dag = DAG() + dag.add_node("a") + dag.add_node("b") + dag.add_node("c") + dag.add_node("d") + dag.add_edge("a", "b") + dag.add_edge("a", "d") + dag.add_edge("b", "c") + print(dag.topological_sort()) + print(dag.graph) + print(dag.all_downstreams("b")) \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index f99acc17b..cd2b0ac0c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ multi_line_output = 7 force_single_line = true known_standard_library = setuptools known_first_party = easy_rec -known_third_party = absl,common_io,distutils,docutils,eas_prediction,easyrec_request,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml +known_third_party = absl,common_io,docutils,eas_prediction,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,six,skimage,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml no_lines_before = LOCALFOLDER default_section = THIRDPARTY skip = easy_rec/python/protos From 5a47eb822c523dfcfb1c6e4d40cb9b1e2564b914 Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 12 Jun 2023 14:24:00 +0800 Subject: [PATCH 29/54] [feat]: add backbone network --- easy_rec/python/layers/backbone.py | 27 ++++++-- easy_rec/python/layers/common_layers.py | 5 +- easy_rec/python/layers/mask_net.py | 12 ++-- easy_rec/python/layers/numerical_embedding.py | 49 ++++++++++++-- easy_rec/python/model/rank_model.py | 9 +++ easy_rec/python/protos/backbone.proto | 29 +++----- easy_rec/python/protos/easy_rec_model.proto | 3 +- easy_rec/python/protos/feature_config.proto | 2 +- easy_rec/python/protos/layer.proto | 67 ++++++------------- easy_rec/python/protos/seq_encoder.proto | 53 +++++++++++++++ 10 files changed, 172 insertions(+), 84 deletions(-) create mode 100644 easy_rec/python/protos/seq_encoder.proto diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 285ff80c5..3e95ba709 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -6,8 +6,8 @@ from easy_rec.python.utils.dag import DAG from easy_rec.python.layers import dnn -from easy_rec.python.layers.common_layers import layer_norm, SENet -from easy_rec.python.layers.numerical_embedding import NumericalEmbedding +from easy_rec.python.layers.common_layers import layer_norm, SENet, highway +from easy_rec.python.layers.numerical_embedding import PeriodicEmbedding, AutoDisEmbedding from easy_rec.python.layers.fibinet import FiBiNetLayer from easy_rec.python.layers.mask_net import MaskNet @@ -97,6 +97,7 @@ def __call__(self, is_training, *args, **kwargs): block_outputs = {} blocks = self._dag.topological_sort() logging.info("backbone topological: " + ','.join(blocks)) + print("backbone topological: " + ','.join(blocks)) for block in blocks: config = self._name_to_blocks[block] layer = config.WhichOneof('layer') @@ -106,12 +107,26 @@ def __call__(self, is_training, *args, **kwargs): input_layer = EnhancedInputLayer(conf, self._input_layer, self._features) output = input_layer(config.inputs[0], is_training) block_outputs[block] = output - elif layer == 'numerical_embedding': - conf = config.numerical_embedding - num_emb = NumericalEmbedding(conf.embedding_dim, stddev=conf.coef_stddev, - scope='%s_numerical_embedding' % block) + elif layer == 'periodic_embedding': + conf = config.periodic_embedding + num_emb = PeriodicEmbedding(conf.embedding_dim, stddev=conf.coef_stddev, scope=block) input_feature = self.block_input(config, block_outputs) block_outputs[block] = num_emb(input_feature) + elif layer == 'auto_dis_embedding': + conf = config.auto_dis_embedding + num_emb = AutoDisEmbedding(conf, scope=block) + input_feature = self.block_input(config, block_outputs) + block_outputs[block] = num_emb(input_feature) + elif layer == 'highway': + conf = config.highway + input_feature = self.block_input(config, block_outputs) + highway_fea = highway( + input_feature, + conf.emb_size, + activation=conf.activation, + dropout=conf.dropout_rate, + scope=block) + block_outputs[block] = highway_fea(input_feature) elif layer == 'mlp': mlp = dnn.DNN( config.mlp, diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index 892e75550..be4615699 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -2,10 +2,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import itertools import logging - +import six import tensorflow as tf from easy_rec.python.compat.layers import layer_norm as tf_layer_norm +from easy_rec.python.utils.activation import get_activation if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -18,6 +19,8 @@ def highway(x, scope='highway', dropout=0.0, reuse=None): + if isinstance(activation, six.string_types): + activation = get_activation(activation) with tf.variable_scope(scope, reuse): if size is None: size = x.shape.as_list()[-1] diff --git a/easy_rec/python/layers/mask_net.py b/easy_rec/python/layers/mask_net.py index fbe75c13c..034cd6018 100644 --- a/easy_rec/python/layers/mask_net.py +++ b/easy_rec/python/layers/mask_net.py @@ -45,15 +45,19 @@ def __call__(self, net, mask_input): masked_net = net * mask output_size = self.mask_block_config.output_size - hidden_layer_output = tf.layers.dense( - masked_net, output_size, name='%s/output' % self.name, reuse=self.reuse) - return layer_norm( - hidden_layer_output, name='%s/ln_output' % self.name, reuse=self.reuse) + hidden = tf.layers.dense( + masked_net, output_size, use_bias=False, name='%s/output' % self.name, reuse=self.reuse) + ln_hidden = layer_norm(hidden, name='%s/ln_output' % self.name, reuse=self.reuse) + return tf.nn.relu(ln_hidden) class MaskNet(object): def __init__(self, mask_net_config, name='mask_net', reuse=None): + """MaskNet: Introducing Feature-Wise Multiplication to CTR Ranking Models by Instance-Guided Mask. + + Refer: https://arxiv.org/pdf/2102.07619.pdf + """ self.mask_net_config = mask_net_config self.name = name self.reuse = reuse diff --git a/easy_rec/python/layers/numerical_embedding.py b/easy_rec/python/layers/numerical_embedding.py index 420716254..26e9f63a3 100644 --- a/easy_rec/python/layers/numerical_embedding.py +++ b/easy_rec/python/layers/numerical_embedding.py @@ -8,16 +8,20 @@ tf = tf.compat.v1 -class NumericalEmbedding(object): +class PeriodicEmbedding(object): - def __init__(self, embedding_dim, scope='numerical_embedding', stddev=1.0): - self.embedding_dim = embedding_dim + def __init__(self, embedding_dim, scope='periodic_embedding', stddev=1.0): + """On Embeddings for Numerical Features in Tabular Deep Learning. + + Refer: https://arxiv.org/pdf/2203.05556.pdf + """ + self.embedding_dim = embedding_dim // 2 self.scope = scope self.initializer = tf.random_normal_initializer(stddev=stddev) def __call__(self, inputs, *args, **kwargs): if inputs.shape.ndims != 2: - raise ValueError('inputs of NumericalEmbedding must have 2 dimensions.') + raise ValueError('inputs of PeriodicEmbedding must have 2 dimensions.') num_features = int(inputs.shape[-1]) with tf.variable_scope(self.scope): @@ -37,3 +41,40 @@ def __call__(self, inputs, *args, **kwargs): embedding = tf.layers.dense(embedding, int(embedding.shape[-1]), activation=tf.nn.relu) embeddings.append(embedding) return tf.concat(embeddings, axis=1) + + +class AutoDisEmbedding(object): + def __init__(self, config, scope='auto_dis'): + """An Embedding Learning Framework for Numerical Features in CTR Prediction. + + Refer: https://arxiv.org/pdf/2012.08986v2.pdf + """ + self.config = config + self.emb_dim = config.embedding_dim + self.num_bins = config.num_bins + self.scope = scope + + def __call__(self, inputs, *args, **kwargs): + if inputs.shape.ndims != 2: + raise ValueError('inputs of PeriodicEmbedding must have 2 dimensions.') + + num_features = int(inputs.shape[-1]) + with tf.variable_scope(self.scope): + meta_emb = tf.get_variable('meta_embedding', shape=[1, num_features, self.num_bins, self.emb_dim]) + w = tf.get_variable('project_w', shape=[1, num_features, self.num_bins]) + mat = tf.get_variable('project_mat', shape=[1, num_features, self.num_bins, self.num_bins]) + + x = tf.expand_dims(inputs, axis=-1) # [B, num_fea, 1] + hidden = tf.nn.leaky_relu(w * x) # [B, num_fea, num_bin] + + y = tf.matmul(mat, tf.expand_dims(hidden, axis=-1)) # [B, num_fea, num_bin, 1] + y = tf.squeeze(y, axis=3) # [B, num_fea, num_bin] + + # keep_prob(float): if dropout_flag is True, keep_prob rate to keep connect; (float, keep_prob=0.8) + alpha = self.config.keep_prob + x_bar = y + alpha * hidden # [B, num_fea, num_bin] + x_hat = tf.nn.softmax(x_bar / self.config.temperature) # [B, num_fea, num_bin] + + emb = tf.matmul(tf.expand_dims(x_hat, axis=2), meta_emb) # [B, num_fea, 1, emb_dim] + # emb = tf.squeeze(emb, axis=2) # [B, num_fea, emb_dim] + return tf.reshape(emb, [-1, self.emb_dim * num_features]) # [B, num_fea*emb_dim] diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 4f4368b9f..2b4ccfd21 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -29,6 +29,15 @@ def __init__(self, if self._labels is not None: self._label_name = list(self._labels.keys())[0] + def build_predict_graph(self): + if not self.has_backbone: + raise NotImplementedError('method `build_predict_graph` must be implemented when backbone network do not exits') + + net = self.backbone + output = tf.layers.dense(net, self._num_class, name='output') + self._add_to_prediction_dict(output) + return self._prediction_dict + def _output_to_prediction_impl(self, output, loss_type, diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index 7b128afe4..f17b22a10 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -2,38 +2,29 @@ syntax = "proto2"; package protos; import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/layer.proto"; import "easy_rec/python/protos/fibinet.proto"; import "easy_rec/python/protos/masknet.proto"; -message NumericalEmbedding { - required uint32 embedding_dim = 1; - required float coef_stddev = 2 [default = 1.0]; -} message SequenceLayer { optional DNN mlp = 1; } -message InputLayer { - optional bool do_batch_norm = 1; - optional bool do_layer_norm = 2; - optional float dropout_rate = 3; - optional float feature_dropout_rate = 4; - optional bool output_feature_list = 5; -} - message Block { required string name = 1; // the input names of feature groups or other blocks repeated string inputs = 2; oneof layer { - InputLayer input_layer = 100; - NumericalEmbedding numerical_embedding = 101; - SequenceLayer sequence_encoder = 102; - MaskNet masknet = 103; - SENet senet = 104; - FiBiNetTower fibinet = 105; - DNN mlp = 106; + InputLayer input_layer = 101; + DNN mlp = 102; + PeriodicEmbedding periodic_embedding = 103; + AutoDisEmbedding auto_dis_embedding = 104; + SequenceLayer sequence_encoder = 105; + HighWayTower highway = 106; + MaskNet masknet = 107; + SENet senet = 108; + FiBiNetTower fibinet = 109; } } diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index faa78a0bf..c6a03c403 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -56,6 +56,7 @@ message EasyRecModel { // model parameters oneof model { + RankModel rank_model = 100; DummyModel dummy = 101; WideAndDeep wide_and_deep = 102; DeepFM deepfm = 103; @@ -111,6 +112,4 @@ message EasyRecModel { required LossWeightStrategy loss_weight_strategy = 16 [default = Fixed]; optional BackboneTower backbone = 17; - - // optional bool do_feature_normalize = 18; } diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index 17e501361..e8b3b2c4f 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -3,7 +3,7 @@ package protos; import "easy_rec/python/protos/hyperparams.proto"; import "easy_rec/python/protos/dnn.proto"; -import "easy_rec/python/protos/layer.proto"; +import "easy_rec/python/protos/seq_encoder.proto"; enum WideOrDeep { DEEP = 0; WIDE = 1; diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index b2ac0d789..5c7bb81a1 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -3,56 +3,29 @@ package protos; import "easy_rec/python/protos/dnn.proto"; -message HighWayTower { - required string input = 1; - required uint32 emb_size = 2; +message InputLayer { + optional bool do_batch_norm = 1; + optional bool do_layer_norm = 2; + optional float dropout_rate = 3; + optional float feature_dropout_rate = 4; + optional bool output_feature_list = 5; } - - -message SequenceEncoder { - // encoder parameters - oneof encoder { - BSTEncoder bst = 101; - DINEncoder din = 102; - } - required bool force_share_embeddings = 1 [default = true]; +message HighWayTower { + optional string input = 1; + required uint32 emb_size = 2; + required string activation = 3 [default = 'gelu']; + optional float dropout_rate = 4; } -message BSTEncoder { - // Size of the encoder layers and the pooler layer - required uint32 hidden_size = 1; - // Number of hidden layers in the Transformer encoder - required uint32 num_hidden_layers = 2; - // Number of attention heads for each attention layer in the Transformer encoder - required uint32 num_attention_heads = 3; - // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder - required uint32 intermediate_size = 4; - // The non-linear activation function (function or string) in the encoder and pooler. - required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. - // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler - required float hidden_dropout_prob = 6 [default = 0.1]; - // The dropout ratio for the attention probabilities - required float attention_probs_dropout_prob = 7 [default = 0.1]; - // The maximum sequence length that this model might ever be used with - required uint32 max_position_embeddings = 8 [default = 512]; - // Whether to add position embeddings for the position of each token in the text sequence - required bool use_position_embeddings = 9 [default = true]; - // The stddev of the truncated_normal_initializer for initializing all weight matrices - required float initializer_range = 10 [default = 0.02]; - // need contrastive learning - required bool need_contrastive_learning = 11 [default = false]; - // the weight of contrastive learning loss - optional float contrastive_loss_weight = 12 [default = 1.0]; - // whether need auto learn contrastive loss weight - optional bool auto_contrastive_loss_weight = 13 [default = false]; +message PeriodicEmbedding { + required uint32 embedding_dim = 1; + required float coef_stddev = 2 [default = 1.0]; } -message DINEncoder { - // din attention layer - required DNN attention_dnn = 1; - // whether to keep target item feature - required bool need_target_feature = 2 [default = true]; - // option: softmax, sigmoid - required string attention_normalizer = 3 [default = 'softmax']; -} +message AutoDisEmbedding { + required uint32 embedding_dim = 1; + required uint32 num_bins = 2; + required float keep_prob = 3 [default = 0.8]; + required float temperature = 4; +} \ No newline at end of file diff --git a/easy_rec/python/protos/seq_encoder.proto b/easy_rec/python/protos/seq_encoder.proto new file mode 100644 index 000000000..7a608af18 --- /dev/null +++ b/easy_rec/python/protos/seq_encoder.proto @@ -0,0 +1,53 @@ +syntax = "proto2"; +package protos; + +import "easy_rec/python/protos/dnn.proto"; + + +message SequenceEncoder { + // encoder parameters + oneof encoder { + BSTEncoder bst = 101; + DINEncoder din = 102; + } + required bool force_share_embeddings = 1 [default = true]; +} + +message BSTEncoder { + // Size of the encoder layers and the pooler layer + required uint32 hidden_size = 1; + // Number of hidden layers in the Transformer encoder + required uint32 num_hidden_layers = 2; + // Number of attention heads for each attention layer in the Transformer encoder + required uint32 num_attention_heads = 3; + // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder + required uint32 intermediate_size = 4; + // The non-linear activation function (function or string) in the encoder and pooler. + required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. + // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler + required float hidden_dropout_prob = 6 [default = 0.1]; + // The dropout ratio for the attention probabilities + required float attention_probs_dropout_prob = 7 [default = 0.1]; + // The maximum sequence length that this model might ever be used with + required uint32 max_position_embeddings = 8 [default = 512]; + // Whether to add position embeddings for the position of each token in the text sequence + required bool use_position_embeddings = 9 [default = true]; + // The stddev of the truncated_normal_initializer for initializing all weight matrices + required float initializer_range = 10 [default = 0.02]; + // need contrastive learning + required bool need_contrastive_learning = 11 [default = false]; + // the weight of contrastive learning loss + optional float contrastive_loss_weight = 12 [default = 1.0]; + // whether need auto learn contrastive loss weight + optional bool auto_contrastive_loss_weight = 13 [default = false]; +} + +message DINEncoder { + // din attention layer + required DNN attention_dnn = 1; + // whether to keep target item feature + required bool need_target_feature = 2 [default = true]; + // option: softmax, sigmoid + required string attention_normalizer = 3 [default = 'softmax']; +} + From b1cb609d02876a4e82b84d6cd2451663573580f2 Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 12 Jun 2023 16:09:20 +0800 Subject: [PATCH 30/54] [feat]: add backbone network --- easy_rec/python/layers/fibinet.py | 2 +- easy_rec/python/protos/easy_rec_model.proto | 5 +- easy_rec/python/protos/fibinet.proto | 2 +- examples/configs/fibinet_on_movielens.config | 197 +++++++++++++++++++ examples/configs/masknet_on_movielens.config | 194 ++++++++++++++++++ examples/readme.md | 2 + 6 files changed, 399 insertions(+), 3 deletions(-) create mode 100644 examples/configs/fibinet_on_movielens.config create mode 100644 examples/configs/masknet_on_movielens.config diff --git a/easy_rec/python/layers/fibinet.py b/easy_rec/python/layers/fibinet.py index d112561ff..4ba15789e 100644 --- a/easy_rec/python/layers/fibinet.py +++ b/easy_rec/python/layers/fibinet.py @@ -32,7 +32,7 @@ def __call__(self, inputs, is_training, l2_reg=None, *args, **kwargs): if self._config.HasField('bilinear'): conf = self._config.bilinear bilinear = BiLinear( - output_size=conf.output_units, + output_size=conf.num_output_units, bilinear_type=conf.type, bilinear_plus=conf.use_plus, name='%s_bilinear' % self.name) diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index c6a03c403..49a5a9592 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -25,9 +25,12 @@ import "easy_rec/python/protos/loss.proto"; import "easy_rec/python/protos/rocket_launching.proto"; import "easy_rec/python/protos/variational_dropout.proto"; import "easy_rec/python/protos/multi_tower_recall.proto"; + // for input performance test message DummyModel { - +} +// configure backbone network in a free style way +message RankModel { } // for knowledge distillation diff --git a/easy_rec/python/protos/fibinet.proto b/easy_rec/python/protos/fibinet.proto index 124bebfe4..1d48448eb 100644 --- a/easy_rec/python/protos/fibinet.proto +++ b/easy_rec/python/protos/fibinet.proto @@ -13,7 +13,7 @@ message SENet { message Bilinear { required string type = 1 [default = 'interaction']; required bool use_plus = 2 [default = true]; - required uint32 output_units = 3; + required uint32 num_output_units = 3; } message FiBiNetTower { diff --git a/examples/configs/fibinet_on_movielens.config b/examples/configs/fibinet_on_movielens.config new file mode 100644 index 000000000..8508172c6 --- /dev/null +++ b/examples/configs/fibinet_on_movielens.config @@ -0,0 +1,197 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/fibinet_on_movieslen_ckpt" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 100 + sync_replicas: True + num_steps: 2500 +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: 'all' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: "emb_list" + inputs: "all" + input_layer { + do_batch_norm: true + output_feature_list: true + } + } + blocks { + name: "fibinet" + inputs: "emb_list" + fibinet { + senet { + reduction_ratio: 4 + } + bilinear { + type: 'each' + num_output_units: 512 + } + mlp { + hidden_units: [512, 256] + } + } + } + concat_blocks: ['fibinet'] + } + rank_model { + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/configs/masknet_on_movielens.config b/examples/configs/masknet_on_movielens.config new file mode 100644 index 000000000..4c7f507b9 --- /dev/null +++ b/examples/configs/masknet_on_movielens.config @@ -0,0 +1,194 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/masknet_on_movieslen_ckpt" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 100 + sync_replicas: True + num_steps: 2500 +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: 'all' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: "mask_net" + inputs: "all" + masknet { + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mlp { + hidden_units: [512, 256] + } + } + } + concat_blocks: ['mask_net'] + } + rank_model { + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/readme.md b/examples/readme.md index 4861b0b42..8fa32e511 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -207,6 +207,8 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee | DeepFM | 1 | 0.8688 | | DCN | 1 | 0.8576 | | AutoInt | 1 | 0.8513 | + | MaskNet | 1 | 0.8872 | + | FibiNet | 1 | 0.8879 | - Criteo-Research From 383cbed66851329960de1a140e5e133584cdcc07 Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 12 Jun 2023 22:18:12 +0800 Subject: [PATCH 31/54] [feat]: add test config for backbone network --- easy_rec/python/layers/backbone.py | 38 +- easy_rec/python/layers/fm.py | 36 +- easy_rec/python/protos/backbone.proto | 2 + easy_rec/python/protos/easy_rec_model.proto | 1 + .../configs/deepfm_backbone_on_criteo.config | 560 ++++++++++++++++++ ...pfm_backbone_on_criteo_with_autodis.config | 560 ++++++++++++++++++ .../deepfm_backbone_on_movielens.config | 194 ++++++ examples/configs/deepfm_on_criteo.config | 26 +- examples/data/criteo/download_and_process.sh | 3 +- examples/data/criteo/process_criteo_kaggle.py | 5 +- examples/rank_model/readme.md | 4 +- examples/readme.md | 4 + 12 files changed, 1395 insertions(+), 38 deletions(-) create mode 100644 examples/configs/deepfm_backbone_on_criteo.config create mode 100644 examples/configs/deepfm_backbone_on_criteo_with_autodis.config create mode 100644 examples/configs/deepfm_backbone_on_movielens.config diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 3e95ba709..8caa31b80 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -10,6 +10,7 @@ from easy_rec.python.layers.numerical_embedding import PeriodicEmbedding, AutoDisEmbedding from easy_rec.python.layers.fibinet import FiBiNetLayer from easy_rec.python.layers.mask_net import MaskNet +from easy_rec.python.layers.fm import FMLayer if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -96,8 +97,8 @@ def block_input(self, config, block_outputs): def __call__(self, is_training, *args, **kwargs): block_outputs = {} blocks = self._dag.topological_sort() - logging.info("backbone topological: " + ','.join(blocks)) - print("backbone topological: " + ','.join(blocks)) + logging.info("backbone topological order: " + ','.join(blocks)) + print("backbone topological order: " + ','.join(blocks)) for block in blocks: config = self._name_to_blocks[block] layer = config.WhichOneof('layer') @@ -108,60 +109,59 @@ def __call__(self, is_training, *args, **kwargs): output = input_layer(config.inputs[0], is_training) block_outputs[block] = output elif layer == 'periodic_embedding': + input_feature = self.block_input(config, block_outputs) conf = config.periodic_embedding num_emb = PeriodicEmbedding(conf.embedding_dim, stddev=conf.coef_stddev, scope=block) - input_feature = self.block_input(config, block_outputs) block_outputs[block] = num_emb(input_feature) elif layer == 'auto_dis_embedding': - conf = config.auto_dis_embedding - num_emb = AutoDisEmbedding(conf, scope=block) input_feature = self.block_input(config, block_outputs) + num_emb = AutoDisEmbedding(config.auto_dis_embedding, scope=block) block_outputs[block] = num_emb(input_feature) elif layer == 'highway': - conf = config.highway input_feature = self.block_input(config, block_outputs) - highway_fea = highway( + conf = config.highway + highway_layer = highway( input_feature, conf.emb_size, activation=conf.activation, dropout=conf.dropout_rate, scope=block) - block_outputs[block] = highway_fea(input_feature) + block_outputs[block] = highway_layer(input_feature) elif layer == 'mlp': + input_feature = self.block_input(config, block_outputs) mlp = dnn.DNN( config.mlp, self._l2_reg, name='%s_mlp' % block, is_training=is_training) - input_feature = self.block_input(config, block_outputs) - output = mlp(input_feature) - block_outputs[block] = output + block_outputs[block] = mlp(input_feature) elif layer == 'sequence_encoder': block_outputs[block] = self.sequence_encoder(config, is_training) elif layer == 'masknet': - conf = config.masknet + input_feature = self.block_input(config, block_outputs) mask_net = MaskNet( - conf, + config.masknet, name=block, reuse=tf.AUTO_REUSE) - input_feature = self.block_input(config, block_outputs) output = mask_net( input_feature, is_training, l2_reg=self._l2_reg) block_outputs[block] = output elif layer == 'senet': - conf = config.senet - senet = SENet(conf, name=block) input_feature = self.block_input(config, block_outputs) + senet = SENet(config.senet, name=block) output = senet(input_feature) block_outputs[block] = output elif layer == 'fibinet': - conf = config.fibinet - fibinet = FiBiNetLayer(conf, name=block) input_feature = self.block_input(config, block_outputs) + fibinet = FiBiNetLayer(config.fibinet, name=block) output = fibinet(input_feature, is_training, l2_reg=self._l2_reg) block_outputs[block] = output + elif layer == 'fm': + input_feature = self.block_input(config, block_outputs) + fm = FMLayer() + block_outputs[block] = fm(input_feature) else: - raise ValueError('Unsupported backbone layer:' + layer) + raise NotImplementedError('Unsupported backbone layer:' + layer) temp = [] for output in self._config.concat_blocks: diff --git a/easy_rec/python/layers/fm.py b/easy_rec/python/layers/fm.py index c638456a4..198d6b8d6 100644 --- a/easy_rec/python/layers/fm.py +++ b/easy_rec/python/layers/fm.py @@ -19,9 +19,41 @@ def __init__(self, name='fm'): def __call__(self, fm_fea): with tf.name_scope(self._name): - fm_feas = tf.concat(fm_fea, axis=1) - fm_feas = tf.expand_dims(fm_feas, axis=1) + fm_feas = [tf.expand_dims(x, axis=1) for x in fm_fea] + fm_feas = tf.concat(fm_feas, axis=1) sum_square = tf.square(tf.reduce_sum(fm_feas, 1)) square_sum = tf.reduce_sum(tf.square(fm_feas), 1) y_v = 0.5 * tf.subtract(sum_square, square_sum) return y_v + + +class FMLayer(object): + """Factorization Machine models pairwise (order-2) feature interactions + without linear term and bias. + Input shape + - List of 2D tensor with shape: ``(batch_size,embedding_size)``. + - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + """ + def __call__(self, inputs): + if type(inputs) == list: + emb_dims = set() + for x in inputs: + emb_dims.add(int(x.shape[-1])) + assert len(emb_dims) == 1, 'all embedding dim must be the same in FM layer:' + ','.join([str(d) for d in emb_dims]) + num_fea = len(inputs) + emb_dim = emb_dims.pop() + fea = tf.concat(inputs, axis=-1) + fea = tf.reshape(fea, [-1, num_fea, emb_dim]) + else: + assert inputs.shape.ndims == 3, 'input of FM layer must be a 3D tensor or a list of 2D tensors' + fea = inputs + + square_of_sum = tf.square(tf.reduce_sum(fea, axis=1, keepdims=True)) + sum_of_square = tf.reduce_sum(fea * fea, axis=1, keepdims=True) + cross_term = square_of_sum - sum_of_square + cross_term = 0.5 * tf.reduce_sum(cross_term, axis=2, keepdims=False) + return cross_term diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index f17b22a10..3dc86cebb 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -2,6 +2,7 @@ syntax = "proto2"; package protos; import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/fm.proto"; import "easy_rec/python/protos/layer.proto"; import "easy_rec/python/protos/fibinet.proto"; import "easy_rec/python/protos/masknet.proto"; @@ -25,6 +26,7 @@ message Block { MaskNet masknet = 107; SENet senet = 108; FiBiNetTower fibinet = 109; + FM fm = 110; } } diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 49a5a9592..3f4f851b9 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -31,6 +31,7 @@ message DummyModel { } // configure backbone network in a free style way message RankModel { + optional float l2_regularization = 1; } // for knowledge distillation diff --git a/examples/configs/deepfm_backbone_on_criteo.config b/examples/configs/deepfm_backbone_on_criteo.config new file mode 100644 index 000000000..a0982a16e --- /dev/null +++ b/examples/configs/deepfm_backbone_on_criteo.config @@ -0,0 +1,560 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/deepfm_criteo_ckpt" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 1000 + sync_replicas: True + num_steps: 20000 +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + embedding_dim:16 + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + embedding_dim:16 + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: "features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'emb_list' + inputs: 'features' + input_layer { + output_feature_list: true + } + } + blocks { + name: 'fm' + inputs: 'emb_list' + fm {} + } + blocks { + name: 'deep' + inputs: 'features' + mlp { + hidden_units: [256, 128, 64] + } + } + concat_blocks: ['fm', 'deep'] + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config new file mode 100644 index 000000000..1dcdf7512 --- /dev/null +++ b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config @@ -0,0 +1,560 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/deepfm_autodis_criteo_ckpt" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 1000 + sync_replicas: True + num_steps: 20000 +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + embedding_dim:16 + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + embedding_dim:16 + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: "features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'emb_list' + inputs: 'features' + input_layer { + output_feature_list: true + } + } + blocks { + name: 'fm' + inputs: 'emb_list' + fm {} + } + blocks { + name: 'deep' + inputs: 'features' + mlp { + hidden_units: [256, 128, 64] + } + } + concat_blocks: ['fm', 'deep'] + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/deepfm_backbone_on_movielens.config b/examples/configs/deepfm_backbone_on_movielens.config new file mode 100644 index 000000000..46a79d83b --- /dev/null +++ b/examples/configs/deepfm_backbone_on_movielens.config @@ -0,0 +1,194 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/deepfm_backbone_movieslen_ckpt" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 100 + sync_replicas: True + num_steps: 2500 +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [8, 4, 4] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: 'features' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + feature_names: 'title' + wide_deep: DEEP + } + backbone { + blocks { + name: 'emb_list' + inputs: 'features' + input_layer { + output_feature_list: true + } + } + blocks { + name: 'fm' + inputs: 'emb_list' + fm {} + } + blocks { + name: 'deep' + inputs: 'features' + mlp { + hidden_units: [256, 128, 64] + } + } + concat_blocks: ['fm', 'deep'] + } + rank_model { + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/configs/deepfm_on_criteo.config b/examples/configs/deepfm_on_criteo.config index c482cf246..fc8537f0d 100644 --- a/examples/configs/deepfm_on_criteo.config +++ b/examples/configs/deepfm_on_criteo.config @@ -241,91 +241,91 @@ data_config { feature_config: { features: { input_names: "F1" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val:0.0 max_val: 5775.0 } features: { input_names: "F2" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: -3.0 max_val: 257675.0 } features: { input_names: "F3" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 65535.0 } features: { input_names: "F4" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 969.0 } features: { input_names: "F5" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 23159456.0 } features: { input_names: "F6" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 431037.0 } features: { input_names: "F7" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 56311.0 } features: { input_names: "F8" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 6047.0 } features: { input_names: "F9" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 29019.0 } features: { input_names: "F10" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 46.0 } features: { input_names: "F11" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 231.0 } features: { input_names: "F12" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 4008.0 } features: { input_names: "F13" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 7393.0 diff --git a/examples/data/criteo/download_and_process.sh b/examples/data/criteo/download_and_process.sh index 30061a862..f0cc8aef9 100644 --- a/examples/data/criteo/download_and_process.sh +++ b/examples/data/criteo/download_and_process.sh @@ -1,6 +1,7 @@ #! /bin/bash if [ "$(uname)" == "Darwin" ]; then - curl -O https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz + #curl -O https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz + wget -c https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then wget -c https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz elif [ "$(expr substr $(uname -s) 1 10)" == "MINGW32_NT" ]; then diff --git a/examples/data/criteo/process_criteo_kaggle.py b/examples/data/criteo/process_criteo_kaggle.py index 60b7d9776..5b9cb4f34 100644 --- a/examples/data/criteo/process_criteo_kaggle.py +++ b/examples/data/criteo/process_criteo_kaggle.py @@ -11,8 +11,9 @@ samples_num = data_train.shape[0] print('samples_num:', samples_num, round(samples_num * 0.9)) -data_train[:round(samples_num * 0.9)].to_csv( +train_num = int(round(samples_num * 0.9)) +data_train[:train_num].to_csv( r'criteo_train_data', index=False, sep='\t', mode='a', header=False) -data_train[round(samples_num * 0.9):].to_csv( +data_train[train_num:].to_csv( r'criteo_test_data', index=False, sep='\t', mode='a', header=False) print('Done.') diff --git a/examples/rank_model/readme.md b/examples/rank_model/readme.md index 15d3f4dca..f6a2ba791 100644 --- a/examples/rank_model/readme.md +++ b/examples/rank_model/readme.md @@ -32,10 +32,12 @@ | MovieLens-1M | DeepFM | 0.8688 | | MovieLens-1M | DCN | 0.8576 | | MovieLens-1M | AutoInt | 0.8513 | +| MovieLens-1M | MaskNet | 0.8872 | +| MovieLens-1M | FibiNet | 0.8879 | # Criteo Research Kaggle 数据集 -在MovieLens-1M 数据集中, 我们提供了2个模型上的demo示例。 +在 `Criteo Research Kaggle` 数据集中, 我们提供了2个模型上的demo示例。 [FM](fm.md) / [DeepFM](deepfm.md) diff --git a/examples/readme.md b/examples/readme.md index 8fa32e511..b95adc8b1 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -77,6 +77,10 @@ EasyRec的模型训练和评估都是基于config配置文件的,配置文件 - [autoint_on_movielens.config](configs/autoint_on_movielens.config) +- [masknet_on_movielens.config](configs/masknet_on_movielens.config) + +- [fibinet_on_movielens.config](configs/fibinet_on_movielens.config) + - [fm_on_criteo.config](configs/fm_on_criteo.config) - [deepfm_on_criteo.config](configs/deepfm_on_criteo.config) From 1114aab534cbe991de9f061c6b1e1cca6d8bd5b3 Mon Sep 17 00:00:00 2001 From: weisu Date: Wed, 14 Jun 2023 17:06:35 +0800 Subject: [PATCH 32/54] [feat]: add more backbone blocks --- easy_rec/python/compat/array_ops.py | 2 +- .../compat/feature_column/feature_column.py | 895 +++++++++--------- .../feature_column/feature_column_v2.py | 10 + easy_rec/python/input/input.py | 7 +- easy_rec/python/layers/backbone.py | 239 +++-- easy_rec/python/layers/common_layers.py | 83 ++ easy_rec/python/layers/fibinet.py | 8 +- easy_rec/python/layers/fm.py | 51 +- easy_rec/python/layers/input_layer.py | 32 +- easy_rec/python/layers/mask_net.py | 9 +- easy_rec/python/layers/numerical_embedding.py | 152 ++- easy_rec/python/model/easy_rec_estimator.py | 3 +- easy_rec/python/model/easy_rec_model.py | 18 +- easy_rec/python/model/rank_model.py | 206 ++-- easy_rec/python/protos/backbone.proto | 44 +- easy_rec/python/protos/dnn.proto | 13 + easy_rec/python/protos/easy_rec_model.proto | 4 +- easy_rec/python/protos/fm.proto | 1 + easy_rec/python/protos/layer.proto | 23 +- easy_rec/python/protos/seq_encoder.proto | 1 - easy_rec/python/train_eval.py | 5 + easy_rec/python/utils/__init__.py | 17 + easy_rec/python/utils/dag.py | 398 ++++---- easy_rec/python/utils/tf_utils.py | 36 + .../configs/deepfm_backbone_on_criteo.config | 95 +- ...pfm_backbone_on_criteo_with_autodis.config | 119 +-- ...fm_backbone_on_criteo_with_periodic.config | 571 +++++++++++ .../configs/dlrm_backbone_on_criteo.config | 566 +++++++++++ examples/readme.md | 8 + 29 files changed, 2586 insertions(+), 1030 deletions(-) create mode 100644 examples/configs/deepfm_backbone_on_criteo_with_periodic.config create mode 100644 examples/configs/dlrm_backbone_on_criteo.config diff --git a/easy_rec/python/compat/array_ops.py b/easy_rec/python/compat/array_ops.py index 3e8929ceb..d788bc8c1 100644 --- a/easy_rec/python/compat/array_ops.py +++ b/easy_rec/python/compat/array_ops.py @@ -194,7 +194,7 @@ def repeat_with_axis(data, repeats, axis, name=None): def repeat(input, repeats, axis=None, name=None): # pylint: disable=redefined-builtin - """Repeat elements of `input` + """Repeat elements of `input`. Args: input: An `N`-dimensional Tensor. diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index 7d8419528..d0f23dfbb 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -179,15 +179,15 @@ def _internal_input_layer(features, cols_to_output_tensors=None, from_template=False, feature_name_to_output_tensors=None, - do_normalize=False): + sort_feature_columns_by_name=True): """See input_layer, `scope` is a name or variable scope to use.""" feature_columns = _normalize_feature_columns(feature_columns) for column in feature_columns: if not isinstance(column, _DenseColumn): raise ValueError( - 'Items of feature_columns must be a _DenseColumn. ' - 'You can wrap a categorical column with an ' - 'embedding_column or indicator_column. Given: {}'.format(column)) + 'Items of feature_columns must be a _DenseColumn. ' + 'You can wrap a categorical column with an ' + 'embedding_column or indicator_column. Given: {}'.format(column)) weight_collections = list(weight_collections or []) if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections: weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) @@ -197,42 +197,28 @@ def _internal_input_layer(features, def _get_logits(): # pylint: disable=missing-docstring builder = _LazyBuilder(features) output_tensors = [] - ordered_columns = [] - for column in sorted(feature_columns, key=lambda x: x.name): - ordered_columns.append(column) + if sort_feature_columns_by_name: + ordered_columns = sorted(feature_columns, key=lambda x: x.name) + else: + ordered_columns = feature_columns + for column in ordered_columns: with variable_scope.variable_scope( None, default_name=column._var_scope_name): # pylint: disable=protected-access tensor = column._get_dense_tensor( # pylint: disable=protected-access - builder, - weight_collections=weight_collections, - trainable=trainable) + builder, + weight_collections=weight_collections, + trainable=trainable) num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] output_tensor = array_ops.reshape( - tensor, shape=(batch_size, num_elements)) - if do_normalize: - from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn,\ - NumericColumn, WeightedCategoricalColumn - from tensorflow.python.layers.normalization import batch_normalization - if isinstance(column, EmbeddingColumn) or isinstance( - column, _SharedEmbeddingColumn): - fc = column.categorical_column - if isinstance(fc, WeightedCategoricalColumn - ) and fc.weight_feature_key.endswith('_raw_proj_val'): - output_tensor = layer_norm( - output_tensor, name='ln_' + column.name) - else: - output_tensor = batch_normalization( - output_tensor, name='bn_' + column.name) - elif isinstance(column, NumericColumn) and int(column.shape[-1]) > 1: - output_tensor = layer_norm(output_tensor, name='ln_' + column.name) + tensor, shape=(batch_size, num_elements)) output_tensors.append(output_tensor) if cols_to_vars is not None: # Retrieve any variables created (some _DenseColumn's don't create # variables, in which case an empty list is returned). cols_to_vars[column] = ops.get_collection( - ops.GraphKeys.GLOBAL_VARIABLES, - scope=variable_scope.get_variable_scope().name) + ops.GraphKeys.GLOBAL_VARIABLES, + scope=variable_scope.get_variable_scope().name) if cols_to_output_tensors is not None: cols_to_output_tensors[column] = output_tensor if feature_name_to_output_tensors is not None: @@ -258,7 +244,7 @@ def input_layer(features, cols_to_vars=None, cols_to_output_tensors=None, feature_name_to_output_tensors=None, - do_normalize=False): + sort_feature_columns_by_name=True): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. @@ -306,8 +292,7 @@ def input_layer(features, cols_to_output_tensors: If not `None`, must be a dictionary that will be filled with a mapping from '_FeatureColumn' to the associated output `Tensor`s. - do_normalize: Whether to do layer normalization for numerical features and - batch normalization operation for categorical features. + sort_feature_columns_by_name: whether to sort feature columns Returns: A `Tensor` which represents input layer of a model. Its shape @@ -318,14 +303,14 @@ def input_layer(features, ValueError: if an item in `feature_columns` is not a `_DenseColumn`. """ return _internal_input_layer( - features, - feature_columns, - weight_collections=weight_collections, - trainable=trainable, - cols_to_vars=cols_to_vars, - cols_to_output_tensors=cols_to_output_tensors, - feature_name_to_output_tensors=feature_name_to_output_tensors, - do_normalize=do_normalize) + features, + feature_columns, + weight_collections=weight_collections, + trainable=trainable, + cols_to_vars=cols_to_vars, + cols_to_output_tensors=cols_to_output_tensors, + feature_name_to_output_tensors=feature_name_to_output_tensors, + sort_feature_columns_by_name=sort_feature_columns_by_name) # TODO(akshayka): InputLayer should be a subclass of Layer, and it @@ -349,17 +334,17 @@ def __init__(self, self._cols_to_vars = cols_to_vars self._name = name self._input_layer_template = template.make_template( - self._name, _internal_input_layer, create_scope_now_=create_scope_now) + self._name, _internal_input_layer, create_scope_now_=create_scope_now) self._scope = self._input_layer_template.variable_scope def __call__(self, features): return self._input_layer_template( - features=features, - feature_columns=self._feature_columns, - weight_collections=self._weight_collections, - trainable=self._trainable, - cols_to_vars=None, - from_template=True) + features=features, + feature_columns=self._feature_columns, + weight_collections=self._weight_collections, + trainable=self._trainable, + cols_to_vars=None, + from_template=True) @property def name(self): @@ -515,12 +500,12 @@ def linear_model(features, with variable_scope.variable_scope(None, 'linear_model') as vs: model_name = _strip_leading_slashes(vs.name) linear_model_layer = _LinearModel( - feature_columns=feature_columns, - units=units, - sparse_combiner=sparse_combiner, - weight_collections=weight_collections, - trainable=trainable, - name=model_name) + feature_columns=feature_columns, + units=units, + sparse_combiner=sparse_combiner, + weight_collections=weight_collections, + trainable=trainable, + name=model_name) retval = linear_model_layer(features) # pylint: disable=not-callable if cols_to_vars is not None: cols_to_vars.update(linear_model_layer.cols_to_vars()) @@ -564,7 +549,7 @@ def __init__(self, name=None, **kwargs): super(_FCLinearWrapper, self).__init__( - trainable=trainable, name=name, **kwargs) + trainable=trainable, name=name, **kwargs) self._feature_column = feature_column self._units = units self._sparse_combiner = sparse_combiner @@ -573,30 +558,30 @@ def __init__(self, def build(self, _): if isinstance(self._feature_column, _CategoricalColumn): weight = self.add_variable( - name='weights', - shape=(self._feature_column._num_buckets, self._units), # pylint: disable=protected-access - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) + name='weights', + shape=(self._feature_column._num_buckets, self._units), # pylint: disable=protected-access + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) else: num_elements = self._feature_column._variable_shape.num_elements() # pylint: disable=protected-access weight = self.add_variable( - name='weights', - shape=[num_elements, self._units], - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) + name='weights', + shape=[num_elements, self._units], + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) _add_to_collections(weight, self._weight_collections) self._weight_var = weight self.built = True def call(self, builder): weighted_sum = _create_weighted_sum( - column=self._feature_column, - builder=builder, - units=self._units, - sparse_combiner=self._sparse_combiner, - weight_collections=self._weight_collections, - trainable=self.trainable, - weight_var=self._weight_var) + column=self._feature_column, + builder=builder, + units=self._units, + sparse_combiner=self._sparse_combiner, + weight_collections=self._weight_collections, + trainable=self.trainable, + weight_var=self._weight_var) return weighted_sum @@ -615,10 +600,10 @@ def __init__(self, def build(self, _): self._bias_variable = self.add_variable( - 'bias_weights', - shape=[self._units], - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) + 'bias_weights', + shape=[self._units], + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) _add_to_collections(self._bias_variable, self._weight_collections) self.built = True @@ -674,11 +659,11 @@ def __init__(self, column_layers[column_name] = column_layer self._column_layers = self._add_layers(column_layers) self._bias_layer = _BiasLayer( - units=units, - trainable=trainable, - weight_collections=self._weight_collections, - name='bias_layer', - **kwargs) + units=units, + trainable=trainable, + weight_collections=self._weight_collections, + name='bias_layer', + **kwargs) self._cols_to_vars = {} def cols_to_vars(self): @@ -694,8 +679,8 @@ def call(self, features): for column in self._feature_columns: if not isinstance(column, (_DenseColumn, _CategoricalColumn)): raise ValueError( - 'Items of feature_columns must be either a ' - '_DenseColumn or _CategoricalColumn. Given: {}'.format(column)) + 'Items of feature_columns must be either a ' + '_DenseColumn or _CategoricalColumn. Given: {}'.format(column)) weighted_sums = [] ordered_columns = [] builder = _LazyBuilder(features) @@ -705,17 +690,17 @@ def call(self, features): weighted_sum = layer(builder) weighted_sums.append(weighted_sum) self._cols_to_vars[column] = ops.get_collection( - ops.GraphKeys.GLOBAL_VARIABLES, scope=layer.scope_name) + ops.GraphKeys.GLOBAL_VARIABLES, scope=layer.scope_name) _verify_static_batch_size_equality(weighted_sums, ordered_columns) predictions_no_bias = math_ops.add_n( - weighted_sums, name='weighted_sum_no_bias') + weighted_sums, name='weighted_sum_no_bias') predictions = nn_ops.bias_add( - predictions_no_bias, - self._bias_layer( # pylint: disable=not-callable - builder, - scope=variable_scope.get_variable_scope()), # pylint: disable=not-callable - name='weighted_sum') + predictions_no_bias, + self._bias_layer( # pylint: disable=not-callable + builder, + scope=variable_scope.get_variable_scope()), # pylint: disable=not-callable + name='weighted_sum') bias = self._bias_layer.variables[0] self._cols_to_vars['bias'] = _get_expanded_variable_list(bias) return predictions @@ -920,31 +905,31 @@ def model_fn(features, ...): if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ' 'Embedding of column_name: {}'.format( - categorical_column.name)) + categorical_column.name)) if initializer is None: initializer = init_ops.truncated_normal_initializer( - mean=0.0, stddev=0.01 / math.sqrt(dimension)) + mean=0.0, stddev=0.01 / math.sqrt(dimension)) embedding_shape = categorical_column._num_buckets, dimension # pylint: disable=protected-access def _creator(weight_collections, scope): embedding_column_layer = _EmbeddingColumnLayer( - embedding_shape=embedding_shape, - initializer=initializer, - weight_collections=weight_collections, - trainable=trainable, - name='embedding_column_layer') + embedding_shape=embedding_shape, + initializer=initializer, + weight_collections=weight_collections, + trainable=trainable, + name='embedding_column_layer') return embedding_column_layer(None, scope=scope) # pylint: disable=not-callable return _EmbeddingColumn( - categorical_column=categorical_column, - dimension=dimension, - combiner=combiner, - layer_creator=_creator, - ckpt_to_load_from=ckpt_to_load_from, - tensor_name_in_ckpt=tensor_name_in_ckpt, - max_norm=max_norm, - trainable=trainable) + categorical_column=categorical_column, + dimension=dimension, + combiner=combiner, + layer_creator=_creator, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable) def _numeric_column(key, @@ -1011,15 +996,15 @@ def _numeric_column(key, if normalizer_fn is not None and not callable(normalizer_fn): raise TypeError( - 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) + 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) fc_utils.assert_key_is_string(key) return _NumericColumn( - key, - shape=shape, - default_value=default_value, - dtype=dtype, - normalizer_fn=normalizer_fn) + key, + shape=shape, + default_value=default_value, + dtype=dtype, + normalizer_fn=normalizer_fn) def _bucketized_column(source_column, boundaries): @@ -1090,8 +1075,8 @@ def _bucketized_column(source_column, boundaries): """ if not isinstance(source_column, _NumericColumn): raise ValueError( - 'source_column must be a column generated with numeric_column(). ' - 'Given: {}'.format(source_column)) + 'source_column must be a column generated with numeric_column(). ' + 'Given: {}'.format(source_column)) if len(source_column.shape) > 1: raise ValueError('source_column must be one-dimensional column. ' 'Given: {}'.format(source_column)) @@ -1154,7 +1139,7 @@ def _categorical_column_with_hash_bucket(key, if hash_bucket_size < 1: raise ValueError('hash_bucket_size must be at least 1. ' 'hash_bucket_size: {}, key: {}'.format( - hash_bucket_size, key)) + hash_bucket_size, key)) fc_utils.assert_key_is_string(key) fc_utils.assert_string_or_int(dtype, prefix='column_name: {}'.format(key)) @@ -1256,8 +1241,8 @@ def _categorical_column_with_vocabulary_file(key, with gfile.GFile(vocabulary_file) as f: vocabulary_size = sum(1 for _ in f) logging.info( - 'vocabulary_size = %d in %s is inferred from the number of elements ' - 'in the vocabulary_file %s.', vocabulary_size, key, vocabulary_file) + 'vocabulary_size = %d in %s is inferred from the number of elements ' + 'in the vocabulary_file %s.', vocabulary_size, key, vocabulary_file) # `vocabulary_size` isn't required for lookup, but it is for `_num_buckets`. if vocabulary_size < 1: @@ -1265,20 +1250,20 @@ def _categorical_column_with_vocabulary_file(key, if num_oov_buckets: if default_value is not None: raise ValueError( - 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( - key)) + 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( + key)) if num_oov_buckets < 0: raise ValueError('Invalid num_oov_buckets {} in {}.'.format( - num_oov_buckets, key)) + num_oov_buckets, key)) fc_utils.assert_string_or_int(dtype, prefix='column_name: {}'.format(key)) fc_utils.assert_key_is_string(key) return _VocabularyFileCategoricalColumn( - key=key, - vocabulary_file=vocabulary_file, - vocabulary_size=vocabulary_size, - num_oov_buckets=0 if num_oov_buckets is None else num_oov_buckets, - default_value=-1 if default_value is None else default_value, - dtype=dtype) + key=key, + vocabulary_file=vocabulary_file, + vocabulary_size=vocabulary_size, + num_oov_buckets=0 if num_oov_buckets is None else num_oov_buckets, + default_value=-1 if default_value is None else default_value, + dtype=dtype) def _categorical_column_with_vocabulary_list(key, @@ -1363,38 +1348,38 @@ def _categorical_column_with_vocabulary_list(key, """ if (vocabulary_list is None) or (len(vocabulary_list) < 1): raise ValueError( - 'vocabulary_list {} must be non-empty, column_name: {}'.format( - vocabulary_list, key)) + 'vocabulary_list {} must be non-empty, column_name: {}'.format( + vocabulary_list, key)) if len(set(vocabulary_list)) != len(vocabulary_list): raise ValueError( - 'Duplicate keys in vocabulary_list {}, column_name: {}'.format( - vocabulary_list, key)) + 'Duplicate keys in vocabulary_list {}, column_name: {}'.format( + vocabulary_list, key)) vocabulary_dtype = dtypes.as_dtype(np.array(vocabulary_list).dtype) if num_oov_buckets: if default_value != -1: raise ValueError( - 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( - key)) + 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( + key)) if num_oov_buckets < 0: raise ValueError('Invalid num_oov_buckets {} in {}.'.format( - num_oov_buckets, key)) + num_oov_buckets, key)) fc_utils.assert_string_or_int( - vocabulary_dtype, prefix='column_name: {} vocabulary'.format(key)) + vocabulary_dtype, prefix='column_name: {} vocabulary'.format(key)) if dtype is None: dtype = vocabulary_dtype elif dtype.is_integer != vocabulary_dtype.is_integer: raise ValueError( - 'dtype {} and vocabulary dtype {} do not match, column_name: {}'.format( - dtype, vocabulary_dtype, key)) + 'dtype {} and vocabulary dtype {} do not match, column_name: {}'.format( + dtype, vocabulary_dtype, key)) fc_utils.assert_string_or_int(dtype, prefix='column_name: {}'.format(key)) fc_utils.assert_key_is_string(key) return _VocabularyListCategoricalColumn( - key=key, - vocabulary_list=tuple(vocabulary_list), - dtype=dtype, - default_value=default_value, - num_oov_buckets=num_oov_buckets) + key=key, + vocabulary_list=tuple(vocabulary_list), + dtype=dtype, + default_value=default_value, + num_oov_buckets=num_oov_buckets) def _categorical_column_with_identity(key, num_buckets, default_value=None): @@ -1453,15 +1438,15 @@ def _categorical_column_with_identity(key, num_buckets, default_value=None): """ if num_buckets < 1: raise ValueError('num_buckets {} < 1, column_name {}'.format( - num_buckets, key)) + num_buckets, key)) if (default_value is not None) and ((default_value < 0) or (default_value >= num_buckets)): raise ValueError( - 'default_value {} not in range [0, {}), column_name {}'.format( - default_value, num_buckets, key)) + 'default_value {} not in range [0, {}), column_name {}'.format( + default_value, num_buckets, key)) fc_utils.assert_key_is_string(key) return _IdentityCategoricalColumn( - key=key, num_buckets=num_buckets, default_value=default_value) + key=key, num_buckets=num_buckets, default_value=default_value) def _indicator_column(categorical_column): @@ -1568,9 +1553,9 @@ def _weighted_categorical_column(categorical_column, if (dtype is None) or not (dtype.is_integer or dtype.is_floating): raise ValueError('dtype {} is not convertible to float.'.format(dtype)) return _WeightedCategoricalColumn( - categorical_column=categorical_column, - weight_feature_key=weight_feature_key, - dtype=dtype) + categorical_column=categorical_column, + weight_feature_key=weight_feature_key, + dtype=dtype) def _crossed_column(keys, hash_bucket_size, hash_key=None): @@ -1682,21 +1667,21 @@ def _crossed_column(keys, hash_bucket_size, hash_key=None): 'hash_bucket_size: {}'.format(hash_bucket_size)) if not keys or len(keys) < 2: raise ValueError( - 'keys must be a list with length > 1. Given: {}'.format(keys)) + 'keys must be a list with length > 1. Given: {}'.format(keys)) for key in keys: if (not isinstance(key, six.string_types) and not isinstance(key, _CategoricalColumn)): raise ValueError( - 'Unsupported key type. All keys must be either string, or ' - 'categorical column except _HashedCategoricalColumn. ' - 'Given: {}'.format(key)) + 'Unsupported key type. All keys must be either string, or ' + 'categorical column except _HashedCategoricalColumn. ' + 'Given: {}'.format(key)) if isinstance(key, _HashedCategoricalColumn): raise ValueError( - 'categorical_column_with_hash_bucket is not supported for crossing. ' - 'Hashing before crossing will increase probability of collision. ' - 'Instead, use the feature name as a string. Given: {}'.format(key)) + 'categorical_column_with_hash_bucket is not supported for crossing. ' + 'Hashing before crossing will increase probability of collision. ' + 'Instead, use the feature name as a string. Given: {}'.format(key)) return _CrossedColumn( - keys=tuple(keys), hash_bucket_size=hash_bucket_size, hash_key=hash_key) + keys=tuple(keys), hash_bucket_size=hash_bucket_size, hash_key=hash_key) # TODO(rohanj): Clearly define semantics of this layer. @@ -1725,7 +1710,7 @@ def __init__(self, **kwargs: keyword named properties. """ super(_EmbeddingColumnLayer, self).__init__( - trainable=trainable, name=name, **kwargs) + trainable=trainable, name=name, **kwargs) self._embedding_shape = embedding_shape self._initializer = initializer self._weight_collections = weight_collections @@ -1741,11 +1726,11 @@ def set_weight_collections(self, weight_collections): def build(self, _): self._embedding_weight_var = self.add_variable( - name='embedding_weights', - shape=self._embedding_shape, - dtype=dtypes.float32, - initializer=self._initializer, - trainable=self.trainable) + name='embedding_weights', + shape=self._embedding_shape, + dtype=dtypes.float32, + initializer=self._initializer, + trainable=self.trainable) if self._weight_collections and not context.executing_eagerly(): _add_to_collections(self._embedding_weight_var, self._weight_collections) self.built = True @@ -1891,21 +1876,21 @@ def _create_weighted_sum(column, """Creates a weighted sum for a dense/categorical column for linear_model.""" if isinstance(column, _CategoricalColumn): return _create_categorical_column_weighted_sum( - column=column, - builder=builder, - units=units, - sparse_combiner=sparse_combiner, - weight_collections=weight_collections, - trainable=trainable, - weight_var=weight_var) + column=column, + builder=builder, + units=units, + sparse_combiner=sparse_combiner, + weight_collections=weight_collections, + trainable=trainable, + weight_var=weight_var) else: return _create_dense_column_weighted_sum( - column=column, - builder=builder, - units=units, - weight_collections=weight_collections, - trainable=trainable, - weight_var=weight_var) + column=column, + builder=builder, + units=units, + weight_collections=weight_collections, + trainable=trainable, + weight_var=weight_var) def _create_dense_column_weighted_sum(column, @@ -1916,9 +1901,9 @@ def _create_dense_column_weighted_sum(column, weight_var=None): """Create a weighted sum of a dense column for linear_model.""" tensor = column._get_dense_tensor( # pylint: disable=protected-access - builder, - weight_collections=weight_collections, - trainable=trainable) + builder, + weight_collections=weight_collections, + trainable=trainable) num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements)) @@ -1926,11 +1911,11 @@ def _create_dense_column_weighted_sum(column, weight = weight_var else: weight = variable_scope.get_variable( - name='weights', - shape=[num_elements, units], - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) + name='weights', + shape=[num_elements, units], + initializer=init_ops.zeros_initializer(), + trainable=trainable, + collections=weight_collections) return math_ops.matmul(tensor, weight, name='weighted_sum') @@ -1944,7 +1929,7 @@ class _CategoricalColumn(_FeatureColumn): """ IdWeightPair = collections.namedtuple( # pylint: disable=invalid-name - 'IdWeightPair', ['id_tensor', 'weight_tensor']) + 'IdWeightPair', ['id_tensor', 'weight_tensor']) @abc.abstractproperty def _num_buckets(self): @@ -2014,39 +1999,39 @@ def _create_categorical_column_weighted_sum(column, sparse_combiner = "sum". """ sparse_tensors = column._get_sparse_tensors( # pylint: disable=protected-access - builder, - weight_collections=weight_collections, - trainable=trainable) + builder, + weight_collections=weight_collections, + trainable=trainable) id_tensor = sparse_ops.sparse_reshape( - sparse_tensors.id_tensor, - [array_ops.shape(sparse_tensors.id_tensor)[0], -1]) + sparse_tensors.id_tensor, + [array_ops.shape(sparse_tensors.id_tensor)[0], -1]) weight_tensor = sparse_tensors.weight_tensor if weight_tensor is not None: weight_tensor = sparse_ops.sparse_reshape( - weight_tensor, [array_ops.shape(weight_tensor)[0], -1]) + weight_tensor, [array_ops.shape(weight_tensor)[0], -1]) if weight_var is not None: weight = weight_var else: weight = variable_scope.get_variable( - name='weights', - shape=(column._num_buckets, units), # pylint: disable=protected-access - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) + name='weights', + shape=(column._num_buckets, units), # pylint: disable=protected-access + initializer=init_ops.zeros_initializer(), + trainable=trainable, + collections=weight_collections) return embedding_ops.safe_embedding_lookup_sparse( - weight, - id_tensor, - sparse_weights=weight_tensor, - combiner=sparse_combiner, - name='weighted_sum') + weight, + id_tensor, + sparse_weights=weight_tensor, + combiner=sparse_combiner, + name='weighted_sum') class _SequenceDenseColumn(_FeatureColumn): """Represents dense sequence data.""" TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name - 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) + 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) @abc.abstractmethod def _get_sequence_dense_tensor(self, @@ -2162,7 +2147,7 @@ def _get_raw_feature_as_tensor(self, key): """ raw_feature = self._features[key] feature_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( - raw_feature) + raw_feature) def expand_dims(input_tensor): # Input_tensor must have rank 1. @@ -2176,20 +2161,20 @@ def expand_dims(input_tensor): if rank is not None: if rank == 0: raise ValueError( - 'Feature (key: {}) cannot have rank 0. Give: {}'.format( - key, feature_tensor)) + 'Feature (key: {}) cannot have rank 0. Give: {}'.format( + key, feature_tensor)) return feature_tensor if rank != 1 else expand_dims(feature_tensor) # Handle dynamic rank. with ops.control_dependencies([ - check_ops.assert_positive( - array_ops.rank(feature_tensor), - message='Feature (key: {}) cannot have rank 0. Given: {}'.format( - key, feature_tensor)) + check_ops.assert_positive( + array_ops.rank(feature_tensor), + message='Feature (key: {}) cannot have rank 0. Given: {}'.format( + key, feature_tensor)) ]): return control_flow_ops.cond( - math_ops.equal(1, array_ops.rank(feature_tensor)), - lambda: expand_dims(feature_tensor), lambda: feature_tensor) + math_ops.equal(1, array_ops.rank(feature_tensor)), + lambda: expand_dims(feature_tensor), lambda: feature_tensor) # TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py @@ -2224,7 +2209,7 @@ def _to_sparse_input_and_drop_ignore_values(input_tensor, ignore_value=None): ValueError: when `input_tensor`'s rank is `None`. """ input_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( - input_tensor) + input_tensor) if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): return input_tensor with ops.name_scope(None, 'to_sparse_input', ( @@ -2243,14 +2228,14 @@ def _to_sparse_input_and_drop_ignore_values(input_tensor, ignore_value=None): # default value for that type. ignore_value = input_tensor.dtype.as_numpy_dtype() ignore_value = math_ops.cast( - ignore_value, input_tensor.dtype, name='ignore_value') + ignore_value, input_tensor.dtype, name='ignore_value') indices = array_ops.where( - math_ops.not_equal(input_tensor, ignore_value), name='indices') + math_ops.not_equal(input_tensor, ignore_value), name='indices') return sparse_tensor_lib.SparseTensor( - indices=indices, - values=array_ops.gather_nd(input_tensor, indices, name='values'), - dense_shape=array_ops.shape( - input_tensor, out_type=dtypes.int64, name='dense_shape')) + indices=indices, + values=array_ops.gather_nd(input_tensor, indices, name='values'), + dense_shape=array_ops.shape( + input_tensor, out_type=dtypes.int64, name='dense_shape')) def _normalize_feature_columns(feature_columns): @@ -2299,10 +2284,10 @@ def _normalize_feature_columns(feature_columns): class _NumericColumn( - _DenseColumn, - collections.namedtuple( - '_NumericColumn', - ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): + _DenseColumn, + collections.namedtuple( + '_NumericColumn', + ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): """see `numeric_column`.""" @property @@ -2312,17 +2297,17 @@ def name(self): @property def _parse_example_spec(self): return { - self.key: - parsing_ops.FixedLenFeature(self.shape, self.dtype, - self.default_value) + self.key: + parsing_ops.FixedLenFeature(self.shape, self.dtype, + self.default_value) } def _transform_feature(self, inputs): input_tensor = inputs.get(self.key) if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): raise ValueError( - 'The corresponding Tensor of numerical column must be a Tensor. ' - 'SparseTensor is not supported. key: {}'.format(self.key)) + 'The corresponding Tensor of numerical column must be a Tensor. ' + 'SparseTensor is not supported. key: {}'.format(self.key)) if self.normalizer_fn is not None: input_tensor = self.normalizer_fn(input_tensor) return math_ops.cast(input_tensor, dtypes.float32) @@ -2374,23 +2359,23 @@ def _parse_example_spec(self): def _transform_feature(self, inputs): source_tensor = inputs.get(self.source_column) return math_ops._bucketize( # pylint: disable=protected-access - source_tensor, - boundaries=self.boundaries) + source_tensor, + boundaries=self.boundaries) @property def _variable_shape(self): return tensor_shape.TensorShape( - tuple(self.source_column.shape) + (len(self.boundaries) + 1,)) + tuple(self.source_column.shape) + (len(self.boundaries) + 1,)) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): del weight_collections del trainable input_tensor = inputs.get(self) return array_ops.one_hot( - indices=math_ops.cast(input_tensor, dtypes.int64), - depth=len(self.boundaries) + 1, - on_value=1., - off_value=0.) + indices=math_ops.cast(input_tensor, dtypes.int64), + depth=len(self.boundaries) + 1, + on_value=1., + off_value=0.) @property def _num_buckets(self): @@ -2408,9 +2393,9 @@ def _get_sparse_tensors(self, source_dimension = self.source_column.shape[0] i1 = array_ops.reshape( - array_ops.tile( - array_ops.expand_dims(math_ops.range(0, batch_size), 1), - [1, source_dimension]), (-1,)) + array_ops.tile( + array_ops.expand_dims(math_ops.range(0, batch_size), 1), + [1, source_dimension]), (-1,)) i2 = array_ops.tile(math_ops.range(0, source_dimension), [batch_size]) # Flatten the bucket indices and unique them across dimensions # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets @@ -2419,20 +2404,20 @@ def _get_sparse_tensors(self, (-1,)) + (len(self.boundaries) + 1) * i2) indices = math_ops.cast( - array_ops.transpose(array_ops.stack((i1, i2))), dtypes.int64) + array_ops.transpose(array_ops.stack((i1, i2))), dtypes.int64) dense_shape = math_ops.cast( - array_ops.stack([batch_size, source_dimension]), dtypes.int64) + array_ops.stack([batch_size, source_dimension]), dtypes.int64) sparse_tensor = sparse_tensor_lib.SparseTensor( - indices=indices, values=bucket_indices, dense_shape=dense_shape) + indices=indices, values=bucket_indices, dense_shape=dense_shape) return _CategoricalColumn.IdWeightPair(sparse_tensor, None) class _EmbeddingColumn( - _DenseColumn, _SequenceDenseColumn, - collections.namedtuple( - '_EmbeddingColumn', - ('categorical_column', 'dimension', 'combiner', 'layer_creator', - 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): + _DenseColumn, _SequenceDenseColumn, + collections.namedtuple( + '_EmbeddingColumn', + ('categorical_column', 'dimension', 'combiner', 'layer_creator', + 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): """See `embedding_column`.""" @property @@ -2461,47 +2446,47 @@ def _get_dense_tensor_internal(self, """Private method that follows the signature of _get_dense_tensor.""" # Get sparse IDs and weights. sparse_tensors = self.categorical_column._get_sparse_tensors( # pylint: disable=protected-access - inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor embedding_weights = self.layer_creator( - weight_collections=weight_collections, - scope=variable_scope.get_variable_scope()) + weight_collections=weight_collections, + scope=variable_scope.get_variable_scope()) if self.ckpt_to_load_from is not None: to_restore = embedding_weights if isinstance(to_restore, variables.PartitionedVariable): to_restore = to_restore._get_variable_list() # pylint: disable=protected-access checkpoint_utils.init_from_checkpoint( - self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) + self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) # Return embedding lookup result. return embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=self.combiner, - name='%s_weights' % self.name, - max_norm=self.max_norm) + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=self.combiner, + name='%s_weights' % self.name, + max_norm=self.max_norm) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must not be of type _SequenceCategoricalColumn. ' - 'Suggested fix A: If you wish to use input_layer, use a ' - 'non-sequence categorical_column_with_*. ' - 'Suggested fix B: If you wish to create sequence input, use ' - 'sequence_input_layer instead of input_layer. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) return self._get_dense_tensor_internal( - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) def _get_sequence_dense_tensor(self, inputs, @@ -2509,22 +2494,22 @@ def _get_sequence_dense_tensor(self, trainable=None): if not isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must be of type _SequenceCategoricalColumn ' - 'to use sequence_input_layer. ' - 'Suggested fix: Use one of sequence_categorical_column_with_*. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) dense_tensor = self._get_dense_tensor_internal( # pylint: disable=protected-access - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = fc_utils.sequence_length_from_sparse_tensor( - sparse_tensors.id_tensor) + sparse_tensors.id_tensor) return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=sequence_length) def _get_graph_for_variable(var): @@ -2535,13 +2520,13 @@ def _get_graph_for_variable(var): class _SharedEmbeddingColumn( - _DenseColumn, _SequenceDenseColumn, - collections.namedtuple( - '_SharedEmbeddingColumn', - ('categorical_column', 'dimension', 'combiner', 'initializer', - 'shared_embedding_collection_name', 'ckpt_to_load_from', - 'tensor_name_in_ckpt', 'max_norm', 'trainable', 'partitioner', - 'ev_params'))): + _DenseColumn, _SequenceDenseColumn, + collections.namedtuple( + '_SharedEmbeddingColumn', + ('categorical_column', 'dimension', 'combiner', 'initializer', + 'shared_embedding_collection_name', 'ckpt_to_load_from', + 'tensor_name_in_ckpt', 'max_norm', 'trainable', 'partitioner', + 'ev_params'))): """See `embedding_column`.""" @property @@ -2556,9 +2541,9 @@ def raw_name(self): @property def cardinality(self): - from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn,\ + from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn, \ BucketizedColumn, WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, \ - CrossedColumn, IdentityCategoricalColumn, VocabularyListCategoricalColumn,\ + CrossedColumn, IdentityCategoricalColumn, VocabularyListCategoricalColumn, \ VocabularyFileCategoricalColumn fc = self.categorical_column @@ -2621,66 +2606,66 @@ def _get_dense_tensor_internal(self, with ops.name_scope(None, default_name=self.name): # Get sparse IDs and weights. sparse_tensors = self.categorical_column._get_sparse_tensors( # pylint: disable=protected-access - inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor embedding_shape = (self.categorical_column._num_buckets, self.dimension) # pylint: disable=protected-access shared_embedding_collection = ops.get_collection( - self.shared_embedding_collection_name) + self.shared_embedding_collection_name) if shared_embedding_collection: if len(shared_embedding_collection) > 1: raise ValueError( - 'Collection {} can only contain one variable. ' - 'Suggested fix A: Choose a unique name for this collection. ' - 'Suggested fix B: Do not add any variables to this collection. ' - 'The feature_column library already adds a variable under the ' - 'hood.'.format(shared_embedding_collection)) + 'Collection {} can only contain one variable. ' + 'Suggested fix A: Choose a unique name for this collection. ' + 'Suggested fix B: Do not add any variables to this collection. ' + 'The feature_column library already adds a variable under the ' + 'hood.'.format(shared_embedding_collection)) embedding_weights = shared_embedding_collection[0] if embedding_weights.get_shape( ) != embedding_shape and not self.ev_params is not None: # noqa : E714 raise ValueError( - 'Shared embedding collection {} contains variable {} of ' - 'unexpected shape {}. Expected shape is {}. ' - 'Suggested fix A: Choose a unique name for this collection. ' - 'Suggested fix B: Do not add any variables to this collection. ' - 'The feature_column library already adds a variable under the ' - 'hood.'.format(self.shared_embedding_collection_name, - embedding_weights.name, - embedding_weights.get_shape(), embedding_shape)) + 'Shared embedding collection {} contains variable {} of ' + 'unexpected shape {}. Expected shape is {}. ' + 'Suggested fix A: Choose a unique name for this collection. ' + 'Suggested fix B: Do not add any variables to this collection. ' + 'The feature_column library already adds a variable under the ' + 'hood.'.format(self.shared_embedding_collection_name, + embedding_weights.name, + embedding_weights.get_shape(), embedding_shape)) else: if self.ev_params is None: embedding_weights = variable_scope.get_variable( - name='embedding_weights', - shape=embedding_shape, - dtype=dtypes.float32, - initializer=self.initializer, - trainable=self.trainable and trainable, - partitioner=self.partitioner, - collections=weight_collections) + name='embedding_weights', + shape=embedding_shape, + dtype=dtypes.float32, + initializer=self.initializer, + trainable=self.trainable and trainable, + partitioner=self.partitioner, + collections=weight_collections) else: # at eval or inference time, it is necessary to set # the initializers to zeros, so that new key will # get zero embedding import os if os.environ.get('tf.estimator.mode', '') != \ - os.environ.get('tf.estimator.ModeKeys.TRAIN', 'train'): + os.environ.get('tf.estimator.ModeKeys.TRAIN', 'train'): initializer = init_ops.zeros_initializer() else: initializer = self.initializer embedding_weights = variable_scope.get_embedding_variable( - name='embedding_weights', - embedding_dim=self.dimension, - initializer=initializer, - trainable=self.trainable and trainable, - partitioner=self.partitioner, - collections=weight_collections, - steps_to_live=self.ev_params.steps_to_live - if self.ev_params is not None else None, - filter_options=variables.CounterFilterOptions( - self.ev_params.filter_freq)) + name='embedding_weights', + embedding_dim=self.dimension, + initializer=initializer, + trainable=self.trainable and trainable, + partitioner=self.partitioner, + collections=weight_collections, + steps_to_live=self.ev_params.steps_to_live + if self.ev_params is not None else None, + filter_options=variables.CounterFilterOptions( + self.ev_params.filter_freq)) ops.add_to_collection(self.shared_embedding_collection_name, embedding_weights) @@ -2689,41 +2674,41 @@ def _get_dense_tensor_internal(self, if isinstance(to_restore, variables.PartitionedVariable): to_restore = to_restore._get_variable_list() # pylint: disable=protected-access checkpoint_utils.init_from_checkpoint( - self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) + self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) # Return embedding lookup result. if self.ev_params is not None: return ev_embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=self.combiner, - name='%s_weights' % self.name, - max_norm=self.max_norm) + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=self.combiner, + name='%s_weights' % self.name, + max_norm=self.max_norm) else: return embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=self.combiner, - name='%s_weights' % self.name, - max_norm=self.max_norm) + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=self.combiner, + name='%s_weights' % self.name, + max_norm=self.max_norm) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must not be of type _SequenceCategoricalColumn. ' - 'Suggested fix A: If you wish to use input_layer, use a ' - 'non-sequence categorical_column_with_*. ' - 'Suggested fix B: If you wish to create sequence input, use ' - 'sequence_input_layer instead of input_layer. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) return self._get_dense_tensor_internal( - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) def _get_sequence_dense_tensor(self, inputs, @@ -2731,21 +2716,21 @@ def _get_sequence_dense_tensor(self, trainable=None): if not isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must be of type _SequenceCategoricalColumn ' - 'to use sequence_input_layer. ' - 'Suggested fix: Use one of sequence_categorical_column_with_*. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) dense_tensor = self._get_dense_tensor_internal( # pylint: disable=protected-access - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = fc_utils.sequence_length_from_sparse_tensor( - sparse_tensors.id_tensor) + sparse_tensors.id_tensor) return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=sequence_length) def _check_shape(shape, key): @@ -2766,8 +2751,8 @@ def _check_shape(shape, key): class _HashedCategoricalColumn(_CategoricalColumn, collections.namedtuple( - '_HashedCategoricalColumn', - ['key', 'hash_bucket_size', 'dtype'])): + '_HashedCategoricalColumn', + ['key', 'hash_bucket_size', 'dtype'])): """see `categorical_column_with_hash_bucket`.""" @property @@ -2788,14 +2773,14 @@ def _transform_feature(self, inputs): raise ValueError('SparseColumn input must be a SparseTensor.') fc_utils.assert_string_or_int( - input_tensor.dtype, - prefix='column_name: {} input_tensor'.format(self.key)) + input_tensor.dtype, + prefix='column_name: {} input_tensor'.format(self.key)) if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( - 'Column dtype and SparseTensors dtype must be compatible. ' - 'key: {}, column dtype: {}, tensor dtype: {}'.format( - self.key, self.dtype, input_tensor.dtype)) + 'Column dtype and SparseTensors dtype must be compatible. ' + 'key: {}, column dtype: {}, tensor dtype: {}'.format( + self.key, self.dtype, input_tensor.dtype)) if self.dtype == dtypes.string: sparse_values = input_tensor.values @@ -2803,7 +2788,7 @@ def _transform_feature(self, inputs): sparse_values = string_ops.as_string(input_tensor.values) sparse_id_values = string_ops.string_to_hash_bucket_fast( - sparse_values, self.hash_bucket_size, name='lookup') + sparse_values, self.hash_bucket_size, name='lookup') return sparse_tensor_lib.SparseTensor(input_tensor.indices, sparse_id_values, input_tensor.dense_shape) @@ -2821,10 +2806,10 @@ def _get_sparse_tensors(self, class _VocabularyFileCategoricalColumn( - _CategoricalColumn, - collections.namedtuple('_VocabularyFileCategoricalColumn', - ('key', 'vocabulary_file', 'vocabulary_size', - 'num_oov_buckets', 'dtype', 'default_value'))): + _CategoricalColumn, + collections.namedtuple('_VocabularyFileCategoricalColumn', + ('key', 'vocabulary_file', 'vocabulary_size', + 'num_oov_buckets', 'dtype', 'default_value'))): """See `categorical_column_with_vocabulary_file`.""" @property @@ -2840,13 +2825,13 @@ def _transform_feature(self, inputs): if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( - 'Column dtype and SparseTensors dtype must be compatible. ' - 'key: {}, column dtype: {}, tensor dtype: {}'.format( - self.key, self.dtype, input_tensor.dtype)) + 'Column dtype and SparseTensors dtype must be compatible. ' + 'key: {}, column dtype: {}, tensor dtype: {}'.format( + self.key, self.dtype, input_tensor.dtype)) fc_utils.assert_string_or_int( - input_tensor.dtype, - prefix='column_name: {} input_tensor'.format(self.key)) + input_tensor.dtype, + prefix='column_name: {} input_tensor'.format(self.key)) key_dtype = self.dtype if input_tensor.dtype.is_integer: @@ -2855,12 +2840,12 @@ def _transform_feature(self, inputs): input_tensor = math_ops.cast(input_tensor, dtypes.int64) return lookup_ops.index_table_from_file( - vocabulary_file=self.vocabulary_file, - num_oov_buckets=self.num_oov_buckets, - vocab_size=self.vocabulary_size, - default_value=self.default_value, - key_dtype=key_dtype, - name='{}_lookup'.format(self.key)).lookup(input_tensor) + vocabulary_file=self.vocabulary_file, + num_oov_buckets=self.num_oov_buckets, + vocab_size=self.vocabulary_size, + default_value=self.default_value, + key_dtype=key_dtype, + name='{}_lookup'.format(self.key)).lookup(input_tensor) @property def _num_buckets(self): @@ -2875,10 +2860,10 @@ def _get_sparse_tensors(self, class _VocabularyListCategoricalColumn( - _CategoricalColumn, - collections.namedtuple( - '_VocabularyListCategoricalColumn', - ('key', 'vocabulary_list', 'dtype', 'default_value', 'num_oov_buckets')) + _CategoricalColumn, + collections.namedtuple( + '_VocabularyListCategoricalColumn', + ('key', 'vocabulary_list', 'dtype', 'default_value', 'num_oov_buckets')) ): """See `categorical_column_with_vocabulary_list`.""" @@ -2895,13 +2880,13 @@ def _transform_feature(self, inputs): if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( - 'Column dtype and SparseTensors dtype must be compatible. ' - 'key: {}, column dtype: {}, tensor dtype: {}'.format( - self.key, self.dtype, input_tensor.dtype)) + 'Column dtype and SparseTensors dtype must be compatible. ' + 'key: {}, column dtype: {}, tensor dtype: {}'.format( + self.key, self.dtype, input_tensor.dtype)) fc_utils.assert_string_or_int( - input_tensor.dtype, - prefix='column_name: {} input_tensor'.format(self.key)) + input_tensor.dtype, + prefix='column_name: {} input_tensor'.format(self.key)) key_dtype = self.dtype if input_tensor.dtype.is_integer: @@ -2910,11 +2895,11 @@ def _transform_feature(self, inputs): input_tensor = math_ops.cast(input_tensor, dtypes.int64) return lookup_ops.index_table_from_tensor( - vocabulary_list=tuple(self.vocabulary_list), - default_value=self.default_value, - num_oov_buckets=self.num_oov_buckets, - dtype=key_dtype, - name='{}_lookup'.format(self.key)).lookup(input_tensor) + vocabulary_list=tuple(self.vocabulary_list), + default_value=self.default_value, + num_oov_buckets=self.num_oov_buckets, + dtype=key_dtype, + name='{}_lookup'.format(self.key)).lookup(input_tensor) @property def _num_buckets(self): @@ -2930,8 +2915,8 @@ def _get_sparse_tensors(self, class _IdentityCategoricalColumn(_CategoricalColumn, collections.namedtuple( - '_IdentityCategoricalColumn', - ('key', 'num_buckets', 'default_value'))): + '_IdentityCategoricalColumn', + ('key', 'num_buckets', 'default_value'))): """See `categorical_column_with_identity`.""" @property @@ -2947,37 +2932,37 @@ def _transform_feature(self, inputs): if not input_tensor.dtype.is_integer: raise ValueError('Invalid input, not integer. key: {} dtype: {}'.format( - self.key, input_tensor.dtype)) + self.key, input_tensor.dtype)) values = math_ops.cast(input_tensor.values, dtypes.int64, name='values') num_buckets = math_ops.cast( - self.num_buckets, dtypes.int64, name='num_buckets') + self.num_buckets, dtypes.int64, name='num_buckets') zero = math_ops.cast(0, dtypes.int64, name='zero') if self.default_value is None: # Fail if values are out-of-range. assert_less = check_ops.assert_less( - values, - num_buckets, - data=(values, num_buckets), - name='assert_less_than_num_buckets') + values, + num_buckets, + data=(values, num_buckets), + name='assert_less_than_num_buckets') assert_greater = check_ops.assert_greater_equal( - values, zero, data=(values,), name='assert_greater_or_equal_0') + values, zero, data=(values,), name='assert_greater_or_equal_0') with ops.control_dependencies((assert_less, assert_greater)): values = array_ops.identity(values) else: # Assign default for out-of-range values. values = array_ops.where( - math_ops.logical_or( - values < zero, values >= num_buckets, name='out_of_range'), - array_ops.fill( - dims=array_ops.shape(values), - value=math_ops.cast(self.default_value, dtypes.int64), - name='default_values'), values) + math_ops.logical_or( + values < zero, values >= num_buckets, name='out_of_range'), + array_ops.fill( + dims=array_ops.shape(values), + value=math_ops.cast(self.default_value, dtypes.int64), + name='default_values'), values) return sparse_tensor_lib.SparseTensor( - indices=input_tensor.indices, - values=values, - dense_shape=input_tensor.dense_shape) + indices=input_tensor.indices, + values=values, + dense_shape=input_tensor.dense_shape) @property def _num_buckets(self): @@ -2992,10 +2977,10 @@ def _get_sparse_tensors(self, class _WeightedCategoricalColumn( - _CategoricalColumn, - collections.namedtuple( - '_WeightedCategoricalColumn', - ('categorical_column', 'weight_feature_key', 'dtype'))): + _CategoricalColumn, + collections.namedtuple( + '_WeightedCategoricalColumn', + ('categorical_column', 'weight_feature_key', 'dtype'))): """See `weighted_categorical_column`.""" @property @@ -3008,7 +2993,7 @@ def _parse_example_spec(self): config = self.categorical_column._parse_example_spec # pylint: disable=protected-access if self.weight_feature_key in config: raise ValueError('Parse config {} already exists for {}.'.format( - config[self.weight_feature_key], self.weight_feature_key)) + config[self.weight_feature_key], self.weight_feature_key)) config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype) return config @@ -3021,14 +3006,14 @@ def _transform_feature(self, inputs): if weight_tensor is None: raise ValueError('Missing weights {}.'.format(self.weight_feature_key)) weight_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( - weight_tensor) + weight_tensor) if self.dtype != weight_tensor.dtype.base_dtype: raise ValueError('Bad dtype, expected {}, but got {}.'.format( - self.dtype, weight_tensor.dtype)) + self.dtype, weight_tensor.dtype)) if not isinstance(weight_tensor, sparse_tensor_lib.SparseTensor): # The weight tensor can be a regular Tensor. In this case, sparsify it. weight_tensor = _to_sparse_input_and_drop_ignore_values( - weight_tensor, ignore_value=0.0) + weight_tensor, ignore_value=0.0) if not weight_tensor.dtype.is_floating: weight_tensor = math_ops.cast(weight_tensor, dtypes.float32) return (inputs.get(self.categorical_column), weight_tensor) @@ -3044,9 +3029,9 @@ def _get_sparse_tensors(self, class _CrossedColumn( - _CategoricalColumn, - collections.namedtuple('_CrossedColumn', - ['keys', 'hash_bucket_size', 'hash_key'])): + _CategoricalColumn, + collections.namedtuple('_CrossedColumn', + ['keys', 'hash_bucket_size', 'hash_key'])): """See `crossed_column`.""" @property @@ -3078,16 +3063,16 @@ def _transform_feature(self, inputs): ids_and_weights = key._get_sparse_tensors(inputs) # pylint: disable=protected-access if ids_and_weights.weight_tensor is not None: raise ValueError( - 'crossed_column does not support weight_tensor, but the given ' - 'column populates weight_tensor. ' - 'Given column: {}'.format(key.name)) + 'crossed_column does not support weight_tensor, but the given ' + 'column populates weight_tensor. ' + 'Given column: {}'.format(key.name)) feature_tensors.append(ids_and_weights.id_tensor) else: raise ValueError('Unsupported column type. Given: {}'.format(key)) return sparse_ops.sparse_cross_hashed( - inputs=feature_tensors, - num_buckets=self.hash_bucket_size, - hash_key=self.hash_key) + inputs=feature_tensors, + num_buckets=self.hash_bucket_size, + hash_key=self.hash_key) @property def _num_buckets(self): @@ -3152,9 +3137,9 @@ def _transform_feature(self, inputs): # If the underlying column is weighted, return the input as a dense tensor. if weight_tensor is not None: weighted_column = sparse_ops.sparse_merge( - sp_ids=id_tensor, - sp_values=weight_tensor, - vocab_size=int(self._variable_shape[-1])) + sp_ids=id_tensor, + sp_values=weight_tensor, + vocab_size=int(self._variable_shape[-1])) # Remove (?, -1) index. weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0], weighted_column.dense_shape) @@ -3165,15 +3150,15 @@ def _transform_feature(self, inputs): weighted_column.dense_shape) dense_id_tensor = sparse_ops.sparse_tensor_to_dense( - id_tensor, default_value=-1) + id_tensor, default_value=-1) # One hot must be float for tf.concat reasons since all other inputs to # input_layer are float32. one_hot_id_tensor = array_ops.one_hot( - dense_id_tensor, - depth=self._variable_shape[-1], - on_value=1.0, - off_value=0.0) + dense_id_tensor, + depth=self._variable_shape[-1], + on_value=1.0, + off_value=0.0) # Reduce to get a multi-hot per example. return math_ops.reduce_sum(one_hot_id_tensor, axis=[-2]) @@ -3209,14 +3194,14 @@ def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): del trainable if isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In indicator_column: {}. ' - 'categorical_column must not be of type _SequenceCategoricalColumn. ' - 'Suggested fix A: If you wish to use input_layer, use a ' - 'non-sequence categorical_column_with_*. ' - 'Suggested fix B: If you wish to create sequence input, use ' - 'sequence_input_layer instead of input_layer. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In indicator_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) # Feature has been already transformed. Return the intermediate # representation created by _transform_feature. return inputs.get(self) @@ -3231,20 +3216,20 @@ def _get_sequence_dense_tensor(self, del trainable if not isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In indicator_column: {}. ' - 'categorical_column must be of type _SequenceCategoricalColumn ' - 'to use sequence_input_layer. ' - 'Suggested fix: Use one of sequence_categorical_column_with_*. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In indicator_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) # Feature has been already transformed. Return the intermediate # representation created by _transform_feature. dense_tensor = inputs.get(self) sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = fc_utils.sequence_length_from_sparse_tensor( - sparse_tensors.id_tensor) + sparse_tensors.id_tensor) return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=sequence_length) def _verify_static_batch_size_equality(tensors, columns): @@ -3267,16 +3252,16 @@ def _verify_static_batch_size_equality(tensors, columns): expected_batch_size = tensors[i].shape.dims[0] elif not expected_batch_size.is_compatible_with(tensors[i].shape.dims[0]): raise ValueError( - 'Batch size (first dimension) of each feature must be same. ' - 'Batch size of columns ({}, {}): ({}, {})'.format( - columns[bath_size_column_index].name, columns[i].name, - expected_batch_size, tensors[i].shape.dims[0])) + 'Batch size (first dimension) of each feature must be same. ' + 'Batch size of columns ({}, {}): ({}, {})'.format( + columns[bath_size_column_index].name, columns[i].name, + expected_batch_size, tensors[i].shape.dims[0])) class _SequenceCategoricalColumn(_CategoricalColumn, collections.namedtuple( - '_SequenceCategoricalColumn', - ['categorical_column'])): + '_SequenceCategoricalColumn', + ['categorical_column'])): """Represents sequences of categorical data.""" @property diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index c264c30c2..0ca532bea 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -5451,3 +5451,13 @@ def deserialize_feature_columns(configs, custom_objects=None): deserialize_feature_column(c, custom_objects, columns_by_name) for c in configs ] + + +def is_embedding_column(fc): + if isinstance(fc, EmbeddingColumn): + return True + if isinstance(fc, fc_old._SharedEmbeddingColumn): + return True + if isinstance(fc, SharedEmbeddingColumn): + return True + return False diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index d2325e680..5cdaa1dd1 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -6,6 +6,7 @@ import six import tensorflow as tf +from tensorflow.python.framework import ops from tensorflow.python.platform import gfile from easy_rec.python.core import sampler as sampler_lib @@ -1012,10 +1013,12 @@ def _input_fn(mode=None, params=None, config=None): return dataset elif mode is None: # serving_input_receiver_fn for export SavedModel if export_config.multi_placeholder: - inputs, features = self.create_multi_placeholders(export_config) + with ops.device('/CPU:0'): + inputs, features = self.create_multi_placeholders(export_config) return tf.estimator.export.ServingInputReceiver(features, inputs) else: - inputs, features = self.create_placeholders(export_config) + with ops.device('/CPU:0'): + inputs, features = self.create_placeholders(export_config) print('built feature placeholders. features: {}'.format( features.keys())) return tf.estimator.export.ServingInputReceiver(features, inputs) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 8caa31b80..fa604926d 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -4,68 +4,23 @@ import tensorflow as tf -from easy_rec.python.utils.dag import DAG from easy_rec.python.layers import dnn -from easy_rec.python.layers.common_layers import layer_norm, SENet, highway -from easy_rec.python.layers.numerical_embedding import PeriodicEmbedding, AutoDisEmbedding +from easy_rec.python.layers.common_layers import SENet, EnhancedInputLayer +from easy_rec.python.layers.common_layers import highway, Concatenate from easy_rec.python.layers.fibinet import FiBiNetLayer +from easy_rec.python.layers.fm import FM, FMLayer from easy_rec.python.layers.mask_net import MaskNet -from easy_rec.python.layers.fm import FMLayer +from easy_rec.python.layers.numerical_embedding import AutoDisEmbedding +from easy_rec.python.layers.numerical_embedding import PeriodicEmbedding +from easy_rec.python.utils.dag import DAG +from easy_rec.python.utils.tf_utils import add_op, dot_op if tf.__version__ >= '2.0': tf = tf.compat.v1 -class EnhancedInputLayer(object): - def __init__(self, config, input_layer, feature_dict): - if config.do_batch_norm and config.do_layer_norm: - raise ValueError('can not do batch norm and layer norm for input layer at the same time') - self._config = config - self._input_layer = input_layer - self._feature_dict = feature_dict - - def __call__(self, feature_group, is_training, *args, **kwargs): - features, feature_list = self._input_layer(self._feature_dict, feature_group) - num_features = len(feature_list) - - do_feature_dropout = 0.0 < self._config.feature_dropout_rate < 1.0 - if self._config.output_feature_list or do_feature_dropout: - if self._config.do_layer_norm or self._config.do_batch_norm: - for i in range(num_features): - fea = feature_list[i] - if self._config.do_batch_norm: - fea = tf.layers.batch_normalization(fea, training=is_training) - elif self._config.do_layer_norm: - fea = layer_norm(fea) - feature_list[i] = fea - elif self._config.do_batch_norm: - features = tf.layers.batch_normalization(features, training=is_training) - elif self._config.do_layer_norm: - features = layer_norm(features) - - if do_feature_dropout and is_training: - keep_prob = 1.0 - self._config.feature_dropout_rate - bern = tf.distributions.Bernoulli(probs=keep_prob) - mask = bern.sample(num_features) - for i in range(num_features): - fea = tf.div(feature_list[i], keep_prob) * mask[i] - feature_list[i] = fea - features = tf.concat(feature_list, axis=-1) - - do_dropout = 0.0 < self._config.dropout_rate < 1.0 - if self._config.output_feature_list: - if do_dropout: - for i in range(num_features): - fea = feature_list[i] - fea = tf.layers.dropout(fea, self._config.dropout_rate, training=is_training) - feature_list[i] = fea - return feature_list - if do_dropout: - return tf.layers.dropout(features, self._config.dropout_rate, training=is_training) - return features - - class Backbone(object): + def __init__(self, config, model, features, input_layer, l2_reg=None): self._model = model self._config = config @@ -77,14 +32,15 @@ def __init__(self, config, model, features, input_layer, l2_reg=None): for block in config.blocks: self._name_to_blocks[block.name] = block self._dag.add_node(block.name) - assert len(self._name_to_blocks) > 0, 'there must be more than one block in backbone' + num_blocks = len(self._name_to_blocks) + assert num_blocks > 0, 'there must be at least one block in backbone' for block in config.blocks: - assert len(block.inputs) > 0, 'there is no input for block: %s' % block.name + assert len(block.inputs) > 0, 'no input for block: %s' % block.name for node in block.inputs: if node in self._name_to_blocks: self._dag.add_edge(node, block.name) - def block_input(self, config, block_outputs): + def block_input(self, config, block_outputs, output_list=False): inputs = [] for input_name in config.inputs: if input_name in block_outputs: @@ -92,26 +48,37 @@ def block_input(self, config, block_outputs): else: input_feature, _ = self._input_layer(self._features, input_name) inputs.append(input_feature) - return concat_inputs(inputs, config.name) + + if output_list: + output = inputs + else: + output = concat_inputs(inputs, config.input_concat_axis, config.name) + + if config.HasField('extra_input_fn'): + fn = eval(config.extra_input_fn) + output = fn(output) + return output def __call__(self, is_training, *args, **kwargs): block_outputs = {} blocks = self._dag.topological_sort() - logging.info("backbone topological order: " + ','.join(blocks)) - print("backbone topological order: " + ','.join(blocks)) + logging.info('backbone topological order: ' + ','.join(blocks)) + print('backbone topological order: ' + ','.join(blocks)) for block in blocks: config = self._name_to_blocks[block] layer = config.WhichOneof('layer') if layer == 'input_layer': - assert len(config.inputs) == 1, 'only one input needed for input_layer: ' + block.name + if len(config.inputs) != 1: + raise ValueError('only one input allowed for input_layer: ' + + block.name) conf = config.input_layer - input_layer = EnhancedInputLayer(conf, self._input_layer, self._features) + input_layer = EnhancedInputLayer(conf, self._input_layer, + self._features) output = input_layer(config.inputs[0], is_training) block_outputs[block] = output elif layer == 'periodic_embedding': input_feature = self.block_input(config, block_outputs) - conf = config.periodic_embedding - num_emb = PeriodicEmbedding(conf.embedding_dim, stddev=conf.coef_stddev, scope=block) + num_emb = PeriodicEmbedding(config.periodic_embedding, scope=block) block_outputs[block] = num_emb(input_feature) elif layer == 'auto_dis_embedding': input_feature = self.block_input(config, block_outputs) @@ -121,30 +88,28 @@ def __call__(self, is_training, *args, **kwargs): input_feature = self.block_input(config, block_outputs) conf = config.highway highway_layer = highway( - input_feature, - conf.emb_size, - activation=conf.activation, - dropout=conf.dropout_rate, - scope=block) + input_feature, + conf.emb_size, + activation=conf.activation, + dropout=conf.dropout_rate, + scope=block) block_outputs[block] = highway_layer(input_feature) elif layer == 'mlp': input_feature = self.block_input(config, block_outputs) mlp = dnn.DNN( - config.mlp, - self._l2_reg, - name='%s_mlp' % block, - is_training=is_training) + config.mlp, + self._l2_reg, + name='%s_mlp' % block, + is_training=is_training, + last_layer_no_activation=config.mlp.last_layer_no_activation, + last_layer_no_batch_norm=config.mlp.last_layer_no_batch_norm) block_outputs[block] = mlp(input_feature) elif layer == 'sequence_encoder': block_outputs[block] = self.sequence_encoder(config, is_training) elif layer == 'masknet': input_feature = self.block_input(config, block_outputs) - mask_net = MaskNet( - config.masknet, - name=block, - reuse=tf.AUTO_REUSE) - output = mask_net( - input_feature, is_training, l2_reg=self._l2_reg) + mask_net = MaskNet(config.masknet, name=block, reuse=tf.AUTO_REUSE) + output = mask_net(input_feature, is_training, l2_reg=self._l2_reg) block_outputs[block] = output elif layer == 'senet': input_feature = self.block_input(config, block_outputs) @@ -158,8 +123,28 @@ def __call__(self, is_training, *args, **kwargs): block_outputs[block] = output elif layer == 'fm': input_feature = self.block_input(config, block_outputs) - fm = FMLayer() + fm = FMLayer(config.fm, name=block) block_outputs[block] = fm(input_feature) + elif layer == 'concat': + input_feature = self.block_input(config, block_outputs) + concat = Concatenate(config.concat) + block_outputs[block] = concat(input_feature) + elif layer == 'reshape': + input_feature = self.block_input(config, block_outputs) + block_outputs[block] = tf.reshape(input_feature, list(config.reshape.dims)) + elif layer == 'add': + input_feature = self.block_input(config, block_outputs, output_list=True) + block_outputs[block] = add_op(input_feature) + elif layer == 'dot': + input_feature = self.block_input(config, block_outputs) + block_outputs[block] = dot_op(input_feature) + elif layer == 'Lambda': + input_feature = self.block_input(config, block_outputs) + fn = eval(config.Lambda.expression) + block_outputs[block] = fn(input_feature) + elif layer == 'chain': + input_feature = self.block_input(config, block_outputs) + block_outputs[block] = op_chain(input_feature, config.chain.ops) else: raise NotImplementedError('Unsupported backbone layer:' + layer) @@ -170,13 +155,17 @@ def __call__(self, is_training, *args, **kwargs): else: raise ValueError('No output `%s` of backbone to be concat' % output) - output = concat_inputs(temp) + output = concat_inputs(temp, msg='backbone') if self._config.HasField('top_mlp'): + no_act = self._config.top_mlp.last_layer_no_activation + no_bn = self._config.top_mlp.last_layer_no_batch_norm final_dnn = dnn.DNN( - self._config.top_mlp, - self._l2_reg, - name='backbone_top_mlp', - is_training=is_training) + self._config.top_mlp, + self._l2_reg, + name='backbone_top_mlp', + is_training=is_training, + last_layer_no_activation=no_act, + last_layer_no_batch_norm=no_bn) output = final_dnn(output) return output @@ -189,22 +178,90 @@ def sequence_encoder(self, config, is_training): conf = config.sequence_encoder if conf.HasField('mlp'): sequence_dnn = dnn.DNN( - conf.mlp, - self._l2_reg, - name='%s_seq_dnn' % config.name, - is_training=is_training) + conf.mlp, + self._l2_reg, + name='%s_seq_dnn' % config.name, + is_training=is_training) encoding = sequence_dnn(encoding) return encoding -def concat_inputs(inputs, msg=''): +def concat_inputs(inputs, axis=-1, msg=''): if len(inputs) > 1: - if type(inputs[0]) == list: + if all(map(lambda x: type(x) == list, inputs)): + # merge multiple lists into a list from functools import reduce return reduce(lambda x, y: x + y, inputs) - return tf.concat(inputs, axis=-1) + + if axis != -1: + logging.info('concat inputs %s axis=%d' % (msg, axis)) + return tf.concat(inputs, axis=axis) + if len(inputs) == 1: return inputs[0] raise ValueError('no inputs to be concat:' + msg) +def op_chain(inputs, ops): + output = inputs + for op in ops: + op_name = op.WhichOneOf('Op') + output = run_op(output, op_name, op, block='op_chain') + return output + + +def run_op(inputs, op_name, config, block='', is_training=False, l2_reg=None): + if op_name == 'periodic_embedding': + num_emb = PeriodicEmbedding(config.periodic_embedding, scope=block) + return num_emb(inputs) + elif op_name == 'auto_dis_embedding': + num_emb = AutoDisEmbedding(config.auto_dis_embedding, scope=block) + return num_emb(inputs) + elif op_name == 'highway': + conf = config.highway + highway_op_name = highway( + inputs, + conf.emb_size, + activation=conf.activation, + dropout=conf.dropout_rate, + scope=block) + return highway_op_name(inputs) + elif op_name == 'mlp': + mlp = dnn.DNN( + config.mlp, + l2_reg, + name='%s_mlp' % block, + is_training=is_training, + last_layer_no_activation=config.mlp.last_layer_no_activation, + last_layer_no_batch_norm=config.mlp.last_layer_no_batch_norm) + return mlp(inputs) + elif op_name == 'masknet': + mask_net = MaskNet(config.masknet, name=block, reuse=tf.AUTO_REUSE) + output = mask_net(inputs, is_training, l2_reg=l2_reg) + return output + elif op_name == 'senet': + senet = SENet(config.senet, name=block) + output = senet(inputs) + return output + elif op_name == 'fibinet': + fibinet = FiBiNetLayer(config.fibinet, name=block) + output = fibinet(inputs, is_training, l2_reg=l2_reg) + return output + elif op_name == 'fm': + fm = FMLayer(config.fm, name=block) + return fm(inputs) + if op_name == 'Lambda': + fn = eval(config.Lambda.expression) + output = fn(inputs) + elif op_name == 'concat': + concat = Concatenate(config.concat) + output = concat(inputs) + elif op_name == 'reshape': + output = tf.reshape(inputs, list(config.reshape.dims)) + elif op_name == 'add': + output = add_op(inputs) + elif op_name == 'dot': + output = dot_op(inputs) + else: + raise NotImplementedError('Unsupported op:' + op_name) + return output diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index be4615699..a453141f9 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -2,6 +2,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import itertools import logging + import six import tensorflow as tf @@ -80,6 +81,88 @@ def layer_norm(input_tensor, name=None, reuse=None): scope=name) +class EnhancedInputLayer(object): + + def __init__(self, config, input_layer, feature_dict): + if config.do_batch_norm and config.do_layer_norm: + raise ValueError( + 'can not do batch norm and layer norm for input layer at the same time' + ) + self._config = config + self._input_layer = input_layer + self._feature_dict = feature_dict + + def __call__(self, feature_group, is_training, *args, **kwargs): + features, feature_list = self._input_layer(self._feature_dict, + feature_group) + num_features = len(feature_list) + + do_feature_dropout = 0.0 < self._config.feature_dropout_rate < 1.0 + if self._config.output_feature_list or do_feature_dropout: + if self._config.do_layer_norm or self._config.do_batch_norm: + for i in range(num_features): + fea = feature_list[i] + if self._config.do_batch_norm: + fea = tf.layers.batch_normalization(fea, training=is_training) + elif self._config.do_layer_norm: + fea = layer_norm(fea) + feature_list[i] = fea + elif self._config.do_batch_norm: + features = tf.layers.batch_normalization(features, training=is_training) + elif self._config.do_layer_norm: + features = layer_norm(features) + + if do_feature_dropout and is_training: + keep_prob = 1.0 - self._config.feature_dropout_rate + bern = tf.distributions.Bernoulli(probs=keep_prob) + mask = bern.sample(num_features) + for i in range(num_features): + fea = tf.div(feature_list[i], keep_prob) * mask[i] + feature_list[i] = fea + features = tf.concat(feature_list, axis=-1) + + do_dropout = 0.0 < self._config.dropout_rate < 1.0 + if self._config.output_feature_list: + if do_dropout: + for i in range(num_features): + fea = feature_list[i] + fea = tf.layers.dropout( + fea, self._config.dropout_rate, training=is_training) + feature_list[i] = fea + if self._config.output_3d_tensor: + for i in range(num_features): + feature_list[i] = tf.expand_dims(feature_list[i], axis=1) + return tf.concat(feature_list, axis=1) + return feature_list + + if do_dropout: + features = tf.layers.dropout( + features, self._config.dropout_rate, training=is_training) + + if self._config.output_3d_tensor: + dim = int(feature_list[0].shape[-1]) + return tf.reshape(features, [-1, num_features, dim]) + return features + + +class Concatenate(object): + + def __init__(self, config): + self.config = config + + def __call__(self, inputs, *args, **kwargs): + if self.config.HasField('expand_dim_before'): + dim = self.config.expand_dim_before + output = tf.stack(inputs, axis=dim) + else: + output = tf.concat(inputs, axis=self.config.axis) + + if self.config.HasField('expand_dim_after'): + dim = self.config.expand_dim_after + output = tf.expand_dims(output, dim) + return output + + class SENet(object): """SENet+ Layer used in FiBiNET,支持不同field的embedding dimension不等. diff --git a/easy_rec/python/layers/fibinet.py b/easy_rec/python/layers/fibinet.py index 4ba15789e..77b6da4a5 100644 --- a/easy_rec/python/layers/fibinet.py +++ b/easy_rec/python/layers/fibinet.py @@ -46,9 +46,9 @@ def __call__(self, inputs, is_training, l2_reg=None, *args, **kwargs): if self._config.HasField('mlp'): final_dnn = dnn.DNN( - self._config.mlp, - l2_reg, - name='%s_fibinet_mlp' % self.name, - is_training=is_training) + self._config.mlp, + l2_reg, + name='%s_fibinet_mlp' % self.name, + is_training=is_training) feature = final_dnn(feature) return feature diff --git a/easy_rec/python/layers/fm.py b/easy_rec/python/layers/fm.py index 198d6b8d6..87d621d57 100644 --- a/easy_rec/python/layers/fm.py +++ b/easy_rec/python/layers/fm.py @@ -19,8 +19,7 @@ def __init__(self, name='fm'): def __call__(self, fm_fea): with tf.name_scope(self._name): - fm_feas = [tf.expand_dims(x, axis=1) for x in fm_fea] - fm_feas = tf.concat(fm_feas, axis=1) + fm_feas = tf.stack(fm_fea, axis=1) sum_square = tf.square(tf.reduce_sum(fm_feas, 1)) square_sum = tf.reduce_sum(tf.square(fm_feas), 1) y_v = 0.5 * tf.subtract(sum_square, square_sum) @@ -28,32 +27,42 @@ def __call__(self, fm_fea): class FMLayer(object): - """Factorization Machine models pairwise (order-2) feature interactions - without linear term and bias. - Input shape + """Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. + + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + """ + def __init__(self, config, name='fm'): + self.name = name + self.config = config + + def __call__(self, inputs): + """FM layer. + + Input shape. - List of 2D tensor with shape: ``(batch_size,embedding_size)``. - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` Output shape - 2D tensor with shape: ``(batch_size, 1)``. - References - - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) - """ - def __call__(self, inputs): + """ if type(inputs) == list: - emb_dims = set() - for x in inputs: - emb_dims.add(int(x.shape[-1])) - assert len(emb_dims) == 1, 'all embedding dim must be the same in FM layer:' + ','.join([str(d) for d in emb_dims]) - num_fea = len(inputs) - emb_dim = emb_dims.pop() - fea = tf.concat(inputs, axis=-1) - fea = tf.reshape(fea, [-1, num_fea, emb_dim]) + emb_dims = set(map(lambda x: int(x.shape[-1]), inputs)) + if len(emb_dims) != 1: + dims = ','.join([str(d) for d in emb_dims]) + raise ValueError('all embedding dim must be equal in FM layer:' + dims) + + with tf.name_scope(self.name): + fea = tf.stack(inputs, axis=1) else: assert inputs.shape.ndims == 3, 'input of FM layer must be a 3D tensor or a list of 2D tensors' fea = inputs - square_of_sum = tf.square(tf.reduce_sum(fea, axis=1, keepdims=True)) - sum_of_square = tf.reduce_sum(fea * fea, axis=1, keepdims=True) - cross_term = square_of_sum - sum_of_square - cross_term = 0.5 * tf.reduce_sum(cross_term, axis=2, keepdims=False) + with tf.name_scope(self.name): + square_of_sum = tf.square(tf.reduce_sum(fea, axis=1)) + sum_of_square = tf.reduce_sum(fea * fea, axis=1) + cross_term = square_of_sum - sum_of_square + if self.config.use_variant: + cross_term = 0.5 * cross_term + else: + cross_term = 0.5 * tf.reduce_sum(cross_term, axis=-1) return cross_term diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index ced65c0cf..33cd681ad 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -4,6 +4,7 @@ from collections import OrderedDict import tensorflow as tf +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import variable_scope @@ -16,12 +17,9 @@ from easy_rec.python.layers.common_layers import text_cnn from easy_rec.python.layers.fscd_layer import FSCDLayer from easy_rec.python.protos.feature_config_pb2 import WideOrDeep -from easy_rec.python.utils import shape_utils +from easy_rec.python.utils import shape_utils, conditional -from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA -from easy_rec.python.compat.feature_column.feature_column_v2 import SharedEmbeddingColumn # NOQA - -from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA +from easy_rec.python.compat.feature_column.feature_column_v2 import is_embedding_column class InputLayer(object): @@ -39,7 +37,7 @@ def __init__(self, embedding_regularizer=None, kernel_regularizer=None, is_training=False, - do_feature_normalize=False): + is_predicting=False): self._feature_configs = feature_configs self._feature_groups = { x.group_name: FeatureGroup(x) for x in feature_groups_config @@ -66,8 +64,8 @@ def __init__(self, self._embedding_regularizer = embedding_regularizer self._kernel_regularizer = kernel_regularizer self._is_training = is_training + self._is_predicting = is_predicting self._variational_dropout_config = variational_dropout_config - self._do_feature_normalize = do_feature_normalize def has_group(self, group_name): return group_name in self._feature_groups @@ -97,7 +95,8 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): feature_name_to_output_tensors = {} negative_sampler = self._feature_groups[group_name]._config.negative_sampler if is_combine: - concat_features, group_features = self.single_call_input_layer( + with conditional(self._is_predicting, ops.device('/CPU:0')): + concat_features, group_features = self.single_call_input_layer( features, group_name, feature_name_to_output_tensors) if group_name in self._group_name_to_seq_features: # for target attention @@ -121,7 +120,7 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): return concat_features, group_features else: # return sequence feature in raw format instead of combine them if self._variational_dropout_config is not None: - logging.warn( + logging.warning( 'variational dropout is not supported in not combined mode now.') feature_group = self._feature_groups[group_name] @@ -138,13 +137,11 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): group_columns, cols_to_output_tensors=cols_to_output_tensors, feature_name_to_output_tensors=feature_name_to_output_tensors, - do_normalize=self._do_feature_normalize) + sort_feature_columns_by_name=False) group_features = [cols_to_output_tensors[x] for x in group_columns] for col, val in cols_to_output_tensors.items(): - if isinstance(col, EmbeddingColumn) or isinstance( - col, _SharedEmbeddingColumn) or isinstance( - col, SharedEmbeddingColumn): + if is_embedding_column(col): embedding_reg_lst.append(val) builder = feature_column._LazyBuilder(features) @@ -188,8 +185,7 @@ def single_call_input_layer(self, features, group_columns, cols_to_output_tensors=cols_to_output_tensors, - feature_name_to_output_tensors=feature_name_to_output_tensors, - do_normalize=self._do_feature_normalize) + feature_name_to_output_tensors=feature_name_to_output_tensors) embedding_reg_lst = [] builder = feature_column._LazyBuilder(features) @@ -197,7 +193,8 @@ def single_call_input_layer(self, for column in sorted(group_seq_columns, key=lambda x: x.name): with variable_scope.variable_scope( None, default_name=column._var_scope_name): - seq_feature, seq_len = column._get_sequence_dense_tensor(builder) + with conditional(self._is_predicting, ops.device('/CPU:0')): + seq_feature, seq_len = column._get_sequence_dense_tensor(builder) embedding_reg_lst.append(seq_feature) sequence_combiner = column.sequence_combiner @@ -265,8 +262,7 @@ def single_call_input_layer(self, [cols_to_output_tensors[x] for x in group_seq_columns] for fc, val in cols_to_output_tensors.items(): - if isinstance(fc, EmbeddingColumn) or isinstance( - fc, _SharedEmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): + if is_embedding_column(fc): embedding_reg_lst.append(val) if embedding_reg_lst: diff --git a/easy_rec/python/layers/mask_net.py b/easy_rec/python/layers/mask_net.py index 034cd6018..2ec3f5799 100644 --- a/easy_rec/python/layers/mask_net.py +++ b/easy_rec/python/layers/mask_net.py @@ -46,8 +46,13 @@ def __call__(self, net, mask_input): output_size = self.mask_block_config.output_size hidden = tf.layers.dense( - masked_net, output_size, use_bias=False, name='%s/output' % self.name, reuse=self.reuse) - ln_hidden = layer_norm(hidden, name='%s/ln_output' % self.name, reuse=self.reuse) + masked_net, + output_size, + use_bias=False, + name='%s/output' % self.name, + reuse=self.reuse) + ln_hidden = layer_norm( + hidden, name='%s/ln_output' % self.name, reuse=self.reuse) return tf.nn.relu(ln_hidden) diff --git a/easy_rec/python/layers/numerical_embedding.py b/easy_rec/python/layers/numerical_embedding.py index 26e9f63a3..1c45fa361 100644 --- a/easy_rec/python/layers/numerical_embedding.py +++ b/easy_rec/python/layers/numerical_embedding.py @@ -3,21 +3,110 @@ import math import tensorflow as tf -from easy_rec.python.compat.array_ops import repeat + +from easy_rec.python.utils.activation import get_activation + if tf.__version__ >= '2.0': tf = tf.compat.v1 -class PeriodicEmbedding(object): +class NLinear(object): + """N linear layers for N token (feature) embeddings. + + To understand this module, let's revise `tf.layers.dense`. When `tf.layers.dense` is + applied to three-dimensional inputs of the shape + ``(batch_size, n_tokens, d_embedding)``, then the same linear transformation is + applied to each of ``n_tokens`` token (feature) embeddings. + + By contrast, `NLinear` allocates one linear layer per token (``n_tokens`` layers in total). + One such layer can be represented as ``tf.layers.dense(d_in, d_out)``. + So, the i-th linear transformation is applied to the i-th token embedding, as + illustrated in the following pseudocode:: + + layers = [tf.layers.dense(d_in, d_out) for _ in range(n_tokens)] + x = tf.random.normal(batch_size, n_tokens, d_in) + result = tf.stack([layers[i](x[:, i]) for i in range(n_tokens)], 1) + + Examples: + .. testcode:: + + batch_size = 2 + n_features = 3 + d_embedding_in = 4 + d_embedding_out = 5 + x = tf.random.normal(batch_size, n_features, d_embedding_in) + m = NLinear(n_features, d_embedding_in, d_embedding_out) + assert m(x).shape == (batch_size, n_features, d_embedding_out) + """ - def __init__(self, embedding_dim, scope='periodic_embedding', stddev=1.0): - """On Embeddings for Numerical Features in Tabular Deep Learning. + def __init__(self, n_tokens, d_in, d_out, bias=True, scope='nd_linear'): + """Init with input shapes. - Refer: https://arxiv.org/pdf/2203.05556.pdf + Args: + n_tokens: the number of tokens (features) + d_in: the input dimension + d_out: the output dimension + bias: indicates if the underlying linear layers have biases """ - self.embedding_dim = embedding_dim // 2 + with tf.variable_scope(scope): + self.weight = tf.get_variable( + 'weights', [1, n_tokens, d_in, d_out], dtype=tf.float32) + if bias: + initializer = tf.constant_initializer(0.0) + self.bias = tf.get_variable( + 'bias', [1, n_tokens, d_out], + dtype=tf.float32, + initializer=initializer) + else: + self.bias = None + + def __call__(self, x, *args, **kwargs): + if x.shape.ndims != 3: + raise ValueError( + 'The input must have three dimensions (batch_size, n_tokens, d_embedding)' + ) + if x.shape[2] != self.weight.shape[2]: + raise ValueError('invalid input embedding dimension %d, expect %d' % + (int(x.shape[2]), int(self.weight.shape[2]))) + + x = x[..., None] * self.weight # [B, N, D, D_out] + x = tf.reduce_sum(x, axis=-2) # [B, N, D_out] + if self.bias is not None: + x = x + self.bias + return x + + +class PeriodicEmbedding(object): + """Periodic embeddings for numerical features described in [1]. + + References: + * [1] Yury Gorishniy, Ivan Rubachev, Artem Babenko, + "On Embeddings for Numerical Features in Tabular Deep Learning", 2022 + https://arxiv.org/pdf/2203.05556.pdf + """ + + def __init__(self, config, scope='periodic_embedding'): + """Init with a pb config. + + Args: + config: pb config + config.embedding_dim: the embedding size, must be an even positive integer. + config.sigma: the scale of the weight initialization. + **This is a super important parameter which significantly affects performance**. + Its optimal value can be dramatically different for different datasets, so + no "default value" can exist for this parameter, and it must be tuned for + each dataset. In the original paper, during hyperparameter tuning, this + parameter was sampled from the distribution ``LogUniform[1e-2, 1e2]``. + A similar grid would be ``[1e-2, 1e-1, 1e0, 1e1, 1e2]``. + If possible, add more intermidiate values to this grid. + config.output_3d_tensor: whether to output a 3d tensor + """ + self.config = config + if config.embedding_dim % 2: + raise ValueError('embedding_dim must be even') + self.emb_dim = config.embedding_dim // 2 self.scope = scope - self.initializer = tf.random_normal_initializer(stddev=stddev) + self.initializer = tf.random_normal_initializer(stddev=config.sigma) def __call__(self, inputs, *args, **kwargs): if inputs.shape.ndims != 2: @@ -26,24 +115,29 @@ def __call__(self, inputs, *args, **kwargs): num_features = int(inputs.shape[-1]) with tf.variable_scope(self.scope): c = tf.get_variable( - 'coef', - shape=[1, num_features * self.embedding_dim], + 'coefficients', + shape=[1, num_features, self.emb_dim], initializer=self.initializer) - features = repeat(inputs, self.embedding_dim, axis=1) - v = features * c * 2 * math.pi - sin_v = tf.split(tf.sin(v), num_features, axis=1) - cos_v = tf.split(tf.cos(v), num_features, axis=1) + features = inputs[..., None] # [B, N, 1] + v = 2 * math.pi * c * features # [B, N, E] + emb = tf.concat([tf.sin(v), tf.cos(v)], axis=-1) # [B, N, 2E] + + dim = self.config.embedding_dim + if self.config.add_linear_layer: + linear = NLinear(num_features, dim, dim) + emb = linear(emb) + act = get_activation(self.config.linear_activation) + if callable(act): + emb = act(emb) - embeddings = [] - for val in zip(sin_v, cos_v): - embedding = tf.concat(val, axis=1) - embedding = tf.layers.dense(embedding, int(embedding.shape[-1]), activation=tf.nn.relu) - embeddings.append(embedding) - return tf.concat(embeddings, axis=1) + if self.config.output_3d_tensor: + return emb + return tf.reshape(emb, [-1, num_features * dim]) class AutoDisEmbedding(object): + def __init__(self, config, scope='auto_dis'): """An Embedding Learning Framework for Numerical Features in CTR Prediction. @@ -60,21 +154,29 @@ def __call__(self, inputs, *args, **kwargs): num_features = int(inputs.shape[-1]) with tf.variable_scope(self.scope): - meta_emb = tf.get_variable('meta_embedding', shape=[1, num_features, self.num_bins, self.emb_dim]) + meta_emb = tf.get_variable( + 'meta_embedding', + shape=[1, num_features, self.num_bins, self.emb_dim]) w = tf.get_variable('project_w', shape=[1, num_features, self.num_bins]) - mat = tf.get_variable('project_mat', shape=[1, num_features, self.num_bins, self.num_bins]) + mat = tf.get_variable( + 'project_mat', shape=[1, num_features, self.num_bins, self.num_bins]) x = tf.expand_dims(inputs, axis=-1) # [B, num_fea, 1] hidden = tf.nn.leaky_relu(w * x) # [B, num_fea, num_bin] - y = tf.matmul(mat, tf.expand_dims(hidden, axis=-1)) # [B, num_fea, num_bin, 1] + y = tf.matmul(mat, hidden[..., None]) # [B, num_fea, num_bin, 1] y = tf.squeeze(y, axis=3) # [B, num_fea, num_bin] # keep_prob(float): if dropout_flag is True, keep_prob rate to keep connect; (float, keep_prob=0.8) alpha = self.config.keep_prob x_bar = y + alpha * hidden # [B, num_fea, num_bin] - x_hat = tf.nn.softmax(x_bar / self.config.temperature) # [B, num_fea, num_bin] + t = self.config.temperature + x_hat = tf.nn.softmax(x_bar / t) # [B, num_fea, num_bin] - emb = tf.matmul(tf.expand_dims(x_hat, axis=2), meta_emb) # [B, num_fea, 1, emb_dim] + emb = tf.matmul(x_hat[:, :, None, :], meta_emb) # [B, num_fea, 1, emb_dim] # emb = tf.squeeze(emb, axis=2) # [B, num_fea, emb_dim] - return tf.reshape(emb, [-1, self.emb_dim * num_features]) # [B, num_fea*emb_dim] + if self.config.output_3d_tensor: + return tf.reshape( + emb, [-1, num_features, self.emb_dim]) # [B, num_fea, emb_dim] + return tf.reshape( + emb, [-1, self.emb_dim * num_features]) # [B, num_fea*emb_dim] diff --git a/easy_rec/python/model/easy_rec_estimator.py b/easy_rec/python/model/easy_rec_estimator.py index 51ecad09f..9cbd28b6c 100644 --- a/easy_rec/python/model/easy_rec_estimator.py +++ b/easy_rec/python/model/easy_rec_estimator.py @@ -514,7 +514,8 @@ def _export_model_fn(self, features, labels, run_config, params): self.feature_configs, features, labels=None, - is_training=False) + is_training=False, + is_predicting=True) model.build_predict_graph() export_config = self._pipeline_config.export_config diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index c6d864498..331d0282e 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -11,8 +11,8 @@ from tensorflow.python.ops.variables import PartitionedVariable from easy_rec.python.compat import regularizers -from easy_rec.python.layers.backbone import Backbone from easy_rec.python.layers import input_layer +from easy_rec.python.layers.backbone import Backbone from easy_rec.python.layers.sequence_encoder import SequenceEncoder from easy_rec.python.utils import constant from easy_rec.python.utils import estimator_utils @@ -34,10 +34,12 @@ def __init__(self, feature_configs, features, labels=None, - is_training=False): + is_training=False, + is_predicting=False): self._base_model_config = model_config self._model_config = model_config self._is_training = is_training + self._is_predicting = is_predicting self._feature_dict = features # embedding variable parameters @@ -67,9 +69,12 @@ def __init__(self, self._l2_reg) self._sequence_encoding_by_group_name = {} if model_config.HasField('backbone'): - self._backbone = Backbone(model_config.backbone, self, features, - input_layer=self._input_layer, - l2_reg=self._l2_reg) + self._backbone = Backbone( + model_config.backbone, + self, + features, + input_layer=self._input_layer, + l2_reg=self._l2_reg) else: self._backbone = None @@ -120,7 +125,8 @@ def build_input_layer(self, model_config, feature_configs): kernel_regularizer=self._l2_reg, variational_dropout_config=model_config.variational_dropout if model_config.HasField('variational_dropout') else None, - is_training=self._is_training) + is_training=self._is_training, + is_predicting=self._is_predicting) def get_sequence_encoding(self, group_name=None, is_training=True): if group_name is not None: diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 2b4ccfd21..0285f225c 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -31,10 +31,22 @@ def __init__(self, def build_predict_graph(self): if not self.has_backbone: - raise NotImplementedError('method `build_predict_graph` must be implemented when backbone network do not exits') + raise NotImplementedError( + 'method `build_predict_graph` must be implemented when backbone network do not exits' + ) + output = self.backbone + + model_config = getattr(self._base_model_config, + self._base_model_config.WhichOneof('model')) + if hasattr(model_config, 'add_head_logits_layer') and \ + model_config.HasField('add_head_logits_layer'): + add_head_logits_layer = model_config.add_head_logits_layer + else: + add_head_logits_layer = True + if add_head_logits_layer: + logging.info('add head logits layer for rank model') + output = tf.layers.dense(output, self._num_class, name='output') - net = self.backbone - output = tf.layers.dense(net, self._num_class, name='output') self._add_to_prediction_dict(output) return self._prediction_dict @@ -45,9 +57,9 @@ def _output_to_prediction_impl(self, suffix=''): prediction_dict = {} binary_loss_type = { - LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, - LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, - LossType.PAIRWISE_LOGISTIC_LOSS + LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type in binary_loss_type: assert num_class == 1, 'num_class must be 1 when loss type is %s' % loss_type.name @@ -74,9 +86,9 @@ def _output_to_prediction_impl(self, prediction_dict['logits' + suffix] = output prediction_dict['probs' + suffix] = probs prediction_dict['logits' + suffix + '_y'] = math_ops.reduce_max( - output, axis=1) + output, axis=1) prediction_dict['probs' + suffix + '_y'] = math_ops.reduce_max( - probs, axis=1) + probs, axis=1) prediction_dict['y' + suffix] = tf.argmax(output, axis=1) elif loss_type == LossType.L2_LOSS: output = tf.squeeze(output, axis=1) @@ -89,12 +101,12 @@ def _output_to_prediction_impl(self, def _add_to_prediction_dict(self, output): if len(self._losses) == 0: prediction_dict = self._output_to_prediction_impl( - output, loss_type=self._loss_type, num_class=self._num_class) + output, loss_type=self._loss_type, num_class=self._num_class) self._prediction_dict.update(prediction_dict) else: for loss in self._losses: prediction_dict = self._output_to_prediction_impl( - output, loss_type=loss.loss_type, num_class=self._num_class) + output, loss_type=loss.loss_type, num_class=self._num_class) self._prediction_dict.update(prediction_dict) def build_rtp_output_dict(self): @@ -106,9 +118,9 @@ def build_rtp_output_dict(self): op = tf.get_default_graph().get_operation_by_name('rank_predict') if len(op.outputs) != 1: raise ValueError( - ('failed to build RTP rank_predict output: op {}[{}] has output ' + - 'size {}, however 1 is expected.').format(op.name, op.type, - len(op.outputs))) + ('failed to build RTP rank_predict output: op {}[{}] has output ' + + 'size {}, however 1 is expected.').format(op.name, op.type, + len(op.outputs))) rank_predict = op.outputs[0] except KeyError: forwarded = None @@ -116,32 +128,32 @@ def build_rtp_output_dict(self): if len(self._losses) > 0: loss_types = {loss.loss_type for loss in self._losses} binary_loss_set = { - LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, - LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, - LossType.JRC_LOSS + LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, + LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, + LossType.JRC_LOSS } if loss_types & binary_loss_set: if 'probs' in self._prediction_dict: forwarded = self._prediction_dict['probs'] else: raise ValueError( - 'failed to build RTP rank_predict output: classification model ' + - "expect 'probs' prediction, which is not found. Please check if" + - ' build_predict_graph() is called.') + 'failed to build RTP rank_predict output: classification model ' + + "expect 'probs' prediction, which is not found. Please check if" + + ' build_predict_graph() is called.') elif loss_types & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: if 'y' in self._prediction_dict: forwarded = self._prediction_dict['y'] else: raise ValueError( - 'failed to build RTP rank_predict output: regression model expect' - + - "'y' prediction, which is not found. Please check if build_predic" - + 't_graph() is called.') + 'failed to build RTP rank_predict output: regression model expect' + + + "'y' prediction, which is not found. Please check if build_predic" + + 't_graph() is called.') else: logging.warning( - 'failed to build RTP rank_predict: unsupported loss type {}'.format( - loss_types)) + 'failed to build RTP rank_predict: unsupported loss type {}'.format( + loss_types)) if forwarded is not None: rank_predict = tf.identity(forwarded, name='rank_predict') if rank_predict is not None: @@ -158,9 +170,9 @@ def _build_loss_impl(self, loss_param=None): loss_dict = {} binary_loss_type = { - LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, - LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, - LossType.PAIRWISE_LOGISTIC_LOSS, LossType.JRC_LOSS + LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS, LossType.JRC_LOSS } if loss_type == LossType.CLASSIFICATION: loss_name = loss_name if loss_name else 'cross_entropy_loss' + suffix @@ -184,23 +196,23 @@ def _build_loss_impl(self, if hasattr(loss_param, 'session_name'): kwargs['session_ids'] = self._feature_dict[loss_param.session_name] loss_dict[loss_name] = loss_builder.build( - loss_type, - self._labels[label_name], - pred, - loss_weight, - num_class, - loss_param=loss_param, - **kwargs) + loss_type, + self._labels[label_name], + pred, + loss_weight, + num_class, + loss_param=loss_param, + **kwargs) return loss_dict def build_loss_graph(self): loss_dict = {} if len(self._losses) == 0: loss_dict = self._build_loss_impl( - self._loss_type, - label_name=self._label_name, - loss_weight=self._sample_weight, - num_class=self._num_class) + self._loss_type, + label_name=self._label_name, + loss_weight=self._sample_weight, + num_class=self._num_class) else: strategy = self._base_model_config.loss_weight_strategy loss_weight = [1.0] @@ -212,26 +224,26 @@ def build_loss_graph(self): if loss_param is not None: loss_param = getattr(loss, loss_param) loss_ops = self._build_loss_impl( - loss.loss_type, - label_name=self._label_name, - loss_weight=self._sample_weight, - num_class=self._num_class, - loss_name=loss.loss_name, - loss_param=loss_param) + loss.loss_type, + label_name=self._label_name, + loss_weight=self._sample_weight, + num_class=self._num_class, + loss_name=loss.loss_name, + loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): if strategy == self._base_model_config.Fixed: loss_dict[loss_name] = loss_value * loss.weight elif strategy == self._base_model_config.Uncertainty: if loss.learn_loss_weight: uncertainty = tf.Variable( - 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: loss_dict[loss_name] = 0.5 * tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = loss_value * loss.weight elif strategy == self._base_model_config.Random: @@ -260,10 +272,10 @@ def _build_metric_impl(self, from easy_rec.python.core.easyrec_metrics import metrics_tf from easy_rec.python.core import metrics as metrics_lib binary_loss_set = { - LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, - LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, - LossType.JRC_LOSS + LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, + LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, + LossType.JRC_LOSS } metric_dict = {} if metric.WhichOneof('metric') == 'auc': @@ -271,15 +283,15 @@ def _build_metric_impl(self, if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['auc' + suffix] = metrics_tf.auc( - label, - self._prediction_dict['probs' + suffix], - num_thresholds=metric.auc.num_thresholds) + label, + self._prediction_dict['probs' + suffix], + num_thresholds=metric.auc.num_thresholds) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['auc' + suffix] = metrics_tf.auc( - label, - self._prediction_dict['probs' + suffix][:, 1], - num_thresholds=metric.auc.num_thresholds) + label, + self._prediction_dict['probs' + suffix][:, 1], + num_thresholds=metric.auc.num_thresholds) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'gauc': @@ -289,20 +301,20 @@ def _build_metric_impl(self, uids = self._feature_dict[metric.gauc.uid_field] if isinstance(uids, tf.sparse.SparseTensor): uids = tf.sparse_to_dense( - uids.indices, uids.dense_shape, uids.values, default_value='') + uids.indices, uids.dense_shape, uids.values, default_value='') uids = tf.reshape(uids, [-1]) metric_dict['gauc' + suffix] = metrics_lib.gauc( - label, - self._prediction_dict['probs' + suffix], - uids=uids, - reduction=metric.gauc.reduction) + label, + self._prediction_dict['probs' + suffix], + uids=uids, + reduction=metric.gauc.reduction) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['gauc' + suffix] = metrics_lib.gauc( - label, - self._prediction_dict['probs' + suffix][:, 1], - uids=self._feature_dict[metric.gauc.uid_field], - reduction=metric.gauc.reduction) + label, + self._prediction_dict['probs' + suffix][:, 1], + uids=self._feature_dict[metric.gauc.uid_field], + reduction=metric.gauc.reduction) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'session_auc': @@ -310,17 +322,17 @@ def _build_metric_impl(self, if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['session_auc' + suffix] = metrics_lib.session_auc( - label, - self._prediction_dict['probs' + suffix], - session_ids=self._feature_dict[metric.session_auc.session_id_field], - reduction=metric.session_auc.reduction) + label, + self._prediction_dict['probs' + suffix], + session_ids=self._feature_dict[metric.session_auc.session_id_field], + reduction=metric.session_auc.reduction) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['session_auc' + suffix] = metrics_lib.session_auc( - label, - self._prediction_dict['probs' + suffix][:, 1], - session_ids=self._feature_dict[metric.session_auc.session_id_field], - reduction=metric.session_auc.reduction) + label, + self._prediction_dict['probs' + suffix][:, 1], + session_ids=self._feature_dict[metric.session_auc.session_id_field], + reduction=metric.session_auc.reduction) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'max_f1': @@ -328,11 +340,11 @@ def _build_metric_impl(self, if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['max_f1' + suffix] = metrics_lib.max_f1( - label, self._prediction_dict['logits' + suffix]) + label, self._prediction_dict['logits' + suffix]) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['max_f1' + suffix] = metrics_lib.max_f1( - label, self._prediction_dict['logits' + suffix][:, 1]) + label, self._prediction_dict['logits' + suffix][:, 1]) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'recall_at_topk': @@ -340,18 +352,18 @@ def _build_metric_impl(self, assert num_class > 1 label = tf.to_int64(self._labels[label_name]) metric_dict['recall_at_topk' + suffix] = metrics_tf.recall_at_k( - label, self._prediction_dict['logits' + suffix], - metric.recall_at_topk.topk) + label, self._prediction_dict['logits' + suffix], + metric.recall_at_topk.topk) elif metric.WhichOneof('metric') == 'mean_absolute_error': label = tf.to_float(self._labels[label_name]) if loss_type & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: metric_dict['mean_absolute_error' + suffix] = metrics_tf.mean_absolute_error( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) elif loss_type & {LossType.CLASSIFICATION} and num_class == 1: metric_dict['mean_absolute_error' + suffix] = metrics_tf.mean_absolute_error( - label, self._prediction_dict['probs' + suffix]) + label, self._prediction_dict['probs' + suffix]) else: assert False, 'mean_absolute_error is not supported for this model' elif metric.WhichOneof('metric') == 'mean_squared_error': @@ -359,11 +371,11 @@ def _build_metric_impl(self, if loss_type & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: metric_dict['mean_squared_error' + suffix] = metrics_tf.mean_squared_error( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) elif num_class == 1 and loss_type & binary_loss_set: metric_dict['mean_squared_error' + suffix] = metrics_tf.mean_squared_error( - label, self._prediction_dict['probs' + suffix]) + label, self._prediction_dict['probs' + suffix]) else: assert False, 'mean_squared_error is not supported for this model' elif metric.WhichOneof('metric') == 'root_mean_squared_error': @@ -371,11 +383,11 @@ def _build_metric_impl(self, if loss_type & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: metric_dict['root_mean_squared_error' + suffix] = metrics_tf.root_mean_squared_error( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) elif loss_type & {LossType.CLASSIFICATION} and num_class == 1: metric_dict['root_mean_squared_error' + suffix] = metrics_tf.root_mean_squared_error( - label, self._prediction_dict['probs' + suffix]) + label, self._prediction_dict['probs' + suffix]) else: assert False, 'root_mean_squared_error is not supported for this model' elif metric.WhichOneof('metric') == 'accuracy': @@ -383,7 +395,7 @@ def _build_metric_impl(self, assert num_class > 1 label = tf.to_int64(self._labels[label_name]) metric_dict['accuracy' + suffix] = metrics_tf.accuracy( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) return metric_dict def build_metric_graph(self, eval_config): @@ -393,18 +405,18 @@ def build_metric_graph(self, eval_config): loss_types = {loss.loss_type for loss in self._losses} for metric in eval_config.metrics_set: metric_dict.update( - self._build_metric_impl( - metric, - loss_type=loss_types, - label_name=self._label_name, - num_class=self._num_class)) + self._build_metric_impl( + metric, + loss_type=loss_types, + label_name=self._label_name, + num_class=self._num_class)) return metric_dict def _get_outputs_impl(self, loss_type, num_class=1, suffix=''): binary_loss_set = { - LossType.F1_REWEIGHTED_LOSS, LossType.JRC_LOSS, LossType.PAIR_WISE_LOSS, - LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, - LossType.PAIRWISE_LOGISTIC_LOSS + LossType.F1_REWEIGHTED_LOSS, LossType.JRC_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type in binary_loss_set: return ['probs' + suffix, 'logits' + suffix] @@ -413,8 +425,8 @@ def _get_outputs_impl(self, loss_type, num_class=1, suffix=''): return ['probs' + suffix, 'logits' + suffix] else: return [ - 'y' + suffix, 'probs' + suffix, 'logits' + suffix, - 'probs' + suffix + '_y', 'logits' + suffix + '_y' + 'y' + suffix, 'probs' + suffix, 'logits' + suffix, + 'probs' + suffix + '_y', 'logits' + suffix + '_y' ] elif loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]: return ['y' + suffix] diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index 3dc86cebb..b77be93be 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -9,16 +9,46 @@ import "easy_rec/python/protos/masknet.proto"; message SequenceLayer { - optional DNN mlp = 1; + optional MLP mlp = 1; +} + +message Lambda { + required string expression = 1; +} + +message Operator { + oneof Op { + MLP mlp = 102; + PeriodicEmbedding periodic_embedding = 103; + AutoDisEmbedding auto_dis_embedding = 104; + SequenceLayer sequence_encoder = 105; + HighWayTower highway = 106; + MaskNet masknet = 107; + SENet senet = 108; + FiBiNetTower fibinet = 109; + FM fm = 110; + Concatenate concat = 111; + Reshape reshape = 112; + Add add = 113; + Dot dot = 114; + Lambda Lambda = 115; + OpChain chain = 116; + } +} + +message OpChain { + repeated Operator ops = 1; } message Block { required string name = 1; // the input names of feature groups or other blocks repeated string inputs = 2; + optional int32 input_concat_axis = 3 [default = -1]; + optional string extra_input_fn = 4; oneof layer { InputLayer input_layer = 101; - DNN mlp = 102; + MLP mlp = 102; PeriodicEmbedding periodic_embedding = 103; AutoDisEmbedding auto_dis_embedding = 104; SequenceLayer sequence_encoder = 105; @@ -27,11 +57,17 @@ message Block { SENet senet = 108; FiBiNetTower fibinet = 109; FM fm = 110; + Concatenate concat = 111; + Reshape reshape = 112; + Add add = 113; + Dot dot = 114; + Lambda Lambda = 115; + OpChain chain = 116; } } message BackboneTower { repeated Block blocks = 1; repeated string concat_blocks = 2; - optional DNN top_mlp = 3; -} \ No newline at end of file + optional MLP top_mlp = 3; +} diff --git a/easy_rec/python/protos/dnn.proto b/easy_rec/python/protos/dnn.proto index 021d34dbb..1564394eb 100644 --- a/easy_rec/python/protos/dnn.proto +++ b/easy_rec/python/protos/dnn.proto @@ -12,3 +12,16 @@ message DNN { // use batch normalization optional bool use_bn = 4 [default = true]; } + +message MLP { + // hidden units for each layer + repeated uint32 hidden_units = 1; + // ratio of dropout + repeated float dropout_ratio = 2; + // activation function + optional string activation = 3 [default = 'tf.nn.relu']; + // use batch normalization + optional bool use_bn = 4 [default = true]; + optional bool last_layer_no_activation = 5 [default = false]; + optional bool last_layer_no_batch_norm = 6 [default = false]; +} \ No newline at end of file diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 3f4f851b9..940ee88f3 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -31,7 +31,8 @@ message DummyModel { } // configure backbone network in a free style way message RankModel { - optional float l2_regularization = 1; + optional float l2_regularization = 1; + optional bool add_head_logits_layer = 2 [default=true]; } // for knowledge distillation @@ -49,7 +50,6 @@ message KD { optional float loss_weight = 4 [default=1.0]; // only for loss_type == CROSS_ENTROPY_LOSS optional float temperature = 5 [default=1.0]; - } message EasyRecModel { diff --git a/easy_rec/python/protos/fm.proto b/easy_rec/python/protos/fm.proto index c90af8cab..31d8f27d7 100644 --- a/easy_rec/python/protos/fm.proto +++ b/easy_rec/python/protos/fm.proto @@ -2,5 +2,6 @@ syntax = "proto2"; package protos; message FM { + optional bool use_variant = 1; optional float l2_regularization = 5 [default = 1e-4]; } diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 5c7bb81a1..576bfdf4f 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -9,6 +9,7 @@ message InputLayer { optional float dropout_rate = 3; optional float feature_dropout_rate = 4; optional bool output_feature_list = 5; + optional bool output_3d_tensor = 6; } message HighWayTower { @@ -20,7 +21,10 @@ message HighWayTower { message PeriodicEmbedding { required uint32 embedding_dim = 1; - required float coef_stddev = 2 [default = 1.0]; + required float sigma = 2; + optional bool add_linear_layer = 3 [default = true]; + optional string linear_activation = 4 [default = 'relu']; + optional bool output_3d_tensor = 5; } message AutoDisEmbedding { @@ -28,4 +32,21 @@ message AutoDisEmbedding { required uint32 num_bins = 2; required float keep_prob = 3 [default = 0.8]; required float temperature = 4; + optional bool output_3d_tensor = 5; +} + +message Concatenate { + required int32 axis = 1; + optional int32 expand_dim_before = 2; + optional int32 expand_dim_after = 3; +} + +message Reshape { + repeated int32 dims = 1; +} + +message Add { +} + +message Dot { } \ No newline at end of file diff --git a/easy_rec/python/protos/seq_encoder.proto b/easy_rec/python/protos/seq_encoder.proto index 7a608af18..f02490238 100644 --- a/easy_rec/python/protos/seq_encoder.proto +++ b/easy_rec/python/protos/seq_encoder.proto @@ -50,4 +50,3 @@ message DINEncoder { // option: softmax, sigmoid required string attention_normalizer = 3 [default = 'softmax']; } - diff --git a/easy_rec/python/train_eval.py b/easy_rec/python/train_eval.py index bdb65eb0a..51c904451 100644 --- a/easy_rec/python/train_eval.py +++ b/easy_rec/python/train_eval.py @@ -95,8 +95,13 @@ help='is use check mode') parser.add_argument( '--selected_cols', type=str, default=None, help='select input columns') + parser.add_argument( + '--gpu', type=str, default=None, help='gpu id') args, extra_args = parser.parse_known_args() + if args.gpu is not None: + os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu + edit_config_json = {} if args.edit_config_json: edit_config_json = json.loads(args.edit_config_json) diff --git a/easy_rec/python/utils/__init__.py b/easy_rec/python/utils/__init__.py index e69de29bb..8a9b460ac 100644 --- a/easy_rec/python/utils/__init__.py +++ b/easy_rec/python/utils/__init__.py @@ -0,0 +1,17 @@ + +class conditional(object): + """Wrap another context manager and enter it only if condition is true. + """ + + def __init__(self, condition, contextmanager): + self.condition = condition + self.contextmanager = contextmanager + + def __enter__(self): + """Conditionally enter a context manager.""" + if self.condition: + return self.contextmanager.__enter__() + + def __exit__(self, *args): + if self.condition: + return self.contextmanager.__exit__(*args) diff --git a/easy_rec/python/utils/dag.py b/easy_rec/python/utils/dag.py index 5063c8473..00646f732 100644 --- a/easy_rec/python/utils/dag.py +++ b/easy_rec/python/utils/dag.py @@ -1,205 +1,205 @@ -from collections import OrderedDict, defaultdict -from copy import copy, deepcopy +from collections import OrderedDict +from collections import defaultdict +from copy import copy +from copy import deepcopy class DAG(object): - """ Directed acyclic graph implementation. """ - - def __init__(self): - """ Construct a new DAG with no nodes or edges. """ - self.reset_graph() - - def add_node(self, node_name, graph=None): - """ Add a node if it does not exist yet, or error out. """ - if not graph: - graph = self.graph - if node_name in graph: - raise KeyError('node %s already exists' % node_name) - graph[node_name] = set() - - def add_node_if_not_exists(self, node_name, graph=None): - try: - self.add_node(node_name, graph=graph) - except KeyError: - pass - - def delete_node(self, node_name, graph=None): - """ Deletes this node and all edges referencing it. """ - if not graph: - graph = self.graph - if node_name not in graph: - raise KeyError('node %s does not exist' % node_name) - graph.pop(node_name) - - for node, edges in graph.items(): - if node_name in edges: - edges.remove(node_name) - - def delete_node_if_exists(self, node_name, graph=None): - try: - self.delete_node(node_name, graph=graph) - except KeyError: - pass - - def add_edge(self, ind_node, dep_node, graph=None): - """ Add an edge (dependency) between the specified nodes. """ - if not graph: - graph = self.graph - if ind_node not in graph or dep_node not in graph: - raise KeyError('one or more nodes do not exist in graph') - test_graph = deepcopy(graph) - test_graph[ind_node].add(dep_node) - is_valid, message = self.validate(test_graph) - if is_valid: - graph[ind_node].add(dep_node) - else: - raise Exception() - - def delete_edge(self, ind_node, dep_node, graph=None): - """ Delete an edge from the graph. """ - if not graph: - graph = self.graph - if dep_node not in graph.get(ind_node, []): - raise KeyError('this edge does not exist in graph') - graph[ind_node].remove(dep_node) - - def rename_edges(self, old_task_name, new_task_name, graph=None): - """ Change references to a task in existing edges. """ - if not graph: - graph = self.graph - for node, edges in graph.items(): - - if node == old_task_name: - graph[new_task_name] = copy(edges) - del graph[old_task_name] - - else: - if old_task_name in edges: - edges.remove(old_task_name) - edges.add(new_task_name) - - def predecessors(self, node, graph=None): - """ Returns a list of all predecessors of the given node """ - if graph is None: - graph = self.graph - return [key for key in graph if node in graph[key]] - - def downstream(self, node, graph=None): - """ Returns a list of all nodes this node has edges towards. """ - if graph is None: - graph = self.graph - if node not in graph: - raise KeyError('node %s is not in graph' % node) - return list(graph[node]) - - def all_downstreams(self, node, graph=None): - """Returns a list of all nodes ultimately downstream - of the given node in the dependency graph, in - topological order.""" - if graph is None: - graph = self.graph - nodes = [node] - nodes_seen = set() - i = 0 - while i < len(nodes): - downstreams = self.downstream(nodes[i], graph) - for downstream_node in downstreams: - if downstream_node not in nodes_seen: - nodes_seen.add(downstream_node) - nodes.append(downstream_node) - i += 1 - return list( - filter( - lambda node: node in nodes_seen, - self.topological_sort(graph=graph) - ) - ) - - def all_leaves(self, graph=None): - """ Return a list of all leaves (nodes with no downstreams) """ - if graph is None: - graph = self.graph - return [key for key in graph if not graph[key]] - - def from_dict(self, graph_dict): - """ Reset the graph and build it from the passed dictionary. - The dictionary takes the form of {node_name: [directed edges]} - """ - - self.reset_graph() - for new_node in graph_dict.keys(): - self.add_node(new_node) - for ind_node, dep_nodes in graph_dict.items(): - if not isinstance(dep_nodes, list): - raise TypeError('dict values must be lists') - for dep_node in dep_nodes: - self.add_edge(ind_node, dep_node) - - def reset_graph(self): - """ Restore the graph to an empty state. """ - self.graph = OrderedDict() - - def ind_nodes(self, graph=None): - """ Returns a list of all nodes in the graph with no dependencies. """ - if graph is None: - graph = self.graph - - dependent_nodes = set( - node for dependents in graph.values() for node in dependents - ) - return [node for node in graph.keys() if node not in dependent_nodes] - - def validate(self, graph=None): - """ Returns (Boolean, message) of whether DAG is valid. """ - graph = graph if graph is not None else self.graph - if len(self.ind_nodes(graph)) == 0: - return False, 'no independent nodes detected' - try: - self.topological_sort(graph) - except ValueError: - return False, 'failed topological sort' - return True, 'valid' - - def topological_sort(self, graph=None): - """ Returns a topological ordering of the DAG. - Raises an error if this is not possible (graph is not valid). - """ - if graph is None: - graph = self.graph - result = [] - in_degree = defaultdict(lambda: 0) - - for u in graph: - for v in graph[u]: - in_degree[v] += 1 - ready = [node for node in graph if not in_degree[node]] - - while ready: - u = ready.pop() - result.append(u) - for v in graph[u]: - in_degree[v] -= 1 - if in_degree[v] == 0: - ready.append(v) - - if len(result) == len(graph): - return result - else: - raise ValueError('graph is not acyclic') - - def size(self): - return len(self.graph) + """Directed acyclic graph implementation.""" + + def __init__(self): + """Construct a new DAG with no nodes or edges.""" + self.reset_graph() + + def add_node(self, node_name, graph=None): + """Add a node if it does not exist yet, or error out.""" + if not graph: + graph = self.graph + if node_name in graph: + raise KeyError('node %s already exists' % node_name) + graph[node_name] = set() + + def add_node_if_not_exists(self, node_name, graph=None): + try: + self.add_node(node_name, graph=graph) + except KeyError: + pass + + def delete_node(self, node_name, graph=None): + """Deletes this node and all edges referencing it.""" + if not graph: + graph = self.graph + if node_name not in graph: + raise KeyError('node %s does not exist' % node_name) + graph.pop(node_name) + + for node, edges in graph.items(): + if node_name in edges: + edges.remove(node_name) + + def delete_node_if_exists(self, node_name, graph=None): + try: + self.delete_node(node_name, graph=graph) + except KeyError: + pass + + def add_edge(self, ind_node, dep_node, graph=None): + """Add an edge (dependency) between the specified nodes.""" + if not graph: + graph = self.graph + if ind_node not in graph or dep_node not in graph: + raise KeyError('one or more nodes do not exist in graph') + test_graph = deepcopy(graph) + test_graph[ind_node].add(dep_node) + is_valid, message = self.validate(test_graph) + if is_valid: + graph[ind_node].add(dep_node) + else: + raise Exception() + + def delete_edge(self, ind_node, dep_node, graph=None): + """Delete an edge from the graph.""" + if not graph: + graph = self.graph + if dep_node not in graph.get(ind_node, []): + raise KeyError('this edge does not exist in graph') + graph[ind_node].remove(dep_node) + + def rename_edges(self, old_task_name, new_task_name, graph=None): + """Change references to a task in existing edges.""" + if not graph: + graph = self.graph + for node, edges in graph.items(): + + if node == old_task_name: + graph[new_task_name] = copy(edges) + del graph[old_task_name] + + else: + if old_task_name in edges: + edges.remove(old_task_name) + edges.add(new_task_name) + + def predecessors(self, node, graph=None): + """Returns a list of all predecessors of the given node.""" + if graph is None: + graph = self.graph + return [key for key in graph if node in graph[key]] + + def downstream(self, node, graph=None): + """Returns a list of all nodes this node has edges towards.""" + if graph is None: + graph = self.graph + if node not in graph: + raise KeyError('node %s is not in graph' % node) + return list(graph[node]) + + def all_downstreams(self, node, graph=None): + """Returns a list of all nodes ultimately downstream of the given node in the dependency graph. + + in topological order. + """ + if graph is None: + graph = self.graph + nodes = [node] + nodes_seen = set() + i = 0 + while i < len(nodes): + downstreams = self.downstream(nodes[i], graph) + for downstream_node in downstreams: + if downstream_node not in nodes_seen: + nodes_seen.add(downstream_node) + nodes.append(downstream_node) + i += 1 + return list( + filter(lambda node: node in nodes_seen, + self.topological_sort(graph=graph))) + + def all_leaves(self, graph=None): + """Return a list of all leaves (nodes with no downstreams).""" + if graph is None: + graph = self.graph + return [key for key in graph if not graph[key]] + + def from_dict(self, graph_dict): + """Reset the graph and build it from the passed dictionary. + + The dictionary takes the form of {node_name: [directed edges]} + """ + self.reset_graph() + for new_node in graph_dict.keys(): + self.add_node(new_node) + for ind_node, dep_nodes in graph_dict.items(): + if not isinstance(dep_nodes, list): + raise TypeError('dict values must be lists') + for dep_node in dep_nodes: + self.add_edge(ind_node, dep_node) + + def reset_graph(self): + """Restore the graph to an empty state.""" + self.graph = OrderedDict() + + def ind_nodes(self, graph=None): + """Returns a list of all nodes in the graph with no dependencies.""" + if graph is None: + graph = self.graph + + dependent_nodes = set( + node for dependents in graph.values() for node in dependents) + return [node for node in graph.keys() if node not in dependent_nodes] + + def validate(self, graph=None): + """Returns (Boolean, message) of whether DAG is valid.""" + graph = graph if graph is not None else self.graph + if len(self.ind_nodes(graph)) == 0: + return False, 'no independent nodes detected' + try: + self.topological_sort(graph) + except ValueError: + return False, 'failed topological sort' + return True, 'valid' + + def topological_sort(self, graph=None): + """Returns a topological ordering of the DAG. + + Raises an error if this is not possible (graph is not valid). + """ + if graph is None: + graph = self.graph + result = [] + in_degree = defaultdict(lambda: 0) + + for u in graph: + for v in graph[u]: + in_degree[v] += 1 + ready = [node for node in graph if not in_degree[node]] + + while ready: + u = ready.pop() + result.append(u) + for v in graph[u]: + in_degree[v] -= 1 + if in_degree[v] == 0: + ready.append(v) + + if len(result) == len(graph): + return result + else: + raise ValueError('graph is not acyclic') + + def size(self): + return len(self.graph) if __name__ == '__main__': - dag = DAG() - dag.add_node("a") - dag.add_node("b") - dag.add_node("c") - dag.add_node("d") - dag.add_edge("a", "b") - dag.add_edge("a", "d") - dag.add_edge("b", "c") - print(dag.topological_sort()) - print(dag.graph) - print(dag.all_downstreams("b")) \ No newline at end of file + dag = DAG() + dag.add_node('a') + dag.add_node('b') + dag.add_node('c') + dag.add_node('d') + dag.add_edge('a', 'b') + dag.add_edge('a', 'd') + dag.add_edge('b', 'c') + print(dag.topological_sort()) + print(dag.graph) + print(dag.all_downstreams('b')) diff --git a/easy_rec/python/utils/tf_utils.py b/easy_rec/python/utils/tf_utils.py index e1026c132..efcd7df12 100644 --- a/easy_rec/python/utils/tf_utils.py +++ b/easy_rec/python/utils/tf_utils.py @@ -46,3 +46,39 @@ def get_config_type(tf_type): } assert tf_type in type_map, 'invalid type: %s' % tf_type return type_map[tf_type] + + +def add_op(inputs): + if not isinstance(inputs, list): + return inputs + if len(inputs) == 1: + if isinstance(inputs[0], list): + return tf.keras.layers.Add()(inputs[0]) + return inputs[0] + return tf.keras.layers.Add()(inputs) + + +def dot_op(features): + """Compute inner dot between any two pair tensors. + + Args: + features: + - List of 2D tensor with shape: ``(batch_size,embedding_size)``. + - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` + Return: + - 2D tensor with shape: ``(batch_size, 1)``. + """ + if isinstance(features, (list, tuple)): + features = tf.stack(features, axis=1) + assert features.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' + + batch_size = tf.shape(features)[0] + matrixdot = tf.matmul(features, features, transpose_b=True) + feature_dim = matrixdot.shape[-1] + + ones_mat = tf.ones_like(matrixdot) + lower_tri_mat = ones_mat - tf.linalg.band_part(ones_mat, 0, -1) + lower_tri_mask = tf.cast(lower_tri_mat, tf.bool) + result = tf.boolean_mask(matrixdot, lower_tri_mask) + output_dim = feature_dim * (feature_dim - 1) // 2 + return tf.reshape(result, (batch_size, output_dim)) diff --git a/examples/configs/deepfm_backbone_on_criteo.config b/examples/configs/deepfm_backbone_on_criteo.config index a0982a16e..c94838daf 100644 --- a/examples/configs/deepfm_backbone_on_criteo.config +++ b/examples/configs/deepfm_backbone_on_criteo.config @@ -1,25 +1,21 @@ train_input_path: "examples/data/criteo/criteo_train_data" eval_input_path: "examples/data/criteo/criteo_test_data" -model_dir: "examples/ckpt/deepfm_criteo_ckpt" +model_dir: "examples/ckpt/deepfm_backbone_criteo" train_config { log_step_count_steps: 500 optimizer_config: { adam_optimizer: { learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.001 - decay_steps: 1000 - decay_factor: 0.5 - min_learning_rate: 0.00001 + constant_learning_rate { + learning_rate: 0.001 } } } use_moving_average: false } - save_checkpoints_steps: 1000 + save_checkpoints_steps: 20000 sync_replicas: True - num_steps: 20000 } eval_config { @@ -241,110 +237,110 @@ data_config { feature_config: { features: { input_names: "F1" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val:0.0 max_val: 5775.0 } features: { input_names: "F2" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: -3.0 max_val: 257675.0 } features: { input_names: "F3" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 65535.0 } features: { input_names: "F4" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 969.0 } features: { input_names: "F5" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 23159456.0 } features: { input_names: "F6" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 431037.0 } features: { input_names: "F7" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 56311.0 } features: { input_names: "F8" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 6047.0 } features: { input_names: "F9" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 29019.0 } features: { input_names: "F10" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 46.0 } features: { input_names: "F11" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 231.0 } features: { input_names: "F12" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 4008.0 } features: { input_names: "F13" - embedding_dim:16 + embedding_dim: 16 feature_type: RawFeature min_val: 0.0 max_val: 7393.0 } features: { input_names: "C1" - hash_bucket_size: 1000000 + hash_bucket_size: 2000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C2" - hash_bucket_size: 1000000 + hash_bucket_size: 1000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C3" - hash_bucket_size: 1000000 + hash_bucket_size: 2500000 feature_type: IdFeature embedding_dim: 16 } @@ -356,132 +352,132 @@ feature_config: { } features: { input_names: "C5" - hash_bucket_size: 1000000 + hash_bucket_size: 500 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C6" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C7" - hash_bucket_size: 1000000 + hash_bucket_size: 13000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C8" - hash_bucket_size: 1000000 + hash_bucket_size: 1000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C9" - hash_bucket_size: 1000000 + hash_bucket_size: 10 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C10" - hash_bucket_size: 1000000 + hash_bucket_size: 100000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C11" - hash_bucket_size: 1000000 + hash_bucket_size: 6000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C12" - hash_bucket_size: 1000000 + hash_bucket_size: 2000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C13" - hash_bucket_size: 1000000 + hash_bucket_size: 4000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C14" - hash_bucket_size: 1000000 + hash_bucket_size: 100 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C15" - hash_bucket_size: 1000000 + hash_bucket_size: 20000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C16" - hash_bucket_size: 1000000 + hash_bucket_size: 1250000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C17" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C18" - hash_bucket_size: 1000000 + hash_bucket_size: 6000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C19" - hash_bucket_size: 1000000 + hash_bucket_size: 3000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C20" - hash_bucket_size: 1000000 + hash_bucket_size: 10 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C21" - hash_bucket_size: 1000000 + hash_bucket_size: 1250000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C22" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C23" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C24" - hash_bucket_size: 1000000 + hash_bucket_size: 280000 feature_type: IdFeature embedding_dim: 16 }features: { input_names: "C25" - hash_bucket_size: 1000000 + hash_bucket_size: 200 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C26" - hash_bucket_size: 1000000 + hash_bucket_size: 150000 feature_type: IdFeature embedding_dim: 16 } @@ -542,7 +538,9 @@ model_config: { blocks { name: 'fm' inputs: 'emb_list' - fm {} + fm { + use_variant: true + } } blocks { name: 'deep' @@ -552,6 +550,9 @@ model_config: { } } concat_blocks: ['fm', 'deep'] + top_mlp { + hidden_units: [256, 128, 64] + } } rank_model { l2_regularization: 1e-5 diff --git a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config index 1dcdf7512..04dde5589 100644 --- a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config +++ b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config @@ -1,25 +1,21 @@ train_input_path: "examples/data/criteo/criteo_train_data" eval_input_path: "examples/data/criteo/criteo_test_data" -model_dir: "examples/ckpt/deepfm_autodis_criteo_ckpt" +model_dir: "examples/ckpt/deepfm_autodis_criteo" train_config { log_step_count_steps: 500 optimizer_config: { adam_optimizer: { learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.001 - decay_steps: 1000 - decay_factor: 0.5 - min_learning_rate: 0.00001 + constant_learning_rate { + learning_rate: 0.001 } } } use_moving_average: false } - save_checkpoints_steps: 1000 + save_checkpoints_steps: 20000 sync_replicas: True - num_steps: 20000 } eval_config { @@ -241,110 +237,97 @@ data_config { feature_config: { features: { input_names: "F1" - embedding_dim:16 feature_type: RawFeature min_val:0.0 max_val: 5775.0 } features: { input_names: "F2" - embedding_dim:16 feature_type: RawFeature min_val: -3.0 max_val: 257675.0 } features: { input_names: "F3" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 65535.0 } features: { input_names: "F4" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 969.0 } features: { input_names: "F5" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 23159456.0 } features: { input_names: "F6" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 431037.0 } features: { input_names: "F7" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 56311.0 } features: { input_names: "F8" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 6047.0 } features: { input_names: "F9" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 29019.0 } features: { input_names: "F10" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 46.0 } features: { input_names: "F11" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 231.0 } features: { input_names: "F12" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 4008.0 } features: { input_names: "F13" - embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 7393.0 } features: { input_names: "C1" - hash_bucket_size: 1000000 + hash_bucket_size: 2000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C2" - hash_bucket_size: 1000000 + hash_bucket_size: 1000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C3" - hash_bucket_size: 1000000 + hash_bucket_size: 2500000 feature_type: IdFeature embedding_dim: 16 } @@ -356,132 +339,132 @@ feature_config: { } features: { input_names: "C5" - hash_bucket_size: 1000000 + hash_bucket_size: 500 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C6" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C7" - hash_bucket_size: 1000000 + hash_bucket_size: 13000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C8" - hash_bucket_size: 1000000 + hash_bucket_size: 1000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C9" - hash_bucket_size: 1000000 + hash_bucket_size: 10 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C10" - hash_bucket_size: 1000000 + hash_bucket_size: 100000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C11" - hash_bucket_size: 1000000 + hash_bucket_size: 6000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C12" - hash_bucket_size: 1000000 + hash_bucket_size: 2000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C13" - hash_bucket_size: 1000000 + hash_bucket_size: 4000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C14" - hash_bucket_size: 1000000 + hash_bucket_size: 100 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C15" - hash_bucket_size: 1000000 + hash_bucket_size: 20000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C16" - hash_bucket_size: 1000000 + hash_bucket_size: 1250000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C17" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C18" - hash_bucket_size: 1000000 + hash_bucket_size: 6000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C19" - hash_bucket_size: 1000000 + hash_bucket_size: 3000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C20" - hash_bucket_size: 1000000 + hash_bucket_size: 10 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C21" - hash_bucket_size: 1000000 + hash_bucket_size: 1250000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C22" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C23" - hash_bucket_size: 1000000 + hash_bucket_size: 50 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C24" - hash_bucket_size: 1000000 + hash_bucket_size: 280000 feature_type: IdFeature embedding_dim: 16 }features: { input_names: "C25" - hash_bucket_size: 1000000 + hash_bucket_size: 200 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C26" - hash_bucket_size: 1000000 + hash_bucket_size: 150000 feature_type: IdFeature embedding_dim: 16 } @@ -489,7 +472,7 @@ feature_config: { model_config: { model_class: 'RankModel' feature_groups: { - group_name: "features" + group_name: "numerical_features" feature_names: "F1" feature_names: "F2" feature_names: "F3" @@ -503,6 +486,10 @@ model_config: { feature_names: "F11" feature_names: "F12" feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "categorical_features" feature_names: "C1" feature_names: "C2" feature_names: "C3" @@ -533,25 +520,51 @@ model_config: { } backbone { blocks { - name: 'emb_list' - inputs: 'features' + name: 'cat_emb' + inputs: 'categorical_features' input_layer { - output_feature_list: true + output_3d_tensor: true + } + } + blocks { + name: 'num_emb' + inputs: 'numerical_features' + auto_dis_embedding { + embedding_dim: 16 + num_bins: 20 + temperature: 0.815 + output_3d_tensor: true } } blocks { name: 'fm' - inputs: 'emb_list' - fm {} + inputs: 'cat_emb' + inputs: 'num_emb' + input_concat_axis: 1 + fm { + use_variant: true + } + } + blocks { + name: 'cat_and_num' + inputs: 'cat_emb' + inputs: 'num_emb' + input_concat_axis: 1 + reshape { + dims: [-1, 624] + } } blocks { name: 'deep' - inputs: 'features' + inputs: 'cat_and_num' mlp { hidden_units: [256, 128, 64] } } concat_blocks: ['fm', 'deep'] + top_mlp { + hidden_units: [256, 128, 64] + } } rank_model { l2_regularization: 1e-5 diff --git a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config new file mode 100644 index 000000000..2affcc9ae --- /dev/null +++ b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config @@ -0,0 +1,571 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/deepfm_periodic_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + constant_learning_rate { + learning_rate: 0.001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 2000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 2500000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 500 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 13000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 10 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 100000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 6000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 2000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 4000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 100 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 20000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1250000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 6000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 3000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 10 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1250000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 280000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 200 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 150000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: "numerical_features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "categorical_features" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'cat_emb' + inputs: 'categorical_features' + input_layer { + output_3d_tensor: true + } + } + blocks { + name: 'num_emb' + inputs: 'numerical_features' + periodic_embedding { + embedding_dim: 16 + output_3d_tensor: true + } + } + blocks { + name: 'fm' + inputs: 'cat_emb' + inputs: 'num_emb' + input_concat_axis: 1 + fm { + use_variant: true + } + } + blocks { + name: 'cat_and_num' + inputs: 'cat_emb' + inputs: 'num_emb' + input_concat_axis: 1 + reshape { + dims: [-1, 624] + } + } + blocks { + name: 'deep' + inputs: 'cat_and_num' + mlp { + hidden_units: [256, 128, 64] + } + } + concat_blocks: ['fm', 'deep'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_backbone_on_criteo.config b/examples/configs/dlrm_backbone_on_criteo.config new file mode 100644 index 000000000..7d698e858 --- /dev/null +++ b/examples/configs/dlrm_backbone_on_criteo.config @@ -0,0 +1,566 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_backbone_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + constant_learning_rate { + learning_rate: 0.001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 2000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 2500000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 500 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 13000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 10 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 100000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 6000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 2000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 4000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 100 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 20000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1250000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 6000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 3000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 10 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1250000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 50 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 280000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 200 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 150000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'bottom_mlp' + inputs: 'dense' + mlp { + hidden_units: [64, 32, 16] + } + } + blocks { + name: 'bottom_list' + inputs: 'bottom_mlp' + Lambda { + expression: 'lambda x: [x]' + } + } + blocks { + name: 'sparse_features' + inputs: 'sparse' + input_layer { + output_feature_list: true + } + } + blocks { + name: 'dot' + inputs: 'bottom_list' + inputs: 'sparse_features' + dot { } + } + blocks { + name: 'dot_and_dense' + inputs: 'bottom_mlp' + inputs: 'dot' + concat { + axis: 1 + } + } + concat_blocks: ['dot_and_dense'] + top_mlp { + hidden_units: [128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/readme.md b/examples/readme.md index b95adc8b1..286b292b1 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -73,6 +73,8 @@ EasyRec的模型训练和评估都是基于config配置文件的,配置文件 - [deepfm_on_movielens.config](configs/deepfm_on_movielens.config) +- [deepfm_backbone_on_movielens.config](configs/deepfm_backbone_on_movielens.config) + - [dcn_on_movielens.config](configs/dcn_on_movielens.config) - [autoint_on_movielens.config](configs/autoint_on_movielens.config) @@ -85,6 +87,8 @@ EasyRec的模型训练和评估都是基于config配置文件的,配置文件 - [deepfm_on_criteo.config](configs/deepfm_on_criteo.config) +- [deepfm_backbone_on_criteo.config](configs/deepfm_backbone_on_criteo.config) + **召回任务** - [dssm_on_books.config](configs/dssm_on_books.config) @@ -209,6 +213,7 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee | --------- | ----- | ------ | | Wide&Deep | 1 | 0.8558 | | DeepFM | 1 | 0.8688 | + | DeepFM(Backbone)|1| 0.8876 | | DCN | 1 | 0.8576 | | AutoInt | 1 | 0.8513 | | MaskNet | 1 | 0.8872 | @@ -220,6 +225,9 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee | ------ | ----- | ------ | | FM | 1 | 0.7577 | | DeepFM | 1 | 0.7967 | + | DeepFM(backbone)| 1 | 0.7965 | + | DeepFM(periodic)| 1 | 0.7982 | + | DeepFM(autodis) | 1 | 0.7983 | ### 召回模型 From 96d502e44cd53cdc92946ffb11069e41d13f835b Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 16 Jun 2023 08:58:50 +0800 Subject: [PATCH 33/54] [feat]: add more backbone blocks --- .../compat/feature_column/feature_column.py | 859 +++++++++--------- easy_rec/python/layers/backbone.py | 222 +++-- easy_rec/python/layers/common_layers.py | 68 +- easy_rec/python/layers/fm.py | 5 +- easy_rec/python/layers/input_layer.py | 7 +- easy_rec/python/layers/keras/__init__.py | 1 + easy_rec/python/layers/keras/dcn.py | 182 ++++ .../python/layers/keras/dot_interaction.py | 92 ++ easy_rec/python/layers/numerical_embedding.py | 46 +- easy_rec/python/model/easy_rec_model.py | 9 +- easy_rec/python/model/rank_model.py | 190 ++-- easy_rec/python/protos/backbone.proto | 59 +- easy_rec/python/protos/dnn.proto | 2 +- easy_rec/python/protos/easy_rec_model.proto | 1 + easy_rec/python/protos/layer.proto | 9 +- easy_rec/python/train_eval.py | 5 +- easy_rec/python/utils/__init__.py | 24 +- easy_rec/python/utils/load_class.py | 27 + easy_rec/python/utils/tf_utils.py | 2 +- .../configs/deepfm_backbone_on_criteo.config | 136 ++- ...pfm_backbone_on_criteo_with_autodis.config | 259 +++++- ...fm_backbone_on_criteo_with_periodic.config | 259 +++++- .../configs/dlrm_backbone_on_criteo.config | 97 +- examples/configs/dlrm_on_criteo.config | 534 +++++++++++ .../dlrm_on_criteo_with_autodis.config | 578 ++++++++++++ .../configs/dlrm_standard_on_criteo.config | 560 ++++++++++++ examples/data/criteo/process_criteo_kaggle.py | 6 + examples/readme.md | 36 +- 28 files changed, 3363 insertions(+), 912 deletions(-) create mode 100644 easy_rec/python/layers/keras/__init__.py create mode 100644 easy_rec/python/layers/keras/dcn.py create mode 100644 easy_rec/python/layers/keras/dot_interaction.py create mode 100644 examples/configs/dlrm_on_criteo.config create mode 100644 examples/configs/dlrm_on_criteo_with_autodis.config create mode 100644 examples/configs/dlrm_standard_on_criteo.config diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index d0f23dfbb..27557e9a7 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -167,7 +167,6 @@ from easy_rec.python.compat import embedding_ops as ev_embedding_ops from easy_rec.python.compat.feature_column import utils as fc_utils -from easy_rec.python.layers.common_layers import layer_norm def _internal_input_layer(features, @@ -185,9 +184,9 @@ def _internal_input_layer(features, for column in feature_columns: if not isinstance(column, _DenseColumn): raise ValueError( - 'Items of feature_columns must be a _DenseColumn. ' - 'You can wrap a categorical column with an ' - 'embedding_column or indicator_column. Given: {}'.format(column)) + 'Items of feature_columns must be a _DenseColumn. ' + 'You can wrap a categorical column with an ' + 'embedding_column or indicator_column. Given: {}'.format(column)) weight_collections = list(weight_collections or []) if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections: weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) @@ -205,20 +204,20 @@ def _get_logits(): # pylint: disable=missing-docstring with variable_scope.variable_scope( None, default_name=column._var_scope_name): # pylint: disable=protected-access tensor = column._get_dense_tensor( # pylint: disable=protected-access - builder, - weight_collections=weight_collections, - trainable=trainable) + builder, + weight_collections=weight_collections, + trainable=trainable) num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] output_tensor = array_ops.reshape( - tensor, shape=(batch_size, num_elements)) + tensor, shape=(batch_size, num_elements)) output_tensors.append(output_tensor) if cols_to_vars is not None: # Retrieve any variables created (some _DenseColumn's don't create # variables, in which case an empty list is returned). cols_to_vars[column] = ops.get_collection( - ops.GraphKeys.GLOBAL_VARIABLES, - scope=variable_scope.get_variable_scope().name) + ops.GraphKeys.GLOBAL_VARIABLES, + scope=variable_scope.get_variable_scope().name) if cols_to_output_tensors is not None: cols_to_output_tensors[column] = output_tensor if feature_name_to_output_tensors is not None: @@ -303,14 +302,14 @@ def input_layer(features, ValueError: if an item in `feature_columns` is not a `_DenseColumn`. """ return _internal_input_layer( - features, - feature_columns, - weight_collections=weight_collections, - trainable=trainable, - cols_to_vars=cols_to_vars, - cols_to_output_tensors=cols_to_output_tensors, - feature_name_to_output_tensors=feature_name_to_output_tensors, - sort_feature_columns_by_name=sort_feature_columns_by_name) + features, + feature_columns, + weight_collections=weight_collections, + trainable=trainable, + cols_to_vars=cols_to_vars, + cols_to_output_tensors=cols_to_output_tensors, + feature_name_to_output_tensors=feature_name_to_output_tensors, + sort_feature_columns_by_name=sort_feature_columns_by_name) # TODO(akshayka): InputLayer should be a subclass of Layer, and it @@ -334,17 +333,17 @@ def __init__(self, self._cols_to_vars = cols_to_vars self._name = name self._input_layer_template = template.make_template( - self._name, _internal_input_layer, create_scope_now_=create_scope_now) + self._name, _internal_input_layer, create_scope_now_=create_scope_now) self._scope = self._input_layer_template.variable_scope def __call__(self, features): return self._input_layer_template( - features=features, - feature_columns=self._feature_columns, - weight_collections=self._weight_collections, - trainable=self._trainable, - cols_to_vars=None, - from_template=True) + features=features, + feature_columns=self._feature_columns, + weight_collections=self._weight_collections, + trainable=self._trainable, + cols_to_vars=None, + from_template=True) @property def name(self): @@ -500,12 +499,12 @@ def linear_model(features, with variable_scope.variable_scope(None, 'linear_model') as vs: model_name = _strip_leading_slashes(vs.name) linear_model_layer = _LinearModel( - feature_columns=feature_columns, - units=units, - sparse_combiner=sparse_combiner, - weight_collections=weight_collections, - trainable=trainable, - name=model_name) + feature_columns=feature_columns, + units=units, + sparse_combiner=sparse_combiner, + weight_collections=weight_collections, + trainable=trainable, + name=model_name) retval = linear_model_layer(features) # pylint: disable=not-callable if cols_to_vars is not None: cols_to_vars.update(linear_model_layer.cols_to_vars()) @@ -549,7 +548,7 @@ def __init__(self, name=None, **kwargs): super(_FCLinearWrapper, self).__init__( - trainable=trainable, name=name, **kwargs) + trainable=trainable, name=name, **kwargs) self._feature_column = feature_column self._units = units self._sparse_combiner = sparse_combiner @@ -558,30 +557,30 @@ def __init__(self, def build(self, _): if isinstance(self._feature_column, _CategoricalColumn): weight = self.add_variable( - name='weights', - shape=(self._feature_column._num_buckets, self._units), # pylint: disable=protected-access - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) + name='weights', + shape=(self._feature_column._num_buckets, self._units), # pylint: disable=protected-access + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) else: num_elements = self._feature_column._variable_shape.num_elements() # pylint: disable=protected-access weight = self.add_variable( - name='weights', - shape=[num_elements, self._units], - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) + name='weights', + shape=[num_elements, self._units], + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) _add_to_collections(weight, self._weight_collections) self._weight_var = weight self.built = True def call(self, builder): weighted_sum = _create_weighted_sum( - column=self._feature_column, - builder=builder, - units=self._units, - sparse_combiner=self._sparse_combiner, - weight_collections=self._weight_collections, - trainable=self.trainable, - weight_var=self._weight_var) + column=self._feature_column, + builder=builder, + units=self._units, + sparse_combiner=self._sparse_combiner, + weight_collections=self._weight_collections, + trainable=self.trainable, + weight_var=self._weight_var) return weighted_sum @@ -600,10 +599,10 @@ def __init__(self, def build(self, _): self._bias_variable = self.add_variable( - 'bias_weights', - shape=[self._units], - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) + 'bias_weights', + shape=[self._units], + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) _add_to_collections(self._bias_variable, self._weight_collections) self.built = True @@ -659,11 +658,11 @@ def __init__(self, column_layers[column_name] = column_layer self._column_layers = self._add_layers(column_layers) self._bias_layer = _BiasLayer( - units=units, - trainable=trainable, - weight_collections=self._weight_collections, - name='bias_layer', - **kwargs) + units=units, + trainable=trainable, + weight_collections=self._weight_collections, + name='bias_layer', + **kwargs) self._cols_to_vars = {} def cols_to_vars(self): @@ -679,8 +678,8 @@ def call(self, features): for column in self._feature_columns: if not isinstance(column, (_DenseColumn, _CategoricalColumn)): raise ValueError( - 'Items of feature_columns must be either a ' - '_DenseColumn or _CategoricalColumn. Given: {}'.format(column)) + 'Items of feature_columns must be either a ' + '_DenseColumn or _CategoricalColumn. Given: {}'.format(column)) weighted_sums = [] ordered_columns = [] builder = _LazyBuilder(features) @@ -690,17 +689,17 @@ def call(self, features): weighted_sum = layer(builder) weighted_sums.append(weighted_sum) self._cols_to_vars[column] = ops.get_collection( - ops.GraphKeys.GLOBAL_VARIABLES, scope=layer.scope_name) + ops.GraphKeys.GLOBAL_VARIABLES, scope=layer.scope_name) _verify_static_batch_size_equality(weighted_sums, ordered_columns) predictions_no_bias = math_ops.add_n( - weighted_sums, name='weighted_sum_no_bias') + weighted_sums, name='weighted_sum_no_bias') predictions = nn_ops.bias_add( - predictions_no_bias, - self._bias_layer( # pylint: disable=not-callable - builder, - scope=variable_scope.get_variable_scope()), # pylint: disable=not-callable - name='weighted_sum') + predictions_no_bias, + self._bias_layer( # pylint: disable=not-callable + builder, + scope=variable_scope.get_variable_scope()), # pylint: disable=not-callable + name='weighted_sum') bias = self._bias_layer.variables[0] self._cols_to_vars['bias'] = _get_expanded_variable_list(bias) return predictions @@ -905,31 +904,31 @@ def model_fn(features, ...): if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified. ' 'Embedding of column_name: {}'.format( - categorical_column.name)) + categorical_column.name)) if initializer is None: initializer = init_ops.truncated_normal_initializer( - mean=0.0, stddev=0.01 / math.sqrt(dimension)) + mean=0.0, stddev=0.01 / math.sqrt(dimension)) embedding_shape = categorical_column._num_buckets, dimension # pylint: disable=protected-access def _creator(weight_collections, scope): embedding_column_layer = _EmbeddingColumnLayer( - embedding_shape=embedding_shape, - initializer=initializer, - weight_collections=weight_collections, - trainable=trainable, - name='embedding_column_layer') + embedding_shape=embedding_shape, + initializer=initializer, + weight_collections=weight_collections, + trainable=trainable, + name='embedding_column_layer') return embedding_column_layer(None, scope=scope) # pylint: disable=not-callable return _EmbeddingColumn( - categorical_column=categorical_column, - dimension=dimension, - combiner=combiner, - layer_creator=_creator, - ckpt_to_load_from=ckpt_to_load_from, - tensor_name_in_ckpt=tensor_name_in_ckpt, - max_norm=max_norm, - trainable=trainable) + categorical_column=categorical_column, + dimension=dimension, + combiner=combiner, + layer_creator=_creator, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable) def _numeric_column(key, @@ -996,15 +995,15 @@ def _numeric_column(key, if normalizer_fn is not None and not callable(normalizer_fn): raise TypeError( - 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) + 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) fc_utils.assert_key_is_string(key) return _NumericColumn( - key, - shape=shape, - default_value=default_value, - dtype=dtype, - normalizer_fn=normalizer_fn) + key, + shape=shape, + default_value=default_value, + dtype=dtype, + normalizer_fn=normalizer_fn) def _bucketized_column(source_column, boundaries): @@ -1075,8 +1074,8 @@ def _bucketized_column(source_column, boundaries): """ if not isinstance(source_column, _NumericColumn): raise ValueError( - 'source_column must be a column generated with numeric_column(). ' - 'Given: {}'.format(source_column)) + 'source_column must be a column generated with numeric_column(). ' + 'Given: {}'.format(source_column)) if len(source_column.shape) > 1: raise ValueError('source_column must be one-dimensional column. ' 'Given: {}'.format(source_column)) @@ -1139,7 +1138,7 @@ def _categorical_column_with_hash_bucket(key, if hash_bucket_size < 1: raise ValueError('hash_bucket_size must be at least 1. ' 'hash_bucket_size: {}, key: {}'.format( - hash_bucket_size, key)) + hash_bucket_size, key)) fc_utils.assert_key_is_string(key) fc_utils.assert_string_or_int(dtype, prefix='column_name: {}'.format(key)) @@ -1241,8 +1240,8 @@ def _categorical_column_with_vocabulary_file(key, with gfile.GFile(vocabulary_file) as f: vocabulary_size = sum(1 for _ in f) logging.info( - 'vocabulary_size = %d in %s is inferred from the number of elements ' - 'in the vocabulary_file %s.', vocabulary_size, key, vocabulary_file) + 'vocabulary_size = %d in %s is inferred from the number of elements ' + 'in the vocabulary_file %s.', vocabulary_size, key, vocabulary_file) # `vocabulary_size` isn't required for lookup, but it is for `_num_buckets`. if vocabulary_size < 1: @@ -1250,20 +1249,20 @@ def _categorical_column_with_vocabulary_file(key, if num_oov_buckets: if default_value is not None: raise ValueError( - 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( - key)) + 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( + key)) if num_oov_buckets < 0: raise ValueError('Invalid num_oov_buckets {} in {}.'.format( - num_oov_buckets, key)) + num_oov_buckets, key)) fc_utils.assert_string_or_int(dtype, prefix='column_name: {}'.format(key)) fc_utils.assert_key_is_string(key) return _VocabularyFileCategoricalColumn( - key=key, - vocabulary_file=vocabulary_file, - vocabulary_size=vocabulary_size, - num_oov_buckets=0 if num_oov_buckets is None else num_oov_buckets, - default_value=-1 if default_value is None else default_value, - dtype=dtype) + key=key, + vocabulary_file=vocabulary_file, + vocabulary_size=vocabulary_size, + num_oov_buckets=0 if num_oov_buckets is None else num_oov_buckets, + default_value=-1 if default_value is None else default_value, + dtype=dtype) def _categorical_column_with_vocabulary_list(key, @@ -1348,38 +1347,38 @@ def _categorical_column_with_vocabulary_list(key, """ if (vocabulary_list is None) or (len(vocabulary_list) < 1): raise ValueError( - 'vocabulary_list {} must be non-empty, column_name: {}'.format( - vocabulary_list, key)) + 'vocabulary_list {} must be non-empty, column_name: {}'.format( + vocabulary_list, key)) if len(set(vocabulary_list)) != len(vocabulary_list): raise ValueError( - 'Duplicate keys in vocabulary_list {}, column_name: {}'.format( - vocabulary_list, key)) + 'Duplicate keys in vocabulary_list {}, column_name: {}'.format( + vocabulary_list, key)) vocabulary_dtype = dtypes.as_dtype(np.array(vocabulary_list).dtype) if num_oov_buckets: if default_value != -1: raise ValueError( - 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( - key)) + 'Can\'t specify both num_oov_buckets and default_value in {}.'.format( + key)) if num_oov_buckets < 0: raise ValueError('Invalid num_oov_buckets {} in {}.'.format( - num_oov_buckets, key)) + num_oov_buckets, key)) fc_utils.assert_string_or_int( - vocabulary_dtype, prefix='column_name: {} vocabulary'.format(key)) + vocabulary_dtype, prefix='column_name: {} vocabulary'.format(key)) if dtype is None: dtype = vocabulary_dtype elif dtype.is_integer != vocabulary_dtype.is_integer: raise ValueError( - 'dtype {} and vocabulary dtype {} do not match, column_name: {}'.format( - dtype, vocabulary_dtype, key)) + 'dtype {} and vocabulary dtype {} do not match, column_name: {}'.format( + dtype, vocabulary_dtype, key)) fc_utils.assert_string_or_int(dtype, prefix='column_name: {}'.format(key)) fc_utils.assert_key_is_string(key) return _VocabularyListCategoricalColumn( - key=key, - vocabulary_list=tuple(vocabulary_list), - dtype=dtype, - default_value=default_value, - num_oov_buckets=num_oov_buckets) + key=key, + vocabulary_list=tuple(vocabulary_list), + dtype=dtype, + default_value=default_value, + num_oov_buckets=num_oov_buckets) def _categorical_column_with_identity(key, num_buckets, default_value=None): @@ -1438,15 +1437,15 @@ def _categorical_column_with_identity(key, num_buckets, default_value=None): """ if num_buckets < 1: raise ValueError('num_buckets {} < 1, column_name {}'.format( - num_buckets, key)) + num_buckets, key)) if (default_value is not None) and ((default_value < 0) or (default_value >= num_buckets)): raise ValueError( - 'default_value {} not in range [0, {}), column_name {}'.format( - default_value, num_buckets, key)) + 'default_value {} not in range [0, {}), column_name {}'.format( + default_value, num_buckets, key)) fc_utils.assert_key_is_string(key) return _IdentityCategoricalColumn( - key=key, num_buckets=num_buckets, default_value=default_value) + key=key, num_buckets=num_buckets, default_value=default_value) def _indicator_column(categorical_column): @@ -1553,9 +1552,9 @@ def _weighted_categorical_column(categorical_column, if (dtype is None) or not (dtype.is_integer or dtype.is_floating): raise ValueError('dtype {} is not convertible to float.'.format(dtype)) return _WeightedCategoricalColumn( - categorical_column=categorical_column, - weight_feature_key=weight_feature_key, - dtype=dtype) + categorical_column=categorical_column, + weight_feature_key=weight_feature_key, + dtype=dtype) def _crossed_column(keys, hash_bucket_size, hash_key=None): @@ -1667,21 +1666,21 @@ def _crossed_column(keys, hash_bucket_size, hash_key=None): 'hash_bucket_size: {}'.format(hash_bucket_size)) if not keys or len(keys) < 2: raise ValueError( - 'keys must be a list with length > 1. Given: {}'.format(keys)) + 'keys must be a list with length > 1. Given: {}'.format(keys)) for key in keys: if (not isinstance(key, six.string_types) and not isinstance(key, _CategoricalColumn)): raise ValueError( - 'Unsupported key type. All keys must be either string, or ' - 'categorical column except _HashedCategoricalColumn. ' - 'Given: {}'.format(key)) + 'Unsupported key type. All keys must be either string, or ' + 'categorical column except _HashedCategoricalColumn. ' + 'Given: {}'.format(key)) if isinstance(key, _HashedCategoricalColumn): raise ValueError( - 'categorical_column_with_hash_bucket is not supported for crossing. ' - 'Hashing before crossing will increase probability of collision. ' - 'Instead, use the feature name as a string. Given: {}'.format(key)) + 'categorical_column_with_hash_bucket is not supported for crossing. ' + 'Hashing before crossing will increase probability of collision. ' + 'Instead, use the feature name as a string. Given: {}'.format(key)) return _CrossedColumn( - keys=tuple(keys), hash_bucket_size=hash_bucket_size, hash_key=hash_key) + keys=tuple(keys), hash_bucket_size=hash_bucket_size, hash_key=hash_key) # TODO(rohanj): Clearly define semantics of this layer. @@ -1710,7 +1709,7 @@ def __init__(self, **kwargs: keyword named properties. """ super(_EmbeddingColumnLayer, self).__init__( - trainable=trainable, name=name, **kwargs) + trainable=trainable, name=name, **kwargs) self._embedding_shape = embedding_shape self._initializer = initializer self._weight_collections = weight_collections @@ -1726,11 +1725,11 @@ def set_weight_collections(self, weight_collections): def build(self, _): self._embedding_weight_var = self.add_variable( - name='embedding_weights', - shape=self._embedding_shape, - dtype=dtypes.float32, - initializer=self._initializer, - trainable=self.trainable) + name='embedding_weights', + shape=self._embedding_shape, + dtype=dtypes.float32, + initializer=self._initializer, + trainable=self.trainable) if self._weight_collections and not context.executing_eagerly(): _add_to_collections(self._embedding_weight_var, self._weight_collections) self.built = True @@ -1876,21 +1875,21 @@ def _create_weighted_sum(column, """Creates a weighted sum for a dense/categorical column for linear_model.""" if isinstance(column, _CategoricalColumn): return _create_categorical_column_weighted_sum( - column=column, - builder=builder, - units=units, - sparse_combiner=sparse_combiner, - weight_collections=weight_collections, - trainable=trainable, - weight_var=weight_var) + column=column, + builder=builder, + units=units, + sparse_combiner=sparse_combiner, + weight_collections=weight_collections, + trainable=trainable, + weight_var=weight_var) else: return _create_dense_column_weighted_sum( - column=column, - builder=builder, - units=units, - weight_collections=weight_collections, - trainable=trainable, - weight_var=weight_var) + column=column, + builder=builder, + units=units, + weight_collections=weight_collections, + trainable=trainable, + weight_var=weight_var) def _create_dense_column_weighted_sum(column, @@ -1901,9 +1900,9 @@ def _create_dense_column_weighted_sum(column, weight_var=None): """Create a weighted sum of a dense column for linear_model.""" tensor = column._get_dense_tensor( # pylint: disable=protected-access - builder, - weight_collections=weight_collections, - trainable=trainable) + builder, + weight_collections=weight_collections, + trainable=trainable) num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements)) @@ -1911,11 +1910,11 @@ def _create_dense_column_weighted_sum(column, weight = weight_var else: weight = variable_scope.get_variable( - name='weights', - shape=[num_elements, units], - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) + name='weights', + shape=[num_elements, units], + initializer=init_ops.zeros_initializer(), + trainable=trainable, + collections=weight_collections) return math_ops.matmul(tensor, weight, name='weighted_sum') @@ -1929,7 +1928,7 @@ class _CategoricalColumn(_FeatureColumn): """ IdWeightPair = collections.namedtuple( # pylint: disable=invalid-name - 'IdWeightPair', ['id_tensor', 'weight_tensor']) + 'IdWeightPair', ['id_tensor', 'weight_tensor']) @abc.abstractproperty def _num_buckets(self): @@ -1999,39 +1998,39 @@ def _create_categorical_column_weighted_sum(column, sparse_combiner = "sum". """ sparse_tensors = column._get_sparse_tensors( # pylint: disable=protected-access - builder, - weight_collections=weight_collections, - trainable=trainable) + builder, + weight_collections=weight_collections, + trainable=trainable) id_tensor = sparse_ops.sparse_reshape( - sparse_tensors.id_tensor, - [array_ops.shape(sparse_tensors.id_tensor)[0], -1]) + sparse_tensors.id_tensor, + [array_ops.shape(sparse_tensors.id_tensor)[0], -1]) weight_tensor = sparse_tensors.weight_tensor if weight_tensor is not None: weight_tensor = sparse_ops.sparse_reshape( - weight_tensor, [array_ops.shape(weight_tensor)[0], -1]) + weight_tensor, [array_ops.shape(weight_tensor)[0], -1]) if weight_var is not None: weight = weight_var else: weight = variable_scope.get_variable( - name='weights', - shape=(column._num_buckets, units), # pylint: disable=protected-access - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) + name='weights', + shape=(column._num_buckets, units), # pylint: disable=protected-access + initializer=init_ops.zeros_initializer(), + trainable=trainable, + collections=weight_collections) return embedding_ops.safe_embedding_lookup_sparse( - weight, - id_tensor, - sparse_weights=weight_tensor, - combiner=sparse_combiner, - name='weighted_sum') + weight, + id_tensor, + sparse_weights=weight_tensor, + combiner=sparse_combiner, + name='weighted_sum') class _SequenceDenseColumn(_FeatureColumn): """Represents dense sequence data.""" TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name - 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) + 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) @abc.abstractmethod def _get_sequence_dense_tensor(self, @@ -2147,7 +2146,7 @@ def _get_raw_feature_as_tensor(self, key): """ raw_feature = self._features[key] feature_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( - raw_feature) + raw_feature) def expand_dims(input_tensor): # Input_tensor must have rank 1. @@ -2161,20 +2160,20 @@ def expand_dims(input_tensor): if rank is not None: if rank == 0: raise ValueError( - 'Feature (key: {}) cannot have rank 0. Give: {}'.format( - key, feature_tensor)) + 'Feature (key: {}) cannot have rank 0. Give: {}'.format( + key, feature_tensor)) return feature_tensor if rank != 1 else expand_dims(feature_tensor) # Handle dynamic rank. with ops.control_dependencies([ - check_ops.assert_positive( - array_ops.rank(feature_tensor), - message='Feature (key: {}) cannot have rank 0. Given: {}'.format( - key, feature_tensor)) + check_ops.assert_positive( + array_ops.rank(feature_tensor), + message='Feature (key: {}) cannot have rank 0. Given: {}'.format( + key, feature_tensor)) ]): return control_flow_ops.cond( - math_ops.equal(1, array_ops.rank(feature_tensor)), - lambda: expand_dims(feature_tensor), lambda: feature_tensor) + math_ops.equal(1, array_ops.rank(feature_tensor)), + lambda: expand_dims(feature_tensor), lambda: feature_tensor) # TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py @@ -2209,7 +2208,7 @@ def _to_sparse_input_and_drop_ignore_values(input_tensor, ignore_value=None): ValueError: when `input_tensor`'s rank is `None`. """ input_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( - input_tensor) + input_tensor) if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): return input_tensor with ops.name_scope(None, 'to_sparse_input', ( @@ -2228,14 +2227,14 @@ def _to_sparse_input_and_drop_ignore_values(input_tensor, ignore_value=None): # default value for that type. ignore_value = input_tensor.dtype.as_numpy_dtype() ignore_value = math_ops.cast( - ignore_value, input_tensor.dtype, name='ignore_value') + ignore_value, input_tensor.dtype, name='ignore_value') indices = array_ops.where( - math_ops.not_equal(input_tensor, ignore_value), name='indices') + math_ops.not_equal(input_tensor, ignore_value), name='indices') return sparse_tensor_lib.SparseTensor( - indices=indices, - values=array_ops.gather_nd(input_tensor, indices, name='values'), - dense_shape=array_ops.shape( - input_tensor, out_type=dtypes.int64, name='dense_shape')) + indices=indices, + values=array_ops.gather_nd(input_tensor, indices, name='values'), + dense_shape=array_ops.shape( + input_tensor, out_type=dtypes.int64, name='dense_shape')) def _normalize_feature_columns(feature_columns): @@ -2284,10 +2283,10 @@ def _normalize_feature_columns(feature_columns): class _NumericColumn( - _DenseColumn, - collections.namedtuple( - '_NumericColumn', - ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): + _DenseColumn, + collections.namedtuple( + '_NumericColumn', + ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): """see `numeric_column`.""" @property @@ -2297,17 +2296,17 @@ def name(self): @property def _parse_example_spec(self): return { - self.key: - parsing_ops.FixedLenFeature(self.shape, self.dtype, - self.default_value) + self.key: + parsing_ops.FixedLenFeature(self.shape, self.dtype, + self.default_value) } def _transform_feature(self, inputs): input_tensor = inputs.get(self.key) if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): raise ValueError( - 'The corresponding Tensor of numerical column must be a Tensor. ' - 'SparseTensor is not supported. key: {}'.format(self.key)) + 'The corresponding Tensor of numerical column must be a Tensor. ' + 'SparseTensor is not supported. key: {}'.format(self.key)) if self.normalizer_fn is not None: input_tensor = self.normalizer_fn(input_tensor) return math_ops.cast(input_tensor, dtypes.float32) @@ -2359,23 +2358,23 @@ def _parse_example_spec(self): def _transform_feature(self, inputs): source_tensor = inputs.get(self.source_column) return math_ops._bucketize( # pylint: disable=protected-access - source_tensor, - boundaries=self.boundaries) + source_tensor, + boundaries=self.boundaries) @property def _variable_shape(self): return tensor_shape.TensorShape( - tuple(self.source_column.shape) + (len(self.boundaries) + 1,)) + tuple(self.source_column.shape) + (len(self.boundaries) + 1,)) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): del weight_collections del trainable input_tensor = inputs.get(self) return array_ops.one_hot( - indices=math_ops.cast(input_tensor, dtypes.int64), - depth=len(self.boundaries) + 1, - on_value=1., - off_value=0.) + indices=math_ops.cast(input_tensor, dtypes.int64), + depth=len(self.boundaries) + 1, + on_value=1., + off_value=0.) @property def _num_buckets(self): @@ -2393,9 +2392,9 @@ def _get_sparse_tensors(self, source_dimension = self.source_column.shape[0] i1 = array_ops.reshape( - array_ops.tile( - array_ops.expand_dims(math_ops.range(0, batch_size), 1), - [1, source_dimension]), (-1,)) + array_ops.tile( + array_ops.expand_dims(math_ops.range(0, batch_size), 1), + [1, source_dimension]), (-1,)) i2 = array_ops.tile(math_ops.range(0, source_dimension), [batch_size]) # Flatten the bucket indices and unique them across dimensions # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets @@ -2404,20 +2403,20 @@ def _get_sparse_tensors(self, (-1,)) + (len(self.boundaries) + 1) * i2) indices = math_ops.cast( - array_ops.transpose(array_ops.stack((i1, i2))), dtypes.int64) + array_ops.transpose(array_ops.stack((i1, i2))), dtypes.int64) dense_shape = math_ops.cast( - array_ops.stack([batch_size, source_dimension]), dtypes.int64) + array_ops.stack([batch_size, source_dimension]), dtypes.int64) sparse_tensor = sparse_tensor_lib.SparseTensor( - indices=indices, values=bucket_indices, dense_shape=dense_shape) + indices=indices, values=bucket_indices, dense_shape=dense_shape) return _CategoricalColumn.IdWeightPair(sparse_tensor, None) class _EmbeddingColumn( - _DenseColumn, _SequenceDenseColumn, - collections.namedtuple( - '_EmbeddingColumn', - ('categorical_column', 'dimension', 'combiner', 'layer_creator', - 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): + _DenseColumn, _SequenceDenseColumn, + collections.namedtuple( + '_EmbeddingColumn', + ('categorical_column', 'dimension', 'combiner', 'layer_creator', + 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): """See `embedding_column`.""" @property @@ -2446,47 +2445,47 @@ def _get_dense_tensor_internal(self, """Private method that follows the signature of _get_dense_tensor.""" # Get sparse IDs and weights. sparse_tensors = self.categorical_column._get_sparse_tensors( # pylint: disable=protected-access - inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor embedding_weights = self.layer_creator( - weight_collections=weight_collections, - scope=variable_scope.get_variable_scope()) + weight_collections=weight_collections, + scope=variable_scope.get_variable_scope()) if self.ckpt_to_load_from is not None: to_restore = embedding_weights if isinstance(to_restore, variables.PartitionedVariable): to_restore = to_restore._get_variable_list() # pylint: disable=protected-access checkpoint_utils.init_from_checkpoint( - self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) + self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) # Return embedding lookup result. return embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=self.combiner, - name='%s_weights' % self.name, - max_norm=self.max_norm) + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=self.combiner, + name='%s_weights' % self.name, + max_norm=self.max_norm) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must not be of type _SequenceCategoricalColumn. ' - 'Suggested fix A: If you wish to use input_layer, use a ' - 'non-sequence categorical_column_with_*. ' - 'Suggested fix B: If you wish to create sequence input, use ' - 'sequence_input_layer instead of input_layer. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) return self._get_dense_tensor_internal( - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) def _get_sequence_dense_tensor(self, inputs, @@ -2494,22 +2493,22 @@ def _get_sequence_dense_tensor(self, trainable=None): if not isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must be of type _SequenceCategoricalColumn ' - 'to use sequence_input_layer. ' - 'Suggested fix: Use one of sequence_categorical_column_with_*. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) dense_tensor = self._get_dense_tensor_internal( # pylint: disable=protected-access - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = fc_utils.sequence_length_from_sparse_tensor( - sparse_tensors.id_tensor) + sparse_tensors.id_tensor) return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=sequence_length) def _get_graph_for_variable(var): @@ -2520,13 +2519,13 @@ def _get_graph_for_variable(var): class _SharedEmbeddingColumn( - _DenseColumn, _SequenceDenseColumn, - collections.namedtuple( - '_SharedEmbeddingColumn', - ('categorical_column', 'dimension', 'combiner', 'initializer', - 'shared_embedding_collection_name', 'ckpt_to_load_from', - 'tensor_name_in_ckpt', 'max_norm', 'trainable', 'partitioner', - 'ev_params'))): + _DenseColumn, _SequenceDenseColumn, + collections.namedtuple( + '_SharedEmbeddingColumn', + ('categorical_column', 'dimension', 'combiner', 'initializer', + 'shared_embedding_collection_name', 'ckpt_to_load_from', + 'tensor_name_in_ckpt', 'max_norm', 'trainable', 'partitioner', + 'ev_params'))): """See `embedding_column`.""" @property @@ -2606,45 +2605,45 @@ def _get_dense_tensor_internal(self, with ops.name_scope(None, default_name=self.name): # Get sparse IDs and weights. sparse_tensors = self.categorical_column._get_sparse_tensors( # pylint: disable=protected-access - inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_ids = sparse_tensors.id_tensor sparse_weights = sparse_tensors.weight_tensor embedding_shape = (self.categorical_column._num_buckets, self.dimension) # pylint: disable=protected-access shared_embedding_collection = ops.get_collection( - self.shared_embedding_collection_name) + self.shared_embedding_collection_name) if shared_embedding_collection: if len(shared_embedding_collection) > 1: raise ValueError( - 'Collection {} can only contain one variable. ' - 'Suggested fix A: Choose a unique name for this collection. ' - 'Suggested fix B: Do not add any variables to this collection. ' - 'The feature_column library already adds a variable under the ' - 'hood.'.format(shared_embedding_collection)) + 'Collection {} can only contain one variable. ' + 'Suggested fix A: Choose a unique name for this collection. ' + 'Suggested fix B: Do not add any variables to this collection. ' + 'The feature_column library already adds a variable under the ' + 'hood.'.format(shared_embedding_collection)) embedding_weights = shared_embedding_collection[0] if embedding_weights.get_shape( ) != embedding_shape and not self.ev_params is not None: # noqa : E714 raise ValueError( - 'Shared embedding collection {} contains variable {} of ' - 'unexpected shape {}. Expected shape is {}. ' - 'Suggested fix A: Choose a unique name for this collection. ' - 'Suggested fix B: Do not add any variables to this collection. ' - 'The feature_column library already adds a variable under the ' - 'hood.'.format(self.shared_embedding_collection_name, - embedding_weights.name, - embedding_weights.get_shape(), embedding_shape)) + 'Shared embedding collection {} contains variable {} of ' + 'unexpected shape {}. Expected shape is {}. ' + 'Suggested fix A: Choose a unique name for this collection. ' + 'Suggested fix B: Do not add any variables to this collection. ' + 'The feature_column library already adds a variable under the ' + 'hood.'.format(self.shared_embedding_collection_name, + embedding_weights.name, + embedding_weights.get_shape(), embedding_shape)) else: if self.ev_params is None: embedding_weights = variable_scope.get_variable( - name='embedding_weights', - shape=embedding_shape, - dtype=dtypes.float32, - initializer=self.initializer, - trainable=self.trainable and trainable, - partitioner=self.partitioner, - collections=weight_collections) + name='embedding_weights', + shape=embedding_shape, + dtype=dtypes.float32, + initializer=self.initializer, + trainable=self.trainable and trainable, + partitioner=self.partitioner, + collections=weight_collections) else: # at eval or inference time, it is necessary to set # the initializers to zeros, so that new key will @@ -2656,16 +2655,16 @@ def _get_dense_tensor_internal(self, else: initializer = self.initializer embedding_weights = variable_scope.get_embedding_variable( - name='embedding_weights', - embedding_dim=self.dimension, - initializer=initializer, - trainable=self.trainable and trainable, - partitioner=self.partitioner, - collections=weight_collections, - steps_to_live=self.ev_params.steps_to_live - if self.ev_params is not None else None, - filter_options=variables.CounterFilterOptions( - self.ev_params.filter_freq)) + name='embedding_weights', + embedding_dim=self.dimension, + initializer=initializer, + trainable=self.trainable and trainable, + partitioner=self.partitioner, + collections=weight_collections, + steps_to_live=self.ev_params.steps_to_live + if self.ev_params is not None else None, + filter_options=variables.CounterFilterOptions( + self.ev_params.filter_freq)) ops.add_to_collection(self.shared_embedding_collection_name, embedding_weights) @@ -2674,41 +2673,41 @@ def _get_dense_tensor_internal(self, if isinstance(to_restore, variables.PartitionedVariable): to_restore = to_restore._get_variable_list() # pylint: disable=protected-access checkpoint_utils.init_from_checkpoint( - self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) + self.ckpt_to_load_from, {self.tensor_name_in_ckpt: to_restore}) # Return embedding lookup result. if self.ev_params is not None: return ev_embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=self.combiner, - name='%s_weights' % self.name, - max_norm=self.max_norm) + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=self.combiner, + name='%s_weights' % self.name, + max_norm=self.max_norm) else: return embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=self.combiner, - name='%s_weights' % self.name, - max_norm=self.max_norm) + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=self.combiner, + name='%s_weights' % self.name, + max_norm=self.max_norm) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): if isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must not be of type _SequenceCategoricalColumn. ' - 'Suggested fix A: If you wish to use input_layer, use a ' - 'non-sequence categorical_column_with_*. ' - 'Suggested fix B: If you wish to create sequence input, use ' - 'sequence_input_layer instead of input_layer. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) return self._get_dense_tensor_internal( - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) def _get_sequence_dense_tensor(self, inputs, @@ -2716,21 +2715,21 @@ def _get_sequence_dense_tensor(self, trainable=None): if not isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In embedding_column: {}. ' - 'categorical_column must be of type _SequenceCategoricalColumn ' - 'to use sequence_input_layer. ' - 'Suggested fix: Use one of sequence_categorical_column_with_*. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In embedding_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) dense_tensor = self._get_dense_tensor_internal( # pylint: disable=protected-access - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = fc_utils.sequence_length_from_sparse_tensor( - sparse_tensors.id_tensor) + sparse_tensors.id_tensor) return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=sequence_length) def _check_shape(shape, key): @@ -2751,8 +2750,8 @@ def _check_shape(shape, key): class _HashedCategoricalColumn(_CategoricalColumn, collections.namedtuple( - '_HashedCategoricalColumn', - ['key', 'hash_bucket_size', 'dtype'])): + '_HashedCategoricalColumn', + ['key', 'hash_bucket_size', 'dtype'])): """see `categorical_column_with_hash_bucket`.""" @property @@ -2773,14 +2772,14 @@ def _transform_feature(self, inputs): raise ValueError('SparseColumn input must be a SparseTensor.') fc_utils.assert_string_or_int( - input_tensor.dtype, - prefix='column_name: {} input_tensor'.format(self.key)) + input_tensor.dtype, + prefix='column_name: {} input_tensor'.format(self.key)) if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( - 'Column dtype and SparseTensors dtype must be compatible. ' - 'key: {}, column dtype: {}, tensor dtype: {}'.format( - self.key, self.dtype, input_tensor.dtype)) + 'Column dtype and SparseTensors dtype must be compatible. ' + 'key: {}, column dtype: {}, tensor dtype: {}'.format( + self.key, self.dtype, input_tensor.dtype)) if self.dtype == dtypes.string: sparse_values = input_tensor.values @@ -2788,7 +2787,7 @@ def _transform_feature(self, inputs): sparse_values = string_ops.as_string(input_tensor.values) sparse_id_values = string_ops.string_to_hash_bucket_fast( - sparse_values, self.hash_bucket_size, name='lookup') + sparse_values, self.hash_bucket_size, name='lookup') return sparse_tensor_lib.SparseTensor(input_tensor.indices, sparse_id_values, input_tensor.dense_shape) @@ -2806,10 +2805,10 @@ def _get_sparse_tensors(self, class _VocabularyFileCategoricalColumn( - _CategoricalColumn, - collections.namedtuple('_VocabularyFileCategoricalColumn', - ('key', 'vocabulary_file', 'vocabulary_size', - 'num_oov_buckets', 'dtype', 'default_value'))): + _CategoricalColumn, + collections.namedtuple('_VocabularyFileCategoricalColumn', + ('key', 'vocabulary_file', 'vocabulary_size', + 'num_oov_buckets', 'dtype', 'default_value'))): """See `categorical_column_with_vocabulary_file`.""" @property @@ -2825,13 +2824,13 @@ def _transform_feature(self, inputs): if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( - 'Column dtype and SparseTensors dtype must be compatible. ' - 'key: {}, column dtype: {}, tensor dtype: {}'.format( - self.key, self.dtype, input_tensor.dtype)) + 'Column dtype and SparseTensors dtype must be compatible. ' + 'key: {}, column dtype: {}, tensor dtype: {}'.format( + self.key, self.dtype, input_tensor.dtype)) fc_utils.assert_string_or_int( - input_tensor.dtype, - prefix='column_name: {} input_tensor'.format(self.key)) + input_tensor.dtype, + prefix='column_name: {} input_tensor'.format(self.key)) key_dtype = self.dtype if input_tensor.dtype.is_integer: @@ -2840,12 +2839,12 @@ def _transform_feature(self, inputs): input_tensor = math_ops.cast(input_tensor, dtypes.int64) return lookup_ops.index_table_from_file( - vocabulary_file=self.vocabulary_file, - num_oov_buckets=self.num_oov_buckets, - vocab_size=self.vocabulary_size, - default_value=self.default_value, - key_dtype=key_dtype, - name='{}_lookup'.format(self.key)).lookup(input_tensor) + vocabulary_file=self.vocabulary_file, + num_oov_buckets=self.num_oov_buckets, + vocab_size=self.vocabulary_size, + default_value=self.default_value, + key_dtype=key_dtype, + name='{}_lookup'.format(self.key)).lookup(input_tensor) @property def _num_buckets(self): @@ -2860,10 +2859,10 @@ def _get_sparse_tensors(self, class _VocabularyListCategoricalColumn( - _CategoricalColumn, - collections.namedtuple( - '_VocabularyListCategoricalColumn', - ('key', 'vocabulary_list', 'dtype', 'default_value', 'num_oov_buckets')) + _CategoricalColumn, + collections.namedtuple( + '_VocabularyListCategoricalColumn', + ('key', 'vocabulary_list', 'dtype', 'default_value', 'num_oov_buckets')) ): """See `categorical_column_with_vocabulary_list`.""" @@ -2880,13 +2879,13 @@ def _transform_feature(self, inputs): if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( - 'Column dtype and SparseTensors dtype must be compatible. ' - 'key: {}, column dtype: {}, tensor dtype: {}'.format( - self.key, self.dtype, input_tensor.dtype)) + 'Column dtype and SparseTensors dtype must be compatible. ' + 'key: {}, column dtype: {}, tensor dtype: {}'.format( + self.key, self.dtype, input_tensor.dtype)) fc_utils.assert_string_or_int( - input_tensor.dtype, - prefix='column_name: {} input_tensor'.format(self.key)) + input_tensor.dtype, + prefix='column_name: {} input_tensor'.format(self.key)) key_dtype = self.dtype if input_tensor.dtype.is_integer: @@ -2895,11 +2894,11 @@ def _transform_feature(self, inputs): input_tensor = math_ops.cast(input_tensor, dtypes.int64) return lookup_ops.index_table_from_tensor( - vocabulary_list=tuple(self.vocabulary_list), - default_value=self.default_value, - num_oov_buckets=self.num_oov_buckets, - dtype=key_dtype, - name='{}_lookup'.format(self.key)).lookup(input_tensor) + vocabulary_list=tuple(self.vocabulary_list), + default_value=self.default_value, + num_oov_buckets=self.num_oov_buckets, + dtype=key_dtype, + name='{}_lookup'.format(self.key)).lookup(input_tensor) @property def _num_buckets(self): @@ -2915,8 +2914,8 @@ def _get_sparse_tensors(self, class _IdentityCategoricalColumn(_CategoricalColumn, collections.namedtuple( - '_IdentityCategoricalColumn', - ('key', 'num_buckets', 'default_value'))): + '_IdentityCategoricalColumn', + ('key', 'num_buckets', 'default_value'))): """See `categorical_column_with_identity`.""" @property @@ -2932,37 +2931,37 @@ def _transform_feature(self, inputs): if not input_tensor.dtype.is_integer: raise ValueError('Invalid input, not integer. key: {} dtype: {}'.format( - self.key, input_tensor.dtype)) + self.key, input_tensor.dtype)) values = math_ops.cast(input_tensor.values, dtypes.int64, name='values') num_buckets = math_ops.cast( - self.num_buckets, dtypes.int64, name='num_buckets') + self.num_buckets, dtypes.int64, name='num_buckets') zero = math_ops.cast(0, dtypes.int64, name='zero') if self.default_value is None: # Fail if values are out-of-range. assert_less = check_ops.assert_less( - values, - num_buckets, - data=(values, num_buckets), - name='assert_less_than_num_buckets') + values, + num_buckets, + data=(values, num_buckets), + name='assert_less_than_num_buckets') assert_greater = check_ops.assert_greater_equal( - values, zero, data=(values,), name='assert_greater_or_equal_0') + values, zero, data=(values,), name='assert_greater_or_equal_0') with ops.control_dependencies((assert_less, assert_greater)): values = array_ops.identity(values) else: # Assign default for out-of-range values. values = array_ops.where( - math_ops.logical_or( - values < zero, values >= num_buckets, name='out_of_range'), - array_ops.fill( - dims=array_ops.shape(values), - value=math_ops.cast(self.default_value, dtypes.int64), - name='default_values'), values) + math_ops.logical_or( + values < zero, values >= num_buckets, name='out_of_range'), + array_ops.fill( + dims=array_ops.shape(values), + value=math_ops.cast(self.default_value, dtypes.int64), + name='default_values'), values) return sparse_tensor_lib.SparseTensor( - indices=input_tensor.indices, - values=values, - dense_shape=input_tensor.dense_shape) + indices=input_tensor.indices, + values=values, + dense_shape=input_tensor.dense_shape) @property def _num_buckets(self): @@ -2977,10 +2976,10 @@ def _get_sparse_tensors(self, class _WeightedCategoricalColumn( - _CategoricalColumn, - collections.namedtuple( - '_WeightedCategoricalColumn', - ('categorical_column', 'weight_feature_key', 'dtype'))): + _CategoricalColumn, + collections.namedtuple( + '_WeightedCategoricalColumn', + ('categorical_column', 'weight_feature_key', 'dtype'))): """See `weighted_categorical_column`.""" @property @@ -2993,7 +2992,7 @@ def _parse_example_spec(self): config = self.categorical_column._parse_example_spec # pylint: disable=protected-access if self.weight_feature_key in config: raise ValueError('Parse config {} already exists for {}.'.format( - config[self.weight_feature_key], self.weight_feature_key)) + config[self.weight_feature_key], self.weight_feature_key)) config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype) return config @@ -3006,14 +3005,14 @@ def _transform_feature(self, inputs): if weight_tensor is None: raise ValueError('Missing weights {}.'.format(self.weight_feature_key)) weight_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( - weight_tensor) + weight_tensor) if self.dtype != weight_tensor.dtype.base_dtype: raise ValueError('Bad dtype, expected {}, but got {}.'.format( - self.dtype, weight_tensor.dtype)) + self.dtype, weight_tensor.dtype)) if not isinstance(weight_tensor, sparse_tensor_lib.SparseTensor): # The weight tensor can be a regular Tensor. In this case, sparsify it. weight_tensor = _to_sparse_input_and_drop_ignore_values( - weight_tensor, ignore_value=0.0) + weight_tensor, ignore_value=0.0) if not weight_tensor.dtype.is_floating: weight_tensor = math_ops.cast(weight_tensor, dtypes.float32) return (inputs.get(self.categorical_column), weight_tensor) @@ -3029,9 +3028,9 @@ def _get_sparse_tensors(self, class _CrossedColumn( - _CategoricalColumn, - collections.namedtuple('_CrossedColumn', - ['keys', 'hash_bucket_size', 'hash_key'])): + _CategoricalColumn, + collections.namedtuple('_CrossedColumn', + ['keys', 'hash_bucket_size', 'hash_key'])): """See `crossed_column`.""" @property @@ -3063,16 +3062,16 @@ def _transform_feature(self, inputs): ids_and_weights = key._get_sparse_tensors(inputs) # pylint: disable=protected-access if ids_and_weights.weight_tensor is not None: raise ValueError( - 'crossed_column does not support weight_tensor, but the given ' - 'column populates weight_tensor. ' - 'Given column: {}'.format(key.name)) + 'crossed_column does not support weight_tensor, but the given ' + 'column populates weight_tensor. ' + 'Given column: {}'.format(key.name)) feature_tensors.append(ids_and_weights.id_tensor) else: raise ValueError('Unsupported column type. Given: {}'.format(key)) return sparse_ops.sparse_cross_hashed( - inputs=feature_tensors, - num_buckets=self.hash_bucket_size, - hash_key=self.hash_key) + inputs=feature_tensors, + num_buckets=self.hash_bucket_size, + hash_key=self.hash_key) @property def _num_buckets(self): @@ -3137,9 +3136,9 @@ def _transform_feature(self, inputs): # If the underlying column is weighted, return the input as a dense tensor. if weight_tensor is not None: weighted_column = sparse_ops.sparse_merge( - sp_ids=id_tensor, - sp_values=weight_tensor, - vocab_size=int(self._variable_shape[-1])) + sp_ids=id_tensor, + sp_values=weight_tensor, + vocab_size=int(self._variable_shape[-1])) # Remove (?, -1) index. weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0], weighted_column.dense_shape) @@ -3150,15 +3149,15 @@ def _transform_feature(self, inputs): weighted_column.dense_shape) dense_id_tensor = sparse_ops.sparse_tensor_to_dense( - id_tensor, default_value=-1) + id_tensor, default_value=-1) # One hot must be float for tf.concat reasons since all other inputs to # input_layer are float32. one_hot_id_tensor = array_ops.one_hot( - dense_id_tensor, - depth=self._variable_shape[-1], - on_value=1.0, - off_value=0.0) + dense_id_tensor, + depth=self._variable_shape[-1], + on_value=1.0, + off_value=0.0) # Reduce to get a multi-hot per example. return math_ops.reduce_sum(one_hot_id_tensor, axis=[-2]) @@ -3194,14 +3193,14 @@ def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): del trainable if isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In indicator_column: {}. ' - 'categorical_column must not be of type _SequenceCategoricalColumn. ' - 'Suggested fix A: If you wish to use input_layer, use a ' - 'non-sequence categorical_column_with_*. ' - 'Suggested fix B: If you wish to create sequence input, use ' - 'sequence_input_layer instead of input_layer. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In indicator_column: {}. ' + 'categorical_column must not be of type _SequenceCategoricalColumn. ' + 'Suggested fix A: If you wish to use input_layer, use a ' + 'non-sequence categorical_column_with_*. ' + 'Suggested fix B: If you wish to create sequence input, use ' + 'sequence_input_layer instead of input_layer. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) # Feature has been already transformed. Return the intermediate # representation created by _transform_feature. return inputs.get(self) @@ -3216,20 +3215,20 @@ def _get_sequence_dense_tensor(self, del trainable if not isinstance(self.categorical_column, _SequenceCategoricalColumn): raise ValueError( - 'In indicator_column: {}. ' - 'categorical_column must be of type _SequenceCategoricalColumn ' - 'to use sequence_input_layer. ' - 'Suggested fix: Use one of sequence_categorical_column_with_*. ' - 'Given (type {}): {}'.format(self.name, type(self.categorical_column), - self.categorical_column)) + 'In indicator_column: {}. ' + 'categorical_column must be of type _SequenceCategoricalColumn ' + 'to use sequence_input_layer. ' + 'Suggested fix: Use one of sequence_categorical_column_with_*. ' + 'Given (type {}): {}'.format(self.name, type(self.categorical_column), + self.categorical_column)) # Feature has been already transformed. Return the intermediate # representation created by _transform_feature. dense_tensor = inputs.get(self) sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access sequence_length = fc_utils.sequence_length_from_sparse_tensor( - sparse_tensors.id_tensor) + sparse_tensors.id_tensor) return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=sequence_length) def _verify_static_batch_size_equality(tensors, columns): @@ -3252,16 +3251,16 @@ def _verify_static_batch_size_equality(tensors, columns): expected_batch_size = tensors[i].shape.dims[0] elif not expected_batch_size.is_compatible_with(tensors[i].shape.dims[0]): raise ValueError( - 'Batch size (first dimension) of each feature must be same. ' - 'Batch size of columns ({}, {}): ({}, {})'.format( - columns[bath_size_column_index].name, columns[i].name, - expected_batch_size, tensors[i].shape.dims[0])) + 'Batch size (first dimension) of each feature must be same. ' + 'Batch size of columns ({}, {}): ({}, {})'.format( + columns[bath_size_column_index].name, columns[i].name, + expected_batch_size, tensors[i].shape.dims[0])) class _SequenceCategoricalColumn(_CategoricalColumn, collections.namedtuple( - '_SequenceCategoricalColumn', - ['categorical_column'])): + '_SequenceCategoricalColumn', + ['categorical_column'])): """Represents sequences of categorical data.""" @property diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index fa604926d..82d42508c 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -5,15 +5,20 @@ import tensorflow as tf from easy_rec.python.layers import dnn -from easy_rec.python.layers.common_layers import SENet, EnhancedInputLayer -from easy_rec.python.layers.common_layers import highway, Concatenate +from easy_rec.python.layers.common_layers import Concatenate +from easy_rec.python.layers.common_layers import EnhancedInputLayer +from easy_rec.python.layers.common_layers import SENet +from easy_rec.python.layers.common_layers import highway from easy_rec.python.layers.fibinet import FiBiNetLayer -from easy_rec.python.layers.fm import FM, FMLayer +from easy_rec.python.layers.fm import FMLayer from easy_rec.python.layers.mask_net import MaskNet from easy_rec.python.layers.numerical_embedding import AutoDisEmbedding from easy_rec.python.layers.numerical_embedding import PeriodicEmbedding +from easy_rec.python.protos import backbone_pb2 +from easy_rec.python.protos import layer_pb2 from easy_rec.python.utils.dag import DAG -from easy_rec.python.utils.tf_utils import add_op, dot_op +from easy_rec.python.utils.tf_utils import add_op +from easy_rec.python.utils.tf_utils import dot_op if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -29,24 +34,67 @@ def __init__(self, config, model, features, input_layer, l2_reg=None): self._l2_reg = l2_reg self._dag = DAG() self._name_to_blocks = {} + input_feature_groups = set() for block in config.blocks: - self._name_to_blocks[block.name] = block self._dag.add_node(block.name) - num_blocks = len(self._name_to_blocks) + self._name_to_blocks[block.name] = block + layer = block.WhichOneof('layer') + if layer == 'input_layer': + if len(block.inputs) != 0: + raise ValueError('no input allowed for input_layer: ' + block.name) + input_name = block.name + if input_name in input_feature_groups: + raise ValueError('input `%s` already exists in other block' % + input_name) + else: + input_feature_groups.add(input_name) + + num_groups = len(input_feature_groups) + num_blocks = len(self._name_to_blocks) - num_groups assert num_blocks > 0, 'there must be at least one block in backbone' + for block in config.blocks: + layer = block.WhichOneof('layer') + if layer == 'input_layer': + continue + if block.name in input_feature_groups: + raise KeyError('block name can not be one of feature groups:' + + block.name) assert len(block.inputs) > 0, 'no input for block: %s' % block.name - for node in block.inputs: - if node in self._name_to_blocks: - self._dag.add_edge(node, block.name) + + for input_node in block.inputs: + input_name = input_node.name + if input_name in self._name_to_blocks: + assert input_name != block.name, 'input name can not equal to block name:' + input_name + self._dag.add_edge(input_name, block.name) + elif input_name not in input_feature_groups: + if input_layer.has_group(input_name): + logging.info('adding an input_layer block: ' + input_name) + new_block = backbone_pb2.Block() + new_block.name = input_name + new_block.input_layer.CopyFrom(layer_pb2.InputLayer()) + self._name_to_blocks[input_name] = new_block + self._dag.add_node(input_name) + self._dag.add_edge(input_name, block.name) + input_feature_groups.add(block.name) + else: + raise KeyError( + 'invalid input name `%s`, must be the name of either a feature group or an another block' + % input_name) + num_groups = len(input_feature_groups) + assert num_groups > 0, 'there must be at least one input layer' def block_input(self, config, block_outputs, output_list=False): inputs = [] - for input_name in config.inputs: + for input_node in config.inputs: + input_name = input_node.name if input_name in block_outputs: input_feature = block_outputs[input_name] else: - input_feature, _ = self._input_layer(self._features, input_name) + raise KeyError('input name `%s` does not exists' % input_name) + if input_node.HasField('input_fn'): + fn = eval(input_node.input_fn) + input_feature = fn(input_feature) inputs.append(input_feature) if output_list: @@ -67,14 +115,12 @@ def __call__(self, is_training, *args, **kwargs): for block in blocks: config = self._name_to_blocks[block] layer = config.WhichOneof('layer') - if layer == 'input_layer': - if len(config.inputs) != 1: - raise ValueError('only one input allowed for input_layer: ' + - block.name) + if layer is None: # identity layer + block_outputs[block] = self.block_input(config, block_outputs) + elif layer == 'input_layer': conf = config.input_layer - input_layer = EnhancedInputLayer(conf, self._input_layer, - self._features) - output = input_layer(config.inputs[0], is_training) + input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) + output = input_fn(block, is_training) block_outputs[block] = output elif layer == 'periodic_embedding': input_feature = self.block_input(config, block_outputs) @@ -131,9 +177,11 @@ def __call__(self, is_training, *args, **kwargs): block_outputs[block] = concat(input_feature) elif layer == 'reshape': input_feature = self.block_input(config, block_outputs) - block_outputs[block] = tf.reshape(input_feature, list(config.reshape.dims)) + block_outputs[block] = tf.reshape(input_feature, + list(config.reshape.dims)) elif layer == 'add': - input_feature = self.block_input(config, block_outputs, output_list=True) + input_feature = self.block_input( + config, block_outputs, output_list=True) block_outputs[block] = add_op(input_feature) elif layer == 'dot': input_feature = self.block_input(config, block_outputs) @@ -142,9 +190,9 @@ def __call__(self, is_training, *args, **kwargs): input_feature = self.block_input(config, block_outputs) fn = eval(config.Lambda.expression) block_outputs[block] = fn(input_feature) - elif layer == 'chain': - input_feature = self.block_input(config, block_outputs) - block_outputs[block] = op_chain(input_feature, config.chain.ops) + # elif layer == 'chain': + # input_feature = self.block_input(config, block_outputs) + # block_outputs[block] = op_chain(input_feature, config.chain.ops) else: raise NotImplementedError('Unsupported backbone layer:' + layer) @@ -154,8 +202,8 @@ def __call__(self, is_training, *args, **kwargs): temp.append(block_outputs[output]) else: raise ValueError('No output `%s` of backbone to be concat' % output) - output = concat_inputs(temp, msg='backbone') + if self._config.HasField('top_mlp'): no_act = self._config.top_mlp.last_layer_no_activation no_bn = self._config.top_mlp.last_layer_no_batch_norm @@ -202,66 +250,66 @@ def concat_inputs(inputs, axis=-1, msg=''): raise ValueError('no inputs to be concat:' + msg) -def op_chain(inputs, ops): - output = inputs - for op in ops: - op_name = op.WhichOneOf('Op') - output = run_op(output, op_name, op, block='op_chain') - return output - - -def run_op(inputs, op_name, config, block='', is_training=False, l2_reg=None): - if op_name == 'periodic_embedding': - num_emb = PeriodicEmbedding(config.periodic_embedding, scope=block) - return num_emb(inputs) - elif op_name == 'auto_dis_embedding': - num_emb = AutoDisEmbedding(config.auto_dis_embedding, scope=block) - return num_emb(inputs) - elif op_name == 'highway': - conf = config.highway - highway_op_name = highway( - inputs, - conf.emb_size, - activation=conf.activation, - dropout=conf.dropout_rate, - scope=block) - return highway_op_name(inputs) - elif op_name == 'mlp': - mlp = dnn.DNN( - config.mlp, - l2_reg, - name='%s_mlp' % block, - is_training=is_training, - last_layer_no_activation=config.mlp.last_layer_no_activation, - last_layer_no_batch_norm=config.mlp.last_layer_no_batch_norm) - return mlp(inputs) - elif op_name == 'masknet': - mask_net = MaskNet(config.masknet, name=block, reuse=tf.AUTO_REUSE) - output = mask_net(inputs, is_training, l2_reg=l2_reg) - return output - elif op_name == 'senet': - senet = SENet(config.senet, name=block) - output = senet(inputs) - return output - elif op_name == 'fibinet': - fibinet = FiBiNetLayer(config.fibinet, name=block) - output = fibinet(inputs, is_training, l2_reg=l2_reg) - return output - elif op_name == 'fm': - fm = FMLayer(config.fm, name=block) - return fm(inputs) - if op_name == 'Lambda': - fn = eval(config.Lambda.expression) - output = fn(inputs) - elif op_name == 'concat': - concat = Concatenate(config.concat) - output = concat(inputs) - elif op_name == 'reshape': - output = tf.reshape(inputs, list(config.reshape.dims)) - elif op_name == 'add': - output = add_op(inputs) - elif op_name == 'dot': - output = dot_op(inputs) - else: - raise NotImplementedError('Unsupported op:' + op_name) - return output +# def op_chain(inputs, ops): +# output = inputs +# for op in ops: +# op_name = op.WhichOneOf('Op') +# output = run_op(output, op_name, op, block='op_chain') +# return output +# +# +# def run_op(inputs, op_name, config, block='', is_training=False, l2_reg=None): +# if op_name == 'periodic_embedding': +# num_emb = PeriodicEmbedding(config.periodic_embedding, scope=block) +# return num_emb(inputs) +# elif op_name == 'auto_dis_embedding': +# num_emb = AutoDisEmbedding(config.auto_dis_embedding, scope=block) +# return num_emb(inputs) +# elif op_name == 'highway': +# conf = config.highway +# highway_op_name = highway( +# inputs, +# conf.emb_size, +# activation=conf.activation, +# dropout=conf.dropout_rate, +# scope=block) +# return highway_op_name(inputs) +# elif op_name == 'mlp': +# mlp = dnn.DNN( +# config.mlp, +# l2_reg, +# name='%s_mlp' % block, +# is_training=is_training, +# last_layer_no_activation=config.mlp.last_layer_no_activation, +# last_layer_no_batch_norm=config.mlp.last_layer_no_batch_norm) +# return mlp(inputs) +# elif op_name == 'masknet': +# mask_net = MaskNet(config.masknet, name=block, reuse=tf.AUTO_REUSE) +# output = mask_net(inputs, is_training, l2_reg=l2_reg) +# return output +# elif op_name == 'senet': +# senet = SENet(config.senet, name=block) +# output = senet(inputs) +# return output +# elif op_name == 'fibinet': +# fibinet = FiBiNetLayer(config.fibinet, name=block) +# output = fibinet(inputs, is_training, l2_reg=l2_reg) +# return output +# elif op_name == 'fm': +# fm = FMLayer(config.fm, name=block) +# return fm(inputs) +# if op_name == 'Lambda': +# fn = eval(config.Lambda.expression) +# output = fn(inputs) +# elif op_name == 'concat': +# concat = Concatenate(config.concat) +# output = concat(inputs) +# elif op_name == 'reshape': +# output = tf.reshape(inputs, list(config.reshape.dims)) +# elif op_name == 'add': +# output = add_op(inputs) +# elif op_name == 'dot': +# output = dot_op(inputs) +# else: +# raise NotImplementedError('Unsupported op:' + op_name) +# return output diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index a453141f9..f06723f68 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -82,6 +82,7 @@ def layer_norm(input_tensor, name=None, reuse=None): class EnhancedInputLayer(object): + """Enhance the raw input layer.""" def __init__(self, config, input_layer, feature_dict): if config.do_batch_norm and config.do_layer_norm: @@ -92,56 +93,49 @@ def __init__(self, config, input_layer, feature_dict): self._input_layer = input_layer self._feature_dict = feature_dict - def __call__(self, feature_group, is_training, *args, **kwargs): - features, feature_list = self._input_layer(self._feature_dict, - feature_group) + def __call__(self, group, is_training, *args, **kwargs): + features, feature_list = self._input_layer(self._feature_dict, group) num_features = len(feature_list) - do_feature_dropout = 0.0 < self._config.feature_dropout_rate < 1.0 - if self._config.output_feature_list or do_feature_dropout: - if self._config.do_layer_norm or self._config.do_batch_norm: - for i in range(num_features): - fea = feature_list[i] - if self._config.do_batch_norm: - fea = tf.layers.batch_normalization(fea, training=is_training) - elif self._config.do_layer_norm: - fea = layer_norm(fea) - feature_list[i] = fea - elif self._config.do_batch_norm: - features = tf.layers.batch_normalization(features, training=is_training) - elif self._config.do_layer_norm: - features = layer_norm(features) - - if do_feature_dropout and is_training: + do_ln = self._config.do_layer_norm + do_bn = self._config.do_batch_norm + do_feature_dropout = is_training and 0.0 < self._config.feature_dropout_rate < 1.0 + if do_feature_dropout: keep_prob = 1.0 - self._config.feature_dropout_rate bern = tf.distributions.Bernoulli(probs=keep_prob) mask = bern.sample(num_features) - for i in range(num_features): - fea = tf.div(feature_list[i], keep_prob) * mask[i] - feature_list[i] = fea - features = tf.concat(feature_list, axis=-1) + elif do_bn: + features = tf.layers.batch_normalization(features, training=is_training) + elif do_ln: + features = layer_norm(features) do_dropout = 0.0 < self._config.dropout_rate < 1.0 - if self._config.output_feature_list: - if do_dropout: - for i in range(num_features): - fea = feature_list[i] + if do_feature_dropout or do_ln or do_bn or do_dropout: + for i in range(num_features): + fea = feature_list[i] + if self._config.do_batch_norm: + fea = tf.layers.batch_normalization(fea, training=is_training) + elif self._config.do_layer_norm: + fea = layer_norm(fea) + if do_dropout: fea = tf.layers.dropout( fea, self._config.dropout_rate, training=is_training) - feature_list[i] = fea - if self._config.output_3d_tensor: - for i in range(num_features): - feature_list[i] = tf.expand_dims(feature_list[i], axis=1) - return tf.concat(feature_list, axis=1) - return feature_list + if do_feature_dropout: + fea = tf.div(fea, keep_prob) * mask[i] + feature_list[i] = fea + if do_feature_dropout: + features = tf.concat(feature_list, axis=-1) - if do_dropout: + if do_dropout and not do_feature_dropout: features = tf.layers.dropout( features, self._config.dropout_rate, training=is_training) - if self._config.output_3d_tensor: - dim = int(feature_list[0].shape[-1]) - return tf.reshape(features, [-1, num_features, dim]) + if self._config.only_output_feature_list: + return feature_list + if self._config.only_output_3d_tensor: + return tf.stack(feature_list, axis=1) + if self._config.output_2d_tensor_and_feature_list: + return features, feature_list return features diff --git a/easy_rec/python/layers/fm.py b/easy_rec/python/layers/fm.py index 87d621d57..7b0742f6d 100644 --- a/easy_rec/python/layers/fm.py +++ b/easy_rec/python/layers/fm.py @@ -32,6 +32,7 @@ class FMLayer(object): References - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) """ + def __init__(self, config, name='fm'): self.name = name self.config = config @@ -59,8 +60,8 @@ def __call__(self, inputs): with tf.name_scope(self.name): square_of_sum = tf.square(tf.reduce_sum(fea, axis=1)) - sum_of_square = tf.reduce_sum(fea * fea, axis=1) - cross_term = square_of_sum - sum_of_square + sum_of_square = tf.reduce_sum(tf.square(fea), axis=1) + cross_term = tf.subtract(square_of_sum, sum_of_square) if self.config.use_variant: cross_term = 0.5 * cross_term else: diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index 33cd681ad..df1a17b25 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -17,9 +17,10 @@ from easy_rec.python.layers.common_layers import text_cnn from easy_rec.python.layers.fscd_layer import FSCDLayer from easy_rec.python.protos.feature_config_pb2 import WideOrDeep -from easy_rec.python.utils import shape_utils, conditional +from easy_rec.python.utils import conditional +from easy_rec.python.utils import shape_utils -from easy_rec.python.compat.feature_column.feature_column_v2 import is_embedding_column +from easy_rec.python.compat.feature_column.feature_column_v2 import is_embedding_column # NOQA class InputLayer(object): @@ -97,7 +98,7 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): if is_combine: with conditional(self._is_predicting, ops.device('/CPU:0')): concat_features, group_features = self.single_call_input_layer( - features, group_name, feature_name_to_output_tensors) + features, group_name, feature_name_to_output_tensors) if group_name in self._group_name_to_seq_features: # for target attention group_seq_arr = self._group_name_to_seq_features[group_name] diff --git a/easy_rec/python/layers/keras/__init__.py b/easy_rec/python/layers/keras/__init__.py new file mode 100644 index 000000000..c4006b39c --- /dev/null +++ b/easy_rec/python/layers/keras/__init__.py @@ -0,0 +1 @@ +from .dot_interaction import DotInteraction diff --git a/easy_rec/python/layers/keras/dcn.py b/easy_rec/python/layers/keras/dcn.py new file mode 100644 index 000000000..2f35bdc5d --- /dev/null +++ b/easy_rec/python/layers/keras/dcn.py @@ -0,0 +1,182 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Implements `Cross` Layer, the cross layer in Deep & Cross Network (DCN).""" + +import tensorflow as tf + + +class Cross(tf.keras.layers.Layer): + """Cross Layer in Deep & Cross Network to learn explicit feature interactions. + + A layer that creates explicit and bounded-degree feature interactions + efficiently. The `call` method accepts `inputs` as a tuple of size 2 + tensors. The first input `x0` is the base layer that contains the original + features (usually the embedding layer); the second input `xi` is the output + of the previous `Cross` layer in the stack, i.e., the i-th `Cross` + layer. For the first `Cross` layer in the stack, x0 = xi. + + The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi, + where .* designates elementwise multiplication, W could be a full-rank + matrix, or a low-rank matrix U*V to reduce the computational cost, and + diag_scale increases the diagonal of W to improve training stability ( + especially for the low-rank case). + + References: + 1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf) + See Eq. (1) for full-rank and Eq. (2) for low-rank version. + 2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf) + + Example: + + ```python + # after embedding layer in a functional model: + input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64) + x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6) + x1 = Cross()(x0, x0) + x2 = Cross()(x0, x1) + logits = tf.keras.layers.Dense(units=10)(x2) + model = tf.keras.Model(input, logits) + ``` + + Args: + projection_dim: project dimension to reduce the computational cost. + Default is `None` such that a full (`input_dim` by `input_dim`) matrix + W is used. If enabled, a low-rank matrix W = U*V will be used, where U + is of size `input_dim` by `projection_dim` and V is of size + `projection_dim` by `input_dim`. `projection_dim` need to be smaller + than `input_dim`/2 to improve the model efficiency. In practice, we've + observed that `projection_dim` = d/4 consistently preserved the + accuracy of a full-rank version. + diag_scale: a non-negative float used to increase the diagonal of the + kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an + identity matrix. + use_bias: whether to add a bias term for this layer. If set to False, + no bias term will be used. + preactivation: Activation applied to output matrix of the layer, before + multiplication with the input. Can be used to control the scale of the + layer's outputs and improve stability. + kernel_initializer: Initializer to use on the kernel matrix. + bias_initializer: Initializer to use on the bias vector. + kernel_regularizer: Regularizer to use on the kernel matrix. + bias_regularizer: Regularizer to use on bias vector. + + Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs. + Output shape: A single (batch_size, `input_dim`) dimensional output. + """ + + def __init__(self, config, **kwargs): + super(Cross, self).__init__(**kwargs) + self._projection_dim = config.projection_dim + self._diag_scale = config.diag_scale + self._use_bias = config.use_bias + self._preactivation = tf.keras.activations.get(config.preactivation) + self._kernel_initializer = tf.keras.initializers.get(config.kernel_initializer) + self._bias_initializer = tf.keras.initializers.get(config.bias_initializer) + self._kernel_regularizer = tf.keras.regularizers.get(config.kernel_regularizer) + self._bias_regularizer = tf.keras.regularizers.get(config.bias_regularizer) + self._input_dim = None + self._supports_masking = True + + if self._diag_scale < 0: # pytype: disable=unsupported-operands + raise ValueError( + "`diag_scale` should be non-negative. Got `diag_scale` = {}".format( + self._diag_scale)) + + def build(self, input_shape): + last_dim = input_shape[-1] + + if self._projection_dim is None: + self._dense = tf.keras.layers.Dense( + last_dim, + kernel_initializer=_clone_initializer(self._kernel_initializer), + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_bias=self._use_bias, + dtype=self.dtype, + activation=self._preactivation, + ) + else: + self._dense_u = tf.keras.layers.Dense( + self._projection_dim, + kernel_initializer=_clone_initializer(self._kernel_initializer), + kernel_regularizer=self._kernel_regularizer, + use_bias=False, + dtype=self.dtype, + ) + self._dense_v = tf.keras.layers.Dense( + last_dim, + kernel_initializer=_clone_initializer(self._kernel_initializer), + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_bias=self._use_bias, + dtype=self.dtype, + activation=self._preactivation, + ) + self.built = True + + def call(self, inputs, **kwargs): + """Computes the feature cross. + + Args: + inputs: The input tensor(x0, x) + - x0: The input tensor + - x: Optional second input tensor. If provided, the layer will compute + crosses between x0 and x; if not provided, the layer will compute + crosses between x0 and itself. + + Returns: + Tensor of crosses. + """ + if isinstance(inputs, (list, tuple)): + x0, x = inputs + else: + x0, x = inputs, inputs + + if not self.built: + self.build(x0.shape) + + if x0.shape[-1] != x.shape[-1]: + raise ValueError( + "`x0` and `x` dimension mismatch! Got `x0` dimension {}, and x " + "dimension {}. This case is not supported yet.".format( + x0.shape[-1], x.shape[-1])) + + if self._projection_dim is None: + prod_output = self._dense(x) + else: + prod_output = self._dense_v(self._dense_u(x)) + + prod_output = tf.cast(prod_output, self.compute_dtype) + + if self._diag_scale: + prod_output = prod_output + self._diag_scale * x + + return x0 * prod_output + x + + def get_config(self): + config = { + "projection_dim": + self._projection_dim, + "diag_scale": + self._diag_scale, + "use_bias": + self._use_bias, + "preactivation": + tf.keras.activations.serialize(self._preactivation), + "kernel_initializer": + tf.keras.initializers.serialize(self._kernel_initializer), + "bias_initializer": + tf.keras.initializers.serialize(self._bias_initializer), + "kernel_regularizer": + tf.keras.regularizers.serialize(self._kernel_regularizer), + "bias_regularizer": + tf.keras.regularizers.serialize(self._bias_regularizer), + } + base_config = super(Cross, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def _clone_initializer(initializer): + return initializer.__class__.from_config(initializer.get_config()) diff --git a/easy_rec/python/layers/keras/dot_interaction.py b/easy_rec/python/layers/keras/dot_interaction.py new file mode 100644 index 000000000..50a3966af --- /dev/null +++ b/easy_rec/python/layers/keras/dot_interaction.py @@ -0,0 +1,92 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Implements `Dot Interaction` Layer of DLRM model.""" + +import tensorflow as tf + + +class DotInteraction(tf.keras.layers.Layer): + """Dot interaction layer. + + See theory in the DLRM paper: https://arxiv.org/pdf/1906.00091.pdf, + section 2.1.3. Sparse activations and dense activations are combined. + Dot interaction is applied to a batch of input Tensors [e1,...,e_k] of the + same dimension and the output is a batch of Tensors with all distinct pairwise + dot products of the form dot(e_i, e_j) for i <= j if self self_interaction is + True, otherwise dot(e_i, e_j) i < j. + + Attributes: + self_interaction: Boolean indicating if features should self-interact. + If it is True, then the diagonal entries of the interaction metric are + also taken. + skip_gather: An optimization flag. If it's set then the upper triangle part + of the dot interaction matrix dot(e_i, e_j) is set to 0. The resulting + activations will be of dimension [num_features * num_features] from which + half will be zeros. Otherwise activations will be only lower triangle part + of the interaction matrix. The later saves space but is much slower. + name: String name of the layer. + """ + + def __init__(self, + config, + self_interaction=False, + skip_gather=False, + name=None, + **kwargs): + self._self_interaction = config.self_interaction + self._skip_gather = config.skip_gather + super(DotInteraction, self).__init__(name=name, **kwargs) + + def call(self, inputs, **kwargs): + """Performs the interaction operation on the tensors in the list. + + The tensors represent as transformed dense features and embedded categorical + features. + Pre-condition: The tensors should all have the same shape. + + Args: + inputs: List of features with shapes [batch_size, feature_dim]. + + Returns: + activations: Tensor representing interacted features. It has a dimension + `num_features * num_features` if skip_gather is True, otherside + `num_features * (num_features + 1) / 2` if self_interaction is True and + `num_features * (num_features - 1) / 2` if self_interaction is False. + """ + num_features = len(inputs) + batch_size = tf.shape(inputs[0])[0] + feature_dim = tf.shape(inputs[0])[1] + # concat_features shape: batch_size, num_features, feature_dim + try: + concat_features = tf.concat(inputs, axis=-1) + concat_features = tf.reshape(concat_features, + [batch_size, -1, feature_dim]) + except (ValueError, tf.errors.InvalidArgumentError) as e: + raise ValueError('Input tensors` dimensions must be equal, original' + 'error message: {}'.format(e)) + + # Interact features, select lower-triangular portion, and re-shape. + xactions = tf.matmul(concat_features, concat_features, transpose_b=True) + ones = tf.ones_like(xactions) + if self._self_interaction: + # Selecting lower-triangular portion including the diagonal. + lower_tri_mask = tf.linalg.band_part(ones, -1, 0) + upper_tri_mask = ones - lower_tri_mask + out_dim = num_features * (num_features + 1) // 2 + else: + # Selecting lower-triangular portion not included the diagonal. + upper_tri_mask = tf.linalg.band_part(ones, 0, -1) + lower_tri_mask = ones - upper_tri_mask + out_dim = num_features * (num_features - 1) // 2 + + if self._skip_gather: + # Setting upper triangle part of the interaction matrix to zeros. + activations = tf.where( + condition=tf.cast(upper_tri_mask, tf.bool), + x=tf.zeros_like(xactions), + y=xactions) + out_dim = num_features * num_features + else: + activations = tf.boolean_mask(xactions, lower_tri_mask) + activations = tf.reshape(activations, (batch_size, out_dim)) + return activations diff --git a/easy_rec/python/layers/numerical_embedding.py b/easy_rec/python/layers/numerical_embedding.py index 1c45fa361..6b571a3ad 100644 --- a/easy_rec/python/layers/numerical_embedding.py +++ b/easy_rec/python/layers/numerical_embedding.py @@ -47,6 +47,7 @@ def __init__(self, n_tokens, d_in, d_out, bias=True, scope='nd_linear'): d_in: the input dimension d_out: the output dimension bias: indicates if the underlying linear layers have biases + scope: variable scope name """ with tf.variable_scope(scope): self.weight = tf.get_variable( @@ -100,6 +101,7 @@ def __init__(self, config, scope='periodic_embedding'): A similar grid would be ``[1e-2, 1e-1, 1e0, 1e1, 1e2]``. If possible, add more intermidiate values to this grid. config.output_3d_tensor: whether to output a 3d tensor + scope: variable scope name """ self.config = config if config.embedding_dim % 2: @@ -130,19 +132,22 @@ def __call__(self, inputs, *args, **kwargs): act = get_activation(self.config.linear_activation) if callable(act): emb = act(emb) + output = tf.reshape(emb, [-1, num_features * dim]) + if self.config.output_tensor_list: + return output, tf.unstack(emb, axis=1) if self.config.output_3d_tensor: - return emb - return tf.reshape(emb, [-1, num_features * dim]) + return output, emb + return output class AutoDisEmbedding(object): + """An Embedding Learning Framework for Numerical Features in CTR Prediction. - def __init__(self, config, scope='auto_dis'): - """An Embedding Learning Framework for Numerical Features in CTR Prediction. + Refer: https://arxiv.org/pdf/2012.08986v2.pdf + """ - Refer: https://arxiv.org/pdf/2012.08986v2.pdf - """ + def __init__(self, config, scope='auto_dis'): self.config = config self.emb_dim = config.embedding_dim self.num_bins = config.num_bins @@ -161,22 +166,25 @@ def __call__(self, inputs, *args, **kwargs): mat = tf.get_variable( 'project_mat', shape=[1, num_features, self.num_bins, self.num_bins]) - x = tf.expand_dims(inputs, axis=-1) # [B, num_fea, 1] - hidden = tf.nn.leaky_relu(w * x) # [B, num_fea, num_bin] + x = tf.expand_dims(inputs, axis=-1) # [B, N, 1] + hidden = tf.nn.leaky_relu(w * x) # [B, N, num_bin] - y = tf.matmul(mat, hidden[..., None]) # [B, num_fea, num_bin, 1] - y = tf.squeeze(y, axis=3) # [B, num_fea, num_bin] + y = tf.matmul(mat, hidden[..., None]) # [B, N, num_bin, 1] + y = tf.squeeze(y, axis=3) # [B, N, num_bin] - # keep_prob(float): if dropout_flag is True, keep_prob rate to keep connect; (float, keep_prob=0.8) + # keep_prob(float): if dropout_flag is True, keep_prob rate to keep connect alpha = self.config.keep_prob - x_bar = y + alpha * hidden # [B, num_fea, num_bin] + x_bar = y + alpha * hidden # [B, N, num_bin] t = self.config.temperature - x_hat = tf.nn.softmax(x_bar / t) # [B, num_fea, num_bin] + x_hat = tf.nn.softmax(x_bar / t) # [B, N, num_bin] + + emb = tf.matmul(x_hat[:, :, None, :], meta_emb) # [B, N, 1, D] + emb = tf.squeeze(emb, axis=2) # [B, N, D] + output = tf.reshape(emb, [-1, self.emb_dim * num_features]) # [B, N*D] + + if self.config.output_tensor_list: + return output, tf.unstack(emb, axis=1) - emb = tf.matmul(x_hat[:, :, None, :], meta_emb) # [B, num_fea, 1, emb_dim] - # emb = tf.squeeze(emb, axis=2) # [B, num_fea, emb_dim] if self.config.output_3d_tensor: - return tf.reshape( - emb, [-1, num_features, self.emb_dim]) # [B, num_fea, emb_dim] - return tf.reshape( - emb, [-1, self.emb_dim * num_features]) # [B, num_fea*emb_dim] + return output, emb + return output diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index 331d0282e..f1a3189f2 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -50,7 +50,7 @@ def __init__(self, self._emb_reg = regularizers.l2_regularizer(self.embedding_regularization) self._l2_reg = regularizers.l2_regularizer(self.l2_regularization) # only used by model with wide feature groups, e.g. WideAndDeep - self._wide_output_dim = -1 + self._wide_output_dim = self.get_wide_output_dim() self._feature_configs = feature_configs self.build_input_layer(model_config, feature_configs) @@ -115,6 +115,13 @@ def l2_regularization(self): l2_regularization = model_config.l2_regularization return l2_regularization + def get_wide_output_dim(self): + model_config = getattr(self._base_model_config, + self._base_model_config.WhichOneof('model')) + if hasattr(model_config, 'wide_output_dim'): + return model_config.wide_output_dim + return -1 + def build_input_layer(self, model_config, feature_configs): self._input_layer = input_layer.InputLayer( feature_configs, diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 0285f225c..7d6b9e877 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -32,7 +32,7 @@ def __init__(self, def build_predict_graph(self): if not self.has_backbone: raise NotImplementedError( - 'method `build_predict_graph` must be implemented when backbone network do not exits' + 'method `build_predict_graph` must be implemented when backbone network do not exits' ) output = self.backbone @@ -57,9 +57,9 @@ def _output_to_prediction_impl(self, suffix=''): prediction_dict = {} binary_loss_type = { - LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, - LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, - LossType.PAIRWISE_LOGISTIC_LOSS + LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type in binary_loss_type: assert num_class == 1, 'num_class must be 1 when loss type is %s' % loss_type.name @@ -86,9 +86,9 @@ def _output_to_prediction_impl(self, prediction_dict['logits' + suffix] = output prediction_dict['probs' + suffix] = probs prediction_dict['logits' + suffix + '_y'] = math_ops.reduce_max( - output, axis=1) + output, axis=1) prediction_dict['probs' + suffix + '_y'] = math_ops.reduce_max( - probs, axis=1) + probs, axis=1) prediction_dict['y' + suffix] = tf.argmax(output, axis=1) elif loss_type == LossType.L2_LOSS: output = tf.squeeze(output, axis=1) @@ -101,12 +101,12 @@ def _output_to_prediction_impl(self, def _add_to_prediction_dict(self, output): if len(self._losses) == 0: prediction_dict = self._output_to_prediction_impl( - output, loss_type=self._loss_type, num_class=self._num_class) + output, loss_type=self._loss_type, num_class=self._num_class) self._prediction_dict.update(prediction_dict) else: for loss in self._losses: prediction_dict = self._output_to_prediction_impl( - output, loss_type=loss.loss_type, num_class=self._num_class) + output, loss_type=loss.loss_type, num_class=self._num_class) self._prediction_dict.update(prediction_dict) def build_rtp_output_dict(self): @@ -118,9 +118,9 @@ def build_rtp_output_dict(self): op = tf.get_default_graph().get_operation_by_name('rank_predict') if len(op.outputs) != 1: raise ValueError( - ('failed to build RTP rank_predict output: op {}[{}] has output ' + - 'size {}, however 1 is expected.').format(op.name, op.type, - len(op.outputs))) + ('failed to build RTP rank_predict output: op {}[{}] has output ' + + 'size {}, however 1 is expected.').format(op.name, op.type, + len(op.outputs))) rank_predict = op.outputs[0] except KeyError: forwarded = None @@ -128,32 +128,32 @@ def build_rtp_output_dict(self): if len(self._losses) > 0: loss_types = {loss.loss_type for loss in self._losses} binary_loss_set = { - LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, - LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, - LossType.JRC_LOSS + LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, + LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, + LossType.JRC_LOSS } if loss_types & binary_loss_set: if 'probs' in self._prediction_dict: forwarded = self._prediction_dict['probs'] else: raise ValueError( - 'failed to build RTP rank_predict output: classification model ' + - "expect 'probs' prediction, which is not found. Please check if" + - ' build_predict_graph() is called.') + 'failed to build RTP rank_predict output: classification model ' + + "expect 'probs' prediction, which is not found. Please check if" + + ' build_predict_graph() is called.') elif loss_types & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: if 'y' in self._prediction_dict: forwarded = self._prediction_dict['y'] else: raise ValueError( - 'failed to build RTP rank_predict output: regression model expect' - + - "'y' prediction, which is not found. Please check if build_predic" - + 't_graph() is called.') + 'failed to build RTP rank_predict output: regression model expect' + + + "'y' prediction, which is not found. Please check if build_predic" + + 't_graph() is called.') else: logging.warning( - 'failed to build RTP rank_predict: unsupported loss type {}'.format( - loss_types)) + 'failed to build RTP rank_predict: unsupported loss type {}'.format( + loss_types)) if forwarded is not None: rank_predict = tf.identity(forwarded, name='rank_predict') if rank_predict is not None: @@ -170,9 +170,9 @@ def _build_loss_impl(self, loss_param=None): loss_dict = {} binary_loss_type = { - LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, - LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, - LossType.PAIRWISE_LOGISTIC_LOSS, LossType.JRC_LOSS + LossType.F1_REWEIGHTED_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS, LossType.JRC_LOSS } if loss_type == LossType.CLASSIFICATION: loss_name = loss_name if loss_name else 'cross_entropy_loss' + suffix @@ -196,23 +196,23 @@ def _build_loss_impl(self, if hasattr(loss_param, 'session_name'): kwargs['session_ids'] = self._feature_dict[loss_param.session_name] loss_dict[loss_name] = loss_builder.build( - loss_type, - self._labels[label_name], - pred, - loss_weight, - num_class, - loss_param=loss_param, - **kwargs) + loss_type, + self._labels[label_name], + pred, + loss_weight, + num_class, + loss_param=loss_param, + **kwargs) return loss_dict def build_loss_graph(self): loss_dict = {} if len(self._losses) == 0: loss_dict = self._build_loss_impl( - self._loss_type, - label_name=self._label_name, - loss_weight=self._sample_weight, - num_class=self._num_class) + self._loss_type, + label_name=self._label_name, + loss_weight=self._sample_weight, + num_class=self._num_class) else: strategy = self._base_model_config.loss_weight_strategy loss_weight = [1.0] @@ -224,26 +224,26 @@ def build_loss_graph(self): if loss_param is not None: loss_param = getattr(loss, loss_param) loss_ops = self._build_loss_impl( - loss.loss_type, - label_name=self._label_name, - loss_weight=self._sample_weight, - num_class=self._num_class, - loss_name=loss.loss_name, - loss_param=loss_param) + loss.loss_type, + label_name=self._label_name, + loss_weight=self._sample_weight, + num_class=self._num_class, + loss_name=loss.loss_name, + loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): if strategy == self._base_model_config.Fixed: loss_dict[loss_name] = loss_value * loss.weight elif strategy == self._base_model_config.Uncertainty: if loss.learn_loss_weight: uncertainty = tf.Variable( - 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: loss_dict[loss_name] = 0.5 * tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + -uncertainty) * loss_value + 0.5 * uncertainty else: loss_dict[loss_name] = loss_value * loss.weight elif strategy == self._base_model_config.Random: @@ -272,10 +272,10 @@ def _build_metric_impl(self, from easy_rec.python.core.easyrec_metrics import metrics_tf from easy_rec.python.core import metrics as metrics_lib binary_loss_set = { - LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, - LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, - LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, - LossType.JRC_LOSS + LossType.CLASSIFICATION, LossType.F1_REWEIGHTED_LOSS, + LossType.PAIR_WISE_LOSS, LossType.BINARY_FOCAL_LOSS, + LossType.PAIRWISE_FOCAL_LOSS, LossType.PAIRWISE_LOGISTIC_LOSS, + LossType.JRC_LOSS } metric_dict = {} if metric.WhichOneof('metric') == 'auc': @@ -283,15 +283,15 @@ def _build_metric_impl(self, if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['auc' + suffix] = metrics_tf.auc( - label, - self._prediction_dict['probs' + suffix], - num_thresholds=metric.auc.num_thresholds) + label, + self._prediction_dict['probs' + suffix], + num_thresholds=metric.auc.num_thresholds) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['auc' + suffix] = metrics_tf.auc( - label, - self._prediction_dict['probs' + suffix][:, 1], - num_thresholds=metric.auc.num_thresholds) + label, + self._prediction_dict['probs' + suffix][:, 1], + num_thresholds=metric.auc.num_thresholds) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'gauc': @@ -301,20 +301,20 @@ def _build_metric_impl(self, uids = self._feature_dict[metric.gauc.uid_field] if isinstance(uids, tf.sparse.SparseTensor): uids = tf.sparse_to_dense( - uids.indices, uids.dense_shape, uids.values, default_value='') + uids.indices, uids.dense_shape, uids.values, default_value='') uids = tf.reshape(uids, [-1]) metric_dict['gauc' + suffix] = metrics_lib.gauc( - label, - self._prediction_dict['probs' + suffix], - uids=uids, - reduction=metric.gauc.reduction) + label, + self._prediction_dict['probs' + suffix], + uids=uids, + reduction=metric.gauc.reduction) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['gauc' + suffix] = metrics_lib.gauc( - label, - self._prediction_dict['probs' + suffix][:, 1], - uids=self._feature_dict[metric.gauc.uid_field], - reduction=metric.gauc.reduction) + label, + self._prediction_dict['probs' + suffix][:, 1], + uids=self._feature_dict[metric.gauc.uid_field], + reduction=metric.gauc.reduction) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'session_auc': @@ -322,17 +322,17 @@ def _build_metric_impl(self, if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['session_auc' + suffix] = metrics_lib.session_auc( - label, - self._prediction_dict['probs' + suffix], - session_ids=self._feature_dict[metric.session_auc.session_id_field], - reduction=metric.session_auc.reduction) + label, + self._prediction_dict['probs' + suffix], + session_ids=self._feature_dict[metric.session_auc.session_id_field], + reduction=metric.session_auc.reduction) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['session_auc' + suffix] = metrics_lib.session_auc( - label, - self._prediction_dict['probs' + suffix][:, 1], - session_ids=self._feature_dict[metric.session_auc.session_id_field], - reduction=metric.session_auc.reduction) + label, + self._prediction_dict['probs' + suffix][:, 1], + session_ids=self._feature_dict[metric.session_auc.session_id_field], + reduction=metric.session_auc.reduction) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'max_f1': @@ -340,11 +340,11 @@ def _build_metric_impl(self, if num_class == 1 or loss_type & {LossType.JRC_LOSS}: label = tf.to_int64(self._labels[label_name]) metric_dict['max_f1' + suffix] = metrics_lib.max_f1( - label, self._prediction_dict['logits' + suffix]) + label, self._prediction_dict['logits' + suffix]) elif num_class == 2: label = tf.to_int64(self._labels[label_name]) metric_dict['max_f1' + suffix] = metrics_lib.max_f1( - label, self._prediction_dict['logits' + suffix][:, 1]) + label, self._prediction_dict['logits' + suffix][:, 1]) else: raise ValueError('Wrong class number') elif metric.WhichOneof('metric') == 'recall_at_topk': @@ -352,18 +352,18 @@ def _build_metric_impl(self, assert num_class > 1 label = tf.to_int64(self._labels[label_name]) metric_dict['recall_at_topk' + suffix] = metrics_tf.recall_at_k( - label, self._prediction_dict['logits' + suffix], - metric.recall_at_topk.topk) + label, self._prediction_dict['logits' + suffix], + metric.recall_at_topk.topk) elif metric.WhichOneof('metric') == 'mean_absolute_error': label = tf.to_float(self._labels[label_name]) if loss_type & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: metric_dict['mean_absolute_error' + suffix] = metrics_tf.mean_absolute_error( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) elif loss_type & {LossType.CLASSIFICATION} and num_class == 1: metric_dict['mean_absolute_error' + suffix] = metrics_tf.mean_absolute_error( - label, self._prediction_dict['probs' + suffix]) + label, self._prediction_dict['probs' + suffix]) else: assert False, 'mean_absolute_error is not supported for this model' elif metric.WhichOneof('metric') == 'mean_squared_error': @@ -371,11 +371,11 @@ def _build_metric_impl(self, if loss_type & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: metric_dict['mean_squared_error' + suffix] = metrics_tf.mean_squared_error( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) elif num_class == 1 and loss_type & binary_loss_set: metric_dict['mean_squared_error' + suffix] = metrics_tf.mean_squared_error( - label, self._prediction_dict['probs' + suffix]) + label, self._prediction_dict['probs' + suffix]) else: assert False, 'mean_squared_error is not supported for this model' elif metric.WhichOneof('metric') == 'root_mean_squared_error': @@ -383,11 +383,11 @@ def _build_metric_impl(self, if loss_type & {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: metric_dict['root_mean_squared_error' + suffix] = metrics_tf.root_mean_squared_error( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) elif loss_type & {LossType.CLASSIFICATION} and num_class == 1: metric_dict['root_mean_squared_error' + suffix] = metrics_tf.root_mean_squared_error( - label, self._prediction_dict['probs' + suffix]) + label, self._prediction_dict['probs' + suffix]) else: assert False, 'root_mean_squared_error is not supported for this model' elif metric.WhichOneof('metric') == 'accuracy': @@ -395,7 +395,7 @@ def _build_metric_impl(self, assert num_class > 1 label = tf.to_int64(self._labels[label_name]) metric_dict['accuracy' + suffix] = metrics_tf.accuracy( - label, self._prediction_dict['y' + suffix]) + label, self._prediction_dict['y' + suffix]) return metric_dict def build_metric_graph(self, eval_config): @@ -405,18 +405,18 @@ def build_metric_graph(self, eval_config): loss_types = {loss.loss_type for loss in self._losses} for metric in eval_config.metrics_set: metric_dict.update( - self._build_metric_impl( - metric, - loss_type=loss_types, - label_name=self._label_name, - num_class=self._num_class)) + self._build_metric_impl( + metric, + loss_type=loss_types, + label_name=self._label_name, + num_class=self._num_class)) return metric_dict def _get_outputs_impl(self, loss_type, num_class=1, suffix=''): binary_loss_set = { - LossType.F1_REWEIGHTED_LOSS, LossType.JRC_LOSS, LossType.PAIR_WISE_LOSS, - LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, - LossType.PAIRWISE_LOGISTIC_LOSS + LossType.F1_REWEIGHTED_LOSS, LossType.JRC_LOSS, LossType.PAIR_WISE_LOSS, + LossType.BINARY_FOCAL_LOSS, LossType.PAIRWISE_FOCAL_LOSS, + LossType.PAIRWISE_LOGISTIC_LOSS } if loss_type in binary_loss_set: return ['probs' + suffix, 'logits' + suffix] @@ -425,8 +425,8 @@ def _get_outputs_impl(self, loss_type, num_class=1, suffix=''): return ['probs' + suffix, 'logits' + suffix] else: return [ - 'y' + suffix, 'probs' + suffix, 'logits' + suffix, - 'probs' + suffix + '_y', 'logits' + suffix + '_y' + 'y' + suffix, 'probs' + suffix, 'logits' + suffix, + 'probs' + suffix + '_y', 'logits' + suffix + '_y' ] elif loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]: return ['y' + suffix] diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index b77be93be..b37b14b2c 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -16,37 +16,24 @@ message Lambda { required string expression = 1; } -message Operator { - oneof Op { - MLP mlp = 102; - PeriodicEmbedding periodic_embedding = 103; - AutoDisEmbedding auto_dis_embedding = 104; - SequenceLayer sequence_encoder = 105; - HighWayTower highway = 106; - MaskNet masknet = 107; - SENet senet = 108; - FiBiNetTower fibinet = 109; - FM fm = 110; - Concatenate concat = 111; - Reshape reshape = 112; - Add add = 113; - Dot dot = 114; - Lambda Lambda = 115; - OpChain chain = 116; - } +message Input { + required string name = 1; + optional string input_fn = 2; } -message OpChain { - repeated Operator ops = 1; +message KerasLayer { + required string class_name = 1; + optional Any params = 2; } message Block { required string name = 1; // the input names of feature groups or other blocks - repeated string inputs = 2; + repeated Input inputs = 2; optional int32 input_concat_axis = 3 [default = -1]; optional string extra_input_fn = 4; oneof layer { + Lambda Lambda = 100; InputLayer input_layer = 101; MLP mlp = 102; PeriodicEmbedding periodic_embedding = 103; @@ -57,12 +44,11 @@ message Block { SENet senet = 108; FiBiNetTower fibinet = 109; FM fm = 110; - Concatenate concat = 111; - Reshape reshape = 112; + // Concatenate concat = 111; + // Reshape reshape = 112; Add add = 113; Dot dot = 114; - Lambda Lambda = 115; - OpChain chain = 116; + //OpChain chain = 116; } } @@ -71,3 +57,26 @@ message BackboneTower { repeated string concat_blocks = 2; optional MLP top_mlp = 3; } + +//message Operator { +// oneof Op { +// MLP mlp = 102; +// PeriodicEmbedding periodic_embedding = 103; +// AutoDisEmbedding auto_dis_embedding = 104; +// HighWayTower highway = 106; +// MaskNet masknet = 107; +// SENet senet = 108; +// FiBiNetTower fibinet = 109; +// FM fm = 110; +// Concatenate concat = 111; +// Reshape reshape = 112; +// Add add = 113; +// Dot dot = 114; +// Lambda Lambda = 115; +// OpChain chain = 116; +// } +//} +// +//message OpChain { +// repeated Operator ops = 1; +//} diff --git a/easy_rec/python/protos/dnn.proto b/easy_rec/python/protos/dnn.proto index 1564394eb..00fe79d82 100644 --- a/easy_rec/python/protos/dnn.proto +++ b/easy_rec/python/protos/dnn.proto @@ -24,4 +24,4 @@ message MLP { optional bool use_bn = 4 [default = true]; optional bool last_layer_no_activation = 5 [default = false]; optional bool last_layer_no_batch_norm = 6 [default = false]; -} \ No newline at end of file +} diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 940ee88f3..48c6f4f8d 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -33,6 +33,7 @@ message DummyModel { message RankModel { optional float l2_regularization = 1; optional bool add_head_logits_layer = 2 [default=true]; + optional uint32 wide_output_dim = 3; } // for knowledge distillation diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 576bfdf4f..e7ad65460 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -8,8 +8,9 @@ message InputLayer { optional bool do_layer_norm = 2; optional float dropout_rate = 3; optional float feature_dropout_rate = 4; - optional bool output_feature_list = 5; - optional bool output_3d_tensor = 6; + optional bool only_output_feature_list = 5; + optional bool only_output_3d_tensor = 6; + optional bool output_2d_tensor_and_feature_list = 7; } message HighWayTower { @@ -25,6 +26,7 @@ message PeriodicEmbedding { optional bool add_linear_layer = 3 [default = true]; optional string linear_activation = 4 [default = 'relu']; optional bool output_3d_tensor = 5; + optional bool output_tensor_list = 6; } message AutoDisEmbedding { @@ -33,6 +35,7 @@ message AutoDisEmbedding { required float keep_prob = 3 [default = 0.8]; required float temperature = 4; optional bool output_3d_tensor = 5; + optional bool output_tensor_list = 6; } message Concatenate { @@ -49,4 +52,4 @@ message Add { } message Dot { -} \ No newline at end of file +} diff --git a/easy_rec/python/train_eval.py b/easy_rec/python/train_eval.py index 51c904451..f12784ac1 100644 --- a/easy_rec/python/train_eval.py +++ b/easy_rec/python/train_eval.py @@ -95,12 +95,11 @@ help='is use check mode') parser.add_argument( '--selected_cols', type=str, default=None, help='select input columns') - parser.add_argument( - '--gpu', type=str, default=None, help='gpu id') + parser.add_argument('--gpu', type=str, default=None, help='gpu id') args, extra_args = parser.parse_known_args() if args.gpu is not None: - os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu edit_config_json = {} if args.edit_config_json: diff --git a/easy_rec/python/utils/__init__.py b/easy_rec/python/utils/__init__.py index 8a9b460ac..09dc89476 100644 --- a/easy_rec/python/utils/__init__.py +++ b/easy_rec/python/utils/__init__.py @@ -1,17 +1,15 @@ - class conditional(object): - """Wrap another context manager and enter it only if condition is true. - """ + """Wrap another context manager and enter it only if condition is true.""" - def __init__(self, condition, contextmanager): - self.condition = condition - self.contextmanager = contextmanager + def __init__(self, condition, contextmanager): + self.condition = condition + self.contextmanager = contextmanager - def __enter__(self): - """Conditionally enter a context manager.""" - if self.condition: - return self.contextmanager.__enter__() + def __enter__(self): + """Conditionally enter a context manager.""" + if self.condition: + return self.contextmanager.__enter__() - def __exit__(self, *args): - if self.condition: - return self.contextmanager.__exit__(*args) + def __exit__(self, *args): + if self.condition: + return self.contextmanager.__exit__(*args) diff --git a/easy_rec/python/utils/load_class.py b/easy_rec/python/utils/load_class.py index 2da1e4e41..efd2cc9cb 100644 --- a/easy_rec/python/utils/load_class.py +++ b/easy_rec/python/utils/load_class.py @@ -220,3 +220,30 @@ def create_class(cls, name): return newclass return RegisterABCMeta + + +def load_keras_layer(name): + """Load keras layer class. + + Args: + name: keras layer name + + Return: + modules or functions or classes + """ + name = name.strip() + if name == '' or name is None: + return None + + path = 'easy_rec.python.layers.keras.' + name + try: + return pydoc.locate(path) + except pydoc.ErrorDuringImport: + path = 'tensorflow.keras.layers.' + name + try: + return pydoc.locate(path) + except pydoc.ErrorDuringImport: + print('load keras layer %s failed' % name) + logging.error('load keras layer %s failed: %s' % + (name, traceback.format_exc())) + return None diff --git a/easy_rec/python/utils/tf_utils.py b/easy_rec/python/utils/tf_utils.py index efcd7df12..e4d39c012 100644 --- a/easy_rec/python/utils/tf_utils.py +++ b/easy_rec/python/utils/tf_utils.py @@ -62,7 +62,7 @@ def dot_op(features): """Compute inner dot between any two pair tensors. Args: - features: + features: must be one of - List of 2D tensor with shape: ``(batch_size,embedding_size)``. - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` Return: diff --git a/examples/configs/deepfm_backbone_on_criteo.config b/examples/configs/deepfm_backbone_on_criteo.config index c94838daf..467d8ad55 100644 --- a/examples/configs/deepfm_backbone_on_criteo.config +++ b/examples/configs/deepfm_backbone_on_criteo.config @@ -1,14 +1,17 @@ train_input_path: "examples/data/criteo/criteo_train_data" eval_input_path: "examples/data/criteo/criteo_test_data" -model_dir: "examples/ckpt/deepfm_backbone_criteo" +model_dir: "examples/ckpt/deepfm_backbone_criteo_w" train_config { log_step_count_steps: 500 optimizer_config: { adam_optimizer: { learning_rate: { - constant_learning_rate { - learning_rate: 0.001 + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 } } } @@ -328,19 +331,19 @@ feature_config: { } features: { input_names: "C1" - hash_bucket_size: 2000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C2" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C3" - hash_bucket_size: 2500000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } @@ -352,132 +355,132 @@ feature_config: { } features: { input_names: "C5" - hash_bucket_size: 500 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C6" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C7" - hash_bucket_size: 13000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C8" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C9" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C10" - hash_bucket_size: 100000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C11" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C12" - hash_bucket_size: 2000000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C13" - hash_bucket_size: 4000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C14" - hash_bucket_size: 100 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C15" - hash_bucket_size: 20000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C16" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C17" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C18" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C19" - hash_bucket_size: 3000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C20" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C21" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C22" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C23" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C24" - hash_bucket_size: 280000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 }features: { input_names: "C25" - hash_bucket_size: 200 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C26" - hash_bucket_size: 150000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } @@ -485,7 +488,7 @@ feature_config: { model_config: { model_class: 'RankModel' feature_groups: { - group_name: "features" + group_name: "deep_features" feature_names: "F1" feature_names: "F2" feature_names: "F3" @@ -527,35 +530,98 @@ model_config: { feature_names: "C26" wide_deep:DEEP } + feature_groups: { + group_name: "wide_features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:WIDE + } backbone { blocks { - name: 'emb_list' - inputs: 'features' + name: 'wide_features' input_layer { - output_feature_list: true + } + } + blocks { + name: 'wide_logit' + inputs { + name: 'wide_features' + } + Lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' + } + } + blocks { + name: 'deep_features' + input_layer { + output_2d_tensor_and_feature_list: true } } blocks { name: 'fm' - inputs: 'emb_list' + inputs { + name: 'deep_features' + input_fn: 'lambda x: x[1]' + } fm { use_variant: true } } blocks { name: 'deep' - inputs: 'features' + inputs { + name: 'deep_features' + input_fn: 'lambda x: x[0]' + } mlp { hidden_units: [256, 128, 64] } } - concat_blocks: ['fm', 'deep'] + concat_blocks: ['wide_logit', 'fm', 'deep'] top_mlp { hidden_units: [256, 128, 64] } } rank_model { l2_regularization: 1e-5 + wide_output_dim: 1 } embedding_regularization: 1e-5 } diff --git a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config index 04dde5589..970508598 100644 --- a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config +++ b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config @@ -7,8 +7,11 @@ train_config { optimizer_config: { adam_optimizer: { learning_rate: { - constant_learning_rate { - learning_rate: 0.001 + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 } } } @@ -315,19 +318,19 @@ feature_config: { } features: { input_names: "C1" - hash_bucket_size: 2000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C2" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C3" - hash_bucket_size: 2500000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } @@ -339,135 +342,239 @@ feature_config: { } features: { input_names: "C5" - hash_bucket_size: 500 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C6" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C7" - hash_bucket_size: 13000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C8" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C9" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C10" - hash_bucket_size: 100000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C11" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C12" - hash_bucket_size: 2000000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C13" - hash_bucket_size: 4000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C14" - hash_bucket_size: 100 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C15" - hash_bucket_size: 20000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C16" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C17" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C18" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C19" - hash_bucket_size: 3000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C20" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C21" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C22" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C23" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C24" - hash_bucket_size: 280000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 }features: { input_names: "C25" - hash_bucket_size: 200 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C26" - hash_bucket_size: 150000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } + features: { + feature_name: "D1" + input_names: "F1" + embedding_dim:16 + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + feature_name: "D2" + input_names: "F2" + embedding_dim:16 + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + feature_name: "D3" + input_names: "F3" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + feature_name: "D4" + input_names: "F4" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + feature_name: "D5" + input_names: "F5" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + feature_name: "D6" + input_names: "F6" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + feature_name: "D7" + input_names: "F7" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + feature_name: "D8" + input_names: "F8" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + feature_name: "D9" + input_names: "F9" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + feature_name: "D10" + input_names: "F10" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + feature_name: "D11" + input_names: "F11" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + feature_name: "D12" + input_names: "F12" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + feature_name: "D13" + input_names: "F13" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } } model_config: { model_class: 'RankModel' @@ -518,56 +625,114 @@ model_config: { feature_names: "C26" wide_deep:DEEP } + feature_groups: { + group_name: "wide_features" + feature_names: "D1" + feature_names: "D2" + feature_names: "D3" + feature_names: "D4" + feature_names: "D5" + feature_names: "D6" + feature_names: "D7" + feature_names: "D8" + feature_names: "D9" + feature_names: "D10" + feature_names: "D11" + feature_names: "D12" + feature_names: "D13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:WIDE + } backbone { blocks { - name: 'cat_emb' - inputs: 'categorical_features' - input_layer { - output_3d_tensor: true + name: 'wide_logit' + inputs { + name: 'wide_features' + } + Lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' } } blocks { name: 'num_emb' - inputs: 'numerical_features' + inputs { + name: 'numerical_features' + } auto_dis_embedding { embedding_dim: 16 num_bins: 20 temperature: 0.815 - output_3d_tensor: true + output_tensor_list: true } } blocks { - name: 'fm' - inputs: 'cat_emb' - inputs: 'num_emb' - input_concat_axis: 1 - fm { - use_variant: true + name: 'categorical_features' + input_layer { + output_2d_tensor_and_feature_list: true } } blocks { - name: 'cat_and_num' - inputs: 'cat_emb' - inputs: 'num_emb' - input_concat_axis: 1 - reshape { - dims: [-1, 624] + name: 'fm' + inputs { + name: 'categorical_features' + input_fn: 'lambda x: x[1]' + } + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + fm { + use_variant: true } } blocks { name: 'deep' - inputs: 'cat_and_num' + inputs { + name: 'categorical_features' + input_fn: 'lambda x: x[0]' + } + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[0]' + } mlp { hidden_units: [256, 128, 64] } } - concat_blocks: ['fm', 'deep'] + // no wide_logit may have better performance + concat_blocks: ['wide_logit', 'fm', 'deep'] top_mlp { hidden_units: [256, 128, 64] } } rank_model { l2_regularization: 1e-5 + wide_output_dim: 1 } embedding_regularization: 1e-5 } diff --git a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config index 2affcc9ae..82dd01998 100644 --- a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config +++ b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config @@ -7,8 +7,11 @@ train_config { optimizer_config: { adam_optimizer: { learning_rate: { - constant_learning_rate { - learning_rate: 0.001 + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 } } } @@ -315,19 +318,19 @@ feature_config: { } features: { input_names: "C1" - hash_bucket_size: 2000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C2" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C3" - hash_bucket_size: 2500000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } @@ -339,135 +342,239 @@ feature_config: { } features: { input_names: "C5" - hash_bucket_size: 500 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C6" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C7" - hash_bucket_size: 13000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C8" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C9" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C10" - hash_bucket_size: 100000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C11" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C12" - hash_bucket_size: 2000000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C13" - hash_bucket_size: 4000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C14" - hash_bucket_size: 100 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C15" - hash_bucket_size: 20000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C16" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C17" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C18" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C19" - hash_bucket_size: 3000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C20" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C21" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C22" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C23" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C24" - hash_bucket_size: 280000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 }features: { input_names: "C25" - hash_bucket_size: 200 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C26" - hash_bucket_size: 150000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } + features: { + feature_name: "D1" + input_names: "F1" + embedding_dim:16 + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + feature_name: "D2" + input_names: "F2" + embedding_dim:16 + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + feature_name: "D3" + input_names: "F3" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + feature_name: "D4" + input_names: "F4" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + feature_name: "D5" + input_names: "F5" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + feature_name: "D6" + input_names: "F6" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + feature_name: "D7" + input_names: "F7" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + feature_name: "D8" + input_names: "F8" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + feature_name: "D9" + input_names: "F9" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + feature_name: "D10" + input_names: "F10" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + feature_name: "D11" + input_names: "F11" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + feature_name: "D12" + input_names: "F12" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + feature_name: "D13" + input_names: "F13" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } } model_config: { model_class: 'RankModel' @@ -518,54 +625,112 @@ model_config: { feature_names: "C26" wide_deep:DEEP } + feature_groups: { + group_name: "wide_features" + feature_names: "D1" + feature_names: "D2" + feature_names: "D3" + feature_names: "D4" + feature_names: "D5" + feature_names: "D6" + feature_names: "D7" + feature_names: "D8" + feature_names: "D9" + feature_names: "D10" + feature_names: "D11" + feature_names: "D12" + feature_names: "D13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:WIDE + } backbone { blocks { - name: 'cat_emb' - inputs: 'categorical_features' - input_layer { - output_3d_tensor: true + name: 'wide_logit' + inputs { + name: 'wide_features' + } + Lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' } } blocks { name: 'num_emb' - inputs: 'numerical_features' + inputs { + name: 'numerical_features' + } periodic_embedding { embedding_dim: 16 - output_3d_tensor: true + sigma: 0.005 + output_tensor_list: true } } blocks { - name: 'fm' - inputs: 'cat_emb' - inputs: 'num_emb' - input_concat_axis: 1 - fm { - use_variant: true + name: 'categorical_features' + input_layer { + output_2d_tensor_and_feature_list: true } } blocks { - name: 'cat_and_num' - inputs: 'cat_emb' - inputs: 'num_emb' - input_concat_axis: 1 - reshape { - dims: [-1, 624] + name: 'fm' + inputs { + name: 'categorical_features' + input_fn: 'lambda x: x[1]' + } + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + fm { + use_variant: true } } blocks { name: 'deep' - inputs: 'cat_and_num' + inputs { + name: 'categorical_features' + input_fn: 'lambda x: x[0]' + } + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[0]' + } mlp { hidden_units: [256, 128, 64] } } - concat_blocks: ['fm', 'deep'] + concat_blocks: ['wide_logit', 'fm', 'deep'] top_mlp { hidden_units: [256, 128, 64] } } rank_model { l2_regularization: 1e-5 + wide_output_dim: 1 } embedding_regularization: 1e-5 } diff --git a/examples/configs/dlrm_backbone_on_criteo.config b/examples/configs/dlrm_backbone_on_criteo.config index 7d698e858..e87acef39 100644 --- a/examples/configs/dlrm_backbone_on_criteo.config +++ b/examples/configs/dlrm_backbone_on_criteo.config @@ -1,3 +1,4 @@ +# align with raw dlrm model train_input_path: "examples/data/criteo/criteo_train_data" eval_input_path: "examples/data/criteo/criteo_test_data" model_dir: "examples/ckpt/dlrm_backbone_criteo" @@ -7,8 +8,11 @@ train_config { optimizer_config: { adam_optimizer: { learning_rate: { - constant_learning_rate { - learning_rate: 0.001 + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 } } } @@ -315,19 +319,19 @@ feature_config: { } features: { input_names: "C1" - hash_bucket_size: 2000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C2" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C3" - hash_bucket_size: 2500000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } @@ -339,132 +343,132 @@ feature_config: { } features: { input_names: "C5" - hash_bucket_size: 500 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C6" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C7" - hash_bucket_size: 13000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C8" - hash_bucket_size: 1000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C9" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C10" - hash_bucket_size: 100000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C11" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C12" - hash_bucket_size: 2000000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C13" - hash_bucket_size: 4000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C14" - hash_bucket_size: 100 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C15" - hash_bucket_size: 20000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C16" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C17" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C18" - hash_bucket_size: 6000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C19" - hash_bucket_size: 3000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C20" - hash_bucket_size: 10 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C21" - hash_bucket_size: 1250000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C22" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C23" - hash_bucket_size: 50 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C24" - hash_bucket_size: 280000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 }features: { input_names: "C25" - hash_bucket_size: 200 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } features: { input_names: "C26" - hash_bucket_size: 150000 + hash_bucket_size: 1000000 feature_type: IdFeature embedding_dim: 16 } @@ -521,42 +525,41 @@ model_config: { backbone { blocks { name: 'bottom_mlp' - inputs: 'dense' + inputs { + name: 'dense' + } mlp { hidden_units: [64, 32, 16] } } blocks { - name: 'bottom_list' - inputs: 'bottom_mlp' - Lambda { - expression: 'lambda x: [x]' - } - } - blocks { - name: 'sparse_features' - inputs: 'sparse' + name: 'sparse' input_layer { - output_feature_list: true + output_2d_tensor_and_feature_list: true } } blocks { name: 'dot' - inputs: 'bottom_list' - inputs: 'sparse_features' + inputs { + name: 'bottom_mlp' + input_fn: 'lambda x: [x]' + } + inputs { + name: 'sparse' + input_fn: 'lambda x: x[1]' + } dot { } } blocks { - name: 'dot_and_dense' - inputs: 'bottom_mlp' - inputs: 'dot' - concat { - axis: 1 + name: 'sparse_2d' + inputs { + name: 'sparse' + input_fn: 'lambda x: x[0]' } } - concat_blocks: ['dot_and_dense'] + concat_blocks: ['sparse_2d', 'dot'] top_mlp { - hidden_units: [128, 64] + hidden_units: [256, 128, 64] } } rank_model { diff --git a/examples/configs/dlrm_on_criteo.config b/examples/configs/dlrm_on_criteo.config new file mode 100644 index 000000000..e6c45d574 --- /dev/null +++ b/examples/configs/dlrm_on_criteo.config @@ -0,0 +1,534 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_criteo_ckpt" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'DLRM' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + dlrm { + bot_dnn { + hidden_units: [64, 32, 16] + } + top_dnn { + hidden_units: [256, 128, 64] + } + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_on_criteo_with_autodis.config b/examples/configs/dlrm_on_criteo_with_autodis.config new file mode 100644 index 000000000..eb81e0a05 --- /dev/null +++ b/examples/configs/dlrm_on_criteo_with_autodis.config @@ -0,0 +1,578 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_autodis_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'num_emb' + inputs { + name: 'dense' + } + auto_dis_embedding { + embedding_dim: 16 + num_bins: 20 + temperature: 0.815 + output_tensor_list: true + } + } + blocks { + name: 'sparse' + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'dot' + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + inputs { + name: 'sparse' + input_fn: 'lambda x: x[1]' + } + dot { } + } + blocks { + name: 'sparse_2d' + inputs { + name: 'sparse' + input_fn: 'lambda x: x[0]' + } + } + blocks { + name: 'num_emb_2d' + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[0]' + } + } + concat_blocks: ['num_emb_2d', 'dot', 'sparse_2d'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_standard_on_criteo.config b/examples/configs/dlrm_standard_on_criteo.config new file mode 100644 index 000000000..131a94607 --- /dev/null +++ b/examples/configs/dlrm_standard_on_criteo.config @@ -0,0 +1,560 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_standard_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'bottom_mlp' + inputs { + name: 'dense' + } + mlp { + hidden_units: [64, 32, 16] + } + } + blocks { + name: 'sparse' + input_layer { + only_output_feature_list: true + } + } + blocks { + name: 'dot' + inputs { + name: 'bottom_mlp' + input_fn: 'lambda x: [x]' + } + inputs { + name: 'sparse' + } + dot { } + } + concat_blocks: ['bottom_mlp', 'dot'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/data/criteo/process_criteo_kaggle.py b/examples/data/criteo/process_criteo_kaggle.py index 5b9cb4f34..e610e33a6 100644 --- a/examples/data/criteo/process_criteo_kaggle.py +++ b/examples/data/criteo/process_criteo_kaggle.py @@ -5,6 +5,12 @@ target_columns = ['label'] columns = target_columns + dense_features + category_features +# data_train = pd.read_csv( +# 'criteo_train_data', sep='\t', names=columns) +# +# for col in category_features: +# print(col, data_train[col].nunique()) + data_train = pd.read_csv( 'criteo_kaggle_display/train.txt', sep='\t', names=columns) diff --git a/examples/readme.md b/examples/readme.md index 286b292b1..94643541e 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -209,25 +209,29 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee - MovieLens-1M - | Model | Epoch | AUC | - | --------- | ----- | ------ | - | Wide&Deep | 1 | 0.8558 | - | DeepFM | 1 | 0.8688 | - | DeepFM(Backbone)|1| 0.8876 | - | DCN | 1 | 0.8576 | - | AutoInt | 1 | 0.8513 | - | MaskNet | 1 | 0.8872 | - | FibiNet | 1 | 0.8879 | + | Model | Epoch | AUC | + | ---------------- | ----- | ------ | + | Wide&Deep | 1 | 0.8558 | + | DeepFM | 1 | 0.8688 | + | DeepFM(Backbone) | 1 | 0.8876 | + | DCN | 1 | 0.8576 | + | AutoInt | 1 | 0.8513 | + | MaskNet | 1 | 0.8872 | + | FibiNet | 1 | 0.8879 | - Criteo-Research - | Model | Epoch | AUC | - | ------ | ----- | ------ | - | FM | 1 | 0.7577 | - | DeepFM | 1 | 0.7967 | - | DeepFM(backbone)| 1 | 0.7965 | - | DeepFM(periodic)| 1 | 0.7982 | - | DeepFM(autodis) | 1 | 0.7983 | + | Model | Epoch | AUC | + | ----------------- | ----- | ------ | + | FM | 1 | 0.7577 | + | DeepFM | 1 | 0.7970 | + | DeepFM (backbone) | 1 | 0.7970 | + | DeepFM (periodic) | 1 | 0.7980 | + | DeepFM (autodis) | 1 | 0.7979 | + | DLRM | 1 | 0.79785 | + | DLRM (backbone) | 1 | 0.7993 | + | DLRM (standard) | 1 | 0.7949 | + | DLRM (autodis) | 1 | 0.7984 | ### 召回模型 From 5cf7d8f205328a2aa58d38cccbb199ee1502e483 Mon Sep 17 00:00:00 2001 From: weisu Date: Sun, 18 Jun 2023 16:57:39 +0800 Subject: [PATCH 34/54] [feat]: add more backbone blocks --- easy_rec/python/layers/backbone.py | 307 ++++----- easy_rec/python/layers/common_layers.py | 176 +----- easy_rec/python/layers/fibinet.py | 54 -- easy_rec/python/layers/fm.py | 43 -- easy_rec/python/layers/keras/__init__.py | 12 + easy_rec/python/layers/keras/blocks.py | 117 ++++ easy_rec/python/layers/{ => keras}/bst.py | 24 +- easy_rec/python/layers/keras/dcn.py | 154 ++--- easy_rec/python/layers/{ => keras}/din.py | 37 +- .../python/layers/keras/dot_interaction.py | 35 +- easy_rec/python/layers/keras/fibinet.py | 229 +++++++ easy_rec/python/layers/keras/fm.py | 46 ++ easy_rec/python/layers/keras/mask_net.py | 102 +++ .../layers/{ => keras}/numerical_embedding.py | 100 +-- easy_rec/python/layers/mask_net.py | 108 ---- easy_rec/python/layers/sequence_encoder.py | 4 +- easy_rec/python/layers/utils.py | 57 ++ easy_rec/python/model/easy_rec_model.py | 102 +-- easy_rec/python/model/rank_model.py | 20 +- easy_rec/python/protos/backbone.proto | 77 +-- easy_rec/python/protos/dnn.proto | 10 +- easy_rec/python/protos/easy_rec_model.proto | 5 +- easy_rec/python/protos/feature_config.proto | 2 +- easy_rec/python/protos/fibinet.proto | 23 - easy_rec/python/protos/keras_layer.proto | 26 + easy_rec/python/protos/layer.proto | 42 +- easy_rec/python/protos/masknet.proto | 17 - easy_rec/python/utils/load_class.py | 20 +- easy_rec/python/utils/tf_utils.py | 65 +- .../configs/deepfm_backbone_on_criteo.config | 26 +- .../deepfm_backbone_on_movielens.config | 72 ++- examples/configs/deepfm_on_movielens.config | 2 +- .../configs/dlrm_backbone_on_criteo.config | 11 +- .../dlrm_on_criteo_with_autodis.config | 17 +- .../dlrm_on_criteo_with_periodic.config | 591 ++++++++++++++++++ .../configs/dlrm_standard_on_criteo.config | 11 +- examples/configs/fibinet_on_movielens.config | 37 +- examples/configs/masknet_on_movielens.config | 37 +- examples/readme.md | 25 +- 39 files changed, 1798 insertions(+), 1045 deletions(-) delete mode 100644 easy_rec/python/layers/fibinet.py create mode 100644 easy_rec/python/layers/keras/blocks.py rename easy_rec/python/layers/{ => keras}/bst.py (89%) rename easy_rec/python/layers/{ => keras}/din.py (67%) create mode 100644 easy_rec/python/layers/keras/fibinet.py create mode 100644 easy_rec/python/layers/keras/fm.py create mode 100644 easy_rec/python/layers/keras/mask_net.py rename easy_rec/python/layers/{ => keras}/numerical_embedding.py (64%) delete mode 100644 easy_rec/python/layers/mask_net.py delete mode 100644 easy_rec/python/protos/fibinet.proto create mode 100644 easy_rec/python/protos/keras_layer.proto delete mode 100644 easy_rec/python/protos/masknet.proto create mode 100644 examples/configs/dlrm_on_criteo_with_periodic.config diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 82d42508c..139e31fee 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -4,36 +4,53 @@ import tensorflow as tf -from easy_rec.python.layers import dnn -from easy_rec.python.layers.common_layers import Concatenate from easy_rec.python.layers.common_layers import EnhancedInputLayer -from easy_rec.python.layers.common_layers import SENet -from easy_rec.python.layers.common_layers import highway -from easy_rec.python.layers.fibinet import FiBiNetLayer -from easy_rec.python.layers.fm import FMLayer -from easy_rec.python.layers.mask_net import MaskNet -from easy_rec.python.layers.numerical_embedding import AutoDisEmbedding -from easy_rec.python.layers.numerical_embedding import PeriodicEmbedding +from easy_rec.python.layers.keras import MLP +from easy_rec.python.layers.utils import Parameter from easy_rec.python.protos import backbone_pb2 -from easy_rec.python.protos import layer_pb2 from easy_rec.python.utils.dag import DAG -from easy_rec.python.utils.tf_utils import add_op -from easy_rec.python.utils.tf_utils import dot_op +from easy_rec.python.utils.load_class import load_keras_layer +from google.protobuf import struct_pb2 if tf.__version__ >= '2.0': tf = tf.compat.v1 +def block_input(config, block_outputs): + inputs = [] + for input_node in config.inputs: + input_name = input_node.name + if input_name in block_outputs: + input_feature = block_outputs[input_name] + else: + raise KeyError('input name `%s` does not exists' % input_name) + if input_node.HasField('input_fn'): + fn = eval(input_node.input_fn) + input_feature = fn(input_feature) + inputs.append(input_feature) + + if config.merge_inputs_into_list: + output = inputs + else: + output = concat_inputs(inputs, config.input_concat_axis, config.name) + + if config.HasField('extra_input_fn'): + fn = eval(config.extra_input_fn) + output = fn(output) + return output + + class Backbone(object): + """Configurable Backbone Network.""" - def __init__(self, config, model, features, input_layer, l2_reg=None): - self._model = model + def __init__(self, config, features, input_layer, l2_reg=None): self._config = config self._features = features self._input_layer = input_layer self._l2_reg = l2_reg self._dag = DAG() self._name_to_blocks = {} + self.loss_dict = {} input_feature_groups = set() for block in config.blocks: self._dag.add_node(block.name) @@ -43,6 +60,10 @@ def __init__(self, config, model, features, input_layer, l2_reg=None): if len(block.inputs) != 0: raise ValueError('no input allowed for input_layer: ' + block.name) input_name = block.name + if not input_layer.has_group(input_name): + raise KeyError( + 'input_layer\'s name must be one of feature group, invalid: ' + + input_name) if input_name in input_feature_groups: raise ValueError('input `%s` already exists in other block' % input_name) @@ -72,7 +93,7 @@ def __init__(self, config, model, features, input_layer, l2_reg=None): logging.info('adding an input_layer block: ' + input_name) new_block = backbone_pb2.Block() new_block.name = input_name - new_block.input_layer.CopyFrom(layer_pb2.InputLayer()) + new_block.input_layer.CopyFrom(backbone_pb2.InputLayer()) self._name_to_blocks[input_name] = new_block self._dag.add_node(input_name) self._dag.add_edge(input_name, block.name) @@ -84,30 +105,7 @@ def __init__(self, config, model, features, input_layer, l2_reg=None): num_groups = len(input_feature_groups) assert num_groups > 0, 'there must be at least one input layer' - def block_input(self, config, block_outputs, output_list=False): - inputs = [] - for input_node in config.inputs: - input_name = input_node.name - if input_name in block_outputs: - input_feature = block_outputs[input_name] - else: - raise KeyError('input name `%s` does not exists' % input_name) - if input_node.HasField('input_fn'): - fn = eval(input_node.input_fn) - input_feature = fn(input_feature) - inputs.append(input_feature) - - if output_list: - output = inputs - else: - output = concat_inputs(inputs, config.input_concat_axis, config.name) - - if config.HasField('extra_input_fn'): - fn = eval(config.extra_input_fn) - output = fn(output) - return output - - def __call__(self, is_training, *args, **kwargs): + def __call__(self, is_training, **kwargs): block_outputs = {} blocks = self._dag.topological_sort() logging.info('backbone topological order: ' + ','.join(blocks)) @@ -116,85 +114,20 @@ def __call__(self, is_training, *args, **kwargs): config = self._name_to_blocks[block] layer = config.WhichOneof('layer') if layer is None: # identity layer - block_outputs[block] = self.block_input(config, block_outputs) + block_outputs[block] = block_input(config, block_outputs) elif layer == 'input_layer': conf = config.input_layer input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) output = input_fn(block, is_training) block_outputs[block] = output - elif layer == 'periodic_embedding': - input_feature = self.block_input(config, block_outputs) - num_emb = PeriodicEmbedding(config.periodic_embedding, scope=block) - block_outputs[block] = num_emb(input_feature) - elif layer == 'auto_dis_embedding': - input_feature = self.block_input(config, block_outputs) - num_emb = AutoDisEmbedding(config.auto_dis_embedding, scope=block) - block_outputs[block] = num_emb(input_feature) - elif layer == 'highway': - input_feature = self.block_input(config, block_outputs) - conf = config.highway - highway_layer = highway( - input_feature, - conf.emb_size, - activation=conf.activation, - dropout=conf.dropout_rate, - scope=block) - block_outputs[block] = highway_layer(input_feature) - elif layer == 'mlp': - input_feature = self.block_input(config, block_outputs) - mlp = dnn.DNN( - config.mlp, - self._l2_reg, - name='%s_mlp' % block, - is_training=is_training, - last_layer_no_activation=config.mlp.last_layer_no_activation, - last_layer_no_batch_norm=config.mlp.last_layer_no_batch_norm) - block_outputs[block] = mlp(input_feature) - elif layer == 'sequence_encoder': - block_outputs[block] = self.sequence_encoder(config, is_training) - elif layer == 'masknet': - input_feature = self.block_input(config, block_outputs) - mask_net = MaskNet(config.masknet, name=block, reuse=tf.AUTO_REUSE) - output = mask_net(input_feature, is_training, l2_reg=self._l2_reg) - block_outputs[block] = output - elif layer == 'senet': - input_feature = self.block_input(config, block_outputs) - senet = SENet(config.senet, name=block) - output = senet(input_feature) - block_outputs[block] = output - elif layer == 'fibinet': - input_feature = self.block_input(config, block_outputs) - fibinet = FiBiNetLayer(config.fibinet, name=block) - output = fibinet(input_feature, is_training, l2_reg=self._l2_reg) + elif layer == 'sequential': + inputs = block_input(config, block_outputs) + layers = config.sequential.layers + output = self.call_sequential_layers(inputs, layers, block, is_training) block_outputs[block] = output - elif layer == 'fm': - input_feature = self.block_input(config, block_outputs) - fm = FMLayer(config.fm, name=block) - block_outputs[block] = fm(input_feature) - elif layer == 'concat': - input_feature = self.block_input(config, block_outputs) - concat = Concatenate(config.concat) - block_outputs[block] = concat(input_feature) - elif layer == 'reshape': - input_feature = self.block_input(config, block_outputs) - block_outputs[block] = tf.reshape(input_feature, - list(config.reshape.dims)) - elif layer == 'add': - input_feature = self.block_input( - config, block_outputs, output_list=True) - block_outputs[block] = add_op(input_feature) - elif layer == 'dot': - input_feature = self.block_input(config, block_outputs) - block_outputs[block] = dot_op(input_feature) - elif layer == 'Lambda': - input_feature = self.block_input(config, block_outputs) - fn = eval(config.Lambda.expression) - block_outputs[block] = fn(input_feature) - # elif layer == 'chain': - # input_feature = self.block_input(config, block_outputs) - # block_outputs[block] = op_chain(input_feature, config.chain.ops) else: - raise NotImplementedError('Unsupported backbone layer:' + layer) + inputs = block_input(config, block_outputs) + block_outputs[block] = self.call_layer(inputs, config, block, is_training) temp = [] for output in self._config.concat_blocks: @@ -205,33 +138,52 @@ def __call__(self, is_training, *args, **kwargs): output = concat_inputs(temp, msg='backbone') if self._config.HasField('top_mlp'): - no_act = self._config.top_mlp.last_layer_no_activation - no_bn = self._config.top_mlp.last_layer_no_batch_norm - final_dnn = dnn.DNN( - self._config.top_mlp, - self._l2_reg, - name='backbone_top_mlp', - is_training=is_training, - last_layer_no_activation=no_act, - last_layer_no_batch_norm=no_bn) - output = final_dnn(output) + params = Parameter.make_from_pb(self._config.top_mlp) + params.l2_regularizer = self._l2_reg + final_mlp = MLP(params, name='backbone_top_mlp') + output = final_mlp(output, training=is_training) return output - def sequence_encoder(self, config, is_training): - encodings = [] - for seq_input in config.inputs: - encoding = self._model.get_sequence_encoding(seq_input, is_training) - encodings.append(encoding) - encoding = concat_inputs(encodings) - conf = config.sequence_encoder - if conf.HasField('mlp'): - sequence_dnn = dnn.DNN( - conf.mlp, - self._l2_reg, - name='%s_seq_dnn' % config.name, - is_training=is_training) - encoding = sequence_dnn(encoding) - return encoding + def call_keras_layer(self, layer_conf, inputs, name, training): + layer_cls, customize = load_keras_layer(layer_conf.class_name) + if layer_cls is None: + raise ValueError('Invalid keras layer class name: ' + + layer_conf.class_name) + + param_type = layer_conf.WhichOneof('params') + if customize: + if param_type is None or param_type == 'st_params': + params = Parameter(layer_conf.st_params, True, l2_reg=self._l2_reg) + else: + pb_params = getattr(layer_conf, param_type) + params = Parameter(pb_params, False, l2_reg=self._l2_reg) + layer = layer_cls(params, name=name) + kwargs = {'loss_dict': self.loss_dict} + return layer(inputs, training=training, **kwargs) + else: # internal keras layer + if param_type is None: + layer = layer_cls(name=name) + else: + assert param_type == 'st_params', 'internal keras layer only support st_params' + kwargs = convert_to_dict(layer_conf.st_params) + layer = layer_cls(name=name, **kwargs) + return layer(inputs, training=training) + + def call_sequential_layers(self, inputs, layers, name, training): + output = inputs + for layer in layers: + output = self.call_layer(output, layer, name, training) + return output + + def call_layer(self, inputs, config, name, training): + layer_name = config.WhichOneof('layer') + if layer_name == 'keras_layer': + return self.call_keras_layer(config.keras_layer, inputs, name, training) + if layer_name == 'lambda': + conf = getattr(config, 'lambda') + fn = eval(conf.expression) + return fn(inputs) + raise NotImplementedError('Unsupported backbone layer:' + layer_name) def concat_inputs(inputs, axis=-1, msg=''): @@ -250,66 +202,23 @@ def concat_inputs(inputs, axis=-1, msg=''): raise ValueError('no inputs to be concat:' + msg) -# def op_chain(inputs, ops): -# output = inputs -# for op in ops: -# op_name = op.WhichOneOf('Op') -# output = run_op(output, op_name, op, block='op_chain') -# return output -# -# -# def run_op(inputs, op_name, config, block='', is_training=False, l2_reg=None): -# if op_name == 'periodic_embedding': -# num_emb = PeriodicEmbedding(config.periodic_embedding, scope=block) -# return num_emb(inputs) -# elif op_name == 'auto_dis_embedding': -# num_emb = AutoDisEmbedding(config.auto_dis_embedding, scope=block) -# return num_emb(inputs) -# elif op_name == 'highway': -# conf = config.highway -# highway_op_name = highway( -# inputs, -# conf.emb_size, -# activation=conf.activation, -# dropout=conf.dropout_rate, -# scope=block) -# return highway_op_name(inputs) -# elif op_name == 'mlp': -# mlp = dnn.DNN( -# config.mlp, -# l2_reg, -# name='%s_mlp' % block, -# is_training=is_training, -# last_layer_no_activation=config.mlp.last_layer_no_activation, -# last_layer_no_batch_norm=config.mlp.last_layer_no_batch_norm) -# return mlp(inputs) -# elif op_name == 'masknet': -# mask_net = MaskNet(config.masknet, name=block, reuse=tf.AUTO_REUSE) -# output = mask_net(inputs, is_training, l2_reg=l2_reg) -# return output -# elif op_name == 'senet': -# senet = SENet(config.senet, name=block) -# output = senet(inputs) -# return output -# elif op_name == 'fibinet': -# fibinet = FiBiNetLayer(config.fibinet, name=block) -# output = fibinet(inputs, is_training, l2_reg=l2_reg) -# return output -# elif op_name == 'fm': -# fm = FMLayer(config.fm, name=block) -# return fm(inputs) -# if op_name == 'Lambda': -# fn = eval(config.Lambda.expression) -# output = fn(inputs) -# elif op_name == 'concat': -# concat = Concatenate(config.concat) -# output = concat(inputs) -# elif op_name == 'reshape': -# output = tf.reshape(inputs, list(config.reshape.dims)) -# elif op_name == 'add': -# output = add_op(inputs) -# elif op_name == 'dot': -# output = dot_op(inputs) -# else: -# raise NotImplementedError('Unsupported op:' + op_name) -# return output +def format_value(value): + value_type = type(value) + if value_type in (unicode, str): + return str(value) + if value_type == float: + int_v = int(value) + return int_v if int_v == value else value + if value_type == struct_pb2.ListValue: + return map(format_value, value) + if value_type == struct_pb2.Struct: + return convert_to_dict(value) + return value + + +def convert_to_dict(struct): + kwargs = {} + for key, value in struct.items(): + kwargs[str(key)] = format_value(value) + return kwargs + diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index f06723f68..810654cf3 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -1,7 +1,5 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -import itertools -import logging import six import tensorflow as tf @@ -94,6 +92,11 @@ def __init__(self, config, input_layer, feature_dict): self._feature_dict = feature_dict def __call__(self, group, is_training, *args, **kwargs): + if self._config.output_seq_and_normal_feature: + seq_features, target_feature, target_features = self._input_layer( + self._feature_dict, group, is_combine=False) + return seq_features, target_features + features, feature_list = self._input_layer(self._feature_dict, group) num_features = len(feature_list) @@ -155,172 +158,3 @@ def __call__(self, inputs, *args, **kwargs): dim = self.config.expand_dim_after output = tf.expand_dims(output, dim) return output - - -class SENet(object): - """SENet+ Layer used in FiBiNET,支持不同field的embedding dimension不等. - - arxiv: 2209.05016 - """ - - def __init__(self, config, name='SENet'): - self.config = config - self.name = name - - def __call__(self, embedding_list): - """embedding_list: - A list of 2D tensor with shape: ``(batch_size,embedding_size)``.""" - print('SENET layer with %d inputs' % len(embedding_list)) - g = self.config.num_squeeze_group - for emb in embedding_list: - assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' - dim = int(emb.shape[-1]) - assert dim >= g and dim % g == 0, 'field embedding dimension %d must be divisible by %d' % ( - dim, g) - - field_size = len(embedding_list) - feature_size_list = [emb.shape.as_list()[-1] for emb in embedding_list] - - # Squeeze - # embedding dimension 必须能被 g 整除 - group_embs = [ - tf.reshape(emb, [-1, g, int(emb.shape[-1]) // g]) - for emb in embedding_list - ] - - squeezed = [] - for emb in group_embs: - squeezed.append(tf.reduce_max(emb, axis=-1)) # [B, g] - squeezed.append(tf.reduce_mean(emb, axis=-1)) # [B, g] - z = tf.concat(squeezed, axis=1) # [bs, field_size * num_groups * 2] - - # Excitation - r = self.config.reduction_ratio - reduction_size = max(1, field_size * g * 2 // r) - - initializer = tf.glorot_normal_initializer() - a1 = tf.layers.dense( - z, - reduction_size, - kernel_initializer=initializer, - activation=tf.nn.relu, - name='%s/W1' % self.name) - weights = tf.layers.dense( - a1, - sum(feature_size_list), - kernel_initializer=initializer, - name='%s/W2' % self.name) - - # Re-weight - inputs = tf.concat(embedding_list, axis=-1) - output = inputs * weights - - # Fuse, add skip-connection - if self.config.use_skip_connection: - output += inputs - - # Layer Normalization - if self.config.use_output_layer_norm: - output = layer_norm(output) - return output - - -def _full_interaction(v_i, v_j): - # [bs, 1, dim] x [bs, dim, 1] = [bs, 1] - interaction = tf.matmul( - tf.expand_dims(v_i, axis=1), tf.expand_dims(v_j, axis=-1)) - return tf.squeeze(interaction, axis=1) - - -class BiLinear(object): - - def __init__(self, - output_size, - bilinear_type, - bilinear_plus=True, - name='bilinear'): - """双线性特征交互层,支持不同field embeddings的size不等. - - arxiv: 2209.05016 - :param output_size: 输出的size - :param bilinear_type: ['all', 'each', 'interaction'],支持其中一种 - :param bilinear_plus: 是否使用bi-linear+ - """ - self.name = name - self.bilinear_type = bilinear_type.lower() - self.output_size = output_size - - if bilinear_type not in ['all', 'each', 'interaction']: - raise NotImplementedError( - "bilinear_type only support: ['all', 'each', 'interaction']") - - if bilinear_plus: - self.func = _full_interaction - else: - self.func = tf.multiply - - def __call__(self, embeddings): - print('Bilinear Layer with %d inputs' % len(embeddings)) - if len(embeddings) > 200: - logging.warn('There are too many inputs for bilinear layer: %d' % - len(embeddings)) - equal_dim = True - _dim = embeddings[0].shape[-1] - for emb in embeddings: - assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' - if emb.shape[-1] != _dim: - equal_dim = False - if not equal_dim and self.bilinear_type != 'interaction': - raise ValueError( - 'all embedding dimensions must be same when not use bilinear type: interaction' - ) - dim = int(_dim) - - field_size = len(embeddings) - initializer = tf.glorot_normal_initializer() - - # bi-linear+: p的维度为[bs, f*(f-1)/2] - # bi-linear: - # 当equal_dim=True时,p的维度为[bs, f*(f-1)/2*k],k为embeddings的size - # 当equal_dim=False时,p的维度为[bs, (k_2+k_3+...+k_f)+...+(k_i+k_{i+1}+...+k_f)+...+k_f], - # 其中 k_i为第i个field的embedding的size - if self.bilinear_type == 'all': - v_dot = [ - tf.layers.dense( - v_i, - dim, - kernel_initializer=initializer, - name='%s/all' % self.name, - reuse=tf.AUTO_REUSE) for v_i in embeddings[:-1] - ] - p = [ - self.func(v_dot[i], embeddings[j]) - for i, j in itertools.combinations(range(field_size), 2) - ] - elif self.bilinear_type == 'each': - v_dot = [ - tf.layers.dense( - v_i, - dim, - kernel_initializer=initializer, - name='%s/each_%d' % (self.name, i), - reuse=tf.AUTO_REUSE) for i, v_i in enumerate(embeddings[:-1]) - ] - p = [ - self.func(v_dot[i], embeddings[j]) - for i, j in itertools.combinations(range(field_size), 2) - ] - else: # interaction - p = [ - self.func( - tf.layers.dense( - embeddings[i], - embeddings[j].shape.as_list()[-1], - kernel_initializer=initializer, - name='%s/interaction_%d_%d' % (self.name, i, j), - reuse=tf.AUTO_REUSE), embeddings[j]) - for i, j in itertools.combinations(range(field_size), 2) - ] - - output = tf.layers.dense( - tf.concat(p, axis=-1), self.output_size, kernel_initializer=initializer) - return output diff --git a/easy_rec/python/layers/fibinet.py b/easy_rec/python/layers/fibinet.py deleted file mode 100644 index 77b6da4a5..000000000 --- a/easy_rec/python/layers/fibinet.py +++ /dev/null @@ -1,54 +0,0 @@ -# -*- encoding:utf-8 -*- -# Copyright (c) Alibaba, Inc. and its affiliates. -import tensorflow as tf - -from easy_rec.python.layers import dnn -from easy_rec.python.layers.common_layers import BiLinear -from easy_rec.python.layers.common_layers import SENet - -if tf.__version__ >= '2.0': - tf = tf.compat.v1 - - -class FiBiNetLayer(object): - """FiBiNet++:Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction. - - This is almost an exact implementation of the original FiBiNet++ model. - See the original paper: - https://arxiv.org/pdf/2209.05016.pdf - """ - - def __init__(self, fibinet_config, name='fibinet'): - self._config = fibinet_config - self.name = name - - def __call__(self, inputs, is_training, l2_reg=None, *args, **kwargs): - feature_list = [] - - senet = SENet(self._config.senet, name='%s_senet' % self.name) - senet_output = senet(inputs) - feature_list.append(senet_output) - - if self._config.HasField('bilinear'): - conf = self._config.bilinear - bilinear = BiLinear( - output_size=conf.num_output_units, - bilinear_type=conf.type, - bilinear_plus=conf.use_plus, - name='%s_bilinear' % self.name) - bilinear_output = bilinear(inputs) - feature_list.append(bilinear_output) - - if len(feature_list) > 1: - feature = tf.concat(feature_list, axis=-1) - else: - feature = feature_list[0] - - if self._config.HasField('mlp'): - final_dnn = dnn.DNN( - self._config.mlp, - l2_reg, - name='%s_fibinet_mlp' % self.name, - is_training=is_training) - feature = final_dnn(feature) - return feature diff --git a/easy_rec/python/layers/fm.py b/easy_rec/python/layers/fm.py index 7b0742f6d..1929e00aa 100644 --- a/easy_rec/python/layers/fm.py +++ b/easy_rec/python/layers/fm.py @@ -24,46 +24,3 @@ def __call__(self, fm_fea): square_sum = tf.reduce_sum(tf.square(fm_feas), 1) y_v = 0.5 * tf.subtract(sum_square, square_sum) return y_v - - -class FMLayer(object): - """Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. - - References - - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) - """ - - def __init__(self, config, name='fm'): - self.name = name - self.config = config - - def __call__(self, inputs): - """FM layer. - - Input shape. - - List of 2D tensor with shape: ``(batch_size,embedding_size)``. - - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` - Output shape - - 2D tensor with shape: ``(batch_size, 1)``. - """ - if type(inputs) == list: - emb_dims = set(map(lambda x: int(x.shape[-1]), inputs)) - if len(emb_dims) != 1: - dims = ','.join([str(d) for d in emb_dims]) - raise ValueError('all embedding dim must be equal in FM layer:' + dims) - - with tf.name_scope(self.name): - fea = tf.stack(inputs, axis=1) - else: - assert inputs.shape.ndims == 3, 'input of FM layer must be a 3D tensor or a list of 2D tensors' - fea = inputs - - with tf.name_scope(self.name): - square_of_sum = tf.square(tf.reduce_sum(fea, axis=1)) - sum_of_square = tf.reduce_sum(tf.square(fea), axis=1) - cross_term = tf.subtract(square_of_sum, sum_of_square) - if self.config.use_variant: - cross_term = 0.5 * cross_term - else: - cross_term = 0.5 * tf.reduce_sum(cross_term, axis=-1) - return cross_term diff --git a/easy_rec/python/layers/keras/__init__.py b/easy_rec/python/layers/keras/__init__.py index c4006b39c..d0dda33cf 100644 --- a/easy_rec/python/layers/keras/__init__.py +++ b/easy_rec/python/layers/keras/__init__.py @@ -1 +1,13 @@ +from .blocks import MLP, Highway +from .bst import BST +from .din import DIN +from .dcn import Cross from .dot_interaction import DotInteraction +from .fibinet import BiLinear +from .fibinet import FiBiNet +from .fibinet import SENet +from .fm import FM +from .mask_net import MaskBlock +from .mask_net import MaskNet +from .numerical_embedding import AutoDisEmbedding +from .numerical_embedding import PeriodicEmbedding diff --git a/easy_rec/python/layers/keras/blocks.py b/easy_rec/python/layers/keras/blocks.py new file mode 100644 index 000000000..507723017 --- /dev/null +++ b/easy_rec/python/layers/keras/blocks.py @@ -0,0 +1,117 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Convenience blocks for building models.""" +import logging +from easy_rec.python.utils.activation import get_activation +import tensorflow as tf + + +class MLP(tf.keras.layers.Layer): + """Sequential multi-layer perceptron (MLP) block. + + Attributes: + units: Sequential list of layer sizes. + use_bias: Whether to include a bias term. + activation: Type of activation to use on all except the last layer. + final_activation: Type of activation to use on last layer. + **kwargs: Extra args passed to the Keras Layer base class. + """ + + def __init__(self, params, name='mlp', **kwargs): + super(MLP, self).__init__(name=name, **kwargs) + params.check_required('hidden_units') + use_bn = params.get_or_default('use_bn', True) + use_final_bn = params.get_or_default('use_final_bn', True) + use_bias = params.get_or_default('use_bias', True) + dropout_rate = list(params.get_or_default('dropout_ratio', [])) + activation = params.get_or_default('activation', 'relu') + initializer = params.get_or_default('initializer', 'he_uniform') + final_activation = params.get_or_default('final_activation', None) + use_bn_after_act = params.get_or_default('use_bn_after_activation', False) + units = list(params.hidden_units) + logging.info( + 'MLP(%s) units: %s, dropout: %r, activate=%s, use_bn=%r, final_bn=%r,' + ' final_activate=%s, bias=%r, initializer=%s, bn_after_activation=%r' + % (name, units, dropout_rate, activation, use_bn, use_final_bn, + final_activation, use_bias, initializer, use_bn_after_act)) + + num_dropout = len(dropout_rate) + self._sub_layers = [] + for i, num_units in enumerate(units[:-1]): + name = 'dnn_%d' % i + drop_rate = dropout_rate[i] if i < num_dropout else 0.0 + self.add_rich_layer(num_units, use_bn, drop_rate, activation, initializer, + use_bias, use_bn_after_act, name, params.l2_regularizer) + + n = len(units) - 1 + drop_rate = dropout_rate[n] if num_dropout > n else 0.0 + name = 'dnn_%d' % n + self.add_rich_layer(units[-1], use_final_bn, drop_rate, final_activation, + initializer, use_bias, use_bn_after_act, name, params.l2_regularizer) + + def add_rich_layer(self, + num_units, + use_bn, + dropout_rate, + activation, + initializer, + use_bias=True, + use_bn_after_activation=False, + name='mlp', + l2_reg=None): + act_fn = get_activation(activation) + if use_bn and not use_bn_after_activation: + dense = tf.keras.layers.Dense( + units=num_units, + use_bias=use_bias, + kernel_initializer=initializer, + kernel_regularizer=l2_reg, + name=name) + self._sub_layers.append(dense) + # bn = tf.keras.layers.BatchNormalization(name='%s/bn' % name) + # keras BN layer have a stale issue on some versions of tf + bn = lambda x, training: tf.layers.batch_normalization(x, training=training, name='%s/bn' % name) + self._sub_layers.append(bn) + act = tf.keras.layers.Activation(act_fn, name='%s/act' % name) + self._sub_layers.append(act) + else: + dense = tf.keras.layers.Dense( + num_units, + activation=act_fn, + use_bias=use_bias, + kernel_initializer=initializer, + kernel_regularizer=l2_reg, + name=name) + self._sub_layers.append(dense) + if use_bn and use_bn_after_activation: + bn = lambda x, training: tf.layers.batch_normalization(x, training=training, name='%s/bn' % name) + self._sub_layers.append(bn) + + if 0.0 < dropout_rate < 1.0: + dropout = tf.keras.layers.Dropout(dropout_rate, name='%s/dropout' % name) + self._sub_layers.append(dropout) + elif dropout_rate >= 1.0: + raise ValueError('invalid dropout_ratio: %.3f' % dropout_rate) + + def call(self, x, training=None, **kwargs): + """Performs the forward computation of the block.""" + for layer in self._sub_layers: + x = layer(x, training=training) + return x + + +class Highway(tf.keras.layers.Layer): + def __init__(self, params, name='highway', **kwargs): + super(Highway, self).__init__(name, **kwargs) + params.check_required('emb_size') + self.emb_size = params.emb_size + self.num_layers = params.get_or_default('num_layers', 1) + self.activation = params.get_or_default('activation', 'gelu') + self.dropout_rate = params.get_or_default('dropout_rate', 0.0) + + def call(self, inputs, training=None, **kwargs): + from easy_rec.python.layers.common_layers import highway + return highway(inputs, self.emb_size, + activation=self.activation, + num_layers=self.num_layers, + dropout=self.dropout_rate if training else 0.0) diff --git a/easy_rec/python/layers/bst.py b/easy_rec/python/layers/keras/bst.py similarity index 89% rename from easy_rec/python/layers/bst.py rename to easy_rec/python/layers/keras/bst.py index 9f2f78030..9492fda07 100644 --- a/easy_rec/python/layers/bst.py +++ b/easy_rec/python/layers/keras/bst.py @@ -7,17 +7,15 @@ from easy_rec.python.loss.nce_loss import nce_loss from easy_rec.python.utils.activation import get_activation from easy_rec.python.utils.shape_utils import get_shape_list +from tensorflow.python.keras.layers import Layer -# from tensorflow.python.keras.layers import Layer +class BST(Layer): -class BST(object): - - def __init__(self, config, l2_reg, name='bst', **kwargs): - # super(BST, self).__init__(name=name, **kwargs) - self.name = name + def __init__(self, params, name='bst', l2_reg=None, **kwargs): + super(BST, self).__init__(name=name, **kwargs) self.l2_reg = l2_reg - self.config = config + self.config = params.get_pb_config() def encode(self, seq_input, max_position): seq_fea = multihead_cross_attention.embedding_postprocessor( @@ -44,15 +42,16 @@ def encode(self, seq_input, max_position): hidden_dropout_prob=self.config.hidden_dropout_prob, attention_probs_dropout_prob=self.config.attention_probs_dropout_prob, initializer_range=self.config.initializer_range, - name=self.name + '/bst', + name=self.name + '/transformer', reuse=tf.AUTO_REUSE) # attention_fea shape: [batch_size, seq_length, hidden_size] out_fea = attention_fea[:, 0, :] # target feature print('bst output shape:', out_fea.shape) return out_fea - def __call__(self, inputs, training=None, **kwargs): - seq_features, target_feature = inputs + def call(self, inputs, training=None, **kwargs): + seq_features, target_features = inputs + assert len(seq_features) > 0, '[%s] sequence feature is empty' % self.name if not training: self.config.hidden_dropout_prob = 0.0 self.config.attention_probs_dropout_prob = 0.0 @@ -70,7 +69,7 @@ def __call__(self, inputs, training=None, **kwargs): with tf.control_dependencies([valid_len]): # seq_input: [batch_size, seq_len, embed_size] seq_input = tf.concat(seq_embeds, axis=-1) - if target_feature is not None: + if len(target_features) > 0: max_position += 1 seq_embed_size = seq_input.shape.as_list()[-1] @@ -97,7 +96,8 @@ def __call__(self, inputs, training=None, **kwargs): loss_dict['%s_contrastive_loss' % self.name] = loss # tf.summary.scalar('loss/%s_contrastive_loss' % self.name, loss) - if target_feature is not None: + if len(target_features) > 0: + target_feature = tf.concat(target_features, axis=-1) target_size = target_feature.shape.as_list()[-1] assert seq_embed_size == target_size, 'the embedding size of sequence and target item is not equal' \ ' in feature group:' + self.name diff --git a/easy_rec/python/layers/keras/dcn.py b/easy_rec/python/layers/keras/dcn.py index 2f35bdc5d..5fe4d4c42 100644 --- a/easy_rec/python/layers/keras/dcn.py +++ b/easy_rec/python/layers/keras/dcn.py @@ -8,78 +8,84 @@ class Cross(tf.keras.layers.Layer): """Cross Layer in Deep & Cross Network to learn explicit feature interactions. - A layer that creates explicit and bounded-degree feature interactions - efficiently. The `call` method accepts `inputs` as a tuple of size 2 - tensors. The first input `x0` is the base layer that contains the original - features (usually the embedding layer); the second input `xi` is the output - of the previous `Cross` layer in the stack, i.e., the i-th `Cross` - layer. For the first `Cross` layer in the stack, x0 = xi. - - The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi, - where .* designates elementwise multiplication, W could be a full-rank - matrix, or a low-rank matrix U*V to reduce the computational cost, and - diag_scale increases the diagonal of W to improve training stability ( - especially for the low-rank case). - - References: - 1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf) - See Eq. (1) for full-rank and Eq. (2) for low-rank version. - 2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf) - - Example: - - ```python - # after embedding layer in a functional model: - input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64) - x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6) - x1 = Cross()(x0, x0) - x2 = Cross()(x0, x1) - logits = tf.keras.layers.Dense(units=10)(x2) - model = tf.keras.Model(input, logits) - ``` - - Args: - projection_dim: project dimension to reduce the computational cost. - Default is `None` such that a full (`input_dim` by `input_dim`) matrix - W is used. If enabled, a low-rank matrix W = U*V will be used, where U - is of size `input_dim` by `projection_dim` and V is of size - `projection_dim` by `input_dim`. `projection_dim` need to be smaller - than `input_dim`/2 to improve the model efficiency. In practice, we've - observed that `projection_dim` = d/4 consistently preserved the - accuracy of a full-rank version. - diag_scale: a non-negative float used to increase the diagonal of the - kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an - identity matrix. - use_bias: whether to add a bias term for this layer. If set to False, - no bias term will be used. - preactivation: Activation applied to output matrix of the layer, before - multiplication with the input. Can be used to control the scale of the - layer's outputs and improve stability. - kernel_initializer: Initializer to use on the kernel matrix. - bias_initializer: Initializer to use on the bias vector. - kernel_regularizer: Regularizer to use on the kernel matrix. - bias_regularizer: Regularizer to use on bias vector. - - Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs. - Output shape: A single (batch_size, `input_dim`) dimensional output. + A layer that creates explicit and bounded-degree feature interactions + efficiently. The `call` method accepts `inputs` as a tuple of size 2 + tensors. The first input `x0` is the base layer that contains the original + features (usually the embedding layer); the second input `xi` is the output + of the previous `Cross` layer in the stack, i.e., the i-th `Cross` + layer. For the first `Cross` layer in the stack, x0 = xi. + + The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi, + where .* designates elementwise multiplication, W could be a full-rank + matrix, or a low-rank matrix U*V to reduce the computational cost, and + diag_scale increases the diagonal of W to improve training stability ( + especially for the low-rank case). + + References: + 1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf) + See Eq. (1) for full-rank and Eq. (2) for low-rank version. + 2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf) + + Example: + + ```python + # after embedding layer in a functional model: + input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64) + x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6) + x1 = Cross()(x0, x0) + x2 = Cross()(x0, x1) + logits = tf.keras.layers.Dense(units=10)(x2) + model = tf.keras.Model(input, logits) + ``` + + Args: + projection_dim: project dimension to reduce the computational cost. + Default is `None` such that a full (`input_dim` by `input_dim`) matrix + W is used. If enabled, a low-rank matrix W = U*V will be used, where U + is of size `input_dim` by `projection_dim` and V is of size + `projection_dim` by `input_dim`. `projection_dim` need to be smaller + than `input_dim`/2 to improve the model efficiency. In practice, we've + observed that `projection_dim` = d/4 consistently preserved the + accuracy of a full-rank version. + diag_scale: a non-negative float used to increase the diagonal of the + kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an + identity matrix. + use_bias: whether to add a bias term for this layer. If set to False, + no bias term will be used. + preactivation: Activation applied to output matrix of the layer, before + multiplication with the input. Can be used to control the scale of the + layer's outputs and improve stability. + kernel_initializer: Initializer to use on the kernel matrix. + bias_initializer: Initializer to use on the bias vector. + kernel_regularizer: Regularizer to use on the kernel matrix. + bias_regularizer: Regularizer to use on bias vector. + + Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs. + Output shape: A single (batch_size, `input_dim`) dimensional output. """ - def __init__(self, config, **kwargs): + def __init__(self, params, **kwargs): super(Cross, self).__init__(**kwargs) - self._projection_dim = config.projection_dim - self._diag_scale = config.diag_scale - self._use_bias = config.use_bias - self._preactivation = tf.keras.activations.get(config.preactivation) - self._kernel_initializer = tf.keras.initializers.get(config.kernel_initializer) - self._bias_initializer = tf.keras.initializers.get(config.bias_initializer) - self._kernel_regularizer = tf.keras.regularizers.get(config.kernel_regularizer) - self._bias_regularizer = tf.keras.regularizers.get(config.bias_regularizer) + self._projection_dim = params.get_or_default('projection_dim', None) + self._diag_scale = params.get_or_default('diag_scale', 0.0) + self._use_bias = params.get_or_default('use_bias', True) + preactivation = params.get_or_default('preactivation', None) + self._preactivation = tf.keras.activations.get(preactivation) + kernel_initializer = params.get_or_default('kernel_initializer', + 'truncated_normal') + self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) + bias_initializer = params.get_or_default('bias_initializer', 'zeros') + self._bias_initializer = tf.keras.initializers.get(bias_initializer) + kernel_regularizer = params.get_or_default('kernel_regularizer', None) + self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) + bias_regularizer = params.get_or_default('bias_regularizer', None) + self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) self._input_dim = None self._supports_masking = True if self._diag_scale < 0: # pytype: disable=unsupported-operands raise ValueError( - "`diag_scale` should be non-negative. Got `diag_scale` = {}".format( + '`diag_scale` should be non-negative. Got `diag_scale` = {}'.format( self._diag_scale)) def build(self, input_shape): @@ -139,8 +145,8 @@ def call(self, inputs, **kwargs): if x0.shape[-1] != x.shape[-1]: raise ValueError( - "`x0` and `x` dimension mismatch! Got `x0` dimension {}, and x " - "dimension {}. This case is not supported yet.".format( + '`x0` and `x` dimension mismatch! Got `x0` dimension {}, and x ' + 'dimension {}. This case is not supported yet.'.format( x0.shape[-1], x.shape[-1])) if self._projection_dim is None: @@ -157,21 +163,21 @@ def call(self, inputs, **kwargs): def get_config(self): config = { - "projection_dim": + 'projection_dim': self._projection_dim, - "diag_scale": + 'diag_scale': self._diag_scale, - "use_bias": + 'use_bias': self._use_bias, - "preactivation": + 'preactivation': tf.keras.activations.serialize(self._preactivation), - "kernel_initializer": + 'kernel_initializer': tf.keras.initializers.serialize(self._kernel_initializer), - "bias_initializer": + 'bias_initializer': tf.keras.initializers.serialize(self._bias_initializer), - "kernel_regularizer": + 'kernel_regularizer': tf.keras.regularizers.serialize(self._kernel_regularizer), - "bias_regularizer": + 'bias_regularizer': tf.keras.regularizers.serialize(self._bias_regularizer), } base_config = super(Cross, self).get_config() diff --git a/easy_rec/python/layers/din.py b/easy_rec/python/layers/keras/din.py similarity index 67% rename from easy_rec/python/layers/din.py rename to easy_rec/python/layers/keras/din.py index 18505bd44..686d23e00 100644 --- a/easy_rec/python/layers/din.py +++ b/easy_rec/python/layers/keras/din.py @@ -7,32 +7,33 @@ from easy_rec.python.layers import dnn from easy_rec.python.utils.shape_utils import get_shape_list -# from tensorflow.python.keras.layers import Layer +from tensorflow.python.keras.layers import Layer -class DIN(object): +class DIN(Layer): - def __init__(self, config, l2_reg, name='din', **kwargs): - # super(DIN, self).__init__(name=name, **kwargs) - self.name = name + def __init__(self, params, name='din', l2_reg=None, **kwargs): + super(DIN, self).__init__(name=name, **kwargs) self.l2_reg = l2_reg - self.config = config + self.config = params.get_pb_config() - def __call__(self, inputs, training=None, **kwargs): - seq_features, target_feature = inputs + def call(self, inputs, training=None, **kwargs): + seq_features, target_features = inputs + assert len(seq_features) > 0, '[%s] sequence feature is empty' % self.name + assert len(target_features) > 0, '[%s] target feature is empty' % self.name + + query = tf.concat(target_features, axis=-1) seq_input = [seq_fea for seq_fea, _ in seq_features] keys = tf.concat(seq_input, axis=-1) - query = target_feature - target_emb_size = target_feature.shape.as_list()[-1] + query_emb_size = int(query.shape[-1]) seq_emb_size = keys.shape.as_list()[-1] - if target_emb_size != seq_emb_size: + if query_emb_size != seq_emb_size: logging.info( ' the embedding size of sequence [%d] and target item [%d] is not equal' - ' in feature group: %s', seq_emb_size, target_emb_size, self.name) - if target_emb_size < seq_emb_size: - query = tf.pad(target_feature, - [[0, 0], [0, seq_emb_size - target_emb_size]]) + ' in feature group: %s', seq_emb_size, query_emb_size, self.name) + if query_emb_size < seq_emb_size: + query = tf.pad(query, [[0, 0], [0, seq_emb_size - query_emb_size]]) else: assert False, 'the embedding size of target item is larger than the one of sequence' @@ -64,10 +65,10 @@ def __call__(self, inputs, training=None, **kwargs): raise ValueError('unsupported attention normalizer: ' + self.config.attention_normalizer) - if target_emb_size < seq_emb_size: - keys = keys[:, :, :target_emb_size] # [B, L, E] + if query_emb_size < seq_emb_size: + keys = keys[:, :, :query_emb_size] # [B, L, E] output = tf.squeeze(tf.matmul(scores, keys), axis=[1]) if self.config.need_target_feature: - output = tf.concat([output, target_feature], axis=-1) + output = tf.concat([output, query], axis=-1) print('din output shape:', output.shape) return output diff --git a/easy_rec/python/layers/keras/dot_interaction.py b/easy_rec/python/layers/keras/dot_interaction.py index 50a3966af..7ec47c5ad 100644 --- a/easy_rec/python/layers/keras/dot_interaction.py +++ b/easy_rec/python/layers/keras/dot_interaction.py @@ -27,14 +27,9 @@ class DotInteraction(tf.keras.layers.Layer): name: String name of the layer. """ - def __init__(self, - config, - self_interaction=False, - skip_gather=False, - name=None, - **kwargs): - self._self_interaction = config.self_interaction - self._skip_gather = config.skip_gather + def __init__(self, params, name=None, **kwargs): + self._self_interaction = params.get_or_default('self_interaction', False) + self._skip_gather = params.get_or_default('skip_gather', False) super(DotInteraction, self).__init__(name=name, **kwargs) def call(self, inputs, **kwargs): @@ -53,20 +48,22 @@ def call(self, inputs, **kwargs): `num_features * (num_features + 1) / 2` if self_interaction is True and `num_features * (num_features - 1) / 2` if self_interaction is False. """ - num_features = len(inputs) - batch_size = tf.shape(inputs[0])[0] - feature_dim = tf.shape(inputs[0])[1] - # concat_features shape: batch_size, num_features, feature_dim - try: - concat_features = tf.concat(inputs, axis=-1) - concat_features = tf.reshape(concat_features, - [batch_size, -1, feature_dim]) - except (ValueError, tf.errors.InvalidArgumentError) as e: - raise ValueError('Input tensors` dimensions must be equal, original' - 'error message: {}'.format(e)) + if isinstance(inputs, (list, tuple)): + # concat_features shape: batch_size, num_features, feature_dim + try: + concat_features = tf.stack(inputs, axis=1) + except (ValueError, tf.errors.InvalidArgumentError) as e: + raise ValueError('Input tensors` dimensions must be equal, original' + 'error message: {}'.format(e)) + else: + assert inputs.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' + concat_features = inputs + + batch_size = tf.shape(concat_features)[0] # Interact features, select lower-triangular portion, and re-shape. xactions = tf.matmul(concat_features, concat_features, transpose_b=True) + num_features = xactions.shape[-1] ones = tf.ones_like(xactions) if self._self_interaction: # Selecting lower-triangular portion including the diagonal. diff --git a/easy_rec/python/layers/keras/fibinet.py b/easy_rec/python/layers/keras/fibinet.py new file mode 100644 index 000000000..dc1f7d003 --- /dev/null +++ b/easy_rec/python/layers/keras/fibinet.py @@ -0,0 +1,229 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import itertools +import logging + +import tensorflow as tf + +from easy_rec.python.layers import dnn +from easy_rec.python.layers.common_layers import layer_norm +from easy_rec.python.layers.keras.blocks import MLP +from easy_rec.python.layers.utils import Parameter + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class SENet(tf.keras.layers.Layer): + """SENet+ Layer used in FiBiNET,支持不同field的embedding dimension不等. + + arxiv: 2209.05016 + """ + + def __init__(self, params, name='SENet', **kwargs): + super(SENet, self).__init__(name, **kwargs) + self.config = params.get_pb_config() + + def call(self, inputs, **kwargs): + """embedding_list: - A list of 2D tensor with shape: ``(batch_size,embedding_size)``.""" + print('SENET layer with %d inputs' % len(inputs)) + g = self.config.num_squeeze_group + for emb in inputs: + assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' + dim = int(emb.shape[-1]) + assert dim >= g and dim % g == 0, 'field embedding dimension %d must be divisible by %d' % ( + dim, g) + + field_size = len(inputs) + feature_size_list = [emb.shape.as_list()[-1] for emb in inputs] + + # Squeeze + # embedding dimension 必须能被 g 整除 + group_embs = [ + tf.reshape(emb, [-1, g, int(emb.shape[-1]) // g]) for emb in inputs + ] + + squeezed = [] + for emb in group_embs: + squeezed.append(tf.reduce_max(emb, axis=-1)) # [B, g] + squeezed.append(tf.reduce_mean(emb, axis=-1)) # [B, g] + z = tf.concat(squeezed, axis=1) # [bs, field_size * num_groups * 2] + + # Excitation + r = self.config.reduction_ratio + reduction_size = max(1, field_size * g * 2 // r) + + initializer = tf.glorot_normal_initializer() + a1 = tf.layers.dense( + z, + reduction_size, + kernel_initializer=initializer, + activation=tf.nn.relu, + name='%s/W1' % self.name) + weights = tf.layers.dense( + a1, + sum(feature_size_list), + kernel_initializer=initializer, + name='%s/W2' % self.name) + + # Re-weight + inputs = tf.concat(inputs, axis=-1) + output = inputs * weights + + # Fuse, add skip-connection + if self.config.use_skip_connection: + output += inputs + + # Layer Normalization + if self.config.use_output_layer_norm: + output = layer_norm(output) + return output + + +def _full_interaction(v_i, v_j): + # [bs, 1, dim] x [bs, dim, 1] = [bs, 1] + interaction = tf.matmul( + tf.expand_dims(v_i, axis=1), tf.expand_dims(v_j, axis=-1)) + return tf.squeeze(interaction, axis=1) + + +class BiLinear(tf.keras.layers.Layer): + """双线性特征交互层,支持不同field embeddings的size不等. + + arxiv: 2209.05016 + + Attributes: + num_output_units: 输出的size + type: ['all', 'each', 'interaction'],支持其中一种 + use_plus: 是否使用bi-linear+ + """ + + def __init__(self, params, name='bilinear', **kwargs): + super(BiLinear, self).__init__(name, **kwargs) + params.check_required(['num_output_units']) + bilinear_plus = params.get_or_default('use_plus', True) + self.bilinear_type = params.get_or_default('type', 'interaction').lower() + self.output_size = params.num_output_units + + if self.bilinear_type not in ['all', 'each', 'interaction']: + raise NotImplementedError( + "bilinear_type only support: ['all', 'each', 'interaction']") + + if bilinear_plus: + self.func = _full_interaction + else: + self.func = tf.multiply + + def call(self, inputs, **kwargs): + embeddings = inputs + logging.info('Bilinear Layer with %d inputs' % len(embeddings)) + if len(embeddings) > 200: + logging.warning('There are too many inputs for bilinear layer: %d' % + len(embeddings)) + equal_dim = True + _dim = embeddings[0].shape[-1] + for emb in embeddings: + assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' + if emb.shape[-1] != _dim: + equal_dim = False + if not equal_dim and self.bilinear_type != 'interaction': + raise ValueError( + 'all embedding dimensions must be same when not use bilinear type: interaction' + ) + dim = int(_dim) + + field_size = len(embeddings) + initializer = tf.glorot_normal_initializer() + + # bi-linear+: p的维度为[bs, f*(f-1)/2] + # bi-linear: + # 当equal_dim=True时,p的维度为[bs, f*(f-1)/2*k],k为embeddings的size + # 当equal_dim=False时,p的维度为[bs, (k_2+k_3+...+k_f)+...+(k_i+k_{i+1}+...+k_f)+...+k_f], + # 其中 k_i为第i个field的embedding的size + if self.bilinear_type == 'all': + v_dot = [ + tf.layers.dense( + v_i, + dim, + kernel_initializer=initializer, + name='%s/all' % self.name, + reuse=tf.AUTO_REUSE) for v_i in embeddings[:-1] + ] + p = [ + self.func(v_dot[i], embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + elif self.bilinear_type == 'each': + v_dot = [ + tf.layers.dense( + v_i, + dim, + kernel_initializer=initializer, + name='%s/each_%d' % (self.name, i), + reuse=tf.AUTO_REUSE) for i, v_i in enumerate(embeddings[:-1]) + ] + p = [ + self.func(v_dot[i], embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + else: # interaction + p = [ + self.func( + tf.layers.dense( + embeddings[i], + embeddings[j].shape.as_list()[-1], + kernel_initializer=initializer, + name='%s/interaction_%d_%d' % (self.name, i, j), + reuse=tf.AUTO_REUSE), embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + + output = tf.layers.dense( + tf.concat(p, axis=-1), self.output_size, kernel_initializer=initializer) + return output + + +class FiBiNet(tf.keras.layers.Layer): + """FiBiNet++:Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction. + + This is almost an exact implementation of the original FiBiNet++ model. + See the original paper: + https://arxiv.org/pdf/2209.05016.pdf + """ + + def __init__(self, params, name='fibinet', l2_reg=None, **kwargs): + super(FiBiNet, self).__init__(name, **kwargs) + self._config = params.get_pb_config() + if self._config.HasField('mlp'): + # self.final_dnn = dnn.DNN( + # self._config.mlp, + # kwargs['l2_reg'] if 'l2_reg' in kwargs else None, + # name='%s_fibinet_mlp' % self.name, + # is_training=False) + p = Parameter.make_from_pb(self._config.mlp) + self.final_dnn = MLP(p, name=name, l2_reg=l2_reg) + else: + self.final_dnn = None + + def call(self, inputs, training=None, **kwargs): + feature_list = [] + + params = Parameter.make_from_pb(self._config.senet) + senet = SENet(params, name='%s_senet' % self.name) + senet_output = senet(inputs) + feature_list.append(senet_output) + + if self._config.HasField('bilinear'): + params = Parameter.make_from_pb(self._config.bilinear) + bilinear = BiLinear(params, name='%s_bilinear' % self.name) + bilinear_output = bilinear(inputs) + feature_list.append(bilinear_output) + + if len(feature_list) > 1: + feature = tf.concat(feature_list, axis=-1) + else: + feature = feature_list[0] + + if self.final_dnn is not None: + feature = self.final_dnn(feature, training=training) + return feature diff --git a/easy_rec/python/layers/keras/fm.py b/easy_rec/python/layers/keras/fm.py new file mode 100644 index 000000000..56910541f --- /dev/null +++ b/easy_rec/python/layers/keras/fm.py @@ -0,0 +1,46 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class FM(tf.keras.layers.Layer): + """Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. + + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + Input shape. + - List of 2D tensor with shape: ``(batch_size,embedding_size)``. + - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + """ + + def __init__(self, params, name='fm', **kwargs): + super(FM, self).__init__(name, **kwargs) + self.use_variant = params.get_or_default('use_variant', False) + + def call(self, inputs, **kwargs): + if type(inputs) == list: + emb_dims = set(map(lambda x: int(x.shape[-1]), inputs)) + if len(emb_dims) != 1: + dims = ','.join([str(d) for d in emb_dims]) + raise ValueError('all embedding dim must be equal in FM layer:' + dims) + + with tf.name_scope(self.name): + fea = tf.stack(inputs, axis=1) + else: + assert inputs.shape.ndims == 3, 'input of FM layer must be a 3D tensor or a list of 2D tensors' + fea = inputs + + with tf.name_scope(self.name): + square_of_sum = tf.square(tf.reduce_sum(fea, axis=1)) + sum_of_square = tf.reduce_sum(tf.square(fea), axis=1) + cross_term = tf.subtract(square_of_sum, sum_of_square) + if self.use_variant: + cross_term = 0.5 * cross_term + else: + cross_term = 0.5 * tf.reduce_sum(cross_term, axis=-1, keepdims=True) + return cross_term diff --git a/easy_rec/python/layers/keras/mask_net.py b/easy_rec/python/layers/keras/mask_net.py new file mode 100644 index 000000000..8749a1ee8 --- /dev/null +++ b/easy_rec/python/layers/keras/mask_net.py @@ -0,0 +1,102 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +from easy_rec.python.layers.common_layers import layer_norm +from easy_rec.python.layers.keras.blocks import MLP +from easy_rec.python.layers.utils import Parameter + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class MaskBlock(tf.keras.layers.Layer): + + def __init__(self, params, name='mask_block', reuse=None, **kwargs): + super(MaskBlock, self).__init__(name, **kwargs) + self.config = params.get_pb_config() + self.reuse = reuse + + def call(self, inputs, **kwargs): + net, mask_input = inputs + mask_input_dim = int(mask_input.shape[-1]) + if self.config.HasField('reduction_factor'): + aggregation_size = int(mask_input_dim * self.config.reduction_factor) + elif self.config.HasField('aggregation_size') is not None: + aggregation_size = self.config.aggregation_size + else: + raise ValueError( + 'Need one of reduction factor or aggregation size for MaskBlock.') + + if self.config.input_layer_norm: + input_name = net.name.replace(':', '_') + net = layer_norm(net, reuse=tf.AUTO_REUSE, name='ln_' + input_name) + + # initializer = tf.initializers.variance_scaling() + initializer = tf.glorot_uniform_initializer() + mask = tf.layers.dense( + mask_input, + aggregation_size, + activation=tf.nn.relu, + kernel_initializer=initializer, + name='%s/hidden' % self.name, + reuse=self.reuse) + mask = tf.layers.dense( + mask, net.shape[-1], name='%s/mask' % self.name, reuse=self.reuse) + masked_net = net * mask + + output_size = self.config.output_size + hidden = tf.layers.dense( + masked_net, + output_size, + use_bias=False, + name='%s/output' % self.name, + reuse=self.reuse) + ln_hidden = layer_norm( + hidden, name='%s/ln_output' % self.name, reuse=self.reuse) + return tf.nn.relu(ln_hidden) + + +class MaskNet(tf.keras.layers.Layer): + """MaskNet: Introducing Feature-Wise Multiplication to CTR Ranking Models by Instance-Guided Mask. + + Refer: https://arxiv.org/pdf/2102.07619.pdf + """ + + def __init__(self, params, name='mask_net', l2_reg=None, **kwargs): + super(MaskNet, self).__init__(name, **kwargs) + self.config = params.get_pb_config() + if self.config.HasField('mlp'): + p = Parameter.make_from_pb(self.config.mlp) + self.mlp = MLP(p, name='%s/mlp' % name, l2_reg=l2_reg) + else: + self.mlp = None + + def call(self, inputs, training=None, **kwargs): + if self.config.use_parallel: + mask_outputs = [] + for i, block_conf in enumerate(self.config.mask_blocks): + params = Parameter.make_from_pb(block_conf) + mask_layer = MaskBlock( + params, name='%s/block_%d' % (self.name, i), reuse=self.reuse) + mask_outputs.append(mask_layer((inputs, inputs))) + all_mask_outputs = tf.concat(mask_outputs, axis=1) + + if self.mlp is not None: + output = self.mlp(all_mask_outputs) + else: + output = all_mask_outputs + return output + else: + net = inputs + for i, block_conf in enumerate(self.config.mask_blocks): + params = Parameter.make_from_pb(block_conf) + mask_layer = MaskBlock( + params, name='%s/block_%d' % (self.name, i), reuse=self.reuse) + net = mask_layer((net, inputs)) + + if self.mlp is not None: + output = self.mlp(net) + else: + output = net + return output diff --git a/easy_rec/python/layers/numerical_embedding.py b/easy_rec/python/layers/keras/numerical_embedding.py similarity index 64% rename from easy_rec/python/layers/numerical_embedding.py rename to easy_rec/python/layers/keras/numerical_embedding.py index 6b571a3ad..4d6a16ca5 100644 --- a/easy_rec/python/layers/numerical_embedding.py +++ b/easy_rec/python/layers/keras/numerical_embedding.py @@ -77,88 +77,95 @@ def __call__(self, x, *args, **kwargs): return x -class PeriodicEmbedding(object): +class PeriodicEmbedding(tf.keras.layers.Layer): """Periodic embeddings for numerical features described in [1]. References: * [1] Yury Gorishniy, Ivan Rubachev, Artem Babenko, "On Embeddings for Numerical Features in Tabular Deep Learning", 2022 https://arxiv.org/pdf/2203.05556.pdf - """ - def __init__(self, config, scope='periodic_embedding'): - """Init with a pb config. + Attributes: + embedding_dim: the embedding size, must be an even positive integer. + sigma: the scale of the weight initialization. + **This is a super important parameter which significantly affects performance**. + Its optimal value can be dramatically different for different datasets, so + no "default value" can exist for this parameter, and it must be tuned for + each dataset. In the original paper, during hyperparameter tuning, this + parameter was sampled from the distribution ``LogUniform[1e-2, 1e2]``. + A similar grid would be ``[1e-2, 1e-1, 1e0, 1e1, 1e2]``. + If possible, add more intermediate values to this grid. + output_3d_tensor: whether to output a 3d tensor + output_tensor_list: whether to output the list of embedding + """ - Args: - config: pb config - config.embedding_dim: the embedding size, must be an even positive integer. - config.sigma: the scale of the weight initialization. - **This is a super important parameter which significantly affects performance**. - Its optimal value can be dramatically different for different datasets, so - no "default value" can exist for this parameter, and it must be tuned for - each dataset. In the original paper, during hyperparameter tuning, this - parameter was sampled from the distribution ``LogUniform[1e-2, 1e2]``. - A similar grid would be ``[1e-2, 1e-1, 1e0, 1e1, 1e2]``. - If possible, add more intermidiate values to this grid. - config.output_3d_tensor: whether to output a 3d tensor - scope: variable scope name - """ - self.config = config - if config.embedding_dim % 2: + def __init__(self, params, name='periodic_embedding', **kwargs): + super(PeriodicEmbedding, self).__init__(name, **kwargs) + params.check_required(['embedding_dim', 'sigma']) + self.embedding_dim = int(params.embedding_dim) + if self.embedding_dim % 2: raise ValueError('embedding_dim must be even') - self.emb_dim = config.embedding_dim // 2 - self.scope = scope - self.initializer = tf.random_normal_initializer(stddev=config.sigma) - - def __call__(self, inputs, *args, **kwargs): + sigma = params.sigma + self.initializer = tf.random_normal_initializer(stddev=sigma) + self.add_linear_layer = params.get_or_default('add_linear_layer', True) + self.linear_activation = params.get_or_default('linear_activation', 'relu') + self.output_tensor_list = params.get_or_default('output_tensor_list', False) + self.output_3d_tensor = params.get_or_default('output_3d_tensor', False) + + def call(self, inputs, **kwargs): if inputs.shape.ndims != 2: raise ValueError('inputs of PeriodicEmbedding must have 2 dimensions.') num_features = int(inputs.shape[-1]) - with tf.variable_scope(self.scope): + emb_dim = self.embedding_dim // 2 + with tf.variable_scope(self.name): c = tf.get_variable( 'coefficients', - shape=[1, num_features, self.emb_dim], + shape=[1, num_features, emb_dim], initializer=self.initializer) features = inputs[..., None] # [B, N, 1] v = 2 * math.pi * c * features # [B, N, E] emb = tf.concat([tf.sin(v), tf.cos(v)], axis=-1) # [B, N, 2E] - dim = self.config.embedding_dim - if self.config.add_linear_layer: + dim = self.embedding_dim + if self.add_linear_layer: linear = NLinear(num_features, dim, dim) emb = linear(emb) - act = get_activation(self.config.linear_activation) + act = get_activation(self.linear_activation) if callable(act): emb = act(emb) output = tf.reshape(emb, [-1, num_features * dim]) - if self.config.output_tensor_list: + if self.output_tensor_list: return output, tf.unstack(emb, axis=1) - if self.config.output_3d_tensor: + if self.output_3d_tensor: return output, emb return output -class AutoDisEmbedding(object): +class AutoDisEmbedding(tf.keras.layers.Layer): """An Embedding Learning Framework for Numerical Features in CTR Prediction. Refer: https://arxiv.org/pdf/2012.08986v2.pdf """ - def __init__(self, config, scope='auto_dis'): - self.config = config - self.emb_dim = config.embedding_dim - self.num_bins = config.num_bins - self.scope = scope - - def __call__(self, inputs, *args, **kwargs): + def __init__(self, params, name='auto_dis_embedding', **kwargs): + super(AutoDisEmbedding, self).__init__(name, **kwargs) + params.check_required(['embedding_dim', 'num_bins', 'temperature']) + self.emb_dim = int(params.embedding_dim) + self.num_bins = int(params.num_bins) + self.temperature = params.temperature + self.keep_prob = params.get_or_default('keep_prob', 0.8) + self.output_tensor_list = params.get_or_default('output_tensor_list', False) + self.output_3d_tensor = params.get_or_default('output_3d_tensor', False) + + def call(self, inputs, **kwargs): if inputs.shape.ndims != 2: - raise ValueError('inputs of PeriodicEmbedding must have 2 dimensions.') + raise ValueError('inputs of AutoDisEmbedding must have 2 dimensions.') num_features = int(inputs.shape[-1]) - with tf.variable_scope(self.scope): + with tf.variable_scope(self.name): meta_emb = tf.get_variable( 'meta_embedding', shape=[1, num_features, self.num_bins, self.emb_dim]) @@ -173,18 +180,17 @@ def __call__(self, inputs, *args, **kwargs): y = tf.squeeze(y, axis=3) # [B, N, num_bin] # keep_prob(float): if dropout_flag is True, keep_prob rate to keep connect - alpha = self.config.keep_prob + alpha = self.keep_prob x_bar = y + alpha * hidden # [B, N, num_bin] - t = self.config.temperature - x_hat = tf.nn.softmax(x_bar / t) # [B, N, num_bin] + x_hat = tf.nn.softmax(x_bar / self.temperature) # [B, N, num_bin] emb = tf.matmul(x_hat[:, :, None, :], meta_emb) # [B, N, 1, D] emb = tf.squeeze(emb, axis=2) # [B, N, D] output = tf.reshape(emb, [-1, self.emb_dim * num_features]) # [B, N*D] - if self.config.output_tensor_list: + if self.output_tensor_list: return output, tf.unstack(emb, axis=1) - if self.config.output_3d_tensor: + if self.output_3d_tensor: return output, emb return output diff --git a/easy_rec/python/layers/mask_net.py b/easy_rec/python/layers/mask_net.py deleted file mode 100644 index 2ec3f5799..000000000 --- a/easy_rec/python/layers/mask_net.py +++ /dev/null @@ -1,108 +0,0 @@ -# -*- encoding:utf-8 -*- -# Copyright (c) Alibaba, Inc. and its affiliates. -import tensorflow as tf - -from easy_rec.python.layers import dnn -from easy_rec.python.layers.common_layers import layer_norm - -if tf.__version__ >= '2.0': - tf = tf.compat.v1 - - -class MaskBlock(object): - - def __init__(self, mask_block_config, name='mask_block', reuse=None): - self.mask_block_config = mask_block_config - self.name = name - self.reuse = reuse - - def __call__(self, net, mask_input): - mask_input_dim = int(mask_input.shape[-1]) - if self.mask_block_config.HasField('reduction_factor'): - aggregation_size = int(mask_input_dim * - self.mask_block_config.reduction_factor) - elif self.mask_block_config.HasField('aggregation_size') is not None: - aggregation_size = self.mask_block_config.aggregation_size - else: - raise ValueError( - 'Need one of reduction factor or aggregation size for MaskBlock.') - - if self.mask_block_config.input_layer_norm: - input_name = net.name.replace(':', '_') - net = layer_norm(net, reuse=tf.AUTO_REUSE, name='ln_' + input_name) - - # initializer = tf.initializers.variance_scaling() - initializer = tf.glorot_uniform_initializer() - mask = tf.layers.dense( - mask_input, - aggregation_size, - activation=tf.nn.relu, - kernel_initializer=initializer, - name='%s/hidden' % self.name, - reuse=self.reuse) - mask = tf.layers.dense( - mask, net.shape[-1], name='%s/mask' % self.name, reuse=self.reuse) - masked_net = net * mask - - output_size = self.mask_block_config.output_size - hidden = tf.layers.dense( - masked_net, - output_size, - use_bias=False, - name='%s/output' % self.name, - reuse=self.reuse) - ln_hidden = layer_norm( - hidden, name='%s/ln_output' % self.name, reuse=self.reuse) - return tf.nn.relu(ln_hidden) - - -class MaskNet(object): - - def __init__(self, mask_net_config, name='mask_net', reuse=None): - """MaskNet: Introducing Feature-Wise Multiplication to CTR Ranking Models by Instance-Guided Mask. - - Refer: https://arxiv.org/pdf/2102.07619.pdf - """ - self.mask_net_config = mask_net_config - self.name = name - self.reuse = reuse - - def __call__(self, inputs, is_training, l2_reg=None): - conf = self.mask_net_config - if conf.use_parallel: - mask_outputs = [] - for i, block_conf in enumerate(self.mask_net_config.mask_blocks): - mask_layer = MaskBlock( - block_conf, name='%s/block_%d' % (self.name, i), reuse=self.reuse) - mask_outputs.append(mask_layer(mask_input=inputs, net=inputs)) - all_mask_outputs = tf.concat(mask_outputs, axis=1) - - if conf.HasField('mlp'): - mlp = dnn.DNN( - conf.mlp, - l2_reg, - name='%s/mlp' % self.name, - is_training=is_training, - reuse=self.reuse) - output = mlp(all_mask_outputs) - else: - output = all_mask_outputs - return output - else: - net = inputs - for i, block_conf in enumerate(self.mask_net_config.mask_blocks): - mask_layer = MaskBlock( - block_conf, name='%s/block_%d' % (self.name, i), reuse=self.reuse) - net = mask_layer(net=net, mask_input=inputs) - - if conf.HasField('mlp'): - mlp = dnn.DNN( - conf.mlp, - l2_reg, - name='%s/mlp' % self.name, - is_training=is_training, - reuse=self.reuse) - output = mlp(net) - else: - output = net - return output diff --git a/easy_rec/python/layers/sequence_encoder.py b/easy_rec/python/layers/sequence_encoder.py index 5286215d4..24dab9754 100644 --- a/easy_rec/python/layers/sequence_encoder.py +++ b/easy_rec/python/layers/sequence_encoder.py @@ -4,8 +4,8 @@ import tensorflow as tf -from easy_rec.python.layers.bst import BST -from easy_rec.python.layers.din import DIN +from easy_rec.python.layers.keras.bst import BST +from easy_rec.python.layers.keras.din import DIN from easy_rec.python.protos.feature_config_pb2 import FeatureConfig if tf.__version__ >= '2.0': diff --git a/easy_rec/python/layers/utils.py b/easy_rec/python/layers/utils.py index 43204241c..1ba585e07 100644 --- a/easy_rec/python/layers/utils.py +++ b/easy_rec/python/layers/utils.py @@ -158,3 +158,60 @@ def mark_input_src(name, src_desc): 'name': name, 'src': src_desc })) + + +class Parameter(object): + + def __init__(self, params, is_struct, l2_reg=None): + self.params = params + self.is_struct = is_struct + self._l2_reg = l2_reg + + @staticmethod + def make_from_pb(config): + return Parameter(config, False) + + def get_pb_config(self): + assert not self.is_struct, 'Struct parameter can not convert to pb config' + return self.params + + @property + def l2_regularizer(self): + return self._l2_reg + + @l2_regularizer.setter + def l2_regularizer(self, value): + self._l2_reg = value + + def __getattr__(self, key): + if self.is_struct: + return self.params[key] + return getattr(self.params, key) + + def __getitem__(self, key): + if self.is_struct: + return self.params[key] + return getattr(self.params, key) + + def get_or_default(self, key, def_val): + if self.is_struct: + if key in self.params: + return self.params[key] + return def_val + else: # pb message + return getattr(self.params, key) + + def check_required(self, keys): + if not self.is_struct: + return + if not isinstance(keys, (list, tuple)): + keys = [keys] + for key in keys: + if key not in self.params: + raise KeyError('%s must be set in params') + + def has_field(self, key): + if self.is_struct: + return key in self.params + else: + return self.params.HasField(key) diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index f1a3189f2..b114d0788 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -64,14 +64,13 @@ def __init__(self, if constant.SAMPLE_WEIGHT in features: self._sample_weight = features[constant.SAMPLE_WEIGHT] - self._sequence_encoder = SequenceEncoder(self._input_layer, feature_configs, - model_config.feature_groups, - self._l2_reg) - self._sequence_encoding_by_group_name = {} + # self._sequence_encoder = SequenceEncoder(self._input_layer, feature_configs, + # model_config.feature_groups, + # self._l2_reg) + # self._sequence_encoding_by_group_name = {} if model_config.HasField('backbone'): self._backbone = Backbone( model_config.backbone, - self, features, input_layer=self._input_layer, l2_reg=self._l2_reg) @@ -85,7 +84,10 @@ def has_backbone(self): @property def backbone(self): if self._backbone: - return self._backbone(self._is_training) + output = self._backbone(self._is_training) + loss_dict = self._backbone.loss_dict + self._loss_dict.update(loss_dict) + return output return None @property @@ -135,50 +137,50 @@ def build_input_layer(self, model_config, feature_configs): is_training=self._is_training, is_predicting=self._is_predicting) - def get_sequence_encoding(self, group_name=None, is_training=True): - if group_name is not None: - if group_name in self._sequence_encoding_by_group_name: - return self._sequence_encoding_by_group_name[group_name] - encoding = self._sequence_encoder( - self._feature_dict, - group_name, - is_training, - loss_dict=self._loss_dict) - self._sequence_encoding_by_group_name[group_name] = encoding - return encoding - - seq_encoding = [] - for group in self.feature_groups: - if len(group.sequence_encoders) == 0: - continue - group_name = group.group_name - if group_name in self._sequence_encoding_by_group_name: - encoding = self._sequence_encoding_by_group_name[group_name] - else: - encoding = self._sequence_encoder( - self._feature_dict, - group_name, - is_training, - loss_dict=self._loss_dict) - self._sequence_encoding_by_group_name[group_name] = encoding - if encoding is not None: - seq_encoding.append(encoding) - - if len(seq_encoding) > 1: - encoding = tf.concat(seq_encoding, axis=-1) - elif len(seq_encoding) == 1: - encoding = seq_encoding[0] - else: - return None - - # if self._base_model_config.HasField('sequence_dnn'): - # sequence_dnn = dnn.DNN( - # self._base_model_config.sequence_dnn, - # self._l2_reg, - # name='sequence_dnn', - # is_training=self._is_training) - # encoding = sequence_dnn(encoding) - return encoding + # def get_sequence_encoding(self, group_name=None, is_training=True): + # if group_name is not None: + # if group_name in self._sequence_encoding_by_group_name: + # return self._sequence_encoding_by_group_name[group_name] + # encoding = self._sequence_encoder( + # self._feature_dict, + # group_name, + # is_training, + # loss_dict=self._loss_dict) + # self._sequence_encoding_by_group_name[group_name] = encoding + # return encoding + # + # seq_encoding = [] + # for group in self.feature_groups: + # if len(group.sequence_encoders) == 0: + # continue + # group_name = group.group_name + # if group_name in self._sequence_encoding_by_group_name: + # encoding = self._sequence_encoding_by_group_name[group_name] + # else: + # encoding = self._sequence_encoder( + # self._feature_dict, + # group_name, + # is_training, + # loss_dict=self._loss_dict) + # self._sequence_encoding_by_group_name[group_name] = encoding + # if encoding is not None: + # seq_encoding.append(encoding) + # + # if len(seq_encoding) > 1: + # encoding = tf.concat(seq_encoding, axis=-1) + # elif len(seq_encoding) == 1: + # encoding = seq_encoding[0] + # else: + # return None + # + # # if self._base_model_config.HasField('sequence_dnn'): + # # sequence_dnn = dnn.DNN( + # # self._base_model_config.sequence_dnn, + # # self._l2_reg, + # # name='sequence_dnn', + # # is_training=self._is_training) + # # encoding = sequence_dnn(encoding) + # return encoding @abstractmethod def build_predict_graph(self): diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 7d6b9e877..a5f447d86 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -35,17 +35,19 @@ def build_predict_graph(self): 'method `build_predict_graph` must be implemented when backbone network do not exits' ) output = self.backbone - - model_config = getattr(self._base_model_config, - self._base_model_config.WhichOneof('model')) - if hasattr(model_config, 'add_head_logits_layer') and \ - model_config.HasField('add_head_logits_layer'): - add_head_logits_layer = model_config.add_head_logits_layer - else: - add_head_logits_layer = True - if add_head_logits_layer: + if int(output.shape[-1]) != self._num_class: logging.info('add head logits layer for rank model') output = tf.layers.dense(output, self._num_class, name='output') + # model_config = getattr(self._base_model_config, + # self._base_model_config.WhichOneof('model')) + # if hasattr(model_config, 'add_head_logits_layer') and \ + # model_config.HasField('add_head_logits_layer'): + # add_head_logits_layer = model_config.add_head_logits_layer + # else: + # add_head_logits_layer = True + # if add_head_logits_layer: + # logging.info('add head logits layer for rank model') + # output = tf.layers.dense(output, self._num_class, name='output') self._add_to_prediction_dict(output) return self._prediction_dict diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index b37b14b2c..a11944d95 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -2,14 +2,17 @@ syntax = "proto2"; package protos; import "easy_rec/python/protos/dnn.proto"; -import "easy_rec/python/protos/fm.proto"; -import "easy_rec/python/protos/layer.proto"; -import "easy_rec/python/protos/fibinet.proto"; -import "easy_rec/python/protos/masknet.proto"; - - -message SequenceLayer { - optional MLP mlp = 1; +import "easy_rec/python/protos/keras_layer.proto"; + +message InputLayer { + optional bool do_batch_norm = 1; + optional bool do_layer_norm = 2; + optional float dropout_rate = 3; + optional float feature_dropout_rate = 4; + optional bool only_output_feature_list = 5; + optional bool only_output_3d_tensor = 6; + optional bool output_2d_tensor_and_feature_list = 7; + optional bool output_seq_and_normal_feature = 8; } message Lambda { @@ -21,34 +24,18 @@ message Input { optional string input_fn = 2; } -message KerasLayer { - required string class_name = 1; - optional Any params = 2; -} - message Block { required string name = 1; // the input names of feature groups or other blocks repeated Input inputs = 2; optional int32 input_concat_axis = 3 [default = -1]; - optional string extra_input_fn = 4; + optional bool merge_inputs_into_list = 4; + optional string extra_input_fn = 5; oneof layer { - Lambda Lambda = 100; InputLayer input_layer = 101; - MLP mlp = 102; - PeriodicEmbedding periodic_embedding = 103; - AutoDisEmbedding auto_dis_embedding = 104; - SequenceLayer sequence_encoder = 105; - HighWayTower highway = 106; - MaskNet masknet = 107; - SENet senet = 108; - FiBiNetTower fibinet = 109; - FM fm = 110; - // Concatenate concat = 111; - // Reshape reshape = 112; - Add add = 113; - Dot dot = 114; - //OpChain chain = 116; + Lambda lambda = 102; + KerasLayer keras_layer = 103; + Sequential sequential = 104; } } @@ -58,25 +45,13 @@ message BackboneTower { optional MLP top_mlp = 3; } -//message Operator { -// oneof Op { -// MLP mlp = 102; -// PeriodicEmbedding periodic_embedding = 103; -// AutoDisEmbedding auto_dis_embedding = 104; -// HighWayTower highway = 106; -// MaskNet masknet = 107; -// SENet senet = 108; -// FiBiNetTower fibinet = 109; -// FM fm = 110; -// Concatenate concat = 111; -// Reshape reshape = 112; -// Add add = 113; -// Dot dot = 114; -// Lambda Lambda = 115; -// OpChain chain = 116; -// } -//} -// -//message OpChain { -// repeated Operator ops = 1; -//} +message Layer { + oneof layer { + Lambda lambda = 101; + KerasLayer keras_layer = 102; + } +} + +message Sequential { + repeated Layer layers = 1; +} diff --git a/easy_rec/python/protos/dnn.proto b/easy_rec/python/protos/dnn.proto index 00fe79d82..ff40f0fe4 100644 --- a/easy_rec/python/protos/dnn.proto +++ b/easy_rec/python/protos/dnn.proto @@ -19,9 +19,13 @@ message MLP { // ratio of dropout repeated float dropout_ratio = 2; // activation function - optional string activation = 3 [default = 'tf.nn.relu']; + optional string activation = 3 [default = 'relu']; // use batch normalization optional bool use_bn = 4 [default = true]; - optional bool last_layer_no_activation = 5 [default = false]; - optional bool last_layer_no_batch_norm = 6 [default = false]; + optional bool use_final_bn = 5 [default = true]; + optional string final_activation = 6 [default = 'relu']; + optional bool use_bias = 7 [default = true]; + // kernel_initializer + optional string initializer = 8 [default = 'he_uniform']; + optional bool use_bn_after_activation = 9; } diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 48c6f4f8d..2bb801847 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -29,11 +29,12 @@ import "easy_rec/python/protos/multi_tower_recall.proto"; // for input performance test message DummyModel { } + // configure backbone network in a free style way message RankModel { optional float l2_regularization = 1; - optional bool add_head_logits_layer = 2 [default=true]; - optional uint32 wide_output_dim = 3; + optional uint32 wide_output_dim = 2; + // optional bool add_head_logits_layer = 3 [default=true]; } // for knowledge distillation diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index e8b3b2c4f..ee245b0e9 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -145,7 +145,7 @@ message FeatureGroupConfig { optional WideOrDeep wide_deep = 3 [default = DEEP]; repeated SeqAttGroupConfig sequence_features = 4; optional bool negative_sampler = 5 [default = false]; - repeated SequenceEncoder sequence_encoders = 6; + // repeated SequenceEncoder sequence_encoders = 6; } message SeqAttMap { diff --git a/easy_rec/python/protos/fibinet.proto b/easy_rec/python/protos/fibinet.proto deleted file mode 100644 index 1d48448eb..000000000 --- a/easy_rec/python/protos/fibinet.proto +++ /dev/null @@ -1,23 +0,0 @@ -syntax = "proto2"; -package protos; - -import "easy_rec/python/protos/dnn.proto"; - -message SENet { - required uint32 reduction_ratio = 1 [default = 4]; - optional uint32 num_squeeze_group = 2 [default = 2]; - optional bool use_skip_connection = 3 [default = true]; - optional bool use_output_layer_norm = 4 [default = true]; -} - -message Bilinear { - required string type = 1 [default = 'interaction']; - required bool use_plus = 2 [default = true]; - required uint32 num_output_units = 3; -} - -message FiBiNetTower { - optional Bilinear bilinear = 1; - required SENet senet = 2; - optional DNN mlp = 8; -} diff --git a/easy_rec/python/protos/keras_layer.proto b/easy_rec/python/protos/keras_layer.proto new file mode 100644 index 000000000..94a3ba801 --- /dev/null +++ b/easy_rec/python/protos/keras_layer.proto @@ -0,0 +1,26 @@ +syntax = "proto2"; +package protos; + +import "google/protobuf/struct.proto"; +import "easy_rec/python/protos/layer.proto"; +import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/fm.proto"; +import "easy_rec/python/protos/seq_encoder.proto"; + +message KerasLayer { + required string class_name = 1; + oneof params { + google.protobuf.Struct st_params = 2; + PeriodicEmbedding periodic_embedding = 3; + AutoDisEmbedding auto_dis_embedding = 4; + FM fm = 5; + MaskBlock mask_block = 6; + MaskNet masknet = 7; + SENet senet = 8; + Bilinear bilinear = 9; + FiBiNet fibinet = 10; + MLP mlp = 11; + DINEncoder din = 12; + BSTEncoder bst = 13; + } +} diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index e7ad65460..9a1e40acb 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -3,16 +3,6 @@ package protos; import "easy_rec/python/protos/dnn.proto"; -message InputLayer { - optional bool do_batch_norm = 1; - optional bool do_layer_norm = 2; - optional float dropout_rate = 3; - optional float feature_dropout_rate = 4; - optional bool only_output_feature_list = 5; - optional bool only_output_3d_tensor = 6; - optional bool output_2d_tensor_and_feature_list = 7; -} - message HighWayTower { optional string input = 1; required uint32 emb_size = 2; @@ -38,18 +28,34 @@ message AutoDisEmbedding { optional bool output_tensor_list = 6; } -message Concatenate { - required int32 axis = 1; - optional int32 expand_dim_before = 2; - optional int32 expand_dim_after = 3; +message SENet { + required uint32 reduction_ratio = 1 [default = 4]; + optional uint32 num_squeeze_group = 2 [default = 2]; + optional bool use_skip_connection = 3 [default = true]; + optional bool use_output_layer_norm = 4 [default = true]; +} + +message Bilinear { + required string type = 1 [default = 'interaction']; + required bool use_plus = 2 [default = true]; + required uint32 num_output_units = 3; } -message Reshape { - repeated int32 dims = 1; +message FiBiNet { + optional Bilinear bilinear = 1; + required SENet senet = 2; + optional MLP mlp = 8; } -message Add { +message MaskBlock { + optional float reduction_factor = 1; + required uint32 output_size = 2; + optional uint32 aggregation_size = 3; + optional bool input_layer_norm = 4 [default = true]; } -message Dot { +message MaskNet { + repeated MaskBlock mask_blocks = 1; + required bool use_parallel = 2 [default = true]; + optional MLP mlp = 3; } diff --git a/easy_rec/python/protos/masknet.proto b/easy_rec/python/protos/masknet.proto deleted file mode 100644 index 3feba334e..000000000 --- a/easy_rec/python/protos/masknet.proto +++ /dev/null @@ -1,17 +0,0 @@ -syntax = "proto2"; -package protos; - -import "easy_rec/python/protos/dnn.proto"; - -message MaskBlock { - optional float reduction_factor = 1; - required uint32 output_size = 2; - optional uint32 aggregation_size = 3; - optional bool input_layer_norm = 4 [default = true]; -} - -message MaskNet { - repeated MaskBlock mask_blocks = 1; - required bool use_parallel = 2 [default = true]; - optional DNN mlp = 3; -} diff --git a/easy_rec/python/utils/load_class.py b/easy_rec/python/utils/load_class.py index efd2cc9cb..0cf12c26f 100644 --- a/easy_rec/python/utils/load_class.py +++ b/easy_rec/python/utils/load_class.py @@ -229,7 +229,7 @@ def load_keras_layer(name): name: keras layer name Return: - modules or functions or classes + (layer_class, is_customize) """ name = name.strip() if name == '' or name is None: @@ -237,13 +237,13 @@ def load_keras_layer(name): path = 'easy_rec.python.layers.keras.' + name try: - return pydoc.locate(path) - except pydoc.ErrorDuringImport: + cls = pydoc.locate(path) + if cls is not None: + return cls, True path = 'tensorflow.keras.layers.' + name - try: - return pydoc.locate(path) - except pydoc.ErrorDuringImport: - print('load keras layer %s failed' % name) - logging.error('load keras layer %s failed: %s' % - (name, traceback.format_exc())) - return None + return pydoc.locate(path), False + except pydoc.ErrorDuringImport: + print('load keras layer %s failed' % name) + logging.error('load keras layer %s failed: %s' % + (name, traceback.format_exc())) + return None, False diff --git a/easy_rec/python/utils/tf_utils.py b/easy_rec/python/utils/tf_utils.py index e4d39c012..160a2f67a 100644 --- a/easy_rec/python/utils/tf_utils.py +++ b/easy_rec/python/utils/tf_utils.py @@ -48,37 +48,36 @@ def get_config_type(tf_type): return type_map[tf_type] -def add_op(inputs): - if not isinstance(inputs, list): - return inputs - if len(inputs) == 1: - if isinstance(inputs[0], list): - return tf.keras.layers.Add()(inputs[0]) - return inputs[0] - return tf.keras.layers.Add()(inputs) +# def add_op(inputs): +# if not isinstance(inputs, list): +# return inputs +# if len(inputs) == 1: +# if isinstance(inputs[0], list): +# return tf.keras.layers.Add()(inputs[0]) +# return inputs[0] +# return tf.keras.layers.Add()(inputs) - -def dot_op(features): - """Compute inner dot between any two pair tensors. - - Args: - features: must be one of - - List of 2D tensor with shape: ``(batch_size,embedding_size)``. - - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` - Return: - - 2D tensor with shape: ``(batch_size, 1)``. - """ - if isinstance(features, (list, tuple)): - features = tf.stack(features, axis=1) - assert features.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' - - batch_size = tf.shape(features)[0] - matrixdot = tf.matmul(features, features, transpose_b=True) - feature_dim = matrixdot.shape[-1] - - ones_mat = tf.ones_like(matrixdot) - lower_tri_mat = ones_mat - tf.linalg.band_part(ones_mat, 0, -1) - lower_tri_mask = tf.cast(lower_tri_mat, tf.bool) - result = tf.boolean_mask(matrixdot, lower_tri_mask) - output_dim = feature_dim * (feature_dim - 1) // 2 - return tf.reshape(result, (batch_size, output_dim)) +# def dot_op(features): +# """Compute inner dot between any two pair tensors. +# +# Args: +# features: must be one of +# - List of 2D tensor with shape: ``(batch_size,embedding_size)``. +# - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` +# Return: +# - 2D tensor with shape: ``(batch_size, 1)``. +# """ +# if isinstance(features, (list, tuple)): +# features = tf.stack(features, axis=1) +# assert features.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' +# +# batch_size = tf.shape(features)[0] +# matrixdot = tf.matmul(features, features, transpose_b=True) +# feature_dim = matrixdot.shape[-1] +# +# ones_mat = tf.ones_like(matrixdot) +# lower_tri_mat = ones_mat - tf.linalg.band_part(ones_mat, 0, -1) +# lower_tri_mask = tf.cast(lower_tri_mat, tf.bool) +# result = tf.boolean_mask(matrixdot, lower_tri_mask) +# output_dim = feature_dim * (feature_dim - 1) // 2 +# return tf.reshape(result, (batch_size, output_dim)) diff --git a/examples/configs/deepfm_backbone_on_criteo.config b/examples/configs/deepfm_backbone_on_criteo.config index 467d8ad55..9cba3fb82 100644 --- a/examples/configs/deepfm_backbone_on_criteo.config +++ b/examples/configs/deepfm_backbone_on_criteo.config @@ -1,6 +1,6 @@ train_input_path: "examples/data/criteo/criteo_train_data" eval_input_path: "examples/data/criteo/criteo_test_data" -model_dir: "examples/ckpt/deepfm_backbone_criteo_w" +model_dir: "examples/ckpt/deepfm_backbone_criteo" train_config { log_step_count_steps: 500 @@ -574,17 +574,12 @@ model_config: { wide_deep:WIDE } backbone { - blocks { - name: 'wide_features' - input_layer { - } - } blocks { name: 'wide_logit' inputs { name: 'wide_features' } - Lambda { + lambda { expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' } } @@ -600,8 +595,14 @@ model_config: { name: 'deep_features' input_fn: 'lambda x: x[1]' } - fm { - use_variant: true + keras_layer { + class_name: 'FM' + st_params { + fields { + key: 'use_variant' + value { bool_value: true } + } + } } } blocks { @@ -610,8 +611,11 @@ model_config: { name: 'deep_features' input_fn: 'lambda x: x[0]' } - mlp { - hidden_units: [256, 128, 64] + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } } } concat_blocks: ['wide_logit', 'fm', 'deep'] diff --git a/examples/configs/deepfm_backbone_on_movielens.config b/examples/configs/deepfm_backbone_on_movielens.config index 46a79d83b..c6bf82151 100644 --- a/examples/configs/deepfm_backbone_on_movielens.config +++ b/examples/configs/deepfm_backbone_on_movielens.config @@ -1,6 +1,6 @@ train_input_path: "examples/data/movielens_1m/movies_train_data" eval_input_path: "examples/data/movielens_1m/movies_test_data" -model_dir: "examples/ckpt/deepfm_backbone_movieslen_ckpt" +model_dir: "examples/ckpt/deepfm_backbone_movieslen" train_config { log_step_count_steps: 100 @@ -17,9 +17,8 @@ train_config { } use_moving_average: false } - save_checkpoints_steps: 100 + save_checkpoints_steps: 2000 sync_replicas: True - num_steps: 2500 } eval_config { @@ -150,6 +149,17 @@ feature_config: { } model_config: { model_class: 'RankModel' + feature_groups: { + group_name: 'wide' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: WIDE + } feature_groups: { group_name: 'features' feature_names: 'user_id' @@ -164,28 +174,66 @@ model_config: { } backbone { blocks { - name: 'emb_list' - inputs: 'features' + name: 'wide_logit' + inputs { + name: 'wide' + } + lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' + } + } + blocks { + name: 'features' input_layer { - output_feature_list: true + output_2d_tensor_and_feature_list: true } } blocks { name: 'fm' - inputs: 'emb_list' - fm {} + inputs { + name: 'features' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'FM' + } } blocks { name: 'deep' - inputs: 'features' - mlp { - hidden_units: [256, 128, 64] + inputs { + name: 'features' + input_fn: 'lambda x: x[0]' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64, 1] + use_final_bn: false + final_activation: 'linear' + } + } + } + blocks { + name: 'add' + inputs { + name: 'wide_logit' + } + inputs { + name: 'fm' + } + inputs { + name: 'deep' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Add' } } - concat_blocks: ['fm', 'deep'] + concat_blocks: 'add' } rank_model { l2_regularization: 1e-4 + wide_output_dim: 1 } embedding_regularization: 1e-4 } diff --git a/examples/configs/deepfm_on_movielens.config b/examples/configs/deepfm_on_movielens.config index cab092c20..0468ae12f 100644 --- a/examples/configs/deepfm_on_movielens.config +++ b/examples/configs/deepfm_on_movielens.config @@ -137,7 +137,7 @@ feature_config: { sequence_combiner: { text_cnn: { filter_sizes: [2, 3, 4] - num_filters: [16, 8, 8] + num_filters: [8, 4, 4] } } } diff --git a/examples/configs/dlrm_backbone_on_criteo.config b/examples/configs/dlrm_backbone_on_criteo.config index e87acef39..afdc0f784 100644 --- a/examples/configs/dlrm_backbone_on_criteo.config +++ b/examples/configs/dlrm_backbone_on_criteo.config @@ -528,8 +528,11 @@ model_config: { inputs { name: 'dense' } - mlp { - hidden_units: [64, 32, 16] + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [64, 32, 16] + } } } blocks { @@ -548,7 +551,9 @@ model_config: { name: 'sparse' input_fn: 'lambda x: x[1]' } - dot { } + keras_layer { + class_name: 'DotInteraction' + } } blocks { name: 'sparse_2d' diff --git a/examples/configs/dlrm_on_criteo_with_autodis.config b/examples/configs/dlrm_on_criteo_with_autodis.config index eb81e0a05..151bb4424 100644 --- a/examples/configs/dlrm_on_criteo_with_autodis.config +++ b/examples/configs/dlrm_on_criteo_with_autodis.config @@ -527,11 +527,14 @@ model_config: { inputs { name: 'dense' } - auto_dis_embedding { - embedding_dim: 16 - num_bins: 20 - temperature: 0.815 - output_tensor_list: true + keras_layer { + class_name: 'AutoDisEmbedding' + auto_dis_embedding { + embedding_dim: 16 + num_bins: 40 + temperature: 0.815 + output_tensor_list: true + } } } blocks { @@ -550,7 +553,9 @@ model_config: { name: 'sparse' input_fn: 'lambda x: x[1]' } - dot { } + keras_layer { + class_name: 'DotInteraction' + } } blocks { name: 'sparse_2d' diff --git a/examples/configs/dlrm_on_criteo_with_periodic.config b/examples/configs/dlrm_on_criteo_with_periodic.config new file mode 100644 index 000000000..81d0db1b3 --- /dev/null +++ b/examples/configs/dlrm_on_criteo_with_periodic.config @@ -0,0 +1,591 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_periodic_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'num_emb' + inputs { + name: 'dense' + } + keras_layer { + class_name: 'PeriodicEmbedding' + st_params { + fields { + key: "output_tensor_list" + value { bool_value: true } + } + fields { + key: "embedding_dim" + value { number_value: 16 } + } + fields { + key: "sigma" + value { number_value: 0.005 } + } + } + } + } + blocks { + name: 'sparse' + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'dot' + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + inputs { + name: 'sparse' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'DotInteraction' + } + } + blocks { + name: 'sparse_2d' + inputs { + name: 'sparse' + input_fn: 'lambda x: x[0]' + } + } + blocks { + name: 'num_emb_2d' + inputs { + name: 'num_emb' + input_fn: 'lambda x: x[0]' + } + } + concat_blocks: ['num_emb_2d', 'dot', 'sparse_2d'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_standard_on_criteo.config b/examples/configs/dlrm_standard_on_criteo.config index 131a94607..03e3df7bc 100644 --- a/examples/configs/dlrm_standard_on_criteo.config +++ b/examples/configs/dlrm_standard_on_criteo.config @@ -527,8 +527,11 @@ model_config: { inputs { name: 'dense' } - mlp { - hidden_units: [64, 32, 16] + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [64, 32, 16] + } } } blocks { @@ -546,7 +549,9 @@ model_config: { inputs { name: 'sparse' } - dot { } + keras_layer { + class_name: 'DotInteraction' + } } concat_blocks: ['bottom_mlp', 'dot'] top_mlp { diff --git a/examples/configs/fibinet_on_movielens.config b/examples/configs/fibinet_on_movielens.config index 8508172c6..aa6bef7f0 100644 --- a/examples/configs/fibinet_on_movielens.config +++ b/examples/configs/fibinet_on_movielens.config @@ -17,9 +17,8 @@ train_config { } use_moving_average: false } - save_checkpoints_steps: 100 - sync_replicas: True - num_steps: 2500 + save_checkpoints_steps: 2000 + sync_replicas: False } eval_config { @@ -163,26 +162,30 @@ model_config: { } backbone { blocks { - name: "emb_list" - inputs: "all" + name: "all" input_layer { do_batch_norm: true - output_feature_list: true + only_output_feature_list: true } } blocks { name: "fibinet" - inputs: "emb_list" - fibinet { - senet { - reduction_ratio: 4 - } - bilinear { - type: 'each' - num_output_units: 512 - } - mlp { - hidden_units: [512, 256] + inputs { + name: "all" + } + keras_layer { + class_name: 'FiBiNet' + fibinet { + senet { + reduction_ratio: 4 + } + bilinear { + type: 'each' + num_output_units: 512 + } + mlp { + hidden_units: [512, 256] + } } } } diff --git a/examples/configs/masknet_on_movielens.config b/examples/configs/masknet_on_movielens.config index 4c7f507b9..c98e3fbd0 100644 --- a/examples/configs/masknet_on_movielens.config +++ b/examples/configs/masknet_on_movielens.config @@ -164,22 +164,27 @@ model_config: { backbone { blocks { name: "mask_net" - inputs: "all" - masknet { - mask_blocks { - aggregation_size: 512 - output_size: 256 - } - mask_blocks { - aggregation_size: 512 - output_size: 256 - } - mask_blocks { - aggregation_size: 512 - output_size: 256 - } - mlp { - hidden_units: [512, 256] + inputs { + name: "all" + } + keras_layer { + class_name: 'MaskNet' + masknet { + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mlp { + hidden_units: [512, 256] + } } } } diff --git a/examples/readme.md b/examples/readme.md index 94643541e..d33304faf 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -212,8 +212,8 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee | Model | Epoch | AUC | | ---------------- | ----- | ------ | | Wide&Deep | 1 | 0.8558 | - | DeepFM | 1 | 0.8688 | - | DeepFM(Backbone) | 1 | 0.8876 | + | DeepFM | 1 | 0.8867 | + | DeepFM(Backbone) | 1 | 0.8872 | | DCN | 1 | 0.8576 | | AutoInt | 1 | 0.8513 | | MaskNet | 1 | 0.8872 | @@ -221,17 +221,18 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee - Criteo-Research - | Model | Epoch | AUC | - | ----------------- | ----- | ------ | - | FM | 1 | 0.7577 | - | DeepFM | 1 | 0.7970 | - | DeepFM (backbone) | 1 | 0.7970 | - | DeepFM (periodic) | 1 | 0.7980 | - | DeepFM (autodis) | 1 | 0.7979 | + | Model | Epoch | AUC | + | ----------------- | ----- | ------- | + | FM | 1 | 0.7577 | + | DeepFM | 1 | 0.7970 | + | DeepFM (backbone) | 1 | 0.7970 | + | DeepFM (periodic) | 1 | 0.7980 | + | DeepFM (autodis) | 1 | 0.7979 | | DLRM | 1 | 0.79785 | - | DLRM (backbone) | 1 | 0.7993 | - | DLRM (standard) | 1 | 0.7949 | - | DLRM (autodis) | 1 | 0.7984 | + | DLRM (backbone) | 1 | 0.7993 | + | DLRM (standard) | 1 | 0.7949 | + | DLRM (autodis) | 1 | 0.7989 | + | DLRM (periodic) | 1 | 0.7998 | ### 召回模型 From 9234140a7f8ebee9232b524bbbfddbd68d85a074 Mon Sep 17 00:00:00 2001 From: weisu Date: Sun, 18 Jun 2023 19:28:18 +0800 Subject: [PATCH 35/54] [feat]: add more backbone blocks --- easy_rec/python/layers/backbone.py | 14 +- easy_rec/python/layers/common_layers.py | 2 +- easy_rec/python/layers/keras/__init__.py | 5 +- easy_rec/python/layers/keras/blocks.py | 33 ++- easy_rec/python/layers/keras/bst.py | 2 +- easy_rec/python/layers/keras/dcn.py | 9 +- easy_rec/python/layers/keras/din.py | 3 +- easy_rec/python/utils/load_class.py | 2 +- .../configs/dcn_backbone_on_movielens.config | 250 ++++++++++++++++++ examples/readme.md | 1 + 10 files changed, 292 insertions(+), 29 deletions(-) create mode 100644 examples/configs/dcn_backbone_on_movielens.config diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 139e31fee..d5fac8a49 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -3,6 +3,7 @@ import logging import tensorflow as tf +from google.protobuf import struct_pb2 from easy_rec.python.layers.common_layers import EnhancedInputLayer from easy_rec.python.layers.keras import MLP @@ -10,7 +11,6 @@ from easy_rec.python.protos import backbone_pb2 from easy_rec.python.utils.dag import DAG from easy_rec.python.utils.load_class import load_keras_layer -from google.protobuf import struct_pb2 if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -127,7 +127,8 @@ def __call__(self, is_training, **kwargs): block_outputs[block] = output else: inputs = block_input(config, block_outputs) - block_outputs[block] = self.call_layer(inputs, config, block, is_training) + block_outputs[block] = self.call_layer(inputs, config, block, + is_training) temp = [] for output in self._config.concat_blocks: @@ -170,10 +171,10 @@ def call_keras_layer(self, layer_conf, inputs, name, training): return layer(inputs, training=training) def call_sequential_layers(self, inputs, layers, name, training): - output = inputs - for layer in layers: - output = self.call_layer(output, layer, name, training) - return output + output = inputs + for layer in layers: + output = self.call_layer(output, layer, name, training) + return output def call_layer(self, inputs, config, name, training): layer_name = config.WhichOneof('layer') @@ -221,4 +222,3 @@ def convert_to_dict(struct): for key, value in struct.items(): kwargs[str(key)] = format_value(value) return kwargs - diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index 810654cf3..011efb061 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -94,7 +94,7 @@ def __init__(self, config, input_layer, feature_dict): def __call__(self, group, is_training, *args, **kwargs): if self._config.output_seq_and_normal_feature: seq_features, target_feature, target_features = self._input_layer( - self._feature_dict, group, is_combine=False) + self._feature_dict, group, is_combine=False) return seq_features, target_features features, feature_list = self._input_layer(self._feature_dict, group) diff --git a/easy_rec/python/layers/keras/__init__.py b/easy_rec/python/layers/keras/__init__.py index d0dda33cf..64cacf3c9 100644 --- a/easy_rec/python/layers/keras/__init__.py +++ b/easy_rec/python/layers/keras/__init__.py @@ -1,7 +1,8 @@ -from .blocks import MLP, Highway +from .blocks import MLP +from .blocks import Highway from .bst import BST -from .din import DIN from .dcn import Cross +from .din import DIN from .dot_interaction import DotInteraction from .fibinet import BiLinear from .fibinet import FiBiNet diff --git a/easy_rec/python/layers/keras/blocks.py b/easy_rec/python/layers/keras/blocks.py index 507723017..2c7f08403 100644 --- a/easy_rec/python/layers/keras/blocks.py +++ b/easy_rec/python/layers/keras/blocks.py @@ -2,9 +2,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. """Convenience blocks for building models.""" import logging -from easy_rec.python.utils.activation import get_activation + import tensorflow as tf +from easy_rec.python.utils.activation import get_activation + class MLP(tf.keras.layers.Layer): """Sequential multi-layer perceptron (MLP) block. @@ -31,9 +33,9 @@ def __init__(self, params, name='mlp', **kwargs): units = list(params.hidden_units) logging.info( 'MLP(%s) units: %s, dropout: %r, activate=%s, use_bn=%r, final_bn=%r,' - ' final_activate=%s, bias=%r, initializer=%s, bn_after_activation=%r' - % (name, units, dropout_rate, activation, use_bn, use_final_bn, - final_activation, use_bias, initializer, use_bn_after_act)) + ' final_activate=%s, bias=%r, initializer=%s, bn_after_activation=%r' % + (name, units, dropout_rate, activation, use_bn, use_final_bn, + final_activation, use_bias, initializer, use_bn_after_act)) num_dropout = len(dropout_rate) self._sub_layers = [] @@ -41,13 +43,15 @@ def __init__(self, params, name='mlp', **kwargs): name = 'dnn_%d' % i drop_rate = dropout_rate[i] if i < num_dropout else 0.0 self.add_rich_layer(num_units, use_bn, drop_rate, activation, initializer, - use_bias, use_bn_after_act, name, params.l2_regularizer) + use_bias, use_bn_after_act, name, + params.l2_regularizer) n = len(units) - 1 drop_rate = dropout_rate[n] if num_dropout > n else 0.0 name = 'dnn_%d' % n self.add_rich_layer(units[-1], use_final_bn, drop_rate, final_activation, - initializer, use_bias, use_bn_after_act, name, params.l2_regularizer) + initializer, use_bias, use_bn_after_act, name, + params.l2_regularizer) def add_rich_layer(self, num_units, @@ -70,7 +74,8 @@ def add_rich_layer(self, self._sub_layers.append(dense) # bn = tf.keras.layers.BatchNormalization(name='%s/bn' % name) # keras BN layer have a stale issue on some versions of tf - bn = lambda x, training: tf.layers.batch_normalization(x, training=training, name='%s/bn' % name) + bn = lambda x, training: tf.layers.batch_normalization( + x, training=training, name='%s/%s/bn' % (self.name, name)) self._sub_layers.append(bn) act = tf.keras.layers.Activation(act_fn, name='%s/act' % name) self._sub_layers.append(act) @@ -84,7 +89,8 @@ def add_rich_layer(self, name=name) self._sub_layers.append(dense) if use_bn and use_bn_after_activation: - bn = lambda x, training: tf.layers.batch_normalization(x, training=training, name='%s/bn' % name) + bn = lambda x, training: tf.layers.batch_normalization( + x, training=training, name='%s/%s/bn' % (self.name, name)) self._sub_layers.append(bn) if 0.0 < dropout_rate < 1.0: @@ -101,6 +107,7 @@ def call(self, x, training=None, **kwargs): class Highway(tf.keras.layers.Layer): + def __init__(self, params, name='highway', **kwargs): super(Highway, self).__init__(name, **kwargs) params.check_required('emb_size') @@ -111,7 +118,9 @@ def __init__(self, params, name='highway', **kwargs): def call(self, inputs, training=None, **kwargs): from easy_rec.python.layers.common_layers import highway - return highway(inputs, self.emb_size, - activation=self.activation, - num_layers=self.num_layers, - dropout=self.dropout_rate if training else 0.0) + return highway( + inputs, + self.emb_size, + activation=self.activation, + num_layers=self.num_layers, + dropout=self.dropout_rate if training else 0.0) diff --git a/easy_rec/python/layers/keras/bst.py b/easy_rec/python/layers/keras/bst.py index 9492fda07..f8b876fb4 100644 --- a/easy_rec/python/layers/keras/bst.py +++ b/easy_rec/python/layers/keras/bst.py @@ -1,13 +1,13 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import tensorflow as tf +from tensorflow.python.keras.layers import Layer from easy_rec.python.input.augment import input_aug_data from easy_rec.python.layers import multihead_cross_attention from easy_rec.python.loss.nce_loss import nce_loss from easy_rec.python.utils.activation import get_activation from easy_rec.python.utils.shape_utils import get_shape_list -from tensorflow.python.keras.layers import Layer class BST(Layer): diff --git a/easy_rec/python/layers/keras/dcn.py b/easy_rec/python/layers/keras/dcn.py index 5fe4d4c42..9585893e5 100644 --- a/easy_rec/python/layers/keras/dcn.py +++ b/easy_rec/python/layers/keras/dcn.py @@ -4,6 +4,8 @@ import tensorflow as tf +from easy_rec.python.utils.activation import get_activation + class Cross(tf.keras.layers.Layer): """Cross Layer in Deep & Cross Network to learn explicit feature interactions. @@ -70,7 +72,8 @@ def __init__(self, params, **kwargs): self._diag_scale = params.get_or_default('diag_scale', 0.0) self._use_bias = params.get_or_default('use_bias', True) preactivation = params.get_or_default('preactivation', None) - self._preactivation = tf.keras.activations.get(preactivation) + preact = get_activation(preactivation) + self._preactivation = tf.keras.activations.get(preact) kernel_initializer = params.get_or_default('kernel_initializer', 'truncated_normal') self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) @@ -89,7 +92,7 @@ def __init__(self, params, **kwargs): self._diag_scale)) def build(self, input_shape): - last_dim = input_shape[-1] + last_dim = input_shape[0][-1] if self._projection_dim is None: self._dense = tf.keras.layers.Dense( @@ -154,7 +157,7 @@ def call(self, inputs, **kwargs): else: prod_output = self._dense_v(self._dense_u(x)) - prod_output = tf.cast(prod_output, self.compute_dtype) + # prod_output = tf.cast(prod_output, self.compute_dtype) if self._diag_scale: prod_output = prod_output + self._diag_scale * x diff --git a/easy_rec/python/layers/keras/din.py b/easy_rec/python/layers/keras/din.py index 686d23e00..cee57ac90 100644 --- a/easy_rec/python/layers/keras/din.py +++ b/easy_rec/python/layers/keras/din.py @@ -3,12 +3,11 @@ import logging import tensorflow as tf +from tensorflow.python.keras.layers import Layer from easy_rec.python.layers import dnn from easy_rec.python.utils.shape_utils import get_shape_list -from tensorflow.python.keras.layers import Layer - class DIN(Layer): diff --git a/easy_rec/python/utils/load_class.py b/easy_rec/python/utils/load_class.py index 0cf12c26f..9ac749c76 100644 --- a/easy_rec/python/utils/load_class.py +++ b/easy_rec/python/utils/load_class.py @@ -245,5 +245,5 @@ def load_keras_layer(name): except pydoc.ErrorDuringImport: print('load keras layer %s failed' % name) logging.error('load keras layer %s failed: %s' % - (name, traceback.format_exc())) + (name, traceback.format_exc())) return None, False diff --git a/examples/configs/dcn_backbone_on_movielens.config b/examples/configs/dcn_backbone_on_movielens.config new file mode 100644 index 000000000..f16337fdd --- /dev/null +++ b/examples/configs/dcn_backbone_on_movielens.config @@ -0,0 +1,250 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/dcn_on_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: false +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: 'RankModel' + feature_groups: { + group_name: 'all' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: "deep" + inputs { + name: 'all' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } + } + } + blocks { + name: "cross1" + inputs { + name: 'all' + input_fn: 'lambda x: [x, x]' + } + keras_layer { + class_name: 'Cross' + } + } + blocks { + name: "cross2" + inputs { + name: 'all' + } + inputs { + name: 'cross1' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Cross' + } + } + blocks { + name: "cross3" + inputs { + name: 'all' + } + inputs { + name: 'cross2' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Cross' + } + } + blocks { + name: "cross4" + inputs { + name: 'all' + } + inputs { + name: 'cross3' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Cross' + } + } + blocks { + name: "cross5" + inputs { + name: 'all' + } + inputs { + name: 'cross4' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Cross' + } + } + concat_blocks: ['deep', 'cross5'] + top_mlp { + hidden_units: [64, 32, 16] + } + } + rank_model { + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/readme.md b/examples/readme.md index d33304faf..55bfb4cba 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -215,6 +215,7 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee | DeepFM | 1 | 0.8867 | | DeepFM(Backbone) | 1 | 0.8872 | | DCN | 1 | 0.8576 | + | DCN (Backbone) | 1 | 0.8770 | | AutoInt | 1 | 0.8513 | | MaskNet | 1 | 0.8872 | | FibiNet | 1 | 0.8879 | From 7d0e350eac99280cf5bfcc0ef4f7ae1d6618d616 Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 19 Jun 2023 12:59:17 +0800 Subject: [PATCH 36/54] [feat]: add more backbone blocks --- easy_rec/python/layers/backbone.py | 67 +++-- easy_rec/python/model/easy_rec_model.py | 7 +- easy_rec/python/model/esmm.py | 8 +- easy_rec/python/model/mmoe.py | 5 +- easy_rec/python/model/ple.py | 5 +- easy_rec/python/model/simple_multi_task.py | 5 +- easy_rec/python/protos/backbone.proto | 31 ++- .../configs/dcn_backbone_on_movielens.config | 64 +---- examples/configs/mlp_on_movielens.config | 239 ++++++++++++++++++ ...wide_and_deep_backbone_on_movielens.config | 216 ++++++++++++++++ examples/readme.md | 24 +- 11 files changed, 571 insertions(+), 100 deletions(-) create mode 100644 examples/configs/mlp_on_movielens.config create mode 100644 examples/configs/wide_and_deep_backbone_on_movielens.config diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index d5fac8a49..7eee14a4d 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -2,6 +2,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import logging +import six import tensorflow as tf from google.protobuf import struct_pb2 @@ -9,6 +10,7 @@ from easy_rec.python.layers.keras import MLP from easy_rec.python.layers.utils import Parameter from easy_rec.python.protos import backbone_pb2 +from easy_rec.python.protos import keras_layer_pb2 from easy_rec.python.utils.dag import DAG from easy_rec.python.utils.load_class import load_keras_layer @@ -112,6 +114,14 @@ def __call__(self, is_training, **kwargs): print('backbone topological order: ' + ','.join(blocks)) for block in blocks: config = self._name_to_blocks[block] + if config.layers: # sequential layers + logging.info('call sequential %d layers' % len(config.layers)) + output = block_input(config, block_outputs) + for layer in config.layers: + output = self.call_layer(output, layer, block, is_training) + block_outputs[block] = output + continue + # just one of layer layer = config.WhichOneof('layer') if layer is None: # identity layer block_outputs[block] = block_input(config, block_outputs) @@ -121,14 +131,11 @@ def __call__(self, is_training, **kwargs): output = input_fn(block, is_training) block_outputs[block] = output elif layer == 'sequential': - inputs = block_input(config, block_outputs) - layers = config.sequential.layers - output = self.call_sequential_layers(inputs, layers, block, is_training) - block_outputs[block] = output + print(config) else: inputs = block_input(config, block_outputs) - block_outputs[block] = self.call_layer(inputs, config, block, - is_training) + output = self.call_layer(inputs, config, block, is_training) + block_outputs[block] = output temp = [] for output in self._config.concat_blocks: @@ -166,16 +173,19 @@ def call_keras_layer(self, layer_conf, inputs, name, training): layer = layer_cls(name=name) else: assert param_type == 'st_params', 'internal keras layer only support st_params' - kwargs = convert_to_dict(layer_conf.st_params) - layer = layer_cls(name=name, **kwargs) + try: + kwargs = convert_to_dict(layer_conf.st_params) + logging.info('call %s layer with params %r' % + (layer_conf.class_name, kwargs)) + layer = layer_cls(name=name, **kwargs) + except TypeError as e: + logging.warning(e) + args = map(format_value, layer_conf.st_params.values()) + logging.info('try to call %s layer with params %r' % + (layer_conf.class_name, args)) + layer = layer_cls(*args, name=name) return layer(inputs, training=training) - def call_sequential_layers(self, inputs, layers, name, training): - output = inputs - for layer in layers: - output = self.call_layer(output, layer, name, training) - return output - def call_layer(self, inputs, config, name, training): layer_name = config.WhichOneof('layer') if layer_name == 'keras_layer': @@ -184,6 +194,33 @@ def call_layer(self, inputs, config, name, training): conf = getattr(config, 'lambda') fn = eval(conf.expression) return fn(inputs) + if layer_name == 'recurrent': + conf = config.recurrent + fixed_input_index = -1 + if conf.HasField('fixed_input_index'): + fixed_input_index = conf.fixed_input_index + if fixed_input_index >= 0: + assert type(inputs) in (tuple, list), '%s inputs must be a list' + output = inputs + for i in range(conf.num_steps): + name_i = '%s_%d' % (name, i) + output_i = self.call_keras_layer(conf.keras_layer, output, name_i, training) + if fixed_input_index >= 0: + j = 0 + for idx in range(len(output)): + if idx == fixed_input_index: + continue + output[idx] = output_i[j] if type(output_i) in (tuple, list) else output_i + j += 1 + else: + output = output_i + if fixed_input_index >= 0: + del output[fixed_input_index] + if len(output) == 1: + return output[0] + return output + return output + raise NotImplementedError('Unsupported backbone layer:' + layer_name) @@ -205,7 +242,7 @@ def concat_inputs(inputs, axis=-1, msg=''): def format_value(value): value_type = type(value) - if value_type in (unicode, str): + if value_type == six.text_type: return str(value) if value_type == float: int_v = int(value) diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index b114d0788..fe9a20ef8 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -68,6 +68,7 @@ def __init__(self, # model_config.feature_groups, # self._l2_reg) # self._sequence_encoding_by_group_name = {} + self._backbone_output = None if model_config.HasField('backbone'): self._backbone = Backbone( model_config.backbone, @@ -83,11 +84,13 @@ def has_backbone(self): @property def backbone(self): + if self._backbone_output: + return self._backbone_output if self._backbone: - output = self._backbone(self._is_training) + self._backbone_output = self._backbone(self._is_training) loss_dict = self._backbone.loss_dict self._loss_dict.update(loss_dict) - return output + return self._backbone_output return None @property diff --git a/easy_rec/python/model/esmm.py b/easy_rec/python/model/esmm.py index c6eaad483..50567ae63 100644 --- a/easy_rec/python/model/esmm.py +++ b/easy_rec/python/model/esmm.py @@ -31,7 +31,9 @@ def __init__(self, self._group_num = len(self._model_config.groups) self._group_features = [] - if self._group_num > 0: + if self.has_backbone: + logging.info('use bottom backbone network') + elif self._group_num > 0: logging.info('group_num: {0}'.format(self._group_num)) for group_id in range(self._group_num): group = self._model_config.groups[group_id] @@ -173,7 +175,9 @@ def build_predict_graph(self): Returns: self._prediction_dict: Prediction result of two tasks. """ - if self._group_num > 0: + if self.has_backbone: + all_fea = self.backbone + elif self._group_num > 0: group_fea_arr = [] # Both towers share the underlying network. for group_id in range(self._group_num): diff --git a/easy_rec/python/model/mmoe.py b/easy_rec/python/model/mmoe.py index acf1d6d59..3cc644f6d 100644 --- a/easy_rec/python/model/mmoe.py +++ b/easy_rec/python/model/mmoe.py @@ -26,7 +26,10 @@ def __init__(self, self._model_config = self._model_config.mmoe assert isinstance(self._model_config, MMoEConfig) - self._features, _ = self._input_layer(self._feature_dict, 'all') + if self.has_backbone: + self._features = self.backbone + else: + self._features, _ = self._input_layer(self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def build_predict_graph(self): diff --git a/easy_rec/python/model/ple.py b/easy_rec/python/model/ple.py index f3ad71215..e04781bcd 100644 --- a/easy_rec/python/model/ple.py +++ b/easy_rec/python/model/ple.py @@ -27,7 +27,10 @@ def __init__(self, self._layer_nums = len(self._model_config.extraction_networks) self._task_nums = len(self._model_config.task_towers) - self._features, _ = self._input_layer(self._feature_dict, 'all') + if self.has_backbone: + self._features = self.backbone + else: + self._features, _ = self._input_layer(self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def gate(self, selector_fea, vec_feas, name): diff --git a/easy_rec/python/model/simple_multi_task.py b/easy_rec/python/model/simple_multi_task.py index b4c0613bc..05dd7a773 100644 --- a/easy_rec/python/model/simple_multi_task.py +++ b/easy_rec/python/model/simple_multi_task.py @@ -27,7 +27,10 @@ def __init__(self, self._model_config = self._model_config.simple_multi_task assert isinstance(self._model_config, SimpleMultiTaskConfig) - self._features, _ = self._input_layer(self._feature_dict, 'all') + if self.has_backbone: + self._features = self.backbone + else: + self._features, _ = self._input_layer(self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def build_predict_graph(self): diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index a11944d95..6f292a48d 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -24,6 +24,20 @@ message Input { optional string input_fn = 2; } +message RecurrentLayer { + required uint32 num_steps = 1 [default = 1]; + optional uint32 fixed_input_index = 2; + required KerasLayer keras_layer = 3; +} + +message Layer { + oneof layer { + Lambda lambda = 1; + KerasLayer keras_layer = 2; + RecurrentLayer recurrent = 3; + } +} + message Block { required string name = 1; // the input names of feature groups or other blocks @@ -31,11 +45,15 @@ message Block { optional int32 input_concat_axis = 3 [default = -1]; optional bool merge_inputs_into_list = 4; optional string extra_input_fn = 5; + + // sequential layers + repeated Layer layers = 6; + // only take effect when there are no layers oneof layer { InputLayer input_layer = 101; Lambda lambda = 102; KerasLayer keras_layer = 103; - Sequential sequential = 104; + RecurrentLayer recurrent = 104; } } @@ -44,14 +62,3 @@ message BackboneTower { repeated string concat_blocks = 2; optional MLP top_mlp = 3; } - -message Layer { - oneof layer { - Lambda lambda = 101; - KerasLayer keras_layer = 102; - } -} - -message Sequential { - repeated Layer layers = 1; -} diff --git a/examples/configs/dcn_backbone_on_movielens.config b/examples/configs/dcn_backbone_on_movielens.config index f16337fdd..9c84794dd 100644 --- a/examples/configs/dcn_backbone_on_movielens.config +++ b/examples/configs/dcn_backbone_on_movielens.config @@ -174,68 +174,20 @@ model_config: { } } blocks { - name: "cross1" + name: "dcn" inputs { name: 'all' input_fn: 'lambda x: [x, x]' } - keras_layer { - class_name: 'Cross' - } - } - blocks { - name: "cross2" - inputs { - name: 'all' - } - inputs { - name: 'cross1' - } - merge_inputs_into_list: true - keras_layer { - class_name: 'Cross' - } - } - blocks { - name: "cross3" - inputs { - name: 'all' - } - inputs { - name: 'cross2' - } - merge_inputs_into_list: true - keras_layer { - class_name: 'Cross' - } - } - blocks { - name: "cross4" - inputs { - name: 'all' - } - inputs { - name: 'cross3' - } - merge_inputs_into_list: true - keras_layer { - class_name: 'Cross' - } - } - blocks { - name: "cross5" - inputs { - name: 'all' - } - inputs { - name: 'cross4' - } - merge_inputs_into_list: true - keras_layer { - class_name: 'Cross' + recurrent { + num_steps: 3 + fixed_input_index: 0 + keras_layer { + class_name: 'Cross' + } } } - concat_blocks: ['deep', 'cross5'] + concat_blocks: ['deep', 'dcn'] top_mlp { hidden_units: [64, 32, 16] } diff --git a/examples/configs/mlp_on_movielens.config b/examples/configs/mlp_on_movielens.config new file mode 100644 index 000000000..392f392ef --- /dev/null +++ b/examples/configs/mlp_on_movielens.config @@ -0,0 +1,239 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/mlp_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: "RankModel" + feature_groups: { + group_name: 'features' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: 'mlp' + inputs { + name: 'features' + } + layers { + keras_layer { + class_name: 'Dense' + st_params { + fields { + key: 'units' + value: { number_value: 256 } + } + fields { + key: 'activation' + value: { string_value: 'relu' } + } + } + } + } + layers { + keras_layer { + class_name: 'Dropout' + st_params { + fields { + key: 'rate' + value: { number_value: 0.5 } + } + } + } + } + layers { + keras_layer { + class_name: 'Dense' + st_params { + fields { + key: 'units' + value: { number_value: 256 } + } + fields { + key: 'activation' + value: { string_value: 'relu' } + } + } + } + } + layers { + keras_layer { + class_name: 'Dropout' + st_params { + fields { + key: 'rate' + value: { number_value: 0.5 } + } + } + } + } + layers { + keras_layer { + class_name: 'Dense' + st_params { + fields { + key: 'units' + value: { number_value: 1 } + } + } + } + } + } + concat_blocks: 'mlp' + } + rank_model { + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} diff --git a/examples/configs/wide_and_deep_backbone_on_movielens.config b/examples/configs/wide_and_deep_backbone_on_movielens.config new file mode 100644 index 000000000..dddc91888 --- /dev/null +++ b/examples/configs/wide_and_deep_backbone_on_movielens.config @@ -0,0 +1,216 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/wide_and_deep_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: "RankModel" + feature_groups: { + group_name: 'wide' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: WIDE + } + feature_groups: { + group_name: 'deep' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: 'wide' + input_layer { + only_output_feature_list: true + } + } + blocks { + name: 'deep_logit' + inputs { + name: 'deep' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 256, 256, 1] + use_final_bn: false + final_activation: 'linear' + } + } + } + blocks { + name: 'final_logit' + inputs { + name: 'wide' + input_fn: 'lambda x: tf.add_n(x)' + } + inputs { + name: 'deep_logit' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Add' + } + } + concat_blocks: 'final_logit' + } + rank_model { + wide_output_dim: 1 + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} diff --git a/examples/readme.md b/examples/readme.md index 55bfb4cba..ba4f57cce 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -209,16 +209,20 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee - MovieLens-1M - | Model | Epoch | AUC | - | ---------------- | ----- | ------ | - | Wide&Deep | 1 | 0.8558 | - | DeepFM | 1 | 0.8867 | - | DeepFM(Backbone) | 1 | 0.8872 | - | DCN | 1 | 0.8576 | - | DCN (Backbone) | 1 | 0.8770 | - | AutoInt | 1 | 0.8513 | - | MaskNet | 1 | 0.8872 | - | FibiNet | 1 | 0.8879 | + | Model | Epoch | AUC | + | ------------------- | ----- | ------ | + | MLP | 1 | 0.8616 | + | Wide&Deep | 1 | 0.8558 | + | Wide&Deep(Backbone) | 1 | 0.8854 | + | DeepFM | 1 | 0.8867 | + | DeepFM(Backbone) | 1 | 0.8872 | + | DCN | 1 | 0.8576 | + | DCN (Backbone) | 1 | 0.8770 | + | AutoInt | 1 | 0.8513 | + | MaskNet | 1 | 0.8872 | + | FibiNet | 1 | 0.8879 | + + 备注:`MovieLens-1M` 数据集较小,评估指标方差较大,以上结果仅供参考。 - Criteo-Research From 136cf37ce92d54fb5255ded0f59d3771c8ee5673 Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 19 Jun 2023 15:14:14 +0800 Subject: [PATCH 37/54] [feat]: format backbone code, add recurrent and sequential layer --- easy_rec/python/layers/backbone.py | 9 +- easy_rec/python/layers/keras/__init__.py | 6 +- .../python/layers/keras/dot_interaction.py | 89 ------------- easy_rec/python/layers/keras/fibinet.py | 66 +++++---- easy_rec/python/layers/keras/fm.py | 46 ------- .../layers/keras/{dcn.py => interaction.py} | 125 +++++++++++++++++- easy_rec/python/layers/keras/mask_net.py | 3 - easy_rec/python/model/cmbf.py | 2 +- .../model/collaborative_metric_learning.py | 86 ++++++------ easy_rec/python/model/dcn.py | 2 +- easy_rec/python/model/deepfm.py | 6 +- easy_rec/python/model/easy_rec_model.py | 1 - easy_rec/python/model/multi_tower.py | 2 +- easy_rec/python/model/multi_tower_bst.py | 2 +- easy_rec/python/model/multi_tower_din.py | 2 +- easy_rec/python/model/multi_tower_recall.py | 2 +- easy_rec/python/model/uniter.py | 2 +- easy_rec/python/model/wide_and_deep.py | 8 +- 18 files changed, 232 insertions(+), 227 deletions(-) delete mode 100644 easy_rec/python/layers/keras/dot_interaction.py delete mode 100644 easy_rec/python/layers/keras/fm.py rename easy_rec/python/layers/keras/{dcn.py => interaction.py} (59%) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 7eee14a4d..22645bee0 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -10,7 +10,6 @@ from easy_rec.python.layers.keras import MLP from easy_rec.python.layers.utils import Parameter from easy_rec.python.protos import backbone_pb2 -from easy_rec.python.protos import keras_layer_pb2 from easy_rec.python.utils.dag import DAG from easy_rec.python.utils.load_class import load_keras_layer @@ -204,13 +203,17 @@ def call_layer(self, inputs, config, name, training): output = inputs for i in range(conf.num_steps): name_i = '%s_%d' % (name, i) - output_i = self.call_keras_layer(conf.keras_layer, output, name_i, training) + layer = conf.keras_layer + output_i = self.call_keras_layer(layer, output, name_i, training) if fixed_input_index >= 0: j = 0 for idx in range(len(output)): if idx == fixed_input_index: continue - output[idx] = output_i[j] if type(output_i) in (tuple, list) else output_i + if type(output_i) in (tuple, list): + output[idx] = output_i[j] + else: + output[idx] = output_i j += 1 else: output = output_i diff --git a/easy_rec/python/layers/keras/__init__.py b/easy_rec/python/layers/keras/__init__.py index 64cacf3c9..24f62ffb3 100644 --- a/easy_rec/python/layers/keras/__init__.py +++ b/easy_rec/python/layers/keras/__init__.py @@ -1,13 +1,13 @@ from .blocks import MLP from .blocks import Highway from .bst import BST -from .dcn import Cross from .din import DIN -from .dot_interaction import DotInteraction from .fibinet import BiLinear from .fibinet import FiBiNet from .fibinet import SENet -from .fm import FM +from .interaction import FM +from .interaction import Cross +from .interaction import DotInteraction from .mask_net import MaskBlock from .mask_net import MaskNet from .numerical_embedding import AutoDisEmbedding diff --git a/easy_rec/python/layers/keras/dot_interaction.py b/easy_rec/python/layers/keras/dot_interaction.py deleted file mode 100644 index 7ec47c5ad..000000000 --- a/easy_rec/python/layers/keras/dot_interaction.py +++ /dev/null @@ -1,89 +0,0 @@ -# -*- encoding:utf-8 -*- -# Copyright (c) Alibaba, Inc. and its affiliates. -"""Implements `Dot Interaction` Layer of DLRM model.""" - -import tensorflow as tf - - -class DotInteraction(tf.keras.layers.Layer): - """Dot interaction layer. - - See theory in the DLRM paper: https://arxiv.org/pdf/1906.00091.pdf, - section 2.1.3. Sparse activations and dense activations are combined. - Dot interaction is applied to a batch of input Tensors [e1,...,e_k] of the - same dimension and the output is a batch of Tensors with all distinct pairwise - dot products of the form dot(e_i, e_j) for i <= j if self self_interaction is - True, otherwise dot(e_i, e_j) i < j. - - Attributes: - self_interaction: Boolean indicating if features should self-interact. - If it is True, then the diagonal entries of the interaction metric are - also taken. - skip_gather: An optimization flag. If it's set then the upper triangle part - of the dot interaction matrix dot(e_i, e_j) is set to 0. The resulting - activations will be of dimension [num_features * num_features] from which - half will be zeros. Otherwise activations will be only lower triangle part - of the interaction matrix. The later saves space but is much slower. - name: String name of the layer. - """ - - def __init__(self, params, name=None, **kwargs): - self._self_interaction = params.get_or_default('self_interaction', False) - self._skip_gather = params.get_or_default('skip_gather', False) - super(DotInteraction, self).__init__(name=name, **kwargs) - - def call(self, inputs, **kwargs): - """Performs the interaction operation on the tensors in the list. - - The tensors represent as transformed dense features and embedded categorical - features. - Pre-condition: The tensors should all have the same shape. - - Args: - inputs: List of features with shapes [batch_size, feature_dim]. - - Returns: - activations: Tensor representing interacted features. It has a dimension - `num_features * num_features` if skip_gather is True, otherside - `num_features * (num_features + 1) / 2` if self_interaction is True and - `num_features * (num_features - 1) / 2` if self_interaction is False. - """ - if isinstance(inputs, (list, tuple)): - # concat_features shape: batch_size, num_features, feature_dim - try: - concat_features = tf.stack(inputs, axis=1) - except (ValueError, tf.errors.InvalidArgumentError) as e: - raise ValueError('Input tensors` dimensions must be equal, original' - 'error message: {}'.format(e)) - else: - assert inputs.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' - concat_features = inputs - - batch_size = tf.shape(concat_features)[0] - - # Interact features, select lower-triangular portion, and re-shape. - xactions = tf.matmul(concat_features, concat_features, transpose_b=True) - num_features = xactions.shape[-1] - ones = tf.ones_like(xactions) - if self._self_interaction: - # Selecting lower-triangular portion including the diagonal. - lower_tri_mask = tf.linalg.band_part(ones, -1, 0) - upper_tri_mask = ones - lower_tri_mask - out_dim = num_features * (num_features + 1) // 2 - else: - # Selecting lower-triangular portion not included the diagonal. - upper_tri_mask = tf.linalg.band_part(ones, 0, -1) - lower_tri_mask = ones - upper_tri_mask - out_dim = num_features * (num_features - 1) // 2 - - if self._skip_gather: - # Setting upper triangle part of the interaction matrix to zeros. - activations = tf.where( - condition=tf.cast(upper_tri_mask, tf.bool), - x=tf.zeros_like(xactions), - y=xactions) - out_dim = num_features * num_features - else: - activations = tf.boolean_mask(xactions, lower_tri_mask) - activations = tf.reshape(activations, (batch_size, out_dim)) - return activations diff --git a/easy_rec/python/layers/keras/fibinet.py b/easy_rec/python/layers/keras/fibinet.py index dc1f7d003..98cdb3179 100644 --- a/easy_rec/python/layers/keras/fibinet.py +++ b/easy_rec/python/layers/keras/fibinet.py @@ -5,7 +5,6 @@ import tensorflow as tf -from easy_rec.python.layers import dnn from easy_rec.python.layers.common_layers import layer_norm from easy_rec.python.layers.keras.blocks import MLP from easy_rec.python.layers.utils import Parameter @@ -15,9 +14,20 @@ class SENet(tf.keras.layers.Layer): - """SENet+ Layer used in FiBiNET,支持不同field的embedding dimension不等. + """SENET Layer used in FiBiNET. - arxiv: 2209.05016 + Input shape + - A list of 2D tensor with shape: ``(batch_size,embedding_size)``. + The ``embedding_size`` of each field can have different value. + + Output shape + - A 2D tensor with shape: ``(batch_size,sum_of_embedding_size)``. + + References: + 1. [FiBiNET](https://arxiv.org/pdf/1905.09433.pdf) + Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction + 2. [FiBiNet++](https://arxiv.org/pdf/2209.05016.pdf) + Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction """ def __init__(self, params, name='SENet', **kwargs): @@ -25,8 +35,6 @@ def __init__(self, params, name='SENet', **kwargs): self.config = params.get_pb_config() def call(self, inputs, **kwargs): - """embedding_list: - A list of 2D tensor with shape: ``(batch_size,embedding_size)``.""" - print('SENET layer with %d inputs' % len(inputs)) g = self.config.num_squeeze_group for emb in inputs: assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' @@ -88,14 +96,26 @@ def _full_interaction(v_i, v_j): class BiLinear(tf.keras.layers.Layer): - """双线性特征交互层,支持不同field embeddings的size不等. + """BilinearInteraction Layer used in FiBiNET. + + Input shape + - A list of 2D tensor with shape: ``(batch_size,embedding_size)``. + Its length is ``filed_size``. + The ``embedding_size`` of each field can have different value. - arxiv: 2209.05016 + Output shape + - 2D tensor with shape: ``(batch_size,output_size)``. Attributes: - num_output_units: 输出的size - type: ['all', 'each', 'interaction'],支持其中一种 - use_plus: 是否使用bi-linear+ + num_output_units: the number of output units + type: ['all', 'each', 'interaction'], types of bilinear functions used in this layer + use_plus: whether to use bi-linear+ + + References: + 1. [FiBiNET](https://arxiv.org/pdf/1905.09433.pdf) + Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction + 2. [FiBiNet++](https://arxiv.org/pdf/2209.05016.pdf) + Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction """ def __init__(self, params, name='bilinear', **kwargs): @@ -186,36 +206,32 @@ def call(self, inputs, **kwargs): class FiBiNet(tf.keras.layers.Layer): """FiBiNet++:Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction. - This is almost an exact implementation of the original FiBiNet++ model. - See the original paper: - https://arxiv.org/pdf/2209.05016.pdf + References: + - [FiBiNet++](https://arxiv.org/pdf/2209.05016.pdf) + Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction """ - def __init__(self, params, name='fibinet', l2_reg=None, **kwargs): + def __init__(self, params, name='fibinet', **kwargs): super(FiBiNet, self).__init__(name, **kwargs) self._config = params.get_pb_config() if self._config.HasField('mlp'): - # self.final_dnn = dnn.DNN( - # self._config.mlp, - # kwargs['l2_reg'] if 'l2_reg' in kwargs else None, - # name='%s_fibinet_mlp' % self.name, - # is_training=False) p = Parameter.make_from_pb(self._config.mlp) - self.final_dnn = MLP(p, name=name, l2_reg=l2_reg) + p.l2_regularizer = params.l2_regularizer + self.final_mlp = MLP(p, name=name) else: - self.final_dnn = None + self.final_mlp = None def call(self, inputs, training=None, **kwargs): feature_list = [] params = Parameter.make_from_pb(self._config.senet) - senet = SENet(params, name='%s_senet' % self.name) + senet = SENet(params, name='%s/senet' % self.name) senet_output = senet(inputs) feature_list.append(senet_output) if self._config.HasField('bilinear'): params = Parameter.make_from_pb(self._config.bilinear) - bilinear = BiLinear(params, name='%s_bilinear' % self.name) + bilinear = BiLinear(params, name='%s/bilinear' % self.name) bilinear_output = bilinear(inputs) feature_list.append(bilinear_output) @@ -224,6 +240,6 @@ def call(self, inputs, training=None, **kwargs): else: feature = feature_list[0] - if self.final_dnn is not None: - feature = self.final_dnn(feature, training=training) + if self.final_mlp is not None: + feature = self.final_mlp(feature, training=training) return feature diff --git a/easy_rec/python/layers/keras/fm.py b/easy_rec/python/layers/keras/fm.py deleted file mode 100644 index 56910541f..000000000 --- a/easy_rec/python/layers/keras/fm.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- encoding:utf-8 -*- -# Copyright (c) Alibaba, Inc. and its affiliates. -import tensorflow as tf - -if tf.__version__ >= '2.0': - tf = tf.compat.v1 - - -class FM(tf.keras.layers.Layer): - """Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. - - References - - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) - Input shape. - - List of 2D tensor with shape: ``(batch_size,embedding_size)``. - - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` - Output shape - - 2D tensor with shape: ``(batch_size, 1)``. - """ - - def __init__(self, params, name='fm', **kwargs): - super(FM, self).__init__(name, **kwargs) - self.use_variant = params.get_or_default('use_variant', False) - - def call(self, inputs, **kwargs): - if type(inputs) == list: - emb_dims = set(map(lambda x: int(x.shape[-1]), inputs)) - if len(emb_dims) != 1: - dims = ','.join([str(d) for d in emb_dims]) - raise ValueError('all embedding dim must be equal in FM layer:' + dims) - - with tf.name_scope(self.name): - fea = tf.stack(inputs, axis=1) - else: - assert inputs.shape.ndims == 3, 'input of FM layer must be a 3D tensor or a list of 2D tensors' - fea = inputs - - with tf.name_scope(self.name): - square_of_sum = tf.square(tf.reduce_sum(fea, axis=1)) - sum_of_square = tf.reduce_sum(tf.square(fea), axis=1) - cross_term = tf.subtract(square_of_sum, sum_of_square) - if self.use_variant: - cross_term = 0.5 * cross_term - else: - cross_term = 0.5 * tf.reduce_sum(cross_term, axis=-1, keepdims=True) - return cross_term diff --git a/easy_rec/python/layers/keras/dcn.py b/easy_rec/python/layers/keras/interaction.py similarity index 59% rename from easy_rec/python/layers/keras/dcn.py rename to easy_rec/python/layers/keras/interaction.py index 9585893e5..55f56f7a1 100644 --- a/easy_rec/python/layers/keras/dcn.py +++ b/easy_rec/python/layers/keras/interaction.py @@ -1,12 +1,133 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -"""Implements `Cross` Layer, the cross layer in Deep & Cross Network (DCN).""" - import tensorflow as tf from easy_rec.python.utils.activation import get_activation +class FM(tf.keras.layers.Layer): + """Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. + + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + Input shape. + - List of 2D tensor with shape: ``(batch_size,embedding_size)``. + - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + """ + + def __init__(self, params, name='fm', **kwargs): + super(FM, self).__init__(name, **kwargs) + self.use_variant = params.get_or_default('use_variant', False) + + def call(self, inputs, **kwargs): + if type(inputs) == list: + emb_dims = set(map(lambda x: int(x.shape[-1]), inputs)) + if len(emb_dims) != 1: + dims = ','.join([str(d) for d in emb_dims]) + raise ValueError('all embedding dim must be equal in FM layer:' + dims) + with tf.name_scope(self.name): + fea = tf.stack(inputs, axis=1) + else: + assert inputs.shape.ndims == 3, 'input of FM layer must be a 3D tensor or a list of 2D tensors' + fea = inputs + + with tf.name_scope(self.name): + square_of_sum = tf.square(tf.reduce_sum(fea, axis=1)) + sum_of_square = tf.reduce_sum(tf.square(fea), axis=1) + cross_term = tf.subtract(square_of_sum, sum_of_square) + if self.use_variant: + cross_term = 0.5 * cross_term + else: + cross_term = 0.5 * tf.reduce_sum(cross_term, axis=-1, keepdims=True) + return cross_term + + +class DotInteraction(tf.keras.layers.Layer): + """Dot interaction layer of DLRM model.. + + See theory in the DLRM paper: https://arxiv.org/pdf/1906.00091.pdf, + section 2.1.3. Sparse activations and dense activations are combined. + Dot interaction is applied to a batch of input Tensors [e1,...,e_k] of the + same dimension and the output is a batch of Tensors with all distinct pairwise + dot products of the form dot(e_i, e_j) for i <= j if self self_interaction is + True, otherwise dot(e_i, e_j) i < j. + + Attributes: + self_interaction: Boolean indicating if features should self-interact. + If it is True, then the diagonal entries of the interaction metric are + also taken. + skip_gather: An optimization flag. If it's set then the upper triangle part + of the dot interaction matrix dot(e_i, e_j) is set to 0. The resulting + activations will be of dimension [num_features * num_features] from which + half will be zeros. Otherwise activations will be only lower triangle part + of the interaction matrix. The later saves space but is much slower. + name: String name of the layer. + """ + + def __init__(self, params, name=None, **kwargs): + self._self_interaction = params.get_or_default('self_interaction', False) + self._skip_gather = params.get_or_default('skip_gather', False) + super(DotInteraction, self).__init__(name=name, **kwargs) + + def call(self, inputs, **kwargs): + """Performs the interaction operation on the tensors in the list. + + The tensors represent as transformed dense features and embedded categorical + features. + Pre-condition: The tensors should all have the same shape. + + Args: + inputs: List of features with shapes [batch_size, feature_dim]. + + Returns: + activations: Tensor representing interacted features. It has a dimension + `num_features * num_features` if skip_gather is True, otherside + `num_features * (num_features + 1) / 2` if self_interaction is True and + `num_features * (num_features - 1) / 2` if self_interaction is False. + """ + if isinstance(inputs, (list, tuple)): + # concat_features shape: batch_size, num_features, feature_dim + try: + concat_features = tf.stack(inputs, axis=1) + except (ValueError, tf.errors.InvalidArgumentError) as e: + raise ValueError('Input tensors` dimensions must be equal, original' + 'error message: {}'.format(e)) + else: + assert inputs.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' + concat_features = inputs + + batch_size = tf.shape(concat_features)[0] + + # Interact features, select lower-triangular portion, and re-shape. + xactions = tf.matmul(concat_features, concat_features, transpose_b=True) + num_features = xactions.shape[-1] + ones = tf.ones_like(xactions) + if self._self_interaction: + # Selecting lower-triangular portion including the diagonal. + lower_tri_mask = tf.linalg.band_part(ones, -1, 0) + upper_tri_mask = ones - lower_tri_mask + out_dim = num_features * (num_features + 1) // 2 + else: + # Selecting lower-triangular portion not included the diagonal. + upper_tri_mask = tf.linalg.band_part(ones, 0, -1) + lower_tri_mask = ones - upper_tri_mask + out_dim = num_features * (num_features - 1) // 2 + + if self._skip_gather: + # Setting upper triangle part of the interaction matrix to zeros. + activations = tf.where( + condition=tf.cast(upper_tri_mask, tf.bool), + x=tf.zeros_like(xactions), + y=xactions) + out_dim = num_features * num_features + else: + activations = tf.boolean_mask(xactions, lower_tri_mask) + activations = tf.reshape(activations, (batch_size, out_dim)) + return activations + + class Cross(tf.keras.layers.Layer): """Cross Layer in Deep & Cross Network to learn explicit feature interactions. diff --git a/easy_rec/python/layers/keras/mask_net.py b/easy_rec/python/layers/keras/mask_net.py index 8749a1ee8..2e66beb22 100644 --- a/easy_rec/python/layers/keras/mask_net.py +++ b/easy_rec/python/layers/keras/mask_net.py @@ -6,9 +6,6 @@ from easy_rec.python.layers.keras.blocks import MLP from easy_rec.python.layers.utils import Parameter -if tf.__version__ >= '2.0': - tf = tf.compat.v1 - class MaskBlock(tf.keras.layers.Layer): diff --git a/easy_rec/python/model/cmbf.py b/easy_rec/python/model/cmbf.py index 0f0a8f3aa..a11a30582 100644 --- a/easy_rec/python/model/cmbf.py +++ b/easy_rec/python/model/cmbf.py @@ -38,7 +38,7 @@ def __init__(self, def build_predict_graph(self): hidden = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) - final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(hidden) diff --git a/easy_rec/python/model/collaborative_metric_learning.py b/easy_rec/python/model/collaborative_metric_learning.py index d785e7141..b19537239 100644 --- a/easy_rec/python/model/collaborative_metric_learning.py +++ b/easy_rec/python/model/collaborative_metric_learning.py @@ -48,21 +48,22 @@ def __init__( raise ValueError('unsupported loss type: %s' % LossType.Name(self._loss_type)) - self._highway_features = {} - self._highway_num = len(self._model_config.highway) - for _id in range(self._highway_num): - highway_cfg = self._model_config.highway[_id] - highway_feature, _ = self._input_layer(self._feature_dict, - highway_cfg.input) - self._highway_features[highway_cfg.input] = highway_feature - - self.input_features = [] - if self._model_config.HasField('input'): - input_feature, _ = self._input_layer(self._feature_dict, - self._model_config.input) - self.input_features.append(input_feature) - - self.dnn = copy_obj(self._model_config.dnn) + if not self.has_backbone: + self._highway_features = {} + self._highway_num = len(self._model_config.highway) + for _id in range(self._highway_num): + highway_cfg = self._model_config.highway[_id] + highway_feature, _ = self._input_layer(self._feature_dict, + highway_cfg.input) + self._highway_features[highway_cfg.input] = highway_feature + + self.input_features = [] + if self._model_config.HasField('input'): + input_feature, _ = self._input_layer(self._feature_dict, + self._model_config.input) + self.input_features.append(input_feature) + + self.dnn = copy_obj(self._model_config.dnn) if self._labels is not None: if self._model_config.HasField('session_id'): @@ -79,32 +80,35 @@ def __init__( self.sample_id = None def build_predict_graph(self): - for _id in range(self._highway_num): - highway_cfg = self._model_config.highway[_id] - highway_fea = tf.layers.batch_normalization( - self._highway_features[highway_cfg.input], - training=self._is_training, - trainable=True, - name='highway_%s_bn' % highway_cfg.input) - highway_fea = highway( - highway_fea, - highway_cfg.emb_size, - activation=gelu, - scope='highway_%s' % _id) - print('highway_fea: ', highway_fea) - self.input_features.append(highway_fea) - - feature = tf.concat(self.input_features, axis=1) - - num_dnn_layer = len(self.dnn.hidden_units) - last_hidden = self.dnn.hidden_units.pop() - dnn_net = dnn.DNN(self.dnn, self._l2_reg, 'dnn', self._is_training) - net_output = dnn_net(feature) - tower_emb = tf.layers.dense( - inputs=net_output, - units=last_hidden, - kernel_regularizer=self._l2_reg, - name='dnn/dnn_%d' % (num_dnn_layer - 1)) + if self.has_backbone: + tower_emb = self.backbone + else: + for _id in range(self._highway_num): + highway_cfg = self._model_config.highway[_id] + highway_fea = tf.layers.batch_normalization( + self._highway_features[highway_cfg.input], + training=self._is_training, + trainable=True, + name='highway_%s_bn' % highway_cfg.input) + highway_fea = highway( + highway_fea, + highway_cfg.emb_size, + activation=gelu, + scope='highway_%s' % _id) + print('highway_fea: ', highway_fea) + self.input_features.append(highway_fea) + + feature = tf.concat(self.input_features, axis=1) + + num_dnn_layer = len(self.dnn.hidden_units) + last_hidden = self.dnn.hidden_units.pop() + dnn_net = dnn.DNN(self.dnn, self._l2_reg, 'dnn', self._is_training) + net_output = dnn_net(feature) + tower_emb = tf.layers.dense( + inputs=net_output, + units=last_hidden, + kernel_regularizer=self._l2_reg, + name='dnn/dnn_%d' % (num_dnn_layer - 1)) if self._model_config.output_l2_normalized_emb: norm_emb = tf.nn.l2_normalize(tower_emb, axis=-1) diff --git a/easy_rec/python/model/dcn.py b/easy_rec/python/model/dcn.py index fcfa7e780..2a460163a 100644 --- a/easy_rec/python/model/dcn.py +++ b/easy_rec/python/model/dcn.py @@ -60,7 +60,7 @@ def build_predict_graph(self): tower_fea_arr.append(cross_tensor) # final tower all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/deepfm.py b/easy_rec/python/model/deepfm.py index d1414c050..0ead36e26 100644 --- a/easy_rec/python/model/deepfm.py +++ b/easy_rec/python/model/deepfm.py @@ -39,7 +39,7 @@ def __init__(self, def build_input_layer(self, model_config, feature_configs): # overwrite create input_layer to support wide_output_dim - has_final = len(model_config.deepfm.final_dnn.hidden_units) > 0 + has_final = len(model_config.deepfm.final_mlp.hidden_units) > 0 if not has_final: assert model_config.deepfm.wide_output_dim == model_config.num_class self._wide_output_dim = model_config.deepfm.wide_output_dim @@ -60,9 +60,9 @@ def build_predict_graph(self): deep_fea = deep_layer(self._deep_features) # Final - if len(self._model_config.final_dnn.hidden_units) > 0: + if len(self._model_config.final_mlp.hidden_units) > 0: all_fea = tf.concat([wide_fea, fm_fea, deep_fea], axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense( diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index fe9a20ef8..cb6c8a802 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -13,7 +13,6 @@ from easy_rec.python.compat import regularizers from easy_rec.python.layers import input_layer from easy_rec.python.layers.backbone import Backbone -from easy_rec.python.layers.sequence_encoder import SequenceEncoder from easy_rec.python.utils import constant from easy_rec.python.utils import estimator_utils from easy_rec.python.utils import restore_filter diff --git a/easy_rec/python/model/multi_tower.py b/easy_rec/python/model/multi_tower.py index 5cdd89ba5..cb0aa6233 100644 --- a/easy_rec/python/model/multi_tower.py +++ b/easy_rec/python/model/multi_tower.py @@ -52,7 +52,7 @@ def build_predict_graph(self): tower_fea_arr.append(tower_fea) all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/multi_tower_bst.py b/easy_rec/python/model/multi_tower_bst.py index 4cbc9fd29..478d26a6c 100644 --- a/easy_rec/python/model/multi_tower_bst.py +++ b/easy_rec/python/model/multi_tower_bst.py @@ -180,7 +180,7 @@ def build_predict_graph(self): tower_fea_arr.append(tower_fea) all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', + final_dnn = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/multi_tower_din.py b/easy_rec/python/model/multi_tower_din.py index e586da1cf..7a1356caa 100644 --- a/easy_rec/python/model/multi_tower_din.py +++ b/easy_rec/python/model/multi_tower_din.py @@ -120,7 +120,7 @@ def build_predict_graph(self): tower_fea_arr.append(tower_fea) all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/multi_tower_recall.py b/easy_rec/python/model/multi_tower_recall.py index 8f576944e..101ad36cf 100644 --- a/easy_rec/python/model/multi_tower_recall.py +++ b/easy_rec/python/model/multi_tower_recall.py @@ -57,7 +57,7 @@ def build_predict_graph(self): tower_fea_arr.append(item_tower_emb) all_fea = tf.concat(tower_fea_arr, axis=-1) - final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, 1, name='output') diff --git a/easy_rec/python/model/uniter.py b/easy_rec/python/model/uniter.py index 40dfc8cb1..9479ce639 100644 --- a/easy_rec/python/model/uniter.py +++ b/easy_rec/python/model/uniter.py @@ -37,7 +37,7 @@ def __init__(self, def build_predict_graph(self): hidden = self._uniter_layer(self._is_training, l2_reg=self._l2_reg) - final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(hidden) diff --git a/easy_rec/python/model/wide_and_deep.py b/easy_rec/python/model/wide_and_deep.py index f841ed049..e0850abe4 100755 --- a/easy_rec/python/model/wide_and_deep.py +++ b/easy_rec/python/model/wide_and_deep.py @@ -34,7 +34,7 @@ def __init__(self, def build_input_layer(self, model_config, feature_configs): # overwrite create input_layer to support wide_output_dim - has_final = len(model_config.wide_and_deep.final_dnn.hidden_units) > 0 + has_final = len(model_config.wide_and_deep.final_mlp.hidden_units) > 0 self._wide_output_dim = model_config.wide_and_deep.wide_output_dim if not has_final: model_config.wide_and_deep.wide_output_dim = model_config.num_class @@ -55,11 +55,11 @@ def build_predict_graph(self): logging.info('output deep features dimension: %d' % deep_fea.get_shape()[-1]) - has_final = len(self._model_config.final_dnn.hidden_units) > 0 + has_final = len(self._model_config.final_mlp.hidden_units) > 0 print('wide_deep has_final_dnn layers = %d' % has_final) if has_final: all_fea = tf.concat([wide_fea, deep_fea], axis=1) - final_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, + final_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_layer(all_fea) output = tf.layers.dense( @@ -87,7 +87,7 @@ def get_grouped_vars(self): Return: list of list of variables. """ - assert len(self._model_config.final_dnn.hidden_units) == 0, \ + assert len(self._model_config.final_mlp.hidden_units) == 0, \ 'if use different optimizers for wide group and deep group, '\ + ' final_dnn should not be set.' wide_vars = [] From e795f009b02883234501d498981e17585acf9456 Mon Sep 17 00:00:00 2001 From: weisu Date: Mon, 19 Jun 2023 16:18:07 +0800 Subject: [PATCH 38/54] [feat]: format backbone code, add recurrent and sequential layer --- easy_rec/python/layers/common_layers.py | 6 +++++- easy_rec/python/layers/keras/mask_net.py | 9 +++++---- easy_rec/version.py | 2 +- examples/configs/masknet_on_movielens.config | 3 +-- examples/readme.md | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index 011efb061..47f5bcb65 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -91,7 +91,11 @@ def __init__(self, config, input_layer, feature_dict): self._input_layer = input_layer self._feature_dict = feature_dict - def __call__(self, group, is_training, *args, **kwargs): + def __call__(self, group, is_training, **kwargs): + with tf.name_scope('input_' + group): + return self.call(group, is_training) + + def call(self, group, is_training): if self._config.output_seq_and_normal_feature: seq_features, target_feature, target_features = self._input_layer( self._feature_dict, group, is_combine=False) diff --git a/easy_rec/python/layers/keras/mask_net.py b/easy_rec/python/layers/keras/mask_net.py index 2e66beb22..0ba769972 100644 --- a/easy_rec/python/layers/keras/mask_net.py +++ b/easy_rec/python/layers/keras/mask_net.py @@ -60,12 +60,13 @@ class MaskNet(tf.keras.layers.Layer): Refer: https://arxiv.org/pdf/2102.07619.pdf """ - def __init__(self, params, name='mask_net', l2_reg=None, **kwargs): + def __init__(self, params, name='mask_net', **kwargs): super(MaskNet, self).__init__(name, **kwargs) self.config = params.get_pb_config() if self.config.HasField('mlp'): p = Parameter.make_from_pb(self.config.mlp) - self.mlp = MLP(p, name='%s/mlp' % name, l2_reg=l2_reg) + p.l2_regularizer = params.l2_regularizer + self.mlp = MLP(p, name='%s/mlp' % name) else: self.mlp = None @@ -75,7 +76,7 @@ def call(self, inputs, training=None, **kwargs): for i, block_conf in enumerate(self.config.mask_blocks): params = Parameter.make_from_pb(block_conf) mask_layer = MaskBlock( - params, name='%s/block_%d' % (self.name, i), reuse=self.reuse) + params, name='%s/block_%d' % (self.name, i)) mask_outputs.append(mask_layer((inputs, inputs))) all_mask_outputs = tf.concat(mask_outputs, axis=1) @@ -89,7 +90,7 @@ def call(self, inputs, training=None, **kwargs): for i, block_conf in enumerate(self.config.mask_blocks): params = Parameter.make_from_pb(block_conf) mask_layer = MaskBlock( - params, name='%s/block_%d' % (self.name, i), reuse=self.reuse) + params, name='%s/block_%d' % (self.name, i)) net = mask_layer((net, inputs)) if self.mlp is not None: diff --git a/easy_rec/version.py b/easy_rec/version.py index f70f1bfba..520cefe3d 100644 --- a/easy_rec/version.py +++ b/easy_rec/version.py @@ -1,3 +1,3 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -__version__ = '0.6.3' +__version__ = '1.0.0' diff --git a/examples/configs/masknet_on_movielens.config b/examples/configs/masknet_on_movielens.config index c98e3fbd0..dccbbb13e 100644 --- a/examples/configs/masknet_on_movielens.config +++ b/examples/configs/masknet_on_movielens.config @@ -17,9 +17,8 @@ train_config { } use_moving_average: false } - save_checkpoints_steps: 100 + save_checkpoints_steps: 2000 sync_replicas: True - num_steps: 2500 } eval_config { diff --git a/examples/readme.md b/examples/readme.md index ba4f57cce..cbf9be600 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -220,7 +220,7 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee | DCN (Backbone) | 1 | 0.8770 | | AutoInt | 1 | 0.8513 | | MaskNet | 1 | 0.8872 | - | FibiNet | 1 | 0.8879 | + | FibiNet | 1 | 0.8893 | 备注:`MovieLens-1M` 数据集较小,评估指标方差较大,以上结果仅供参考。 From c4f5ea946a2aa4d4c4a8e617febcd39716232178 Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 20 Jun 2023 13:20:49 +0800 Subject: [PATCH 39/54] [feat]: format backbone code, add recurrent and sequential layer --- easy_rec/python/layers/backbone.py | 25 ++++- easy_rec/python/layers/common_layers.py | 2 +- easy_rec/python/layers/keras/blocks.py | 10 +- easy_rec/python/protos/backbone.proto | 9 ++ easy_rec/python/utils/config_util.py | 93 +++++++++++++++++++ ...pfm_backbone_on_criteo_with_autodis.config | 31 ++++--- ...fm_backbone_on_criteo_with_periodic.config | 27 ++++-- 7 files changed, 170 insertions(+), 27 deletions(-) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 22645bee0..cfc0e3d60 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -129,20 +129,22 @@ def __call__(self, is_training, **kwargs): input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) output = input_fn(block, is_training) block_outputs[block] = output - elif layer == 'sequential': - print(config) else: inputs = block_input(config, block_outputs) output = self.call_layer(inputs, config, block, is_training) block_outputs[block] = output - temp = [] + outputs = [] for output in self._config.concat_blocks: if output in block_outputs: - temp.append(block_outputs[output]) + temp = block_outputs[output] + if type(temp) in (tuple, list): + outputs.extend(temp) + else: + outputs.append(temp) else: raise ValueError('No output `%s` of backbone to be concat' % output) - output = concat_inputs(temp, msg='backbone') + output = concat_inputs(outputs, msg='backbone') if self._config.HasField('top_mlp'): params = Parameter.make_from_pb(self._config.top_mlp) @@ -193,6 +195,19 @@ def call_layer(self, inputs, config, name, training): conf = getattr(config, 'lambda') fn = eval(conf.expression) return fn(inputs) + if layer_name == 'repeat': + conf = config.repeat + n_loop = conf.num_repeat + outputs = [] + for i in range(n_loop): + name_i = '%s_%d' % (name, i) + output = self.call_keras_layer(conf.keras_layer, inputs, name_i, training) + outputs.append(output) + if len(outputs) == 1: + return outputs[0] + if conf.HasField('output_concat_axis'): + return tf.concat(outputs, conf.output_concat_axis) + return outputs if layer_name == 'recurrent': conf = config.recurrent fixed_input_index = -1 diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index 47f5bcb65..dd39d8259 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -109,7 +109,7 @@ def call(self, group, is_training): do_feature_dropout = is_training and 0.0 < self._config.feature_dropout_rate < 1.0 if do_feature_dropout: keep_prob = 1.0 - self._config.feature_dropout_rate - bern = tf.distributions.Bernoulli(probs=keep_prob) + bern = tf.distributions.Bernoulli(probs=keep_prob, dtype=tf.float32) mask = bern.sample(num_features) elif do_bn: features = tf.layers.batch_normalization(features, training=is_training) diff --git a/easy_rec/python/layers/keras/blocks.py b/easy_rec/python/layers/keras/blocks.py index 2c7f08403..5c14a07c3 100644 --- a/easy_rec/python/layers/keras/blocks.py +++ b/easy_rec/python/layers/keras/blocks.py @@ -101,8 +101,16 @@ def add_rich_layer(self, def call(self, x, training=None, **kwargs): """Performs the forward computation of the block.""" + from inspect import isfunction for layer in self._sub_layers: - x = layer(x, training=training) + if isfunction(layer): + x = layer(x, training=training) + else: + cls = layer.__class__.__name__ + if cls in ('Dropout', 'BatchNormalization'): + x = layer(x, training=training) + else: + x = layer(x) return x diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index 6f292a48d..5e2262707 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -30,11 +30,19 @@ message RecurrentLayer { required KerasLayer keras_layer = 3; } +message RepeatLayer { + required uint32 num_repeat = 1 [default = 1]; + // default output the list of multiple outputs + optional int32 output_concat_axis = 2; + required KerasLayer keras_layer = 3; +} + message Layer { oneof layer { Lambda lambda = 1; KerasLayer keras_layer = 2; RecurrentLayer recurrent = 3; + RepeatLayer repeat = 4; } } @@ -54,6 +62,7 @@ message Block { Lambda lambda = 102; KerasLayer keras_layer = 103; RecurrentLayer recurrent = 104; + RepeatLayer repeat = 105; } } diff --git a/easy_rec/python/utils/config_util.py b/easy_rec/python/utils/config_util.py index b63a02f71..67f3bc351 100644 --- a/easy_rec/python/utils/config_util.py +++ b/easy_rec/python/utils/config_util.py @@ -5,6 +5,7 @@ Such as Hyper parameter tuning or automatic feature expanding. """ +import argparse import datetime import json import logging @@ -605,3 +606,95 @@ def process_multi_file_input_path(sampler_config_input_path): input_path = sampler_config_input_path return input_path + + +def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): + """Reads config from a file containing pipeline_pb2.EasyRecConfig. + + Args: + pipeline_config_path: Path to pipeline_pb2.EasyRecConfig text + proto. + groups: the names of feature group to be changed + emb_dim: target embedding dimension + + Returns: + Dictionary of configuration objects. Keys are `model`, `train_config`, + `train_input_config`, `eval_config`, `eval_input_config`. Value are the + corresponding config objects. + """ + if isinstance(pipeline_config_path, pipeline_pb2.EasyRecConfig): + return pipeline_config_path + + assert tf.gfile.Exists( + pipeline_config_path + ), 'pipeline_config_path [%s] not exists' % pipeline_config_path + + pipeline_config = pipeline_pb2.EasyRecConfig() + with tf.gfile.GFile(pipeline_config_path, 'r') as f: + config_str = f.read() + if pipeline_config_path.endswith('.config'): + text_format.Merge(config_str, pipeline_config) + elif pipeline_config_path.endswith('.json'): + json_format.Parse(config_str, pipeline_config) + else: + assert False, 'invalid file format(%s), currently support formats: .config(prototxt) .json' % pipeline_config_path + + target_groups = set(groups.split(',')) + features = set() + conf = pipeline_config.model_config + for group in conf.feature_groups: + if group.group_name not in target_groups: + continue + for feature in group.feature_names: + features.add(feature) + + feature_configs = get_compatible_feature_configs(pipeline_config) + for fea_conf in feature_configs: + fea_name = fea_conf.input_names[0] + if fea_conf.HasField('feature_name'): + fea_name = fea_conf.feature_name + if fea_name in features: + fea_conf.embedding_dim = emb_dim + + return pipeline_config + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--pipeline_config_path', + type=str, + default=None, + help='Path to pipeline config file.') + parser.add_argument( + '--feature_groups', + type=str, + default=None, + help='The name of feature group to be changed.') + parser.add_argument( + '--embedding_dim', + type=int, + default=None, + help='The embedding dim to be changed to.') + parser.add_argument( + '--save_config_path', + type=str, + default=None, + help='Path to save changed config.') + + args, extra_args = parser.parse_known_args() + if args.pipeline_config_path is None: + raise ValueError('--pipeline_config_path must be set') + if args.save_config_path is None: + raise ValueError('--save_config_path must be set') + if args.feature_groups is None: + raise ValueError('--feature_groups must be set') + if args.embedding_dim is None: + raise ValueError('--embedding_dim must be set') + + # 传入一个不存在的feature group,可以起到format配置文件的效果 + config = change_configured_embedding_dim( + args.pipeline_config_path, + args.feature_groups, + args.embedding_dim) + save_message(config, args.save_config_path) diff --git a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config index 970508598..49fcf8e38 100644 --- a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config +++ b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config @@ -674,7 +674,7 @@ model_config: { inputs { name: 'wide_features' } - Lambda { + lambda { expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' } } @@ -683,11 +683,14 @@ model_config: { inputs { name: 'numerical_features' } - auto_dis_embedding { - embedding_dim: 16 - num_bins: 20 - temperature: 0.815 - output_tensor_list: true + keras_layer { + class_name: 'AutoDisEmbedding' + auto_dis_embedding { + embedding_dim: 16 + num_bins: 20 + temperature: 0.815 + output_tensor_list: true + } } } blocks { @@ -706,8 +709,11 @@ model_config: { name: 'num_emb' input_fn: 'lambda x: x[1]' } - fm { - use_variant: true + keras_layer { + class_name: 'FM' + fm { + use_variant: true + } } } blocks { @@ -720,11 +726,14 @@ model_config: { name: 'num_emb' input_fn: 'lambda x: x[0]' } - mlp { - hidden_units: [256, 128, 64] + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } } } - // no wide_logit may have better performance + # no wide_logit may have better performance concat_blocks: ['wide_logit', 'fm', 'deep'] top_mlp { hidden_units: [256, 128, 64] diff --git a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config index 82dd01998..2f2f8435b 100644 --- a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config +++ b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config @@ -674,7 +674,7 @@ model_config: { inputs { name: 'wide_features' } - Lambda { + lambda { expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' } } @@ -683,10 +683,13 @@ model_config: { inputs { name: 'numerical_features' } - periodic_embedding { - embedding_dim: 16 - sigma: 0.005 - output_tensor_list: true + keras_layer { + class_name: 'PeriodicEmbedding' + periodic_embedding { + embedding_dim: 16 + sigma: 0.005 + output_tensor_list: true + } } } blocks { @@ -705,8 +708,11 @@ model_config: { name: 'num_emb' input_fn: 'lambda x: x[1]' } - fm { - use_variant: true + keras_layer { + class_name: 'FM' + fm { + use_variant: true + } } } blocks { @@ -719,8 +725,11 @@ model_config: { name: 'num_emb' input_fn: 'lambda x: x[0]' } - mlp { - hidden_units: [256, 128, 64] + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } } } concat_blocks: ['wide_logit', 'fm', 'deep'] From 1b504a8df374dcb493ca5e9bdf9f1d6df057fbd0 Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 20 Jun 2023 15:11:38 +0800 Subject: [PATCH 40/54] [feat]: add repeat block --- easy_rec/python/model/cmbf.py | 2 +- easy_rec/python/model/dcn.py | 2 +- easy_rec/python/model/deepfm.py | 6 +++--- easy_rec/python/model/multi_tower.py | 2 +- easy_rec/python/model/multi_tower_bst.py | 2 +- easy_rec/python/model/multi_tower_din.py | 2 +- easy_rec/python/model/multi_tower_recall.py | 2 +- easy_rec/python/model/uniter.py | 2 +- easy_rec/python/model/wide_and_deep.py | 8 ++++---- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/easy_rec/python/model/cmbf.py b/easy_rec/python/model/cmbf.py index a11a30582..0f0a8f3aa 100644 --- a/easy_rec/python/model/cmbf.py +++ b/easy_rec/python/model/cmbf.py @@ -38,7 +38,7 @@ def __init__(self, def build_predict_graph(self): hidden = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) - final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(hidden) diff --git a/easy_rec/python/model/dcn.py b/easy_rec/python/model/dcn.py index 2a460163a..fcfa7e780 100644 --- a/easy_rec/python/model/dcn.py +++ b/easy_rec/python/model/dcn.py @@ -60,7 +60,7 @@ def build_predict_graph(self): tower_fea_arr.append(cross_tensor) # final tower all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/deepfm.py b/easy_rec/python/model/deepfm.py index 0ead36e26..d1414c050 100644 --- a/easy_rec/python/model/deepfm.py +++ b/easy_rec/python/model/deepfm.py @@ -39,7 +39,7 @@ def __init__(self, def build_input_layer(self, model_config, feature_configs): # overwrite create input_layer to support wide_output_dim - has_final = len(model_config.deepfm.final_mlp.hidden_units) > 0 + has_final = len(model_config.deepfm.final_dnn.hidden_units) > 0 if not has_final: assert model_config.deepfm.wide_output_dim == model_config.num_class self._wide_output_dim = model_config.deepfm.wide_output_dim @@ -60,9 +60,9 @@ def build_predict_graph(self): deep_fea = deep_layer(self._deep_features) # Final - if len(self._model_config.final_mlp.hidden_units) > 0: + if len(self._model_config.final_dnn.hidden_units) > 0: all_fea = tf.concat([wide_fea, fm_fea, deep_fea], axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense( diff --git a/easy_rec/python/model/multi_tower.py b/easy_rec/python/model/multi_tower.py index cb0aa6233..5cdd89ba5 100644 --- a/easy_rec/python/model/multi_tower.py +++ b/easy_rec/python/model/multi_tower.py @@ -52,7 +52,7 @@ def build_predict_graph(self): tower_fea_arr.append(tower_fea) all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/multi_tower_bst.py b/easy_rec/python/model/multi_tower_bst.py index 478d26a6c..4cbc9fd29 100644 --- a/easy_rec/python/model/multi_tower_bst.py +++ b/easy_rec/python/model/multi_tower_bst.py @@ -180,7 +180,7 @@ def build_predict_graph(self): tower_fea_arr.append(tower_fea) all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn = dnn.DNN(self._model_config.final_mlp, self._l2_reg, 'final_dnn', + final_dnn = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/multi_tower_din.py b/easy_rec/python/model/multi_tower_din.py index 7a1356caa..e586da1cf 100644 --- a/easy_rec/python/model/multi_tower_din.py +++ b/easy_rec/python/model/multi_tower_din.py @@ -120,7 +120,7 @@ def build_predict_graph(self): tower_fea_arr.append(tower_fea) all_fea = tf.concat(tower_fea_arr, axis=1) - final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, self._num_class, name='output') diff --git a/easy_rec/python/model/multi_tower_recall.py b/easy_rec/python/model/multi_tower_recall.py index 101ad36cf..8f576944e 100644 --- a/easy_rec/python/model/multi_tower_recall.py +++ b/easy_rec/python/model/multi_tower_recall.py @@ -57,7 +57,7 @@ def build_predict_graph(self): tower_fea_arr.append(item_tower_emb) all_fea = tf.concat(tower_fea_arr, axis=-1) - final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(all_fea) output = tf.layers.dense(all_fea, 1, name='output') diff --git a/easy_rec/python/model/uniter.py b/easy_rec/python/model/uniter.py index 9479ce639..40dfc8cb1 100644 --- a/easy_rec/python/model/uniter.py +++ b/easy_rec/python/model/uniter.py @@ -37,7 +37,7 @@ def __init__(self, def build_predict_graph(self): hidden = self._uniter_layer(self._is_training, l2_reg=self._l2_reg) - final_dnn_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_dnn_layer(hidden) diff --git a/easy_rec/python/model/wide_and_deep.py b/easy_rec/python/model/wide_and_deep.py index e0850abe4..f841ed049 100755 --- a/easy_rec/python/model/wide_and_deep.py +++ b/easy_rec/python/model/wide_and_deep.py @@ -34,7 +34,7 @@ def __init__(self, def build_input_layer(self, model_config, feature_configs): # overwrite create input_layer to support wide_output_dim - has_final = len(model_config.wide_and_deep.final_mlp.hidden_units) > 0 + has_final = len(model_config.wide_and_deep.final_dnn.hidden_units) > 0 self._wide_output_dim = model_config.wide_and_deep.wide_output_dim if not has_final: model_config.wide_and_deep.wide_output_dim = model_config.num_class @@ -55,11 +55,11 @@ def build_predict_graph(self): logging.info('output deep features dimension: %d' % deep_fea.get_shape()[-1]) - has_final = len(self._model_config.final_mlp.hidden_units) > 0 + has_final = len(self._model_config.final_dnn.hidden_units) > 0 print('wide_deep has_final_dnn layers = %d' % has_final) if has_final: all_fea = tf.concat([wide_fea, deep_fea], axis=1) - final_layer = dnn.DNN(self._model_config.final_mlp, self._l2_reg, + final_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg, 'final_dnn', self._is_training) all_fea = final_layer(all_fea) output = tf.layers.dense( @@ -87,7 +87,7 @@ def get_grouped_vars(self): Return: list of list of variables. """ - assert len(self._model_config.final_mlp.hidden_units) == 0, \ + assert len(self._model_config.final_dnn.hidden_units) == 0, \ 'if use different optimizers for wide group and deep group, '\ + ' final_dnn should not be set.' wide_vars = [] From 32ff01cad4cf14ea0e936d1f6e7e481201712474 Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 20 Jun 2023 16:59:25 +0800 Subject: [PATCH 41/54] fix bug of no is_predicting argument --- easy_rec/python/model/easy_rec_model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index cb6c8a802..8920f06dc 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -33,12 +33,11 @@ def __init__(self, feature_configs, features, labels=None, - is_training=False, - is_predicting=False): + is_training=False): self._base_model_config = model_config self._model_config = model_config self._is_training = is_training - self._is_predicting = is_predicting + self._is_predicting = labels is None self._feature_dict = features # embedding variable parameters From 0c087d94b7f31e9eb8d46c402ef647de16c49007 Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 20 Jun 2023 17:30:38 +0800 Subject: [PATCH 42/54] fix bug of no is_predicting argument --- .../compat/feature_column/feature_column.py | 6 +-- easy_rec/python/layers/backbone.py | 3 +- easy_rec/python/layers/keras/blocks.py | 14 ++++--- easy_rec/python/layers/keras/mask_net.py | 6 +-- easy_rec/python/model/easy_rec_estimator.py | 3 +- easy_rec/python/utils/config_util.py | 39 +++++++++---------- 6 files changed, 35 insertions(+), 36 deletions(-) diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index 27557e9a7..d446adb76 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -2541,9 +2541,9 @@ def raw_name(self): @property def cardinality(self): from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn, \ - BucketizedColumn, WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, \ - CrossedColumn, IdentityCategoricalColumn, VocabularyListCategoricalColumn, \ - VocabularyFileCategoricalColumn + BucketizedColumn, WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, \ + CrossedColumn, IdentityCategoricalColumn, VocabularyListCategoricalColumn, \ + VocabularyFileCategoricalColumn fc = self.categorical_column if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index cfc0e3d60..b673a209a 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -201,7 +201,8 @@ def call_layer(self, inputs, config, name, training): outputs = [] for i in range(n_loop): name_i = '%s_%d' % (name, i) - output = self.call_keras_layer(conf.keras_layer, inputs, name_i, training) + output = self.call_keras_layer(conf.keras_layer, inputs, name_i, + training) outputs.append(output) if len(outputs) == 1: return outputs[0] diff --git a/easy_rec/python/layers/keras/blocks.py b/easy_rec/python/layers/keras/blocks.py index 5c14a07c3..62063e451 100644 --- a/easy_rec/python/layers/keras/blocks.py +++ b/easy_rec/python/layers/keras/blocks.py @@ -63,6 +63,11 @@ def add_rich_layer(self, use_bn_after_activation=False, name='mlp', l2_reg=None): + + def batch_norm(x, training): + return tf.layers.batch_normalization( + x, training=training, name='%s/%s/bn' % (self.name, name)) + act_fn = get_activation(activation) if use_bn and not use_bn_after_activation: dense = tf.keras.layers.Dense( @@ -72,11 +77,10 @@ def add_rich_layer(self, kernel_regularizer=l2_reg, name=name) self._sub_layers.append(dense) + # bn = tf.keras.layers.BatchNormalization(name='%s/bn' % name) # keras BN layer have a stale issue on some versions of tf - bn = lambda x, training: tf.layers.batch_normalization( - x, training=training, name='%s/%s/bn' % (self.name, name)) - self._sub_layers.append(bn) + self._sub_layers.append(batch_norm) act = tf.keras.layers.Activation(act_fn, name='%s/act' % name) self._sub_layers.append(act) else: @@ -89,9 +93,7 @@ def add_rich_layer(self, name=name) self._sub_layers.append(dense) if use_bn and use_bn_after_activation: - bn = lambda x, training: tf.layers.batch_normalization( - x, training=training, name='%s/%s/bn' % (self.name, name)) - self._sub_layers.append(bn) + self._sub_layers.append(batch_norm) if 0.0 < dropout_rate < 1.0: dropout = tf.keras.layers.Dropout(dropout_rate, name='%s/dropout' % name) diff --git a/easy_rec/python/layers/keras/mask_net.py b/easy_rec/python/layers/keras/mask_net.py index 0ba769972..ca939bb7e 100644 --- a/easy_rec/python/layers/keras/mask_net.py +++ b/easy_rec/python/layers/keras/mask_net.py @@ -75,8 +75,7 @@ def call(self, inputs, training=None, **kwargs): mask_outputs = [] for i, block_conf in enumerate(self.config.mask_blocks): params = Parameter.make_from_pb(block_conf) - mask_layer = MaskBlock( - params, name='%s/block_%d' % (self.name, i)) + mask_layer = MaskBlock(params, name='%s/block_%d' % (self.name, i)) mask_outputs.append(mask_layer((inputs, inputs))) all_mask_outputs = tf.concat(mask_outputs, axis=1) @@ -89,8 +88,7 @@ def call(self, inputs, training=None, **kwargs): net = inputs for i, block_conf in enumerate(self.config.mask_blocks): params = Parameter.make_from_pb(block_conf) - mask_layer = MaskBlock( - params, name='%s/block_%d' % (self.name, i)) + mask_layer = MaskBlock(params, name='%s/block_%d' % (self.name, i)) net = mask_layer((net, inputs)) if self.mlp is not None: diff --git a/easy_rec/python/model/easy_rec_estimator.py b/easy_rec/python/model/easy_rec_estimator.py index 9cbd28b6c..51ecad09f 100644 --- a/easy_rec/python/model/easy_rec_estimator.py +++ b/easy_rec/python/model/easy_rec_estimator.py @@ -514,8 +514,7 @@ def _export_model_fn(self, features, labels, run_config, params): self.feature_configs, features, labels=None, - is_training=False, - is_predicting=True) + is_training=False) model.build_predict_graph() export_config = self._pipeline_config.export_config diff --git a/easy_rec/python/utils/config_util.py b/easy_rec/python/utils/config_util.py index 67f3bc351..72c050775 100644 --- a/easy_rec/python/utils/config_util.py +++ b/easy_rec/python/utils/config_util.py @@ -662,25 +662,25 @@ def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( - '--pipeline_config_path', - type=str, - default=None, - help='Path to pipeline config file.') + '--pipeline_config_path', + type=str, + default=None, + help='Path to pipeline config file.') parser.add_argument( - '--feature_groups', - type=str, - default=None, - help='The name of feature group to be changed.') + '--feature_groups', + type=str, + default=None, + help='The name of feature group to be changed.') parser.add_argument( - '--embedding_dim', - type=int, - default=None, - help='The embedding dim to be changed to.') + '--embedding_dim', + type=int, + default=None, + help='The embedding dim to be changed to.') parser.add_argument( - '--save_config_path', - type=str, - default=None, - help='Path to save changed config.') + '--save_config_path', + type=str, + default=None, + help='Path to save changed config.') args, extra_args = parser.parse_known_args() if args.pipeline_config_path is None: @@ -693,8 +693,7 @@ def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): raise ValueError('--embedding_dim must be set') # 传入一个不存在的feature group,可以起到format配置文件的效果 - config = change_configured_embedding_dim( - args.pipeline_config_path, - args.feature_groups, - args.embedding_dim) + config = change_configured_embedding_dim(args.pipeline_config_path, + args.feature_groups, + args.embedding_dim) save_message(config, args.save_config_path) From af871b36f236a7eef48e91d17029b823f9e624bf Mon Sep 17 00:00:00 2001 From: weisu Date: Tue, 20 Jun 2023 19:43:23 +0800 Subject: [PATCH 43/54] fix deepfm distribute eval test case --- easy_rec/python/model/mind.py | 2 +- easy_rec/python/test/train_eval_test.py | 20 +- easy_rec/python/utils/io_util.py | 2 +- ...equence_feature_aux_hist_seq_taobao.config | 292 ------------------ .../deepfm_on_sequence_feature_taobao.config | 291 ----------------- .../fm_on_sequence_feature_taobao.config | 288 ----------------- 6 files changed, 6 insertions(+), 889 deletions(-) delete mode 100644 samples/model_config/deepfm_on_sequence_feature_aux_hist_seq_taobao.config delete mode 100644 samples/model_config/deepfm_on_sequence_feature_taobao.config delete mode 100644 samples/model_config/fm_on_sequence_feature_taobao.config diff --git a/easy_rec/python/model/mind.py b/easy_rec/python/model/mind.py index c414703d2..270060297 100644 --- a/easy_rec/python/model/mind.py +++ b/easy_rec/python/model/mind.py @@ -32,7 +32,7 @@ def __init__(self, 'invalid model config: %s' % self._model_config.WhichOneof('model') self._model_config = self._model_config.mind - self._hist_seq_features = self._input_layer( + self._hist_seq_features, _, _ = self._input_layer( self._feature_dict, 'hist', is_combine=False) self._user_features, _ = self._input_layer(self._feature_dict, 'user') self._item_features, _ = self._input_layer(self._feature_dict, 'item') diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py index cbdf95dd2..8f0f25aa1 100644 --- a/easy_rec/python/test/train_eval_test.py +++ b/easy_rec/python/test/train_eval_test.py @@ -306,10 +306,10 @@ def test_bst(self): 'samples/model_config/bst_on_taobao.config', self._test_dir) self.assertTrue(self._success) - def test_bst_contrastive_learning(self): - self._success = test_utils.test_single_train_eval( - 'samples/model_config/bst_cl_on_taobao.config', self._test_dir) - self.assertTrue(self._success) + # def test_bst_contrastive_learning(self): + # self._success = test_utils.test_single_train_eval( + # 'samples/model_config/bst_cl_on_taobao.config', self._test_dir) + # self.assertTrue(self._success) def test_dcn(self): self._success = test_utils.test_single_train_eval( @@ -800,12 +800,6 @@ def test_sequence_esmm(self): self._test_dir) self.assertTrue(self._success) - def test_sequence_fm(self): - self._success = test_utils.test_single_train_eval( - 'samples/model_config/fm_on_sequence_feature_taobao.config', - self._test_dir) - self.assertTrue(self._success) - def test_sequence_mmoe(self): self._success = test_utils.test_single_train_eval( 'samples/model_config/mmoe_on_sequence_feature_taobao.config', @@ -1036,12 +1030,6 @@ def test_dbmtl_on_multi_numeric_boundary_aux_hist_seq(self): self._test_dir) self.assertTrue(self._success) - def test_deepfm_on_sequence_feature_aux_hist_seq(self): - self._success = test_utils.test_single_train_eval( - 'samples/model_config/deepfm_on_sequence_feature_aux_hist_seq_taobao.config', - self._test_dir) - self.assertTrue(self._success) - @unittest.skipIf(gl is None, 'graphlearn is not installed') def test_multi_tower_recall_neg_sampler_sequence_feature(self): self._success = test_utils.test_single_train_eval( diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index 4c1c28550..091e10e07 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -97,7 +97,7 @@ def download(oss_or_url, dst_dir=''): def create_module_dir(dst_dir): if not os.path.exists(dst_dir): os.makedirs(dst_dir) - with open(os.path.join(dst_dir, 'explainer.py'), 'w') as ofile: + with open(os.path.join(dst_dir, '__init__.py'), 'w') as ofile: ofile.write('\n') diff --git a/samples/model_config/deepfm_on_sequence_feature_aux_hist_seq_taobao.config b/samples/model_config/deepfm_on_sequence_feature_aux_hist_seq_taobao.config deleted file mode 100644 index a663d2f03..000000000 --- a/samples/model_config/deepfm_on_sequence_feature_aux_hist_seq_taobao.config +++ /dev/null @@ -1,292 +0,0 @@ -train_input_path: "data/test/tb_data/taobao_train_data" -eval_input_path: "data/test/tb_data/taobao_test_data" -model_dir: "experiments/deepfm_on_taobao_ckpt" - -train_config { - log_step_count_steps: 100 - optimizer_config: { - adam_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.001 - decay_steps: 1000 - decay_factor: 0.5 - min_learning_rate: 0.00001 - } - } - } - use_moving_average: false - } - save_checkpoints_steps: 100 - sync_replicas: True - num_steps: 1000 -} - -eval_config { - metrics_set: { - auc {} - } -} - -data_config { - input_fields { - input_name:'clk' - input_type: INT32 - } - input_fields { - input_name:'buy' - input_type: INT32 - } - input_fields { - input_name: 'pid' - input_type: STRING - } - input_fields { - input_name: 'adgroup_id' - input_type: STRING - } - input_fields { - input_name: 'cate_id' - input_type: STRING - } - input_fields { - input_name: 'campaign_id' - input_type: STRING - } - input_fields { - input_name: 'customer' - input_type: STRING - } - input_fields { - input_name: 'brand' - input_type: STRING - } - input_fields { - input_name: 'user_id' - input_type: STRING - } - input_fields { - input_name: 'cms_segid' - input_type: STRING - } - input_fields { - input_name: 'cms_group_id' - input_type: STRING - } - input_fields { - input_name: 'final_gender_code' - input_type: STRING - } - input_fields { - input_name: 'age_level' - input_type: STRING - } - input_fields { - input_name: 'pvalue_level' - input_type: STRING - } - input_fields { - input_name: 'shopping_level' - input_type: STRING - } - input_fields { - input_name: 'occupation' - input_type: STRING - } - input_fields { - input_name: 'new_user_class_level' - input_type: STRING - } - input_fields { - input_name: 'tag_category_list' - input_type: STRING - } - input_fields { - input_name: 'tag_brand_list' - input_type: STRING - } - input_fields { - input_name: 'price' - input_type: INT32 - } - - label_fields: 'clk' - batch_size: 4096 - num_epochs: 10000 - prefetch_size: 32 - input_type: CSVInput -} - -feature_configs : { - input_names: 'pid' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'adgroup_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'cate_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10000 -} -feature_configs : { - input_names: 'campaign_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'customer' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'brand' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'user_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'cms_segid' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100 -} -feature_configs : { - input_names: 'cms_group_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100 -} -feature_configs : { - input_names: 'final_gender_code' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'age_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'pvalue_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'shopping_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'occupation' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'new_user_class_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'tag_category_list' - feature_type: SequenceFeature - separator: '|' - hash_bucket_size: 10000 - embedding_dim: 16 -} -feature_configs : { - input_names: 'tag_brand_list' - feature_type: SequenceFeature - separator: '|' - hash_bucket_size: 100000 - embedding_dim: 16 -} -feature_configs : { - input_names: 'price' - feature_type: IdFeature - embedding_dim: 16 - num_buckets: 50 -} - -model_config: { - model_class: 'DeepFM' - feature_groups: { - group_name: 'wide' - feature_names: 'user_id' - feature_names: 'cms_segid' - feature_names: 'cms_group_id' - feature_names: 'age_level' - feature_names: 'pvalue_level' - feature_names: 'shopping_level' - feature_names: 'occupation' - feature_names: 'new_user_class_level' - feature_names: 'adgroup_id' - feature_names: 'cate_id' - feature_names: 'campaign_id' - feature_names: 'customer' - feature_names: 'brand' - feature_names: 'price' - feature_names: 'pid' - wide_deep: WIDE - } - feature_groups: { - group_name: 'deep' - feature_names: 'user_id' - feature_names: 'cms_segid' - feature_names: 'cms_group_id' - feature_names: 'age_level' - feature_names: 'pvalue_level' - feature_names: 'shopping_level' - feature_names: 'occupation' - feature_names: 'new_user_class_level' - feature_names: 'adgroup_id' - feature_names: 'cate_id' - feature_names: 'campaign_id' - feature_names: 'customer' - feature_names: 'brand' - feature_names: 'price' - feature_names: 'pid' - wide_deep: DEEP - sequence_features: { - group_name: "seq_fea" - tf_summary: false - allow_key_transform:true - seq_att_map: { - key: "brand" - key: "cate_id" - hist_seq: "tag_brand_list" - aux_hist_seq: "tag_category_list" - } - } - } - deepfm { - dnn { - hidden_units: [256, 256, 256] - } - l2_regularization: 1e-4 - } - embedding_regularization: 1e-5 -} - -export_config { -} diff --git a/samples/model_config/deepfm_on_sequence_feature_taobao.config b/samples/model_config/deepfm_on_sequence_feature_taobao.config deleted file mode 100644 index 059e33d7b..000000000 --- a/samples/model_config/deepfm_on_sequence_feature_taobao.config +++ /dev/null @@ -1,291 +0,0 @@ -train_input_path: "data/test/tb_data/taobao_train_data" -eval_input_path: "data/test/tb_data/taobao_test_data" -model_dir: "experiments/deepfm_on_taobao_ckpt" - -train_config { - log_step_count_steps: 100 - optimizer_config: { - adam_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.001 - decay_steps: 1000 - decay_factor: 0.5 - min_learning_rate: 0.00001 - } - } - } - use_moving_average: false - } - save_checkpoints_steps: 100 - sync_replicas: True - num_steps: 2500 -} - -eval_config { - metrics_set: { - auc {} - } -} - -data_config { - input_fields { - input_name:'clk' - input_type: INT32 - } - input_fields { - input_name:'buy' - input_type: INT32 - } - input_fields { - input_name: 'pid' - input_type: STRING - } - input_fields { - input_name: 'adgroup_id' - input_type: STRING - } - input_fields { - input_name: 'cate_id' - input_type: STRING - } - input_fields { - input_name: 'campaign_id' - input_type: STRING - } - input_fields { - input_name: 'customer' - input_type: STRING - } - input_fields { - input_name: 'brand' - input_type: STRING - } - input_fields { - input_name: 'user_id' - input_type: STRING - } - input_fields { - input_name: 'cms_segid' - input_type: STRING - } - input_fields { - input_name: 'cms_group_id' - input_type: STRING - } - input_fields { - input_name: 'final_gender_code' - input_type: STRING - } - input_fields { - input_name: 'age_level' - input_type: STRING - } - input_fields { - input_name: 'pvalue_level' - input_type: STRING - } - input_fields { - input_name: 'shopping_level' - input_type: STRING - } - input_fields { - input_name: 'occupation' - input_type: STRING - } - input_fields { - input_name: 'new_user_class_level' - input_type: STRING - } - input_fields { - input_name: 'tag_category_list' - input_type: STRING - } - input_fields { - input_name: 'tag_brand_list' - input_type: STRING - } - input_fields { - input_name: 'price' - input_type: INT32 - } - - label_fields: 'clk' - batch_size: 4096 - num_epochs: 10000 - prefetch_size: 32 - input_type: CSVInput -} - -feature_configs : { - input_names: 'pid' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'adgroup_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'cate_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10000 -} -feature_configs : { - input_names: 'campaign_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'customer' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'brand' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'user_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'cms_segid' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100 -} -feature_configs : { - input_names: 'cms_group_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100 -} -feature_configs : { - input_names: 'final_gender_code' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'age_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'pvalue_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'shopping_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'occupation' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'new_user_class_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'tag_category_list' - feature_type: SequenceFeature - separator: '|' - hash_bucket_size: 10000 - embedding_dim: 16 -} -feature_configs : { - input_names: 'tag_brand_list' - feature_type: SequenceFeature - separator: '|' - hash_bucket_size: 100000 - embedding_dim: 16 -} -feature_configs : { - input_names: 'price' - feature_type: IdFeature - embedding_dim: 16 - num_buckets: 50 -} - -model_config: { - model_class: 'DeepFM' - feature_groups: { - group_name: 'wide' - feature_names: 'user_id' - feature_names: 'cms_segid' - feature_names: 'cms_group_id' - feature_names: 'age_level' - feature_names: 'pvalue_level' - feature_names: 'shopping_level' - feature_names: 'occupation' - feature_names: 'new_user_class_level' - feature_names: 'adgroup_id' - feature_names: 'cate_id' - feature_names: 'campaign_id' - feature_names: 'customer' - feature_names: 'brand' - feature_names: 'price' - feature_names: 'pid' - wide_deep: WIDE - } - feature_groups: { - group_name: 'deep' - feature_names: 'user_id' - feature_names: 'cms_segid' - feature_names: 'cms_group_id' - feature_names: 'age_level' - feature_names: 'pvalue_level' - feature_names: 'shopping_level' - feature_names: 'occupation' - feature_names: 'new_user_class_level' - feature_names: 'adgroup_id' - feature_names: 'cate_id' - feature_names: 'campaign_id' - feature_names: 'customer' - feature_names: 'brand' - feature_names: 'price' - feature_names: 'pid' - wide_deep: DEEP - sequence_features: { - group_name: "seq_fea" - tf_summary: false - seq_att_map: { - key: "brand" - key: "cate_id" - hist_seq: "tag_brand_list" - hist_seq: "tag_category_list" - } - } - } - deepfm { - dnn { - hidden_units: [256, 256, 256] - } - l2_regularization: 1e-4 - } - embedding_regularization: 1e-5 -} - -export_config { -} diff --git a/samples/model_config/fm_on_sequence_feature_taobao.config b/samples/model_config/fm_on_sequence_feature_taobao.config deleted file mode 100644 index eb6096acb..000000000 --- a/samples/model_config/fm_on_sequence_feature_taobao.config +++ /dev/null @@ -1,288 +0,0 @@ -train_input_path: "data/test/tb_data/taobao_train_data" -eval_input_path: "data/test/tb_data/taobao_test_data" -model_dir: "experiments/fm_taobao_ckpt" - -train_config { - log_step_count_steps: 100 - optimizer_config: { - adam_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.001 - decay_steps: 1000 - decay_factor: 0.5 - min_learning_rate: 0.00001 - } - } - } - use_moving_average: false - } - save_checkpoints_steps: 100 - sync_replicas: True - num_steps: 2500 -} - -eval_config { - metrics_set: { - auc {} - } -} - -data_config { - input_fields { - input_name:'clk' - input_type: INT32 - } - input_fields { - input_name:'buy' - input_type: INT32 - } - input_fields { - input_name: 'pid' - input_type: STRING - } - input_fields { - input_name: 'adgroup_id' - input_type: STRING - } - input_fields { - input_name: 'cate_id' - input_type: STRING - } - input_fields { - input_name: 'campaign_id' - input_type: STRING - } - input_fields { - input_name: 'customer' - input_type: STRING - } - input_fields { - input_name: 'brand' - input_type: STRING - } - input_fields { - input_name: 'user_id' - input_type: STRING - } - input_fields { - input_name: 'cms_segid' - input_type: STRING - } - input_fields { - input_name: 'cms_group_id' - input_type: STRING - } - input_fields { - input_name: 'final_gender_code' - input_type: STRING - } - input_fields { - input_name: 'age_level' - input_type: STRING - } - input_fields { - input_name: 'pvalue_level' - input_type: STRING - } - input_fields { - input_name: 'shopping_level' - input_type: STRING - } - input_fields { - input_name: 'occupation' - input_type: STRING - } - input_fields { - input_name: 'new_user_class_level' - input_type: STRING - } - input_fields { - input_name: 'tag_category_list' - input_type: STRING - } - input_fields { - input_name: 'tag_brand_list' - input_type: STRING - } - input_fields { - input_name: 'price' - input_type: INT32 - } - - label_fields: 'clk' - batch_size: 4096 - num_epochs: 10000 - prefetch_size: 32 - input_type: CSVInput -} - -feature_configs : { - input_names: 'pid' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'adgroup_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'cate_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10000 -} -feature_configs : { - input_names: 'campaign_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'customer' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'brand' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'user_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100000 -} -feature_configs : { - input_names: 'cms_segid' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100 -} -feature_configs : { - input_names: 'cms_group_id' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 100 -} -feature_configs : { - input_names: 'final_gender_code' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'age_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'pvalue_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'shopping_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'occupation' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'new_user_class_level' - feature_type: IdFeature - embedding_dim: 16 - hash_bucket_size: 10 -} -feature_configs : { - input_names: 'tag_category_list' - feature_type: SequenceFeature - separator: '|' - hash_bucket_size: 10000 - embedding_dim: 16 -} -feature_configs : { - input_names: 'tag_brand_list' - feature_type: SequenceFeature - separator: '|' - hash_bucket_size: 100000 - embedding_dim: 16 -} -feature_configs : { - input_names: 'price' - feature_type: IdFeature - embedding_dim: 16 - num_buckets: 50 -} - -model_config: { - model_class: 'FM' - feature_groups: { - group_name: 'wide' - feature_names: 'user_id' - feature_names: 'cms_segid' - feature_names: 'cms_group_id' - feature_names: 'age_level' - feature_names: 'pvalue_level' - feature_names: 'shopping_level' - feature_names: 'occupation' - feature_names: 'new_user_class_level' - feature_names: 'adgroup_id' - feature_names: 'cate_id' - feature_names: 'campaign_id' - feature_names: 'customer' - feature_names: 'brand' - feature_names: 'price' - feature_names: 'pid' - wide_deep: WIDE - } - feature_groups: { - group_name: 'deep' - feature_names: 'user_id' - feature_names: 'cms_segid' - feature_names: 'cms_group_id' - feature_names: 'age_level' - feature_names: 'pvalue_level' - feature_names: 'shopping_level' - feature_names: 'occupation' - feature_names: 'new_user_class_level' - feature_names: 'adgroup_id' - feature_names: 'cate_id' - feature_names: 'campaign_id' - feature_names: 'customer' - feature_names: 'brand' - feature_names: 'price' - feature_names: 'pid' - wide_deep: DEEP - sequence_features: { - group_name: "seq_fea" - tf_summary: false - allow_key_search:true - seq_att_map: { - key: "brand" - key: "cate_id" - hist_seq: "tag_brand_list" - hist_seq: "tag_category_list" - } - } - } - fm { - } - embedding_regularization: 1e-5 -} - -export_config { -} From 5813c0e0e8c7154d8a9f53745c6adc0cd4b4df50 Mon Sep 17 00:00:00 2001 From: weisu Date: Thu, 22 Jun 2023 19:40:17 +0800 Subject: [PATCH 44/54] modify --- easy_rec/python/layers/utils.py | 18 +++-- easy_rec/python/test/train_eval_test.py | 1 + easy_rec/python/utils/config_util.py | 91 ++++++++++++++++--------- examples/readme.md | 2 +- 4 files changed, 73 insertions(+), 39 deletions(-) diff --git a/easy_rec/python/layers/utils.py b/easy_rec/python/layers/utils.py index 1ba585e07..2af9b855f 100644 --- a/easy_rec/python/layers/utils.py +++ b/easy_rec/python/layers/utils.py @@ -19,6 +19,7 @@ import json +from google.protobuf import struct_pb2 from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import variables @@ -185,18 +186,25 @@ def l2_regularizer(self, value): def __getattr__(self, key): if self.is_struct: - return self.params[key] + value = self.params[key] + if type(value) == struct_pb2.Struct: + return Parameter(value, True, self._l2_reg) + else: + return value return getattr(self.params, key) def __getitem__(self, key): - if self.is_struct: - return self.params[key] - return getattr(self.params, key) + return self.__getattr__(key) def get_or_default(self, key, def_val): if self.is_struct: if key in self.params: - return self.params[key] + if def_val is None: + return self.params[key] + value = self.params[key] + if type(value) == float: + return type(def_val)(value) + return value return def_val else: # pb message return getattr(self.params, key) diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py index 8f0f25aa1..5680cadb3 100644 --- a/easy_rec/python/test/train_eval_test.py +++ b/easy_rec/python/test/train_eval_test.py @@ -960,6 +960,7 @@ def test_distribute_eval_deepfm_multi_cls(self): def test_distribute_eval_deepfm_single_cls(self): cur_eval_path = 'data/test/distribute_eval_test/dwd_distribute_eval_avazu_out_test_combo' + #cur_eval_path = '/Users/weisu.yxd/Code/EasyRec/experiments/distribute_eval_test/dwd_distribute_eval_avazu_out_test_combo' self._success = test_utils.test_distributed_eval( 'samples/model_config/deepfm_distribute_eval_combo_on_avazu_ctr.config', cur_eval_path, self._test_dir) diff --git a/easy_rec/python/utils/config_util.py b/easy_rec/python/utils/config_util.py index 72c050775..9f272919d 100644 --- a/easy_rec/python/utils/config_util.py +++ b/easy_rec/python/utils/config_util.py @@ -609,7 +609,7 @@ def process_multi_file_input_path(sampler_config_input_path): def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): - """Reads config from a file containing pipeline_pb2.EasyRecConfig. + """Change the embedding dimension of the features in groups. Args: pipeline_config_path: Path to pipeline_pb2.EasyRecConfig text @@ -622,22 +622,7 @@ def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): `train_input_config`, `eval_config`, `eval_input_config`. Value are the corresponding config objects. """ - if isinstance(pipeline_config_path, pipeline_pb2.EasyRecConfig): - return pipeline_config_path - - assert tf.gfile.Exists( - pipeline_config_path - ), 'pipeline_config_path [%s] not exists' % pipeline_config_path - - pipeline_config = pipeline_pb2.EasyRecConfig() - with tf.gfile.GFile(pipeline_config_path, 'r') as f: - config_str = f.read() - if pipeline_config_path.endswith('.config'): - text_format.Merge(config_str, pipeline_config) - elif pipeline_config_path.endswith('.json'): - json_format.Parse(config_str, pipeline_config) - else: - assert False, 'invalid file format(%s), currently support formats: .config(prototxt) .json' % pipeline_config_path + pipeline_config = get_configs_from_pipeline_file(pipeline_config_path, False) target_groups = set(groups.split(',')) features = set() @@ -658,13 +643,50 @@ def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): return pipeline_config +def remove_redundant_config(pipeline_config_path): + """Remove redundant configs from a file containing pipeline_pb2.EasyRecConfig. + + Args: + pipeline_config_path: Path to pipeline_pb2.EasyRecConfig text + proto. + + Returns: + Dictionary of configuration objects. Keys are `model`, `train_config`, + `train_input_config`, `eval_config`, `eval_input_config`. Value are the + corresponding config objects. + """ + pipeline_config = get_configs_from_pipeline_file(pipeline_config_path, False) + + features = set() + conf = pipeline_config.model_config + for group in conf.feature_groups: + for feature in group.feature_names: + features.add(feature) + + feature_configs = get_compatible_feature_configs(pipeline_config) + for fea_conf in feature_configs: + fea_name = fea_conf.input_names[0] + if fea_conf.HasField('feature_name'): + fea_name = fea_conf.feature_name + if fea_name not in features: + logging.info("redundant feature:" + fea_name) + fea_conf.Clear() + return pipeline_config + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( - '--pipeline_config_path', + '--cmd', + type=str, + choices=['format', 'set_emb_dim', 'rm_redundancy'], + required=True, + help='Path to pipeline config file.') + parser.add_argument( + '-c', '--pipeline_config_path', type=str, default=None, + required=True, help='Path to pipeline config file.') parser.add_argument( '--feature_groups', @@ -677,23 +699,26 @@ def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): default=None, help='The embedding dim to be changed to.') parser.add_argument( - '--save_config_path', + '-o', '--save_config_path', type=str, default=None, + required=True, help='Path to save changed config.') args, extra_args = parser.parse_known_args() - if args.pipeline_config_path is None: - raise ValueError('--pipeline_config_path must be set') - if args.save_config_path is None: - raise ValueError('--save_config_path must be set') - if args.feature_groups is None: - raise ValueError('--feature_groups must be set') - if args.embedding_dim is None: - raise ValueError('--embedding_dim must be set') - - # 传入一个不存在的feature group,可以起到format配置文件的效果 - config = change_configured_embedding_dim(args.pipeline_config_path, - args.feature_groups, - args.embedding_dim) - save_message(config, args.save_config_path) + if args.cmd == 'format': + config = get_configs_from_pipeline_file(args.pipeline_config_path) + save_message(config, args.save_config_path) + elif args.cmd == 'set_emb_dim': + if args.feature_groups is None: + raise ValueError('--feature_groups must be set') + if args.embedding_dim is None: + raise ValueError('--embedding_dim must be set') + + config = change_configured_embedding_dim(args.pipeline_config_path, + args.feature_groups, + args.embedding_dim) + save_message(config, args.save_config_path) + elif args.cmd == 'rm_redundancy': + config = remove_redundant_config(args.pipeline_config_path) + save_message(config, args.save_config_path) diff --git a/examples/readme.md b/examples/readme.md index cbf9be600..f2c337431 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -217,7 +217,7 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee | DeepFM | 1 | 0.8867 | | DeepFM(Backbone) | 1 | 0.8872 | | DCN | 1 | 0.8576 | - | DCN (Backbone) | 1 | 0.8770 | + | DCN_v2 | 1 | 0.8770 | | AutoInt | 1 | 0.8513 | | MaskNet | 1 | 0.8872 | | FibiNet | 1 | 0.8893 | From 0c85dd21ccbec24d7bc7c07ca7fedeacae6046f2 Mon Sep 17 00:00:00 2001 From: weisu Date: Sat, 24 Jun 2023 16:12:08 +0800 Subject: [PATCH 45/54] add gate layer --- easy_rec/python/layers/keras/__init__.py | 1 + easy_rec/python/layers/keras/blocks.py | 22 +++++++++++++ easy_rec/python/layers/keras/mask_net.py | 39 +++++++++++++++++++++++- easy_rec/python/layers/utils.py | 4 ++- easy_rec/python/protos/layer.proto | 1 + 5 files changed, 65 insertions(+), 2 deletions(-) diff --git a/easy_rec/python/layers/keras/__init__.py b/easy_rec/python/layers/keras/__init__.py index 24f62ffb3..39d7c8be8 100644 --- a/easy_rec/python/layers/keras/__init__.py +++ b/easy_rec/python/layers/keras/__init__.py @@ -1,4 +1,5 @@ from .blocks import MLP +from .blocks import Gate from .blocks import Highway from .bst import BST from .din import DIN diff --git a/easy_rec/python/layers/keras/blocks.py b/easy_rec/python/layers/keras/blocks.py index 62063e451..38b47abfa 100644 --- a/easy_rec/python/layers/keras/blocks.py +++ b/easy_rec/python/layers/keras/blocks.py @@ -134,3 +134,25 @@ def call(self, inputs, training=None, **kwargs): activation=self.activation, num_layers=self.num_layers, dropout=self.dropout_rate if training else 0.0) + + +class Gate(tf.keras.layers.Layer): + """Weighted sum gate.""" + + def __init__(self, params, name='gate', **kwargs): + super(Gate, self).__init__(name, **kwargs) + self.weight_index = params.get_or_default("weight_index", 0) + + def call(self, inputs, **kwargs): + assert len(inputs) > 1, 'input of Gate layer must be a list containing at least 2 elements' + weights = inputs[self.weight_index] + j = 0 + for i, x in enumerate(inputs): + if i == self.weight_index: + continue + if j == 0: + output = weights[:, j] * x + else: + output += weights[:, j] * x + j += 1 + return output diff --git a/easy_rec/python/layers/keras/mask_net.py b/easy_rec/python/layers/keras/mask_net.py index ca939bb7e..fa1503b11 100644 --- a/easy_rec/python/layers/keras/mask_net.py +++ b/easy_rec/python/layers/keras/mask_net.py @@ -8,10 +8,24 @@ class MaskBlock(tf.keras.layers.Layer): + """MaskBlock use in MaskNet. + + Args: + projection_dim: project dimension to reduce the computational cost. + Default is `None` such that a full (`input_dim` by `aggregation_size`) matrix + W is used. If enabled, a low-rank matrix W = U*V will be used, where U + is of size `input_dim` by `projection_dim` and V is of size + `projection_dim` by `aggregation_size`. `projection_dim` need to be smaller + than `aggregation_size`/2 to improve the model efficiency. In practice, we've + observed that `projection_dim` = d/4 consistently preserved the + accuracy of a full-rank version. + """ def __init__(self, params, name='mask_block', reuse=None, **kwargs): super(MaskBlock, self).__init__(name, **kwargs) self.config = params.get_pb_config() + self.l2_reg = params.l2_regularizer + self._projection_dim = params.get_or_default('projection_dim', None) self.reuse = reuse def call(self, inputs, **kwargs): @@ -31,13 +45,33 @@ def call(self, inputs, **kwargs): # initializer = tf.initializers.variance_scaling() initializer = tf.glorot_uniform_initializer() - mask = tf.layers.dense( + + if self._projection_dim is None: + mask = tf.layers.dense( mask_input, aggregation_size, activation=tf.nn.relu, kernel_initializer=initializer, + kernel_regularizer=self.l2_reg, name='%s/hidden' % self.name, reuse=self.reuse) + else: + u = tf.layers.dense( + mask_input, + self._projection_dim, + kernel_initializer=initializer, + kernel_regularizer=self.l2_reg, + use_bias=False, + name='%s/prj_u' % self.name, + reuse=self.reuse) + mask = tf.layers.dense( + u, + aggregation_size, + activation=tf.nn.relu, + kernel_initializer=initializer, + kernel_regularizer=self.l2_reg, + name='%s/prj_v' % self.name, + reuse=self.reuse) mask = tf.layers.dense( mask, net.shape[-1], name='%s/mask' % self.name, reuse=self.reuse) masked_net = net * mask @@ -62,6 +96,7 @@ class MaskNet(tf.keras.layers.Layer): def __init__(self, params, name='mask_net', **kwargs): super(MaskNet, self).__init__(name, **kwargs) + self.params = params self.config = params.get_pb_config() if self.config.HasField('mlp'): p = Parameter.make_from_pb(self.config.mlp) @@ -75,6 +110,7 @@ def call(self, inputs, training=None, **kwargs): mask_outputs = [] for i, block_conf in enumerate(self.config.mask_blocks): params = Parameter.make_from_pb(block_conf) + params.l2_regularizer = self.params.l2_regularizer mask_layer = MaskBlock(params, name='%s/block_%d' % (self.name, i)) mask_outputs.append(mask_layer((inputs, inputs))) all_mask_outputs = tf.concat(mask_outputs, axis=1) @@ -88,6 +124,7 @@ def call(self, inputs, training=None, **kwargs): net = inputs for i, block_conf in enumerate(self.config.mask_blocks): params = Parameter.make_from_pb(block_conf) + params.l2_regularizer = self.params.l2_regularizer mask_layer = MaskBlock(params, name='%s/block_%d' % (self.name, i)) net = mask_layer((net, inputs)) diff --git a/easy_rec/python/layers/utils.py b/easy_rec/python/layers/utils.py index 2af9b855f..705b1be90 100644 --- a/easy_rec/python/layers/utils.py +++ b/easy_rec/python/layers/utils.py @@ -207,7 +207,9 @@ def get_or_default(self, key, def_val): return value return def_val else: # pb message - return getattr(self.params, key) + if self.params.HasField(key): + return getattr(self.params, key) + return def_val def check_required(self, keys): if not self.is_struct: diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 9a1e40acb..c7349c2ac 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -52,6 +52,7 @@ message MaskBlock { required uint32 output_size = 2; optional uint32 aggregation_size = 3; optional bool input_layer_norm = 4 [default = true]; + optional uint32 projection_dim = 5; } message MaskNet { From 3ed293a0a2662312c5d94cbbbc4564d6c9acfc6a Mon Sep 17 00:00:00 2001 From: weisu Date: Sat, 24 Jun 2023 19:56:02 +0800 Subject: [PATCH 46/54] add gate layer --- easy_rec/python/input/input.py | 8 ++++-- easy_rec/python/layers/input_layer.py | 9 +++++-- easy_rec/python/layers/keras/blocks.py | 4 +-- easy_rec/python/layers/utils.py | 4 +-- easy_rec/python/utils/config_util.py | 35 ++++++++++++++++++++++---- pai_jobs/run.py | 7 +++++- 6 files changed, 52 insertions(+), 15 deletions(-) diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index 5cdaa1dd1..9b8c4b3b0 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -1,9 +1,11 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import logging +import os from abc import abstractmethod from collections import OrderedDict +from easy_rec.python.utils import conditional import six import tensorflow as tf from tensorflow.python.framework import ops @@ -1012,12 +1014,14 @@ def _input_fn(mode=None, params=None, config=None): dataset = self._build(mode, params) return dataset elif mode is None: # serving_input_receiver_fn for export SavedModel + place_on_cpu = os.getenv('place_embedding_on_cpu') + place_on_cpu = eval(place_on_cpu) if place_on_cpu else False if export_config.multi_placeholder: - with ops.device('/CPU:0'): + with conditional(place_on_cpu, ops.device('/CPU:0')): inputs, features = self.create_multi_placeholders(export_config) return tf.estimator.export.ServingInputReceiver(features, inputs) else: - with ops.device('/CPU:0'): + with conditional(place_on_cpu, ops.device('/CPU:0')): inputs, features = self.create_placeholders(export_config) print('built feature placeholders. features: {}'.format( features.keys())) diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index df1a17b25..4c36811fa 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -1,6 +1,7 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import logging +import os from collections import OrderedDict import tensorflow as tf @@ -96,7 +97,9 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): feature_name_to_output_tensors = {} negative_sampler = self._feature_groups[group_name]._config.negative_sampler if is_combine: - with conditional(self._is_predicting, ops.device('/CPU:0')): + place_on_cpu = os.getenv('place_embedding_on_cpu') + place_on_cpu = eval(place_on_cpu) if place_on_cpu else False + with conditional(self._is_predicting and place_on_cpu, ops.device('/CPU:0')): concat_features, group_features = self.single_call_input_layer( features, group_name, feature_name_to_output_tensors) if group_name in self._group_name_to_seq_features: @@ -194,7 +197,9 @@ def single_call_input_layer(self, for column in sorted(group_seq_columns, key=lambda x: x.name): with variable_scope.variable_scope( None, default_name=column._var_scope_name): - with conditional(self._is_predicting, ops.device('/CPU:0')): + place_on_cpu = os.getenv('place_embedding_on_cpu') + place_on_cpu = eval(place_on_cpu) if place_on_cpu else False + with conditional(self._is_predicting and place_on_cpu, ops.device('/CPU:0')): seq_feature, seq_len = column._get_sequence_dense_tensor(builder) embedding_reg_lst.append(seq_feature) diff --git a/easy_rec/python/layers/keras/blocks.py b/easy_rec/python/layers/keras/blocks.py index 38b47abfa..1a6715a8e 100644 --- a/easy_rec/python/layers/keras/blocks.py +++ b/easy_rec/python/layers/keras/blocks.py @@ -151,8 +151,8 @@ def call(self, inputs, **kwargs): if i == self.weight_index: continue if j == 0: - output = weights[:, j] * x + output = weights[:, j, None] * x else: - output += weights[:, j] * x + output += weights[:, j, None] * x j += 1 return output diff --git a/easy_rec/python/layers/utils.py b/easy_rec/python/layers/utils.py index 705b1be90..2af9b855f 100644 --- a/easy_rec/python/layers/utils.py +++ b/easy_rec/python/layers/utils.py @@ -207,9 +207,7 @@ def get_or_default(self, key, def_val): return value return def_val else: # pb message - if self.params.HasField(key): - return getattr(self.params, key) - return def_val + return getattr(self.params, key) def check_required(self, keys): if not self.is_struct: diff --git a/easy_rec/python/utils/config_util.py b/easy_rec/python/utils/config_util.py index 9f272919d..e35175be9 100644 --- a/easy_rec/python/utils/config_util.py +++ b/easy_rec/python/utils/config_util.py @@ -643,12 +643,14 @@ def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): return pipeline_config -def remove_redundant_config(pipeline_config_path): + +def remove_redundant_config(pipeline_config_path, remove_input=False): """Remove redundant configs from a file containing pipeline_pb2.EasyRecConfig. Args: pipeline_config_path: Path to pipeline_pb2.EasyRecConfig text proto. + remove_input: whether to remove input configs Returns: Dictionary of configuration objects. Keys are `model`, `train_config`, @@ -657,6 +659,7 @@ def remove_redundant_config(pipeline_config_path): """ pipeline_config = get_configs_from_pipeline_file(pipeline_config_path, False) + inputs = set() features = set() conf = pipeline_config.model_config for group in conf.feature_groups: @@ -664,13 +667,30 @@ def remove_redundant_config(pipeline_config_path): features.add(feature) feature_configs = get_compatible_feature_configs(pipeline_config) - for fea_conf in feature_configs: + offset = 0 + for i in range(len(feature_configs)): + fea_conf = feature_configs[i - offset] fea_name = fea_conf.input_names[0] if fea_conf.HasField('feature_name'): fea_name = fea_conf.feature_name if fea_name not in features: logging.info("redundant feature:" + fea_name) - fea_conf.Clear() + del feature_configs[i - offset] + offset += 1 + elif remove_input: + for input_name in fea_conf.input_names: + inputs.add(input_name) + + if remove_input: + for label in pipeline_config.data_config.label_fields: + inputs.add(label) + input_fields = pipeline_config.data_config.input_fields + offset = 0 + for i in range(len(input_fields)): + field = input_fields[i - offset] + if field.input_name not in inputs: + del input_fields[i - offset] + offset += 1 return pipeline_config @@ -689,12 +709,17 @@ def remove_redundant_config(pipeline_config_path): required=True, help='Path to pipeline config file.') parser.add_argument( - '--feature_groups', + '-g', '--feature_groups', type=str, default=None, help='The name of feature group to be changed.') parser.add_argument( - '--embedding_dim', + '--rm_input', + type=bool, + default=False, + help='Whether to remove redundancy input.') + parser.add_argument( + '-d', '--embedding_dim', type=int, default=None, help='The embedding dim to be changed to.') diff --git a/pai_jobs/run.py b/pai_jobs/run.py index 41c61ad31..986731d36 100644 --- a/pai_jobs/run.py +++ b/pai_jobs/run.py @@ -166,6 +166,8 @@ tf.app.flags.DEFINE_string('oss_embedding_version', '', 'oss embedding version') tf.app.flags.DEFINE_bool('verbose', False, 'print more debug information') +tf.app.flags.DEFINE_bool('place_embedding_on_cpu', False, + 'whether to place embedding variables on cpu') # for automl hyper parameter tuning tf.app.flags.DEFINE_string('model_dir', None, 'model directory') @@ -434,7 +436,10 @@ def main(argv): elif FLAGS.cmd == 'export': check_param('export_dir') check_param('config') - + if FLAGS.place_embedding_on_cpu: + os.environ['place_embedding_on_cpu'] = 'True' + else: + os.environ['place_embedding_on_cpu'] = 'False' redis_params = {} if FLAGS.redis_url: redis_params['redis_url'] = FLAGS.redis_url From a9aff757ed9c220c2ea2d83c246629564d0b3064 Mon Sep 17 00:00:00 2001 From: weisu Date: Sun, 25 Jun 2023 21:48:39 +0800 Subject: [PATCH 47/54] add gate layer --- easy_rec/python/layers/common_layers.py | 18 ------------------ easy_rec/python/layers/utils.py | 8 +++++++- easy_rec/python/protos/feature_config.proto | 1 - 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index dd39d8259..fae4fe3fc 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -144,21 +144,3 @@ def call(self, group, is_training): if self._config.output_2d_tensor_and_feature_list: return features, feature_list return features - - -class Concatenate(object): - - def __init__(self, config): - self.config = config - - def __call__(self, inputs, *args, **kwargs): - if self.config.HasField('expand_dim_before'): - dim = self.config.expand_dim_before - output = tf.stack(inputs, axis=dim) - else: - output = tf.concat(inputs, axis=self.config.axis) - - if self.config.HasField('expand_dim_after'): - dim = self.config.expand_dim_after - output = tf.expand_dims(output, dim) - return output diff --git a/easy_rec/python/layers/utils.py b/easy_rec/python/layers/utils.py index 2af9b855f..b95eef2fe 100644 --- a/easy_rec/python/layers/utils.py +++ b/easy_rec/python/layers/utils.py @@ -207,7 +207,13 @@ def get_or_default(self, key, def_val): return value return def_val else: # pb message - return getattr(self.params, key) + value = getattr(self.params, key) + if hasattr(value, '__len__'): + if len(value) > 0: + return value + elif self.params.HasField(key): + return value + return def_val def check_required(self, keys): if not self.is_struct: diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index ee245b0e9..75d49a15c 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -3,7 +3,6 @@ package protos; import "easy_rec/python/protos/hyperparams.proto"; import "easy_rec/python/protos/dnn.proto"; -import "easy_rec/python/protos/seq_encoder.proto"; enum WideOrDeep { DEEP = 0; WIDE = 1; From 28473476c60cdbd6ea82eb280781f37c4f8e3eec Mon Sep 17 00:00:00 2001 From: weisu Date: Wed, 28 Jun 2023 15:33:08 +0800 Subject: [PATCH 48/54] add block package for reuse sub network --- easy_rec/python/layers/backbone.py | 153 ++++++++---- easy_rec/python/protos/backbone.proto | 30 ++- easy_rec/python/protos/easy_rec_model.proto | 1 + .../configs/dcn_backbone_on_movielens.config | 5 +- .../configs/deepfm_backbone_on_criteo.config | 10 +- ...pfm_backbone_on_criteo_with_autodis.config | 16 +- ...fm_backbone_on_criteo_with_periodic.config | 16 +- .../deepfm_backbone_on_movielens.config | 16 +- .../configs/dlrm_backbone_on_criteo.config | 12 +- .../dlrm_on_criteo_with_autodis.config | 14 +- .../dlrm_on_criteo_with_periodic.config | 14 +- .../configs/dlrm_standard_on_criteo.config | 10 +- examples/configs/fibinet_on_movielens.config | 6 +- examples/configs/masknet_on_movielens.config | 3 +- examples/configs/mlp_on_movielens.config | 2 +- .../configs/multi_tower_on_movielens.config | 223 ++++++++++++++++++ ...wide_and_deep_backbone_on_movielens.config | 9 +- 17 files changed, 438 insertions(+), 102 deletions(-) create mode 100644 examples/configs/multi_tower_on_movielens.config diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index b673a209a..414c667fb 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -17,32 +17,9 @@ tf = tf.compat.v1 -def block_input(config, block_outputs): - inputs = [] - for input_node in config.inputs: - input_name = input_node.name - if input_name in block_outputs: - input_feature = block_outputs[input_name] - else: - raise KeyError('input name `%s` does not exists' % input_name) - if input_node.HasField('input_fn'): - fn = eval(input_node.input_fn) - input_feature = fn(input_feature) - inputs.append(input_feature) - - if config.merge_inputs_into_list: - output = inputs - else: - output = concat_inputs(inputs, config.input_concat_axis, config.name) - - if config.HasField('extra_input_fn'): - fn = eval(config.extra_input_fn) - output = fn(output) - return output - - -class Backbone(object): - """Configurable Backbone Network.""" +class Package(object): + """A sub DAG of tf ops for reuse.""" + __packages = {} def __init__(self, config, features, input_layer, l2_reg=None): self._config = config @@ -54,22 +31,27 @@ def __init__(self, config, features, input_layer, l2_reg=None): self.loss_dict = {} input_feature_groups = set() for block in config.blocks: + if len(block.inputs) == 0: + raise ValueError('block takes at least one input: %s' % block.name) self._dag.add_node(block.name) self._name_to_blocks[block.name] = block layer = block.WhichOneof('layer') if layer == 'input_layer': - if len(block.inputs) != 0: - raise ValueError('no input allowed for input_layer: ' + block.name) - input_name = block.name - if not input_layer.has_group(input_name): + if len(block.inputs) != 1: + raise ValueError('input layer `%s` takes only one input' % block.name) + one_input = block.inputs[0] + name = one_input.WhichOneof('name') + if name != 'feature_group_name': raise KeyError( - 'input_layer\'s name must be one of feature group, invalid: ' + - input_name) + '`feature_group_name` should be set for input layer: ' + + block.name) + input_name = one_input.feature_group_name + if not input_layer.has_group(input_name): + raise KeyError('invalid feature group name: ' + input_name) if input_name in input_feature_groups: - raise ValueError('input `%s` already exists in other block' % - input_name) - else: - input_feature_groups.add(input_name) + logging.warning('input `%s` already exists in other block' % + input_name) + input_feature_groups.add(input_name) num_groups = len(input_feature_groups) num_blocks = len(self._name_to_blocks) - num_groups @@ -82,10 +64,8 @@ def __init__(self, config, features, input_layer, l2_reg=None): if block.name in input_feature_groups: raise KeyError('block name can not be one of feature groups:' + block.name) - assert len(block.inputs) > 0, 'no input for block: %s' % block.name - for input_node in block.inputs: - input_name = input_node.name + input_name = getattr(input_node, input_node.WhichOneof('name')) if input_name in self._name_to_blocks: assert input_name != block.name, 'input name can not equal to block name:' + input_name self._dag.add_edge(input_name, block.name) @@ -94,19 +74,58 @@ def __init__(self, config, features, input_layer, l2_reg=None): logging.info('adding an input_layer block: ' + input_name) new_block = backbone_pb2.Block() new_block.name = input_name + input_cfg = backbone_pb2.Input() + input_cfg.feature_group_name = input_name + new_block.inputs.append(input_cfg) new_block.input_layer.CopyFrom(backbone_pb2.InputLayer()) self._name_to_blocks[input_name] = new_block self._dag.add_node(input_name) self._dag.add_edge(input_name, block.name) - input_feature_groups.add(block.name) + input_feature_groups.add(input_name) else: raise KeyError( - 'invalid input name `%s`, must be the name of either a feature group or an another block' - % input_name) + 'invalid input name `%s`, must be the name of either a feature group or an another block' + % input_name) num_groups = len(input_feature_groups) assert num_groups > 0, 'there must be at least one input layer' + Package.__packages[self._config.name] = self + + def block_input(self, config, block_outputs, training=None): + inputs = [] + for input_node in config.inputs: + input_type = input_node.WhichOneof('name') + input_name = getattr(input_node, input_type) + if input_type == 'package_name': + if input_name not in Package.__packages: + raise KeyError('package name `%s` does not exists' % input_name) + package = Package.__packages[input_name] + input_feature = package(training) + if len(package.loss_dict) > 0: + self.loss_dict.update(package.loss_dict) + elif input_name in block_outputs: + input_feature = block_outputs[input_name] + else: + raise KeyError('input name `%s` does not exists' % input_name) + if input_node.HasField('input_fn'): + fn = eval(input_node.input_fn) + input_feature = fn(input_feature) + inputs.append(input_feature) + + if config.merge_inputs_into_list: + output = inputs + else: + output = concat_inputs(inputs, config.input_concat_axis, config.name) + + if config.HasField('extra_input_fn'): + fn = eval(config.extra_input_fn) + output = fn(output) + return output def __call__(self, is_training, **kwargs): + with tf.variable_scope(self._config.name, reuse=tf.AUTO_REUSE): + return self.call(is_training) + + def call(self, is_training): block_outputs = {} blocks = self._dag.topological_sort() logging.info('backbone topological order: ' + ','.join(blocks)) @@ -115,7 +134,7 @@ def __call__(self, is_training, **kwargs): config = self._name_to_blocks[block] if config.layers: # sequential layers logging.info('call sequential %d layers' % len(config.layers)) - output = block_input(config, block_outputs) + output = self.block_input(config, block_outputs, is_training) for layer in config.layers: output = self.call_layer(output, layer, block, is_training) block_outputs[block] = output @@ -123,14 +142,14 @@ def __call__(self, is_training, **kwargs): # just one of layer layer = config.WhichOneof('layer') if layer is None: # identity layer - block_outputs[block] = block_input(config, block_outputs) + block_outputs[block] = self.block_input(config, block_outputs, is_training) elif layer == 'input_layer': conf = config.input_layer input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) output = input_fn(block, is_training) block_outputs[block] = output else: - inputs = block_input(config, block_outputs) + inputs = self.block_input(config, block_outputs, is_training) output = self.call_layer(inputs, config, block, is_training) block_outputs[block] = output @@ -146,11 +165,11 @@ def __call__(self, is_training, **kwargs): raise ValueError('No output `%s` of backbone to be concat' % output) output = concat_inputs(outputs, msg='backbone') - if self._config.HasField('top_mlp'): - params = Parameter.make_from_pb(self._config.top_mlp) - params.l2_regularizer = self._l2_reg - final_mlp = MLP(params, name='backbone_top_mlp') - output = final_mlp(output, training=is_training) + # if self._config.HasField('top_mlp'): + # params = Parameter.make_from_pb(self._config.top_mlp) + # params.l2_regularizer = self._l2_reg + # final_mlp = MLP(params, name='backbone_top_mlp') + # output = final_mlp(output, training=is_training) return output def call_keras_layer(self, layer_conf, inputs, name, training): @@ -185,7 +204,10 @@ def call_keras_layer(self, layer_conf, inputs, name, training): logging.info('try to call %s layer with params %r' % (layer_conf.class_name, args)) layer = layer_cls(*args, name=name) - return layer(inputs, training=training) + try: + return layer(inputs, training=training) + except TypeError: + return layer(inputs) def call_layer(self, inputs, config, name, training): layer_name = config.WhichOneof('layer') @@ -243,6 +265,35 @@ def call_layer(self, inputs, config, name, training): raise NotImplementedError('Unsupported backbone layer:' + layer_name) +class Backbone(object): + """Configurable Backbone Network.""" + + def __init__(self, config, features, input_layer, l2_reg=None): + self._config = config + self._l2_reg = l2_reg + self.loss_dict = {} + for pkg in config.packages: + Package(pkg, features, input_layer, l2_reg) + + main_pkg = backbone_pb2.BlockPackage() + main_pkg.name = 'backbone' + main_pkg.blocks.MergeFrom(config.blocks) + main_pkg.concat_blocks.extend(config.concat_blocks) + self._main_pkg = Package(main_pkg, features, input_layer, l2_reg) + + def __call__(self, is_training, **kwargs): + output = self._main_pkg(is_training, **kwargs) + if len(self._main_pkg.loss_dict) > 0: + self.loss_dict = self._main_pkg.loss_dict + + if self._config.HasField('top_mlp'): + params = Parameter.make_from_pb(self._config.top_mlp) + params.l2_regularizer = self._l2_reg + final_mlp = MLP(params, name='backbone_top_mlp') + output = final_mlp(output, training=is_training) + return output + + def concat_inputs(inputs, axis=-1, msg=''): if len(inputs) > 1: if all(map(lambda x: type(x) == list, inputs)): diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index 5e2262707..d73799707 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -20,8 +20,12 @@ message Lambda { } message Input { - required string name = 1; - optional string input_fn = 2; + optional string input_fn = 1; + oneof name { + string feature_group_name = 2; + string block_name = 3; + string package_name = 4; + } } message RecurrentLayer { @@ -56,6 +60,7 @@ message Block { // sequential layers repeated Layer layers = 6; + // only take effect when there are no layers oneof layer { InputLayer input_layer = 101; @@ -66,8 +71,23 @@ message Block { } } +// a package of blocks for reuse; e.g. call in a contrastive learning manner +message BlockPackage { + // package name + required string name = 1; + // a few blocks generating a DAG + repeated Block blocks = 2; + // the names of output blocks + repeated string concat_blocks = 3; +} + message BackboneTower { - repeated Block blocks = 1; - repeated string concat_blocks = 2; - optional MLP top_mlp = 3; + // a few sub DAGs + repeated BlockPackage packages = 1; + // a few blocks generating a DAG + repeated Block blocks = 2; + // the names of output blocks + repeated string concat_blocks = 3; + // optional top mlp layer + optional MLP top_mlp = 4; } diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 2bb801847..21ac685d3 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -55,6 +55,7 @@ message KD { } message EasyRecModel { + required string model_name = 99; required string model_class = 1; // actually input layers, each layer produce a group of feature diff --git a/examples/configs/dcn_backbone_on_movielens.config b/examples/configs/dcn_backbone_on_movielens.config index 9c84794dd..3376db96f 100644 --- a/examples/configs/dcn_backbone_on_movielens.config +++ b/examples/configs/dcn_backbone_on_movielens.config @@ -148,6 +148,7 @@ feature_config: { } } model_config: { + model_name: 'DCN v2' model_class: 'RankModel' feature_groups: { group_name: 'all' @@ -164,7 +165,7 @@ model_config: { blocks { name: "deep" inputs { - name: 'all' + feature_group_name: 'all' } keras_layer { class_name: 'MLP' @@ -176,7 +177,7 @@ model_config: { blocks { name: "dcn" inputs { - name: 'all' + feature_group_name: 'all' input_fn: 'lambda x: [x, x]' } recurrent { diff --git a/examples/configs/deepfm_backbone_on_criteo.config b/examples/configs/deepfm_backbone_on_criteo.config index 9cba3fb82..06c60f966 100644 --- a/examples/configs/deepfm_backbone_on_criteo.config +++ b/examples/configs/deepfm_backbone_on_criteo.config @@ -486,6 +486,7 @@ feature_config: { } } model_config: { + model_name: 'DeepFM' model_class: 'RankModel' feature_groups: { group_name: "deep_features" @@ -577,7 +578,7 @@ model_config: { blocks { name: 'wide_logit' inputs { - name: 'wide_features' + feature_group_name: 'wide_features' } lambda { expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' @@ -585,6 +586,9 @@ model_config: { } blocks { name: 'deep_features' + inputs { + feature_group_name: 'deep_features' + } input_layer { output_2d_tensor_and_feature_list: true } @@ -592,7 +596,7 @@ model_config: { blocks { name: 'fm' inputs { - name: 'deep_features' + block_name: 'deep_features' input_fn: 'lambda x: x[1]' } keras_layer { @@ -608,7 +612,7 @@ model_config: { blocks { name: 'deep' inputs { - name: 'deep_features' + block_name: 'deep_features' input_fn: 'lambda x: x[0]' } keras_layer { diff --git a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config index 49fcf8e38..9d1856cae 100644 --- a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config +++ b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config @@ -577,6 +577,7 @@ feature_config: { } } model_config: { + model_name: 'DeepFM with AutoDis' model_class: 'RankModel' feature_groups: { group_name: "numerical_features" @@ -672,7 +673,7 @@ model_config: { blocks { name: 'wide_logit' inputs { - name: 'wide_features' + feature_group_name: 'wide_features' } lambda { expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' @@ -681,7 +682,7 @@ model_config: { blocks { name: 'num_emb' inputs { - name: 'numerical_features' + feature_group_name: 'numerical_features' } keras_layer { class_name: 'AutoDisEmbedding' @@ -695,6 +696,9 @@ model_config: { } blocks { name: 'categorical_features' + inputs { + feature_group_name: 'categorical_features' + } input_layer { output_2d_tensor_and_feature_list: true } @@ -702,11 +706,11 @@ model_config: { blocks { name: 'fm' inputs { - name: 'categorical_features' + block_name: 'categorical_features' input_fn: 'lambda x: x[1]' } inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[1]' } keras_layer { @@ -719,11 +723,11 @@ model_config: { blocks { name: 'deep' inputs { - name: 'categorical_features' + block_name: 'categorical_features' input_fn: 'lambda x: x[0]' } inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[0]' } keras_layer { diff --git a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config index 2f2f8435b..3ce65c8bf 100644 --- a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config +++ b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config @@ -577,6 +577,7 @@ feature_config: { } } model_config: { + model_name: 'DeepFM with Periodic' model_class: 'RankModel' feature_groups: { group_name: "numerical_features" @@ -672,7 +673,7 @@ model_config: { blocks { name: 'wide_logit' inputs { - name: 'wide_features' + feature_group_name: 'wide_features' } lambda { expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' @@ -681,7 +682,7 @@ model_config: { blocks { name: 'num_emb' inputs { - name: 'numerical_features' + feature_group_name: 'numerical_features' } keras_layer { class_name: 'PeriodicEmbedding' @@ -694,6 +695,9 @@ model_config: { } blocks { name: 'categorical_features' + inputs { + feature_group_name: 'categorical_features' + } input_layer { output_2d_tensor_and_feature_list: true } @@ -701,11 +705,11 @@ model_config: { blocks { name: 'fm' inputs { - name: 'categorical_features' + block_name: 'categorical_features' input_fn: 'lambda x: x[1]' } inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[1]' } keras_layer { @@ -718,11 +722,11 @@ model_config: { blocks { name: 'deep' inputs { - name: 'categorical_features' + block_name: 'categorical_features' input_fn: 'lambda x: x[0]' } inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[0]' } keras_layer { diff --git a/examples/configs/deepfm_backbone_on_movielens.config b/examples/configs/deepfm_backbone_on_movielens.config index c6bf82151..36ef7ace3 100644 --- a/examples/configs/deepfm_backbone_on_movielens.config +++ b/examples/configs/deepfm_backbone_on_movielens.config @@ -148,6 +148,7 @@ feature_config: { } } model_config: { + model_name: 'DeepFM' model_class: 'RankModel' feature_groups: { group_name: 'wide' @@ -176,7 +177,7 @@ model_config: { blocks { name: 'wide_logit' inputs { - name: 'wide' + feature_group_name: 'wide' } lambda { expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' @@ -184,6 +185,9 @@ model_config: { } blocks { name: 'features' + inputs { + feature_group_name: 'features' + } input_layer { output_2d_tensor_and_feature_list: true } @@ -191,7 +195,7 @@ model_config: { blocks { name: 'fm' inputs { - name: 'features' + block_name: 'features' input_fn: 'lambda x: x[1]' } keras_layer { @@ -201,7 +205,7 @@ model_config: { blocks { name: 'deep' inputs { - name: 'features' + block_name: 'features' input_fn: 'lambda x: x[0]' } keras_layer { @@ -216,13 +220,13 @@ model_config: { blocks { name: 'add' inputs { - name: 'wide_logit' + block_name: 'wide_logit' } inputs { - name: 'fm' + block_name: 'fm' } inputs { - name: 'deep' + block_name: 'deep' } merge_inputs_into_list: true keras_layer { diff --git a/examples/configs/dlrm_backbone_on_criteo.config b/examples/configs/dlrm_backbone_on_criteo.config index afdc0f784..6dc5dd41e 100644 --- a/examples/configs/dlrm_backbone_on_criteo.config +++ b/examples/configs/dlrm_backbone_on_criteo.config @@ -474,6 +474,7 @@ feature_config: { } } model_config: { + model_name: 'DLRM' model_class: 'RankModel' feature_groups: { group_name: "dense" @@ -526,7 +527,7 @@ model_config: { blocks { name: 'bottom_mlp' inputs { - name: 'dense' + feature_group_name: 'dense' } keras_layer { class_name: 'MLP' @@ -537,6 +538,9 @@ model_config: { } blocks { name: 'sparse' + inputs { + feature_group_name: 'sparse' + } input_layer { output_2d_tensor_and_feature_list: true } @@ -544,11 +548,11 @@ model_config: { blocks { name: 'dot' inputs { - name: 'bottom_mlp' + block_name: 'bottom_mlp' input_fn: 'lambda x: [x]' } inputs { - name: 'sparse' + block_name: 'sparse' input_fn: 'lambda x: x[1]' } keras_layer { @@ -558,7 +562,7 @@ model_config: { blocks { name: 'sparse_2d' inputs { - name: 'sparse' + block_name: 'sparse' input_fn: 'lambda x: x[0]' } } diff --git a/examples/configs/dlrm_on_criteo_with_autodis.config b/examples/configs/dlrm_on_criteo_with_autodis.config index 151bb4424..c6f522f95 100644 --- a/examples/configs/dlrm_on_criteo_with_autodis.config +++ b/examples/configs/dlrm_on_criteo_with_autodis.config @@ -473,6 +473,7 @@ feature_config: { } } model_config: { + model_name: 'DLRM with autodis' model_class: 'RankModel' feature_groups: { group_name: "dense" @@ -525,7 +526,7 @@ model_config: { blocks { name: 'num_emb' inputs { - name: 'dense' + feature_group_name: 'dense' } keras_layer { class_name: 'AutoDisEmbedding' @@ -539,6 +540,9 @@ model_config: { } blocks { name: 'sparse' + inputs { + feature_group_name: 'sparse' + } input_layer { output_2d_tensor_and_feature_list: true } @@ -546,11 +550,11 @@ model_config: { blocks { name: 'dot' inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[1]' } inputs { - name: 'sparse' + block_name: 'sparse' input_fn: 'lambda x: x[1]' } keras_layer { @@ -560,14 +564,14 @@ model_config: { blocks { name: 'sparse_2d' inputs { - name: 'sparse' + block_name: 'sparse' input_fn: 'lambda x: x[0]' } } blocks { name: 'num_emb_2d' inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[0]' } } diff --git a/examples/configs/dlrm_on_criteo_with_periodic.config b/examples/configs/dlrm_on_criteo_with_periodic.config index 81d0db1b3..c42e8252b 100644 --- a/examples/configs/dlrm_on_criteo_with_periodic.config +++ b/examples/configs/dlrm_on_criteo_with_periodic.config @@ -473,6 +473,7 @@ feature_config: { } } model_config: { + model_name: 'dlrm with periodic' model_class: 'RankModel' feature_groups: { group_name: "dense" @@ -525,7 +526,7 @@ model_config: { blocks { name: 'num_emb' inputs { - name: 'dense' + feature_group_name: 'dense' } keras_layer { class_name: 'PeriodicEmbedding' @@ -547,6 +548,9 @@ model_config: { } blocks { name: 'sparse' + inputs { + feature_group_name: 'sparse' + } input_layer { output_2d_tensor_and_feature_list: true } @@ -554,11 +558,11 @@ model_config: { blocks { name: 'dot' inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[1]' } inputs { - name: 'sparse' + block_name: 'sparse' input_fn: 'lambda x: x[1]' } keras_layer { @@ -568,14 +572,14 @@ model_config: { blocks { name: 'sparse_2d' inputs { - name: 'sparse' + block_name: 'sparse' input_fn: 'lambda x: x[0]' } } blocks { name: 'num_emb_2d' inputs { - name: 'num_emb' + block_name: 'num_emb' input_fn: 'lambda x: x[0]' } } diff --git a/examples/configs/dlrm_standard_on_criteo.config b/examples/configs/dlrm_standard_on_criteo.config index 03e3df7bc..df82e7990 100644 --- a/examples/configs/dlrm_standard_on_criteo.config +++ b/examples/configs/dlrm_standard_on_criteo.config @@ -473,6 +473,7 @@ feature_config: { } } model_config: { + model_name: 'Stardard DLRM' model_class: 'RankModel' feature_groups: { group_name: "dense" @@ -525,7 +526,7 @@ model_config: { blocks { name: 'bottom_mlp' inputs { - name: 'dense' + feature_group_name: 'dense' } keras_layer { class_name: 'MLP' @@ -536,6 +537,9 @@ model_config: { } blocks { name: 'sparse' + inputs { + feature_group_name: 'sparse' + } input_layer { only_output_feature_list: true } @@ -543,11 +547,11 @@ model_config: { blocks { name: 'dot' inputs { - name: 'bottom_mlp' + block_name: 'bottom_mlp' input_fn: 'lambda x: [x]' } inputs { - name: 'sparse' + block_name: 'sparse' } keras_layer { class_name: 'DotInteraction' diff --git a/examples/configs/fibinet_on_movielens.config b/examples/configs/fibinet_on_movielens.config index aa6bef7f0..1fe36aac3 100644 --- a/examples/configs/fibinet_on_movielens.config +++ b/examples/configs/fibinet_on_movielens.config @@ -148,6 +148,7 @@ feature_config: { } } model_config: { + model_name: 'FiBiNet' model_class: 'RankModel' feature_groups: { group_name: 'all' @@ -163,6 +164,9 @@ model_config: { backbone { blocks { name: "all" + inputs { + feature_group_name: "all" + } input_layer { do_batch_norm: true only_output_feature_list: true @@ -171,7 +175,7 @@ model_config: { blocks { name: "fibinet" inputs { - name: "all" + block_name: "all" } keras_layer { class_name: 'FiBiNet' diff --git a/examples/configs/masknet_on_movielens.config b/examples/configs/masknet_on_movielens.config index dccbbb13e..fd3dc1342 100644 --- a/examples/configs/masknet_on_movielens.config +++ b/examples/configs/masknet_on_movielens.config @@ -148,6 +148,7 @@ feature_config: { } } model_config: { + model_name: 'MaskNet' model_class: 'RankModel' feature_groups: { group_name: 'all' @@ -164,7 +165,7 @@ model_config: { blocks { name: "mask_net" inputs { - name: "all" + feature_group_name: "all" } keras_layer { class_name: 'MaskNet' diff --git a/examples/configs/mlp_on_movielens.config b/examples/configs/mlp_on_movielens.config index 392f392ef..038b02a51 100644 --- a/examples/configs/mlp_on_movielens.config +++ b/examples/configs/mlp_on_movielens.config @@ -164,7 +164,7 @@ model_config: { blocks { name: 'mlp' inputs { - name: 'features' + feature_group_name: 'features' } layers { keras_layer { diff --git a/examples/configs/multi_tower_on_movielens.config b/examples/configs/multi_tower_on_movielens.config new file mode 100644 index 000000000..472443cfa --- /dev/null +++ b/examples/configs/multi_tower_on_movielens.config @@ -0,0 +1,223 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/multi_tower_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: "RankModel" + feature_groups: { + group_name: 'user' + feature_names: 'user_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + wide_deep: DEEP + } + feature_groups: { + group_name: 'item' + feature_names: 'movie_id' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + packages { + name: 'user' + blocks { + name: 'mlp' + inputs { + feature_group_name: 'user' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128] + } + } + } + concat_blocks: 'mlp' + } + packages { + name: 'item' + blocks { + name: 'mlp' + inputs { + feature_group_name: 'item' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128] + } + } + } + concat_blocks: 'mlp' + } + blocks { + name: 'top_mlp' + inputs { + package_name: 'user' + } + inputs { + package_name: 'item' + } + layers { + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [128, 64] + } + } + } + } + concat_blocks: 'top_mlp' + } + rank_model { + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} diff --git a/examples/configs/wide_and_deep_backbone_on_movielens.config b/examples/configs/wide_and_deep_backbone_on_movielens.config index dddc91888..0f13a0511 100644 --- a/examples/configs/wide_and_deep_backbone_on_movielens.config +++ b/examples/configs/wide_and_deep_backbone_on_movielens.config @@ -174,6 +174,9 @@ model_config: { backbone { blocks { name: 'wide' + inputs { + feature_group_name: 'wide' + } input_layer { only_output_feature_list: true } @@ -181,7 +184,7 @@ model_config: { blocks { name: 'deep_logit' inputs { - name: 'deep' + feature_group_name: 'deep' } keras_layer { class_name: 'MLP' @@ -195,11 +198,11 @@ model_config: { blocks { name: 'final_logit' inputs { - name: 'wide' + block_name: 'wide' input_fn: 'lambda x: tf.add_n(x)' } inputs { - name: 'deep_logit' + block_name: 'deep_logit' } merge_inputs_into_list: true keras_layer { From ee49dbe9446d4a715982f636400c44c1a5271345 Mon Sep 17 00:00:00 2001 From: weisu Date: Wed, 28 Jun 2023 15:47:54 +0800 Subject: [PATCH 49/54] add block package for reuse sub network --- examples/configs/multi_tower_on_movielens.config | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/configs/multi_tower_on_movielens.config b/examples/configs/multi_tower_on_movielens.config index 472443cfa..a502922ae 100644 --- a/examples/configs/multi_tower_on_movielens.config +++ b/examples/configs/multi_tower_on_movielens.config @@ -148,6 +148,7 @@ feature_config: { } } model_config: { + model_name: "multi tower" model_class: "RankModel" feature_groups: { group_name: 'user' From 2c591fc523fabd048e1245180f7554ae8b1add98 Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 30 Jun 2023 10:51:38 +0800 Subject: [PATCH 50/54] fix a bug --- easy_rec/python/layers/backbone.py | 68 ++++++++++++++++----------- easy_rec/python/protos/backbone.proto | 10 ++-- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 414c667fb..92705e3d4 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -43,8 +43,8 @@ def __init__(self, config, features, input_layer, l2_reg=None): name = one_input.WhichOneof('name') if name != 'feature_group_name': raise KeyError( - '`feature_group_name` should be set for input layer: ' + - block.name) + '`feature_group_name` should be set for input layer: ' + + block.name) input_name = one_input.feature_group_name if not input_layer.has_group(input_name): raise KeyError('invalid feature group name: ' + input_name) @@ -57,6 +57,7 @@ def __init__(self, config, features, input_layer, l2_reg=None): num_blocks = len(self._name_to_blocks) - num_groups assert num_blocks > 0, 'there must be at least one block in backbone' + num_pkg_input = 0 for block in config.blocks: layer = block.WhichOneof('layer') if layer == 'input_layer': @@ -65,7 +66,11 @@ def __init__(self, config, features, input_layer, l2_reg=None): raise KeyError('block name can not be one of feature groups:' + block.name) for input_node in block.inputs: - input_name = getattr(input_node, input_node.WhichOneof('name')) + input_type = input_node.WhichOneof('name') + if input_type == 'package_name': + num_pkg_input += 1 + continue + input_name = getattr(input_node, input_type) if input_name in self._name_to_blocks: assert input_name != block.name, 'input name can not equal to block name:' + input_name self._dag.add_edge(input_name, block.name) @@ -84,10 +89,16 @@ def __init__(self, config, features, input_layer, l2_reg=None): input_feature_groups.add(input_name) else: raise KeyError( - 'invalid input name `%s`, must be the name of either a feature group or an another block' - % input_name) + 'invalid input name `%s`, must be the name of either a feature group or an another block' + % input_name) num_groups = len(input_feature_groups) - assert num_groups > 0, 'there must be at least one input layer' + assert num_pkg_input > 0 or num_groups > 0, 'there must be at least one input layer/feature group' + + if len(config.concat_blocks) == 0: + leaf = self._dag.all_leaves() + logging.warning("%s has no `concat_blocks`, try to use all leaf blocks: %s" % (config.name, ','.join(leaf))) + self._config.concat_blocks.extend(leaf) + Package.__packages[self._config.name] = self def block_input(self, config, block_outputs, training=None): @@ -106,6 +117,10 @@ def block_input(self, config, block_outputs, training=None): input_feature = block_outputs[input_name] else: raise KeyError('input name `%s` does not exists' % input_name) + + if input_node.HasField('input_slice'): + fn = 'lambda x: x' + input_node.input_slice.strip() + input_feature = fn(input_feature) if input_node.HasField('input_fn'): fn = eval(input_node.input_fn) input_feature = fn(input_feature) @@ -114,7 +129,7 @@ def block_input(self, config, block_outputs, training=None): if config.merge_inputs_into_list: output = inputs else: - output = concat_inputs(inputs, config.input_concat_axis, config.name) + output = merge_inputs(inputs, config.input_concat_axis, config.name) if config.HasField('extra_input_fn'): fn = eval(config.extra_input_fn) @@ -142,7 +157,8 @@ def call(self, is_training): # just one of layer layer = config.WhichOneof('layer') if layer is None: # identity layer - block_outputs[block] = self.block_input(config, block_outputs, is_training) + block_outputs[block] = self.block_input(config, block_outputs, + is_training) elif layer == 'input_layer': conf = config.input_layer input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) @@ -163,13 +179,7 @@ def call(self, is_training): outputs.append(temp) else: raise ValueError('No output `%s` of backbone to be concat' % output) - output = concat_inputs(outputs, msg='backbone') - - # if self._config.HasField('top_mlp'): - # params = Parameter.make_from_pb(self._config.top_mlp) - # params.l2_regularizer = self._l2_reg - # final_mlp = MLP(params, name='backbone_top_mlp') - # output = final_mlp(output, training=is_training) + output = merge_inputs(outputs, msg='backbone') return output def call_keras_layer(self, layer_conf, inputs, name, training): @@ -294,20 +304,24 @@ def __call__(self, is_training, **kwargs): return output -def concat_inputs(inputs, axis=-1, msg=''): - if len(inputs) > 1: - if all(map(lambda x: type(x) == list, inputs)): - # merge multiple lists into a list - from functools import reduce - return reduce(lambda x, y: x + y, inputs) - - if axis != -1: - logging.info('concat inputs %s axis=%d' % (msg, axis)) - return tf.concat(inputs, axis=axis) - +def merge_inputs(inputs, axis=-1, msg=''): + if len(inputs) == 0: + raise ValueError('no inputs to be concat:' + msg) if len(inputs) == 1: return inputs[0] - raise ValueError('no inputs to be concat:' + msg) + + from functools import reduce + if all(map(lambda x: type(x) == list, inputs)): + # merge multiple lists into a list + return reduce(lambda x, y: x + y, inputs) + + if any(map(lambda x: type(x) == list, inputs)): + logging.warning('%s: try to merge inputs into list' % msg) + return reduce(lambda x, y: x + y, [e if type(e) == list else [e] for e in inputs]) + + if axis != -1: + logging.info('concat inputs %s axis=%d' % (msg, axis)) + return tf.concat(inputs, axis=axis) def format_value(value): diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto index d73799707..67b230c04 100644 --- a/easy_rec/python/protos/backbone.proto +++ b/easy_rec/python/protos/backbone.proto @@ -20,12 +20,13 @@ message Lambda { } message Input { - optional string input_fn = 1; oneof name { - string feature_group_name = 2; - string block_name = 3; - string package_name = 4; + string feature_group_name = 1; + string block_name = 2; + string package_name = 3; } + optional string input_fn = 11; + optional string input_slice = 12; } message RecurrentLayer { @@ -47,6 +48,7 @@ message Layer { KerasLayer keras_layer = 2; RecurrentLayer recurrent = 3; RepeatLayer repeat = 4; + InputLayer input_layer = 5; } } From a1f0b8af0527a0cc7fc6df626ec9eb1e1a699d8b Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 30 Jun 2023 11:32:54 +0800 Subject: [PATCH 51/54] fix bug of input layer block --- easy_rec/python/layers/backbone.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 92705e3d4..b5782fb94 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -143,8 +143,8 @@ def __call__(self, is_training, **kwargs): def call(self, is_training): block_outputs = {} blocks = self._dag.topological_sort() - logging.info('backbone topological order: ' + ','.join(blocks)) - print('backbone topological order: ' + ','.join(blocks)) + logging.info(self._config.name + ' topological order: ' + ','.join(blocks)) + print(self._config.name + ' topological order: ' + ','.join(blocks)) for block in blocks: config = self._name_to_blocks[block] if config.layers: # sequential layers @@ -162,7 +162,7 @@ def call(self, is_training): elif layer == 'input_layer': conf = config.input_layer input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) - output = input_fn(block, is_training) + output = input_fn(config.feature_group_name, is_training) block_outputs[block] = output else: inputs = self.block_input(config, block_outputs, is_training) From ef5f9cd1fb1fd937be4adbabfdb5c26f7a28e62e Mon Sep 17 00:00:00 2001 From: weisu Date: Fri, 30 Jun 2023 11:37:06 +0800 Subject: [PATCH 52/54] fix bug of input layer block --- easy_rec/python/layers/backbone.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index b5782fb94..0b16d92a1 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -162,7 +162,8 @@ def call(self, is_training): elif layer == 'input_layer': conf = config.input_layer input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) - output = input_fn(config.feature_group_name, is_training) + feature_group = config.inputs[0].feature_group_name + output = input_fn(feature_group, is_training) block_outputs[block] = output else: inputs = self.block_input(config, block_outputs, is_training) From a3944f9b452d675db4bbdfa2121b5ba1df7168ca Mon Sep 17 00:00:00 2001 From: weisu Date: Wed, 12 Jul 2023 10:32:24 +0800 Subject: [PATCH 53/54] fix bug of input layer block --- easy_rec/python/layers/backbone.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py index 0b16d92a1..3093d9f8e 100644 --- a/easy_rec/python/layers/backbone.py +++ b/easy_rec/python/layers/backbone.py @@ -96,7 +96,9 @@ def __init__(self, config, features, input_layer, l2_reg=None): if len(config.concat_blocks) == 0: leaf = self._dag.all_leaves() - logging.warning("%s has no `concat_blocks`, try to use all leaf blocks: %s" % (config.name, ','.join(leaf))) + logging.warning( + '%s has no `concat_blocks`, try to use all leaf blocks: %s' % + (config.name, ','.join(leaf))) self._config.concat_blocks.extend(leaf) Package.__packages[self._config.name] = self @@ -119,7 +121,7 @@ def block_input(self, config, block_outputs, training=None): raise KeyError('input name `%s` does not exists' % input_name) if input_node.HasField('input_slice'): - fn = 'lambda x: x' + input_node.input_slice.strip() + fn = eval('lambda x: x' + input_node.input_slice.strip()) input_feature = fn(input_feature) if input_node.HasField('input_fn'): fn = eval(input_node.input_fn) @@ -318,7 +320,8 @@ def merge_inputs(inputs, axis=-1, msg=''): if any(map(lambda x: type(x) == list, inputs)): logging.warning('%s: try to merge inputs into list' % msg) - return reduce(lambda x, y: x + y, [e if type(e) == list else [e] for e in inputs]) + return reduce(lambda x, y: x + y, + [e if type(e) == list else [e] for e in inputs]) if axis != -1: logging.info('concat inputs %s axis=%d' % (msg, axis)) From faf8ddfce0c8730287ae9ca571dad39fa955e49e Mon Sep 17 00:00:00 2001 From: weisu Date: Wed, 12 Jul 2023 11:27:25 +0800 Subject: [PATCH 54/54] upgrade to new version --- easy_rec/python/model/dbmtl.py | 29 -------- easy_rec/python/model/easy_rec_model.py | 49 -------------- easy_rec/python/model/multi_task_model.py | 66 +++++++++++++++++++ easy_rec/python/protos/dbmtl.proto | 5 -- easy_rec/python/protos/easy_rec_model.proto | 12 ++-- easy_rec/python/protos/feature_config.proto | 1 - easy_rec/python/protos/keras_layer.proto | 1 + easy_rec/python/protos/layer.proto | 9 +++ easy_rec/python/protos/seq_encoder.proto | 15 ----- .../python/protos/variational_dropout.proto | 2 +- 10 files changed, 84 insertions(+), 105 deletions(-) diff --git a/easy_rec/python/model/dbmtl.py b/easy_rec/python/model/dbmtl.py index a1ebbf14b..e87ee9ae7 100644 --- a/easy_rec/python/model/dbmtl.py +++ b/easy_rec/python/model/dbmtl.py @@ -43,35 +43,6 @@ def __init__(self, self._init_towers(self._model_config.task_towers) def build_predict_graph(self): - # if self._model_config.use_self_supervised_learning: - # bern = tf.distributions.Bernoulli(probs=0.5) - # num_features = len(self._feature_list) - # mask = bern.sample(num_features) - # left_features, right_features = [], [] - # for i in range(num_features): - # fea = self._feature_list[i] - # zero = tf.zeros_like(fea) - # left, right = tf.cond( - # tf.equal(mask[i], 1), lambda: (fea, zero), lambda: (zero, fea)) - # left_features.append(left) - # right_features.append(right) - # left_feature = tf.concat(left_features, axis=-1) - # right_feature = tf.concat(right_features, axis=-1) - # if self._model_config.HasField('bottom_mask_net'): - # left_encoding = self._mask_net_layer( - # left_feature, self._is_training, l2_reg=self._l2_reg) - # right_encoding = self._mask_net_layer( - # right_feature, self._is_training, l2_reg=self._l2_reg) - # else: - # raise ValueError( - # 'Unsupported bottom layer when use self supervised learning') - # - # loss = info_nce_loss( - # left_encoding, - # right_encoding, - # temperature=self._model_config.ssl_loss_temperature) - # self._loss_dict['ssl_loss'] = loss * self._model_config.ssl_loss_weight - bottom_fea = self.backbone if bottom_fea is None: if self._model_config.HasField('bottom_cmbf'): diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index e385ac9a2..522d3632e 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -62,10 +62,6 @@ def __init__(self, if constant.SAMPLE_WEIGHT in features: self._sample_weight = features[constant.SAMPLE_WEIGHT] - # self._sequence_encoder = SequenceEncoder(self._input_layer, feature_configs, - # model_config.feature_groups, - # self._l2_reg) - # self._sequence_encoding_by_group_name = {} self._backbone_output = None if model_config.HasField('backbone'): self._backbone = Backbone( @@ -138,51 +134,6 @@ def build_input_layer(self, model_config, feature_configs): is_training=self._is_training, is_predicting=self._is_predicting) - # def get_sequence_encoding(self, group_name=None, is_training=True): - # if group_name is not None: - # if group_name in self._sequence_encoding_by_group_name: - # return self._sequence_encoding_by_group_name[group_name] - # encoding = self._sequence_encoder( - # self._feature_dict, - # group_name, - # is_training, - # loss_dict=self._loss_dict) - # self._sequence_encoding_by_group_name[group_name] = encoding - # return encoding - # - # seq_encoding = [] - # for group in self.feature_groups: - # if len(group.sequence_encoders) == 0: - # continue - # group_name = group.group_name - # if group_name in self._sequence_encoding_by_group_name: - # encoding = self._sequence_encoding_by_group_name[group_name] - # else: - # encoding = self._sequence_encoder( - # self._feature_dict, - # group_name, - # is_training, - # loss_dict=self._loss_dict) - # self._sequence_encoding_by_group_name[group_name] = encoding - # if encoding is not None: - # seq_encoding.append(encoding) - # - # if len(seq_encoding) > 1: - # encoding = tf.concat(seq_encoding, axis=-1) - # elif len(seq_encoding) == 1: - # encoding = seq_encoding[0] - # else: - # return None - # - # # if self._base_model_config.HasField('sequence_dnn'): - # # sequence_dnn = dnn.DNN( - # # self._base_model_config.sequence_dnn, - # # self._l2_reg, - # # name='sequence_dnn', - # # is_training=self._is_training) - # # encoding = sequence_dnn(encoding) - # return encoding - @abstractmethod def build_predict_graph(self): pass diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index 06dc53f8a..21e8f2c55 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -5,6 +5,7 @@ import tensorflow as tf from easy_rec.python.builders import loss_builder +from easy_rec.python.layers.dnn import DNN from easy_rec.python.model.rank_model import RankModel from easy_rec.python.protos import tower_pb2 from easy_rec.python.protos.loss_pb2 import LossType @@ -27,6 +28,71 @@ def __init__(self, self._task_num = None self._label_name_dict = {} + def build_predict_graph(self): + if not self.has_backbone: + raise NotImplementedError( + 'method `build_predict_graph` must be implemented when backbone network do not exits' + ) + model = self._model_config.WhichOneof('model') + assert model == 'model_params', '`model_params` must be configured' + config = self._model_config.model_params + + self._init_towers(config.task_towers) + + backbone = self.backbone + if type(backbone) in (list, tuple): + if len(backbone) != len(config.task_towers): + raise ValueError( + 'The number of backbone outputs and task towers must be equal') + task_input_list = backbone + else: + task_input_list = [backbone] * len(config.task_towers) + + tower_features = {} + for i, task_tower_cfg in enumerate(config.task_towers): + tower_name = task_tower_cfg.tower_name + if task_tower_cfg.HasField('dnn'): + tower_dnn = DNN( + task_tower_cfg.dnn, + self._l2_reg, + name=tower_name, + is_training=self._is_training) + tower_output = tower_dnn(task_input_list[i]) + else: + tower_output = task_input_list[i] + tower_features[tower_name] = tower_output + + tower_outputs = {} + relation_features = {} + # bayes network + for task_tower_cfg in config.task_towers: + tower_name = task_tower_cfg.tower_name + if task_tower_cfg.HasField('relation_dnn'): + relation_dnn = DNN( + task_tower_cfg.relation_dnn, + self._l2_reg, + name=tower_name + '/relation_dnn', + is_training=self._is_training) + tower_inputs = [tower_features[tower_name]] + for relation_tower_name in task_tower_cfg.relation_tower_names: + tower_inputs.append(relation_features[relation_tower_name]) + relation_input = tf.concat( + tower_inputs, axis=-1, name=tower_name + '/relation_input') + relation_fea = relation_dnn(relation_input) + relation_features[tower_name] = relation_fea + else: + relation_fea = tower_features[tower_name] + + output_logits = tf.layers.dense( + relation_fea, + task_tower_cfg.num_class, + kernel_regularizer=self._l2_reg, + name=tower_name + '/output') + tower_outputs[tower_name] = output_logits + + self._add_to_prediction_dict(tower_outputs) + return self._prediction_dict + def _init_towers(self, task_tower_configs): """Init task towers.""" self._task_towers = task_tower_configs diff --git a/easy_rec/python/protos/dbmtl.proto b/easy_rec/python/protos/dbmtl.proto index a9c4a2e74..9adff1f62 100644 --- a/easy_rec/python/protos/dbmtl.proto +++ b/easy_rec/python/protos/dbmtl.proto @@ -21,9 +21,4 @@ message DBMTL { repeated BayesTaskTower task_towers = 4; // l2 regularization optional float l2_regularization = 5 [default = 1e-4]; - - // Whether to use self supervised learning - required bool use_self_supervised_learning = 9 [default = false]; - optional float ssl_loss_weight = 10 [default = 1.0]; - optional float ssl_loss_temperature = 11 [default = 0.1]; } diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 21ac685d3..1e926c368 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -25,16 +25,17 @@ import "easy_rec/python/protos/loss.proto"; import "easy_rec/python/protos/rocket_launching.proto"; import "easy_rec/python/protos/variational_dropout.proto"; import "easy_rec/python/protos/multi_tower_recall.proto"; +import "easy_rec/python/protos/tower.proto"; // for input performance test message DummyModel { } -// configure backbone network in a free style way -message RankModel { +// configure backbone network common parameters +message ModelParams { optional float l2_regularization = 1; optional uint32 wide_output_dim = 2; - // optional bool add_head_logits_layer = 3 [default=true]; + repeated BayesTaskTower task_towers = 3; } // for knowledge distillation @@ -55,15 +56,16 @@ message KD { } message EasyRecModel { - required string model_name = 99; required string model_class = 1; + // just a name for backbone config + optional string model_name = 99; // actually input layers, each layer produce a group of feature repeated FeatureGroupConfig feature_groups = 2; // model parameters oneof model { - RankModel rank_model = 100; + ModelParams model_params = 100; DummyModel dummy = 101; WideAndDeep wide_and_deep = 102; DeepFM deepfm = 103; diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index 75d49a15c..e05e73753 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -144,7 +144,6 @@ message FeatureGroupConfig { optional WideOrDeep wide_deep = 3 [default = DEEP]; repeated SeqAttGroupConfig sequence_features = 4; optional bool negative_sampler = 5 [default = false]; - // repeated SequenceEncoder sequence_encoders = 6; } message SeqAttMap { diff --git a/easy_rec/python/protos/keras_layer.proto b/easy_rec/python/protos/keras_layer.proto index 94a3ba801..2798260d3 100644 --- a/easy_rec/python/protos/keras_layer.proto +++ b/easy_rec/python/protos/keras_layer.proto @@ -22,5 +22,6 @@ message KerasLayer { MLP mlp = 11; DINEncoder din = 12; BSTEncoder bst = 13; + MMoELayer mmoe = 14; } } diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index c7349c2ac..52a1cbf30 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -60,3 +60,12 @@ message MaskNet { required bool use_parallel = 2 [default = true]; optional MLP mlp = 3; } + +message MMoELayer { + // number of tasks + required uint32 num_task = 1; + // mmoe expert mlp layer definition + optional MLP expert_mlp = 2; + // number of mmoe experts + optional uint32 num_expert = 3; +} diff --git a/easy_rec/python/protos/seq_encoder.proto b/easy_rec/python/protos/seq_encoder.proto index f02490238..2b845a429 100644 --- a/easy_rec/python/protos/seq_encoder.proto +++ b/easy_rec/python/protos/seq_encoder.proto @@ -4,15 +4,6 @@ package protos; import "easy_rec/python/protos/dnn.proto"; -message SequenceEncoder { - // encoder parameters - oneof encoder { - BSTEncoder bst = 101; - DINEncoder din = 102; - } - required bool force_share_embeddings = 1 [default = true]; -} - message BSTEncoder { // Size of the encoder layers and the pooler layer required uint32 hidden_size = 1; @@ -34,12 +25,6 @@ message BSTEncoder { required bool use_position_embeddings = 9 [default = true]; // The stddev of the truncated_normal_initializer for initializing all weight matrices required float initializer_range = 10 [default = 0.02]; - // need contrastive learning - required bool need_contrastive_learning = 11 [default = false]; - // the weight of contrastive learning loss - optional float contrastive_loss_weight = 12 [default = 1.0]; - // whether need auto learn contrastive loss weight - optional bool auto_contrastive_loss_weight = 13 [default = false]; } message DINEncoder { diff --git a/easy_rec/python/protos/variational_dropout.proto b/easy_rec/python/protos/variational_dropout.proto index e76a0fb3b..a1bb39974 100644 --- a/easy_rec/python/protos/variational_dropout.proto +++ b/easy_rec/python/protos/variational_dropout.proto @@ -2,7 +2,7 @@ syntax = "proto2"; package protos; -message VariationalDropoutLayer { +message VariationalDropoutLayer{ // regularization coefficient lambda optional float regularization_lambda = 1 [default = 0.01]; // variational_dropout dimension