DeepFM model

chenxj · chenxj · commit 70d77df8aa09 · 2018-08-13T11:20:53.000+08:00
diff --git a/DeepFM.py b/DeepFM.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+
+"""
+A pytorch implementation of DeepFM for rates prediction problem.
+
+Created on Aug 10, 2018
+"""
+
+__author__ = 'Xijun Chen'
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+
+
+class DeepFM(nn.Module):
+    """
+    A DeepFM network with RMSE loss for rates prediction problem.
+
+    There are two parts in the architecture of this network: fm part for low
+    order interactions of features and deep part for higher order. In this 
+    network, we use bachnorm and dropout technology for all hidden layers,
+    and "Adam" method for optimazation.
+
+    You may find more details in this paper:
+    DeepFM: A Factorization-Machine based Neural Network for CTR Prediction,
+    Huifeng Guo, Ruiming Tang, Yunming Yey, Zhenguo Li, Xiuqiang He.
+    """
+
+    def __init__(self, feature_sizes, embedding_size=4,
+                 hidden_dims=[32, 32], num_classes=10, dropout=[0.5, 0.5], 
+                 use_cuda=True, verbose=False):
+        """
+        Initialize a new network
+
+        Inputs:
+        - feature_size: A list of integer giving the size of features for each field.
+        - embedding_size: An integer giving size of feature embedding.
+        - hidden_dims: A list of integer giving the size of each hidden layer.
+        - num_classes: An integer giving the number of classes to predict. For example,
+                    someone may rate 1,2,3,4 or 5 stars to a film.
+        - batch_size: An integer giving size of instances used in each interation.
+        - use_cuda: Bool, Using cuda or not
+        - verbose: Bool
+        """
+        super().__init__()
+        self.field_size = len(feature_sizes)
+        self.feature_sizes = feature_sizes
+        self.embedding_size = embedding_size
+        self.hidden_dims = hidden_dims
+        self.num_classes = num_classes
+        self.bias = torch.nn.Parameter(torch.randn(1))
+        """
+            check if use cuda
+        """
+        if use_cuda and torch.cuda.is_available():
+            self.device = torch.device('cuda')
+        else:
+            self.device = torch.device('cpu')
+        """
+            init fm part
+        """
+        self.fm_first_order_embeddings = nn.ModuleList(
+            [nn.Embedding(feature_size, 1) for feature_size in self.feature_sizes])
+        self.fm_second_order_embeddings = nn.ModuleList(
+            [nn.Embedding(feature_size, self.embedding_size) for feature_size in self.feature_sizes])
+        """
+            init deep part
+        """
+        all_dims = [self.field_size * self.embedding_size] + \
+            self.hidden_dims + [self.num_classes]
+        for i in range(1, len(hidden_dims) + 1):
+            setattr(self, 'linear_'+str(i),
+                    nn.Linear(all_dims[i-1], all_dims[i]))
+            # nn.init.kaiming_normal_(self.fc1.weight)
+            setattr(self, 'batchNorm_' + str(i),
+                    nn.BatchNorm1d(all_dims[i]))
+            setattr(self, 'dropout_'+str(i),
+                    nn.Dropout(dropout[i-1]))
+
+    def forward(self, Xi, Xv):
+        """
+        Forward process of network. 
+
+        Inputs:
+        - Xi: A tensor of input's index, shape of (N, embedding_size, 1)
+        - Xv: A tensor of input's value, shape of (N, embedding_size, 1)
+        """
+        """
+            fm part
+        """
+        fm_first_order_emb_arr = [(torch.sum(emb(Xi[:, i, :]), 1).t() * \
+                                   Xv[:, i]).t() for i, emb in enumerate(self.fm_first_order_embeddings)]
+        fm_first_order = torch.cat(fm_first_order_emb_arr, 1)
+        # use 2xy = (x+y)^2 - x^2 - y^2 reduce calculation
+        fm_second_order_emb_arr = [(torch.sum(emb(Xi[:, i, :]), 1).t() * \
+                                    Xv[:, i]).t() for i, emb in enumerate(self.fm_second_order_embeddings)]
+        fm_sum_second_order_emb = sum(fm_second_order_emb_arr)
+        fm_sum_second_order_emb_square = fm_sum_second_order_emb * \
+            fm_sum_second_order_emb  # (x+y)^2
+        fm_second_order_emb_square = [
+            item*item for item in fm_second_order_emb_arr]
+        fm_second_order_emb_square_sum = sum(
+            fm_second_order_emb_square)  # x^2+y^2
+        fm_second_order = (fm_sum_second_order_emb_square -
+                           fm_second_order_emb_square_sum) * 0.5
+        """
+            deep part
+        """
+        deep_emb = torch.cat(fm_second_order_emb_arr, 1)
+        deep_out = deep_emb
+        for i in range(1, self.hidden_dims + 1):
+            deep_out = getattr(self, 'linear_' + str(i))(deep_out)
+            deep_out = getattr(self, 'batchNorm_' + str(i))(deep_out)
+            deep_out = getattr(self, 'dropout_' + str(i))(deep_out)
+        """
+            sum
+        """
+        total_sum = torch.sum(fm_first_order, 1) + \
+                    torch.sum(fm_second_order, 1) + torch.sum(deep_out, 1) + self.bias
+        return total_sum
+
+
+
+
+