R/Keras/TensorFlowでやる『ディープラーニング(Deep Learning)』のすゝめ【その３】敵対的生成ネットワーク (GAN)による教師なし画像生成(image generation)をやってみた件

はじめに
- 関連パッケージのインストール・ロード
ministデータセットをダウンロードする
7のみのデータを抽出して、アレイ形式に変換してノーマライズする
生成器モデル (1)
生成器モデル (2)
参考資料

はじめに

「敵対的生成ネットワーク（GAN: Generative Adversarial Networks）」は、ディープラーニングの生成モデルの一種であり、与えたデータから本物と偽物を見分けるように複数の学習器を用いて学習することで、新たな画像の生成や画風の変更などができる。このGANモデルは、Generator（生成器）とDiscriminator（識別器）のニューラルネットワーク部に大別される。

生成器では、インプットデータ（ex. ノイズ）から本物のデータに近い偽物を作ることを目指して、学習を進めていく。また、識別器では、生成器が作成した偽物を本物のデータと区別するために、学習を行う。

今回、GANモデル（教師なし学習）による手書き文字（7を使用）の画像生成がやってみた。

中間層に、全結合ネットワークだけでなく、畳み込みニューラルネットワーク（CNN）を使ってるので、正式には、DCGAN (Deep Convolutional GAN)の分類かもしれないけど。

ministデータセットをダウンロードする

#データ準備
minist <- dataset_mnist()
str(minist)

#データの抽出
trainx <- minist$train$x
trainy <- minist$train$y
#testx <- minist$test$x
#testy <- minist$test$y

#or
# %<-% でも変数代入ができる
#c(c(trainx,trainy), c(testx,testy)) %<-% minist

str(trainx)
#int [1:60000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ...

7のみのデータを抽出して、アレイ形式に変換してノーマライズする

#7のみを抽出する
trainx7 <- trainx[trainy==7,,]

#図示
par(mfrow=c(8,8), mar=rep(0,4))
for(n in 1:64){plot(as.raster(trainx7[n,,], max=255))}
quartz.save("Fig_01.png", type = "png")
par(mfrow=c(1,1))

#アレイの変形
trainX <- array_reshape(trainx7, 
                        c(nrow(trainx7), 28, 28,1))

#ノーマライズ
trainX <- trainX/255
str(trainX)
#num [1:6265, 1:28, 1:28, 1] 0 0 0 0 0 0 0 0 0 0 ...

生成器モデル (1)

Generatorモデル (生成器モデル)の作成(1)

#input
l <- 28

#Generatorモデルの作成
input <- layer_input(shape=l)
gen <- input %>% 
  layer_dense(unit=32*14*14) %>%
  layer_activation_leaky_relu() %>%
  layer_reshape(target_shape = c(14,14,32)) %>%
  layer_conv_2d(filters = 32,
                kernel_size = 5,
                padding = "same") %>%
  layer_activation_leaky_relu() %>%
  layer_conv_2d_transpose(filters = 32,
                          kernel_size = 4,
                          strides = 2,
                          padding = "same") %>%
  layer_activation_leaky_relu() %>%
  layer_conv_2d(filters = 1,
                kernel_size = 5,
                activation = "tanh",
                padding = "same")

#モデル構築
Generator <- keras_model(input, gen)

Generator
#Model
#Model: "model"
#____________________________________________________________________________
#Layer (type)                      Output Shape                  Param #     
#============================================================================
#input_1 (InputLayer)              [(None, 28)]                  0           
#____________________________________________________________________________
#dense (Dense)                     (None, 6272)                  181888      
#____________________________________________________________________________
#leaky_re_lu_2 (LeakyReLU)         (None, 6272)                  0           
#____________________________________________________________________________
#reshape (Reshape)                 (None, 14, 14, 32)            0           
#____________________________________________________________________________
#conv2d_1 (Conv2D)                 (None, 14, 14, 32)            25632       
#____________________________________________________________________________
#leaky_re_lu_1 (LeakyReLU)         (None, 14, 14, 32)            0           
#____________________________________________________________________________
#conv2d_transpose (Conv2DTranspose (None, 28, 28, 32)            16416       
#____________________________________________________________________________
#leaky_re_lu (LeakyReLU)           (None, 28, 28, 32)            0           
#____________________________________________________________________________
#conv2d (Conv2D)                   (None, 28, 28, 1)             801         
#============================================================================
#Total params: 224,737
#Trainable params: 224,737
#Non-trainable params: 0
#____________________________________________________________________________

Discriminatorモデル (識別器モデル)の作成(1)

#input
shape <- c(28, 28, 1)

# Discriminator モデル作成
input0 <- layer_input(shape=shape)
dis <- input0 %>%
  layer_conv_2d(filters = 64,
                kernel_size = 4) %>%
  layer_activation_leaky_relu() %>%
  layer_flatten()  %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 1,
              activation = "sigmoid")

#モデル構築
Discriminator <- keras_model(input0, dis)

Discriminator
#Model
#Model: "model_1"
#____________________________________________________________________________
#Layer (type)                      Output Shape                  Param #     
#============================================================================
#input_2 (InputLayer)              [(None, 28, 28, 1)]           0           
#____________________________________________________________________________
#conv2d_2 (Conv2D)                 (None, 25, 25, 64)            1088        
#____________________________________________________________________________
#leaky_re_lu_3 (LeakyReLU)         (None, 25, 25, 64)            0           
#____________________________________________________________________________
#flatten (Flatten)                 (None, 40000)                 0           
#____________________________________________________________________________
#dropout (Dropout)                 (None, 40000)                 0           
#____________________________________________________________________________
#dense_1 (Dense)                   (None, 1)                     40001       
#============================================================================
#Total params: 41,089
#Trainable params: 41,089
#Non-trainable params: 0
#____________________________________________________________________________

モデルのコンパイル(1)

#Discriminatorモデルのコンパイル: 学習を可能とする
Discriminator %>% compile(optimizer="rmsprop",
                          loss="binary_crossentropy")
Discriminator$trainable
#[1] TRUE

#次に、Discriminatorモデルの学習を不可とする
freeze_weights(Discriminator)
Discriminator$trainable
#[1] FALSE
#or
#Discriminator$trainable = FALSE

#GANモデルの作成(Discriminatorモデルの学習はしない)
input <- layer_input(shape=l)
gan <- input %>% Generator %>% Discriminator
GAN <- keras_model(input, gan)

#GANモデルのコンパイル
GAN %>% compile(optimizer="rmsprop",
                loss="binary_crossentropy")

#可視化
plot_model_modi(Generator)
plot_model_modi(Discriminator)
plot_model_modi(GAN)

モデルのトレーニング (事前準備 + 実行)

#inputのデータ数
b <- 50

#ディレクトリの作成
dir <- "gan_img"
dir.create(dir)

#初期パラメータ
start <- 1
dloss <- NULL
gloss <- NULL

#########################################
#Training step: 120 steps
#########################################
for(i in 1:120){
#i <- 1

#50個のfake dataの作成
noise <- matrix(rnorm(b*l),
                nrow=b,
                ncol=l)

#偽物のデータを生成する
fake <- predict(Generator, noise)
#str(fake)
#num [1:50, 1:28, 1:28, 1] -0.00514 -0.00762 -0.00724 -0.01103 -0.00628 ...

#データ範囲を決める
stop <- start + b -1

#本物のデータを取り出す
real <- trainX[start:stop,,,]
#str(real)
#num [1:50, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ...

#本物のデータのアレイ形状を変換
real <- array_reshape(real, c(nrow(real), 28,28,1))
#str(real)
#num [1:50, 1:28, 1:28, 1] 0 0 0 0 0 0 0 0 0 0 ...

#データ数
rows <- nrow(real)

#偽物と本物のデータを結合する
both <- array(0, dim=c(rows*2, dim(real)[-1]))
#str(both)
#num [1:100, 1:28, 1:28, 1] 0 0 0 0 0 0 0 0 0 0 ...
both[1:rows,,,] <- fake
both[(rows+1):(rows*2),,,] <- real

#ラベルの作成: fake: 0.9-1, real: 0-0.1
Labels <- rbind(matrix(runif(b, 0.9, 1),
                       nrow=b,
                       ncol=1),
                matrix(runif(b, 0, 0.1),
                       nrow=b,
                       ncol=1)
                )

#識別器の学習: 正解データとフェイクを与えて学習する
dloss[i] <- Discriminator %>% train_on_batch(both, Labels)

#GANの学習: フェイクデータに逆のラベル(正解ラベル)を与えて学習させる
fakeAsReal <- array(runif(b, 0, 0.1), dim=c(b,1))
gloss[i] <- GAN %>% train_on_batch(noise, fakeAsReal)

#Save Fake images
#str(fake)
par(mfrow=c(7,7), mar=rep(0,4), omi=c(0,0,0.5,0))
for(n in 1:49){
  f <- fake[n,,,]
  dim(f) <- c(28, 28)
  plot(as.raster(((f-min(f))/max(f-min(f)))*255, max=255))
}
mtext(side = 3, line=1, outer=T, cex=2, 
      text = paste0("i = ", i))
quartz.save(file.path(dir, paste0("f_", formatC(i, width = 4, flag = "0"), ".png")), 
            type = "png")

#startを更新する
start <- start + b
}

ここで、train_on_batch関数は、1batchのサンプルでのシングル勾配更新またはモデル評価する関数である

画像生成の結果をgifアニメーションとして可視化(1)

# 動画生成: 事前にImageMagickをインストールしておく
system(paste0("convert -delay 20 -loop 10 ./", dir, "/*.png ./Fig_03_20ms.gif"))

Lossの結果を可視化する

par(mfrow=c(1,1))
x <- 1:120
plot(x, dloss, col="red", type="l",
     ylim=c(0,3), xlab="Interations",
     ylab="Loss")
lines(x, gloss, col="black", type="l")
legend("topright", legend=c("Discriminator", "GAN Loss"),
       col=c("red", "black"), lwd=1)
quartz.save("Fig_04.png", type = "png")

GANの改善モデルでの実行

次に、少し改善したモデルで実行してみる。

各モデルに、layer_conv_2dレイヤーを１つずつ追加した。

生成器モデル (2)

Generatorモデル (生成器モデル)の作成(2)

#input
l <- 28

#Generatorモデルの作成
input <- layer_input(shape=l)
gen <- input %>% 
  layer_dense(unit=32*14*14) %>%
  layer_activation_leaky_relu() %>%
  layer_reshape(target_shape = c(14,14,32)) %>%
  layer_conv_2d(filters = 32,
                kernel_size = 5,
                padding = "same") %>%
  layer_activation_leaky_relu() %>%
  layer_conv_2d_transpose(filters = 32,
                          kernel_size = 4,
                          strides = 2,
                          padding = "same") %>%
  layer_activation_leaky_relu() %>%
  layer_conv_2d(filters = 64,
                kernel_size = 5,
                padding = "same") %>%
  layer_activation_leaky_relu() %>%
  layer_conv_2d(filters = 1,
                kernel_size = 5,
                activation = "tanh",
                padding = "same")

#モデル構築
Generator <- keras_model(input, gen)  

Generator
#Model
#Model: "model_5"
#____________________________________________________________________________
#Layer (type)                      Output Shape                  Param #     
#============================================================================
#input_6 (InputLayer)              [(None, 28)]                  0           
#____________________________________________________________________________
#dense_4 (Dense)                   (None, 6272)                  181888      
#____________________________________________________________________________
#leaky_re_lu_13 (LeakyReLU)        (None, 6272)                  0           
#____________________________________________________________________________
#reshape_2 (Reshape)               (None, 14, 14, 32)            0           
#____________________________________________________________________________
#conv2d_10 (Conv2D)                (None, 14, 14, 32)            25632       
#____________________________________________________________________________
#leaky_re_lu_12 (LeakyReLU)        (None, 14, 14, 32)            0           
#____________________________________________________________________________
#conv2d_transpose_2 (Conv2DTranspo (None, 28, 28, 32)            16416       
#____________________________________________________________________________
#leaky_re_lu_11 (LeakyReLU)        (None, 28, 28, 32)            0           
#____________________________________________________________________________
#conv2d_9 (Conv2D)                 (None, 28, 28, 64)            51264       
#____________________________________________________________________________
#leaky_re_lu_10 (LeakyReLU)        (None, 28, 28, 64)            0           
#____________________________________________________________________________
#conv2d_8 (Conv2D)                 (None, 28, 28, 1)             1601        
#============================================================================
#Total params: 276,801
#Trainable params: 276,801
#Non-trainable params: 0
#____________________________________________________________________________

Discriminatorモデル (識別器モデル)の作成(2)

#input
shape <- c(28, 28, 1)

# Discriminator モデル作成
input0 <- layer_input(shape=shape)
dis <- input0 %>%
  layer_conv_2d(filters = 64,
                kernel_size = 4) %>%
  layer_activation_leaky_relu() %>%
  layer_conv_2d(filters = 64,
                kernel_size = 4,
                strides = 2) %>%
  layer_activation_leaky_relu() %>%
  layer_flatten()  %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 1,
              activation = "sigmoid")

#モデル構築
Discriminator <- keras_model(input0, dis)

Discriminator
#Model
#Model: "model_3"
#____________________________________________________________________________
#Layer (type)                      Output Shape                  Param #     
#============================================================================
#input_4 (InputLayer)              [(None, 28, 28, 1)]           0           
#____________________________________________________________________________
#conv2d_4 (Conv2D)                 (None, 25, 25, 64)            1088        
#____________________________________________________________________________
#leaky_re_lu_5 (LeakyReLU)         (None, 25, 25, 64)            0           
#____________________________________________________________________________
#conv2d_3 (Conv2D)                 (None, 11, 11, 64)            65600       
#____________________________________________________________________________
#leaky_re_lu_4 (LeakyReLU)         (None, 11, 11, 64)            0           
#____________________________________________________________________________
#flatten_1 (Flatten)               (None, 7744)                  0           
#____________________________________________________________________________
#dropout_1 (Dropout)               (None, 7744)                  0           
#____________________________________________________________________________
#dense_2 (Dense)                   (None, 1)                     7745        
#============================================================================
#Total params: 74,433
#Trainable params: 74,433
#Non-trainable params: 0
#____________________________________________________________________________

モデルのコンパイル(2)

#Discriminatorモデルのコンパイル: 学習を可能とする
Discriminator %>% compile(optimizer="rmsprop",
                          loss="binary_crossentropy")
Discriminator$trainable
#[1] TRUE

#次に、Discriminatorモデルの学習を不可とする
freeze_weights(Discriminator)
Discriminator$trainable
#[1] FALSE
#or
#Discriminator$trainable = FALSE

#GANモデルの作成(Discriminatorモデルの学習はしない)
input <- layer_input(shape=l)
gan <- input %>% Generator %>% Discriminator
GAN <- keras_model(input, gan)

#GANモデルのコンパイル
GAN %>% compile(optimizer="rmsprop",
                loss="binary_crossentropy")

モデルのトレーニング (事前準備 + 再実行)

#inputのデータ数
b <- 50

#ディレクトリの作成
dir <- "gan_img2"
dir.create(dir)

#初期パラメータ
start <- 1
dloss <- NULL
gloss <- NULL

#########################################
#Training step: 120 steps
#########################################
for(i in 1:120){
#i <- 1

#50個のfake dataの作成
noise <- matrix(rnorm(b*l),
                nrow=b,
                ncol=l)

#偽物のデータを生成する
fake <- predict(Generator, noise)
#str(fake)
#num [1:50, 1:28, 1:28, 1] -0.00514 -0.00762 -0.00724 -0.01103 -0.00628 ...

#データ範囲を決める
stop <- start + b -1

#本物のデータを取り出す
real <- trainX[start:stop,,,]
#str(real)
#num [1:50, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ...

#本物のデータのアレイ形状を変換
real <- array_reshape(real, c(nrow(real), 28,28,1))
#str(real)
#num [1:50, 1:28, 1:28, 1] 0 0 0 0 0 0 0 0 0 0 ...

#データ数
rows <- nrow(real)

#偽物と本物のデータを結合する
both <- array(0, dim=c(rows*2, dim(real)[-1]))
#str(both)
#num [1:100, 1:28, 1:28, 1] 0 0 0 0 0 0 0 0 0 0 ...
both[1:rows,,,] <- fake
both[(rows+1):(rows*2),,,] <- real

#ラベルの作成: fake: 0.9-1, real: 0-0.1
Labels <- rbind(matrix(runif(b, 0.9, 1),
                       nrow=b,
                       ncol=1),
                matrix(runif(b, 0, 0.1),
                       nrow=b,
                       ncol=1)
                )

#識別器の学習: 正解データとフェイクを与えて学習する
dloss[i] <- Discriminator %>% train_on_batch(both, Labels)

#GANの学習: フェイクデータに逆のラベル(正解ラベル)を与えて学習させる
fakeAsReal <- array(runif(b, 0, 0.1), dim=c(b,1))
gloss[i] <- GAN %>% train_on_batch(noise, fakeAsReal)

#Save Fake images
#str(fake)
par(mfrow=c(7,7), mar=rep(0,4), omi=c(0,0,0.5,0))
for(n in 1:49){
  f <- fake[n,,,]
  dim(f) <- c(28, 28)
  plot(as.raster(((f-min(f))/max(f-min(f)))*255, max=255))
}
mtext(side = 3, line=1, outer=T, cex=2, 
      text = paste0("i = ", i))
quartz.save(file.path(dir, paste0("f_", formatC(i, width = 4, flag = "0"), ".png")), 
            type = "png")

#startを更新する
start <- start + b
}

画像生成の結果をgifアニメーションとして可視化(2)

# 動画生成: 事前にImageMagickをインストールしておく
system(paste0("convert -delay 20 -loop 10 ./", dir, "/*.png ./Fig_05_20ms.gif"))

Lossの結果を可視化する

par(mfrow=c(1,1))
x <- 1:120
plot(x, dloss, col="red", type="l",
     ylim=c(0,3), xlab="Interations",
     ylab="Loss")
lines(x, gloss, col="black", type="l")
legend("topright", legend=c("Discriminator", "GAN Loss"),
       col=c("red", "black"), lwd=1)
quartz.save("Fig_06.png", type = "png")