PyTorchの基本的なところ

・ミニマムプログラムには、MNISTではなく、CIFAR10などを使う
MNISTの[28, 28, 1]は都合が悪い。CIFAR10は[32, 32, 3]。


・シード固定

def fix_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
# torch.use_deterministic_algorithms(True)

benchmarkとdeterministicはconvolutionのみに適用される。
use_deterministic_algorithmsは対応していない処理があるので、基本的に使えない。


・pretrained modelはtorchvisionではなく、timmを使う
self.base_model = timm.create_model("efficientnetv2_rw_s", pretrained=True)
※torchvisionの場合
self.base_model = models.__dict__['efficientnet_v2_s'](pretrained=True).features


・pretrained modelの一部を利用する

self.base_model = nn.Sequential(
efficient_net.conv_stem,
efficient_net.bn1,
efficient_net.act1,
efficient_net.blocks[0],
efficient_net.blocks[1],
efficient_net.blocks[2],
efficient_net.blocks[3],
)



・Accuracy計算式の意味
# [batch_size, 1]の予測ラベルを算出
pred = output.argmax(dim=1, keepdim=True)

# targetのshapeをpredと揃えてから、要素ごとにpredと比較し、Trueをカウント。
correct += pred.eq(target.view_as(pred)).sum().item()

targetのshapeが[batch_size, ]なら、以下のコードでも大丈夫。
pred = output.argmax(dim=1, keepdim=False)
correct += pred.eq(target).sum().item()


・Lossのreductionはデフォルトの'mean'を使う
'sum'だとバッチサイズに合わせて学習率を調整する必要がある。'mean'の場合、
loss_sum += loss.item() * data.shape[0]
でバッチごとにLossの和を足していき、
loss_epoch = loss_sum / len(data_loader.dataset)
で1データ当たりのlossを算出する。

・ネットワーク重みの初期化は基本的に不要
最近はHeの初期値などで初期化されているので、初期化処理は基本的に不要。
特殊なネットワークで精度を追い込みたい時だけ考慮する。


・pytorch lightning
LightingDataModuleは不要。fit関数にDataLoaderを渡せる。
seed_everything関数とTrainer(deterministic=True)で再現性。

PyTorch Lightning

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
from pytorch_lightning import LightningModule, LightningDataModule, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from torchmetrics import Accuracy
from sklearn.model_selection import train_test_split


class MNISTModel(LightningModule):
def __init__(self):
super().__init__()
self.l1 = torch.nn.Linear(28 * 28, 1024)
self.l2 = torch.nn.Linear(1024, 10)
self.accuracy = Accuracy()

def forward(self, x):
x = torch.relu(self.l1(x.view(x.size(0), -1)))
x = self.l2(x)
return x

def training_step(self, batch):
loss, accuracy = self.step(batch)
self.log_dict({'train_loss': loss,
'train_accuracy': accuracy,
'step': torch.tensor(self.current_epoch, dtype=torch.float32)},
on_step=False, on_epoch=True)
return loss

def validation_step(self, batch, batch_idx):
loss, accuracy = self.step(batch)
self.log_dict({'val_loss': loss,
'val_accuracy': accuracy,
'step': torch.tensor(self.current_epoch, dtype=torch.float32)},
on_step=False, on_epoch=True)
return loss

def step(self, batch):
x, y = batch
output = self.forward(x)
loss = F.cross_entropy(output, y)
accuracy = self.accuracy(output, y)
return loss, accuracy

def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=0.02)


class MNISTDataModule(LightningDataModule):
def __init__(self, batch_size, num_workers):
super().__init__()
self.batch_size = batch_size
self.num_workers = num_workers
mnist_train_val = MNIST('data', train=True, download=True, transform=transforms.ToTensor())
self.mnist_test = MNIST('data', train=False, download=True, transform=transforms.ToTensor())
self.mnist_train, self.mnist_val = train_test_split(mnist_train_val, test_size=0.1, random_state=0)

# def setup(self, stage):
# mnist_train_val = MNIST('data', train=True, download=True, transform=transforms.ToTensor())
# self.mnist_test = MNIST('data', train=False, download=True, transform=transforms.ToTensor())
# self.mnist_train, self.mnist_val = train_test_split(mnist_train_val, test_size=0.1, random_state=0)

def train_dataloader(self):
return DataLoader(self.mnist_train, shuffle=True, batch_size=self.batch_size, num_workers=self.num_workers)

def val_dataloader(self):
return DataLoader(self.mnist_val, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers)

def test_dataloader(self):
return DataLoader(self.mnist_test, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers)


def train():

data_module = MNISTDataModule(batch_size=32, num_workers=0)
model = MNISTModel()

model_checkpoint = ModelCheckpoint(monitor='val_loss', filename='{epoch:02d}', mode='min')
trainer = Trainer(gpus=[1], max_epochs=50, callbacks=[model_checkpoint])
trainer.fit(model, data_module)


def test():

data_module = MNISTDataModule(batch_size=1, num_workers=0)
data_module.setup(stage='test')
data_loader = data_module.test_dataloader()

model = MNISTModel.load_from_checkpoint('lightning_logs/version_17/checkpoints/epoch=06.ckpt')
model.freeze()

for data, target in data_loader:
out = model(data)


if __name__ == '__main__':
train()
# test()

TensorFlow, PyTorchで共通のCUDA, Cudnnを利用する

・PyTorch
PyTorchのCuda対応バージョンを確認。
2022/3現在、10.2と11.3。以下では11.3を例に説明。
https://pytorch.org/


・TensorFlow
「テスト済みのビルド構成」で上記CUDAバージョンより新しくて、最も近いバージョンを選択。
CUDA11.3の例であれば、tensorflow-2.6.0のCUDA11.2が該当。
https://www.tensorflow.org/install/source#common_installation_problems

 

・CUDA
PyTorchに対応しているCUDAをインストール(11.3)
https://developer.nvidia.com/cuda-toolkit-archive


・Cudnn

インストールしたCUDAに対応するCudnnをインストール(8.2.1)
https://developer.nvidia.com/rdp/cudnn-archive

TensorFlow Javaの使い方

Maven Repository
Maven Repository: org.tensorflow » tensorflow-core-platform
※旧版と間違えないよう注意

・Example1
Python学習

import tensorflow as tf
from tensorflow.keras import Model

class MyModel(Model):
def __init__(self):
super().__init__()

def call(self, x):
return x * 2

def main():
model = MyModel()
model(3)
tf.saved_model.save(model, 'minimum_model')

if __name__ == '__main__':
main()


Java推論

public class Minimum {
public static void main(String[] args) {
SavedModelBundle model = SavedModelBundle.load("minimum_model");

IntNdArray input_matrix = NdArrays.ofInts(Shape.of(1));
input_matrix.set(NdArrays.vectorOf(3));
Tensor input_tensor = TInt32.tensorOf(input_matrix);

List<Tensor> outputsList = null;
outputsList = model.session().runner().feed("serving_default_input_1:0", input_tensor).fetch("PartitionedCall:0").run();
Tensor result = outputsList.get(0);
int scores = result.asRawTensor().data().asInts().getInt(0);
}
}

※feedとfetchの引数はsaved_model_cliで確認


・Example2
Python学習

import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as np

class MyModel2(Model):
def __init__(self):
super().__init__()
self.fc1 = layers.Dense(128, activation='relu')
self.fc2 = layers.Dense(10, activation='softmax')

def call(self, x):
x = self.fc1(x)
x = self.fc2(x)
return x

def main():

data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

model = MyModel2()
out = model(data)
print(out)
tf.saved_model.save(model, 'minimum_model2')

if __name__ == '__main__':
main()


Java推論

public class Minimum2 {
public static void main(String[] args) {

SavedModelBundle model = SavedModelBundle.load("minimum_model2");

FloatNdArray input_matrix = NdArrays.ofFloats(Shape.of(2, 3));
// input_matrix.set(NdArrays.vectorOf(1.0f, 2.0f, 3.0f), 0);
// input_matrix.set(NdArrays.vectorOf(4.0f, 5.0f, 6.0f), 1);
input_matrix.setFloat(1.0f, 0, 0);
input_matrix.setFloat(2.0f, 0, 1);
input_matrix.setFloat(3.0f, 0, 2);
input_matrix.setFloat(4.0f, 1, 0);
input_matrix.setFloat(5.0f, 1, 1);
input_matrix.setFloat(6.0f, 1, 2);

Tensor input_tensor = TFloat32.tensorOf(input_matrix);

List<Tensor> outputsList = null;
outputsList = model.session().runner().feed("serving_default_input_1:0", input_tensor).fetch("StatefulPartitionedCall:0").run();
Tensor result = outputsList.get(0);
float scores = result.asRawTensor().data().asFloats().getFloat(12);
}
}


・Example3(複数出力)
Python学習

import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as np

class MyModel3(Model):
def __init__(self):
super().__init__()
self.fc1 = layers.Dense(128, activation='relu')
self.fc2 = layers.Dense(10, activation='softmax')

def call(self, x):
x = self.fc1(x)
x = self.fc2(x)
return x, x + 0.1

def main():

data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

model = MyModel3()
out = model(data)
print(out)
tf.saved_model.save(model, 'minimum_model3')

if __name__ == '__main__':
main()


Java推論

public class Minimum2 {
public static void main(String[] args) {

SavedModelBundle model = SavedModelBundle.load("minimum_model2");

FloatNdArray input_matrix = NdArrays.ofFloats(Shape.of(2, 3));
input_matrix.set(NdArrays.vectorOf(1.0f, 2.0f, 3.0f), 0);
input_matrix.set(NdArrays.vectorOf(4.0f, 5.0f, 6.0f), 1);

Tensor input_tensor = TFloat32.tensorOf(input_matrix);

List<Tensor> outputsList = null;
outputsList = model.session().runner().feed("serving_default_input_1:0", input_tensor).fetch("StatefulPartitionedCall:0").run();
Tensor result = outputsList.get(0);
float scores = result.asRawTensor().data().asFloats().getFloat(12);
}
}


・Example4(画像分類)

public class Buffered {
public static void main(String[] args) {

try {
SavedModelBundle model = SavedModelBundle.load("checkpoints");

BufferedImage img0 = ImageIO.read(new File("data/00000_03_050_025/000000.png"));
BufferedImage img1 = ImageIO.read(new File("data/00000_04_060_050/000000.png"));
FloatNdArray input_matrix = NdArrays.ofFloats(Shape.of(2, img0.getHeight(), img0.getWidth(), 3));
for (int h = 0; h < img0.getHeight(); h++) {
for (int w = 0; w < img0.getWidth(); w++) {
for (int c = 0; c < 3; c++) {
input_matrix.setFloat((img0.getRGB(w, h) & 0xFF) / 255.0f, 0, h, w, c);
input_matrix.setFloat((img1.getRGB(w, h) & 0xFF) / 255.0f, 1, h, w, c);
}
}
}
Tensor input_tensor = TFloat32.tensorOf(input_matrix);

List<Tensor> outputsList = null;
outputsList = model.session().runner().feed("serving_default_input_1:0", input_tensor)
.fetch("StatefulPartitionedCall:0").fetch("StatefulPartitionedCall:1").fetch("StatefulPartitionedCall:2").run();

// size, flow, hardness
Tensor result0 = outputsList.get(0);
Tensor result1 = outputsList.get(1);
Tensor result2 = outputsList.get(2);

for (int i = 0; i < result0.asRawTensor().size(); i++) {
System.out.println(result0.asRawTensor().data().asFloats().getFloat(i));
}
System.out.println();
for (int i = 0; i < result1.asRawTensor().size(); i++) {
System.out.println(result1.asRawTensor().data().asFloats().getFloat(i));
}
System.out.println();
for (int i = 0; i < result2.asRawTensor().size(); i++) {
System.out.println(result2.asRawTensor().data().asFloats().getFloat(i));
}
} catch (Exception e) {
e.printStackTrace();
}
}
}



kotlin opencvでkmeans

fun main() {

val img = imread("yutaka.png", -1)
val channels = MatVector()
split(img, channels)
val alpha = channels[3]

val rgb_img = Mat()
cvtColor(img, rgb_img, COLOR_BGRA2BGR)

val samples = rgb_img.reshape(1, img.rows() * img.cols())
val samples32f = Mat()
samples.convertTo(samples32f, CV_32F)

val labels = Mat()
kmeans(samples32f, 5, labels, TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, 1.0), 10, KMEANS_RANDOM_CENTERS)

val buffer = labels.createBuffer<IntBuffer>()
for (y in 0 until img.rows()) {
for (x in 0 until img.cols()) {
println(buffer[y * img.cols() + x])
}
}
}