194 lines
7.4 KiB
Text
194 lines
7.4 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"https://www.analyticsvidhya.com/blog/2021/11/benchmarking-cpu-and-gpu-performance-with-tensorflow/"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import tensorflow as tf\n",
|
|
"from tensorflow import keras\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# scaling image values between 0-1\n",
|
|
"X_train_scaled = X_train/255\n",
|
|
"X_test_scaled = X_test/255\n",
|
|
"y_train_encoded = keras.utils.to_categorical(y_train, num_classes = 10, dtype = 'float32')\n",
|
|
"y_test_encoded = keras.utils.to_categorical(y_test, num_classes = 10, dtype = 'float32')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_model():\n",
|
|
" model = keras.Sequential([\n",
|
|
" keras.layers.Flatten(input_shape=(32,32,3)),\n",
|
|
" keras.layers.Dense(3000, activation='relu'),\n",
|
|
" keras.layers.Dense(1000, activation='relu'),\n",
|
|
" keras.layers.Dense(10, activation='sigmoid') \n",
|
|
" ])\n",
|
|
" model.compile(optimizer='SGD',\n",
|
|
" loss='categorical_crossentropy',\n",
|
|
" metrics=['accuracy'])\n",
|
|
" return model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2021-12-14 08:17:31.149910: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA\n",
|
|
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
|
"2021-12-14 08:17:31.150614: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7676 MB memory: -> device: 0, name: AMD Radeon RX 6600 XT, pci bus id: 0000:08:00.0\n",
|
|
"2021-12-14 08:17:32.735544: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)\n",
|
|
"2021-12-14 08:17:32.765031: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n",
|
|
"2021-12-14 08:17:32.768862: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n",
|
|
"2021-12-14 08:17:32.770708: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n",
|
|
"2021-12-14 08:17:32.894871: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Epoch 1/10\n",
|
|
"1563/1563 [==============================] - 23s 15ms/step - loss: 1.8109 - accuracy: 0.3557\n",
|
|
"Epoch 2/10\n",
|
|
"1563/1563 [==============================] - 24s 15ms/step - loss: 1.6225 - accuracy: 0.4273\n",
|
|
"Epoch 3/10\n",
|
|
"1563/1563 [==============================] - 23s 15ms/step - loss: 1.5435 - accuracy: 0.4561\n",
|
|
"Epoch 4/10\n",
|
|
"1563/1563 [==============================] - 24s 15ms/step - loss: 1.4820 - accuracy: 0.4785\n",
|
|
"Epoch 5/10\n",
|
|
"1563/1563 [==============================] - 23s 15ms/step - loss: 1.4333 - accuracy: 0.4949\n",
|
|
"Epoch 6/10\n",
|
|
"1563/1563 [==============================] - 23s 15ms/step - loss: 1.3896 - accuracy: 0.5116\n",
|
|
"Epoch 7/10\n",
|
|
"1563/1563 [==============================] - 23s 15ms/step - loss: 1.3511 - accuracy: 0.5241\n",
|
|
"Epoch 8/10\n",
|
|
"1563/1563 [==============================] - 23s 15ms/step - loss: 1.3165 - accuracy: 0.5384\n",
|
|
"Epoch 9/10\n",
|
|
"1563/1563 [==============================] - 24s 15ms/step - loss: 1.2830 - accuracy: 0.5497\n",
|
|
"Epoch 10/10\n",
|
|
"1563/1563 [==============================] - 23s 15ms/step - loss: 1.2524 - accuracy: 0.5605\n",
|
|
"3min 55s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%%timeit -n1 -r1\n",
|
|
"# CPU\n",
|
|
"with tf.device('/CPU:0'):\n",
|
|
" model_cpu = get_model()\n",
|
|
" model_cpu.fit(X_train_scaled, y_train_encoded, epochs = 10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2021-12-14 08:21:53.663198: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n",
|
|
"2021-12-14 08:21:53.665473: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n",
|
|
"2021-12-14 08:21:53.667073: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n",
|
|
"2021-12-14 08:21:53.776133: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Epoch 1/10\n",
|
|
"1563/1563 [==============================] - 21s 3ms/step - loss: 1.8134 - accuracy: 0.3534\n",
|
|
"Epoch 2/10\n",
|
|
"1563/1563 [==============================] - 7s 4ms/step - loss: 1.6232 - accuracy: 0.4246\n",
|
|
"Epoch 3/10\n",
|
|
"1563/1563 [==============================] - 5s 3ms/step - loss: 1.5409 - accuracy: 0.4573\n",
|
|
"Epoch 4/10\n",
|
|
"1563/1563 [==============================] - 5s 3ms/step - loss: 1.4833 - accuracy: 0.4787\n",
|
|
"Epoch 5/10\n",
|
|
"1563/1563 [==============================] - 5s 3ms/step - loss: 1.4319 - accuracy: 0.4979\n",
|
|
"Epoch 6/10\n",
|
|
"1563/1563 [==============================] - 7s 5ms/step - loss: 1.3883 - accuracy: 0.5114\n",
|
|
"Epoch 7/10\n",
|
|
"1563/1563 [==============================] - 5s 3ms/step - loss: 1.3526 - accuracy: 0.5279\n",
|
|
"Epoch 8/10\n",
|
|
"1563/1563 [==============================] - 5s 3ms/step - loss: 1.3171 - accuracy: 0.5377\n",
|
|
"Epoch 9/10\n",
|
|
"1563/1563 [==============================] - 5s 3ms/step - loss: 1.2844 - accuracy: 0.5471\n",
|
|
"Epoch 10/10\n",
|
|
"1563/1563 [==============================] - 7s 5ms/step - loss: 1.2505 - accuracy: 0.5631\n",
|
|
"1min 13s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%%timeit -n1 -r1\n",
|
|
"# GPU\n",
|
|
"with tf.device('/GPU:0'):\n",
|
|
" model_gpu = get_model()\n",
|
|
" model_gpu.fit(X_train_scaled, y_train_encoded, epochs = 10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "21ce40c1d60a5dc40f385ae01086c7bb98c0e0e19c4f7e634ba6a67c32ffc01a"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3.9.9 64-bit ('.venv': venv)",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.9"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|