Dump all my ML stuff here
This commit is contained in:
parent
4207b5595c
commit
2063cc116f
File diff suppressed because one or more lines are too long
BIN
cuda_check
Executable file
BIN
cuda_check
Executable file
Binary file not shown.
108
cuda_check.c
Normal file
108
cuda_check.c
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
|
||||||
|
/* Outputs some information on CUDA-enabled devices on your computer,
|
||||||
|
* including compute capability and current memory usage.
|
||||||
|
*
|
||||||
|
* On Linux, compile with: nvcc -o cuda_check cuda_check.c -lcuda
|
||||||
|
* On Windows, compile with: nvcc -o cuda_check.exe cuda_check.c -lcuda
|
||||||
|
*
|
||||||
|
* Authors: Thomas Unterthiner, Jan Schlüter
|
||||||
|
*/
|
||||||
|
|
||||||
|
int ConvertSMVer2Cores(int major, int minor)
|
||||||
|
{
|
||||||
|
// Returns the number of CUDA cores per multiprocessor for a given
|
||||||
|
// Compute Capability version. There is no way to retrieve that via
|
||||||
|
// the API, so it needs to be hard-coded.
|
||||||
|
// See _ConvertSMVer2Cores in helper_cuda.h in NVIDIA's CUDA Samples.
|
||||||
|
switch ((major << 4) + minor) {
|
||||||
|
case 0x10: return 8; // Tesla
|
||||||
|
case 0x11: return 8;
|
||||||
|
case 0x12: return 8;
|
||||||
|
case 0x13: return 8;
|
||||||
|
case 0x20: return 32; // Fermi
|
||||||
|
case 0x21: return 48;
|
||||||
|
case 0x30: return 192; // Kepler
|
||||||
|
case 0x32: return 192;
|
||||||
|
case 0x35: return 192;
|
||||||
|
case 0x37: return 192;
|
||||||
|
case 0x50: return 128; // Maxwell
|
||||||
|
case 0x52: return 128;
|
||||||
|
case 0x53: return 128;
|
||||||
|
case 0x60: return 64; // Pascal
|
||||||
|
case 0x61: return 128;
|
||||||
|
case 0x62: return 128;
|
||||||
|
case 0x70: return 64; // Volta
|
||||||
|
case 0x72: return 64; // Xavier
|
||||||
|
case 0x75: return 64; // Turing
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int nGpus;
|
||||||
|
int i;
|
||||||
|
char name[100];
|
||||||
|
int cc_major, cc_minor, cores, cuda_cores, threads_per_core, clockrate;
|
||||||
|
size_t freeMem;
|
||||||
|
size_t totalMem;
|
||||||
|
|
||||||
|
CUresult result;
|
||||||
|
CUdevice device;
|
||||||
|
CUcontext context;
|
||||||
|
|
||||||
|
result = cuInit(0);
|
||||||
|
if (result != CUDA_SUCCESS) {
|
||||||
|
printf("cuInit failed with error code %d: %s\n", result, cudaGetErrorString(result));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
result = cuDeviceGetCount(&nGpus);
|
||||||
|
if (result != CUDA_SUCCESS) {
|
||||||
|
printf("cuDeviceGetCount failed with error code %d: %s\n", result, cudaGetErrorString(result));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
printf("Found %d device(s).\n", nGpus);
|
||||||
|
for (i = 0; i < nGpus; i++) {
|
||||||
|
cuDeviceGet(&device, i);
|
||||||
|
printf("Device: %d\n", i);
|
||||||
|
if (cuDeviceGetName(&name[0], sizeof(name), device) == CUDA_SUCCESS) {
|
||||||
|
printf(" Name: %s\n", &name[0]);
|
||||||
|
}
|
||||||
|
if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
|
||||||
|
printf(" Compute Capability: %d.%d\n", cc_major, cc_minor);
|
||||||
|
}
|
||||||
|
if (cuDeviceGetAttribute(&cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device) == CUDA_SUCCESS) {
|
||||||
|
printf(" Multiprocessors: %d\n", cores);
|
||||||
|
if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
|
||||||
|
cuda_cores = cores * ConvertSMVer2Cores(cc_major, cc_minor);
|
||||||
|
if (cuda_cores > 0) {
|
||||||
|
printf(" CUDA Cores: %d\n", cuda_cores);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
printf(" CUDA Cores: unknown\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cuDeviceGetAttribute(&threads_per_core, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, device) == CUDA_SUCCESS) {
|
||||||
|
printf(" Concurrent threads: %d\n", cores*threads_per_core);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device) == CUDA_SUCCESS) {
|
||||||
|
printf(" GPU clock: %g MHz\n", clockrate/1000.);
|
||||||
|
}
|
||||||
|
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device) == CUDA_SUCCESS) {
|
||||||
|
printf(" Memory clock: %g MHz\n", clockrate/1000.);
|
||||||
|
}
|
||||||
|
cuCtxCreate(&context, 0, device);
|
||||||
|
result = cuMemGetInfo(&freeMem, &totalMem);
|
||||||
|
if (result == CUDA_SUCCESS ) {
|
||||||
|
printf(" Total Memory: %ld MiB\n Free Memory: %ld MiB\n", totalMem / ( 1024 * 1024 ), freeMem / ( 1024 * 1024 ));
|
||||||
|
} else {
|
||||||
|
printf(" cMemGetInfo failed with error code %d: %s\n", result, cudaGetErrorString(result));
|
||||||
|
}
|
||||||
|
cuCtxDetach(context);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
30
keras-test.py
Normal file
30
keras-test.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
import keras
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Dropout, Activation
|
||||||
|
from keras.optimizers import SGD
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
x_train = np.random.random((1000, 20))
|
||||||
|
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=50)
|
||||||
|
x_test = np.random.random((100, 20))
|
||||||
|
y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=50)
|
||||||
|
|
||||||
|
model = Sequential()
|
||||||
|
# Dense(64) is a fully-connected layer with 64 hidden units.
|
||||||
|
# in the first layer, you must specify the expected input data shape:
|
||||||
|
# here, 20-dimensional vectors.
|
||||||
|
model.add(Dense(200, activation='relu', input_dim=20))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(200, activation='relu'))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(50, activation='softmax'))
|
||||||
|
|
||||||
|
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||||
|
model.compile(loss='categorical_crossentropy',
|
||||||
|
optimizer=sgd,
|
||||||
|
metrics=['accuracy'])
|
||||||
|
|
||||||
|
model.fit(x_train, y_train,
|
||||||
|
epochs=20,
|
||||||
|
batch_size=128)
|
||||||
|
score = model.evaluate(x_test, y_test, batch_size=128)
|
|
@ -9,5 +9,7 @@ import matplotlib.pyplot as plt
|
||||||
|
|
||||||
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
|
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
|
||||||
|
|
||||||
print(tf.config.list_physical_devices('GPU'))
|
with tf.device("/GPU:0"):
|
||||||
|
a = tf.random.normal([1, 2])
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
import torch
|
import torch
|
||||||
from importlib import reload
|
|
||||||
reload(torch)
|
|
||||||
|
|
||||||
print(torch.cuda.is_available())
|
print(torch.cuda.is_available())
|
||||||
|
|
Loading…
Reference in a new issue