First_try
First Try
讲道理这个commit的名字很是能反映心境了hhh
我们先来看代码,因为这次不像第一次那么少,所以我们分开说
Makefile
#
# The following defines a variable named "NAME" with a value of "myprogram". By convention,
# a lowercase prefix (in this case "program") and an uppercased suffix (in this case "NAME"), separated
# by an underscore is used to name attributes for a common element. Think of this like
# using program.NAME, program.C_SRCS, etc. There are no structs in Make, so we use this convention
# to keep track of attributes that all belong to the same target or program.
#
NAME := caffeine.so
C_SRCS := $(wildcard src/caffeine/*.c)
CXX_SRCS := $(wildcard src/caffeine/*.cpp)
C_OBJS := ${C_SRCS:.c=.o}
CXX_OBJS := ${CXX_SRCS:.cpp=.o}
OBJS := $(C_OBJS) $(CXX_OBJS)
CUDA_DIR = /usr/local/cuda
CUDA_INCLUDE_DIR = $(CUDA_DIR)/include
CUDA_LIB_DIR = $(CUDA_DIR)/lib
INCLUDE_DIRS := $(CUDA_INCLUDE_DIR) src/
LIBRARY_DIRS := $(CUDA_LIB_DIR)
LIBRARIES := cudart cublas
WARNINGS := -Wall
CPPFLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir))
LDFLAGS += $(foreach library,$(LIBRARIES),-l$(library)) -shared
LINK = $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(WARNINGS)
.PHONY: all clean distclean
all: $(NAME)
$(NAME): $(OBJS)
$(LINK) $(OBJS) -o $(NAME)
clean:
@- $(RM) $(NAME)
@- $(RM) $(OBJS)
distclean: clean
src/caffeine/blob.cpp
#include "caffeine/blob.hpp"
#include "caffeine/common.hpp"
#include "caffeine/syncedmem.hpp"
namespace caffeine {
template <typename Dtype>
void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
const int width) {
num_ = num;
channels_ = channels;
height_ = height;
width_ = width;
count_ = num_ * channels_ * height_ * width_;
data_.reset(SyncedMemory(count_ * sizeof(Dtype)));
diff_.reset(SyncedMemory(count_ * sizeof(Dtype)));
}
template <typename Dtype>
const Dtype* Blob<Dtype>::cpu_data() {
check_data();
return data_->cpu_data();
}
template <typename Dtype>
const Dtype* Blob<Dtype>::gpu_data() {
check_data();
return data_->gpu_data();
}
template <typename Dtype>
const Dtype* Blob<Dtype>::cpu_diff() {
check_diff();
return diff_->cpu_data();
}
template <typename Dtype>
const Dtype* Blob<Dtype>::gpu_diff() {
check_diff();
return diff_->gpu_data();
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_data() {
check_data();
return data_->mutable_cpu_data();
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_data() {
check_data();
return data_->mutable_gpu_data();
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_diff() {
check_diff();
return diff_->mutable_cpu_data();
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_diff() {
check_diff();
return diff_->mutable_gpu_data();
}
} // namespace caffeine
src/caffeine/blob.hpp
#ifndef CAFFEINE_BLOB_HPP
#define CAFFEINE_BLOB_HPP
#include <memory>
#include "caffeine/common.hpp"
#include "caffeine/syncedmem.hpp"
namespace caffeine {
template <typename Dtype>
class Blob {
public:
Blob()
: num_(0), channels_(0), height_(0), width_(0), count_(0), data_(),
diff_() {};
explicit Blob(const int num, const int channels, const int height,
const int width) {
Reshape(num, channels, height, width);
};
~Blob() {};
void Reshape(const int num, const int channels, const int height,
const int width);
inline int num() { return num_; }
inline int channels() { return channels_; }
inline int height() { return height_; }
inline int width() { return width_; }
const Dtype* cpu_data();
const Dtype* gpu_data();
const Dtype* cpu_diff();
const Dtype* gpu_diff();
Dtype* mutable_cpu_data();
Dtype* mutable_gpu_data();
Dtype* mutable_cpu_diff();
Dtype* mutable_gpu_diff();
private:
void check_data();
void check_diff();
shared_ptr<SyncedMemory> data_;
shared_ptr<SyncedMemory> diff_;
int num_;
int channels_;
int height_;
int width_;
int count_;
}; // class Blob
} // namespace caffeine
#endif // CAFFEINE_BLOB_HPP_
src/caffeine/common.hpp
#ifndef CAFFEINE_COMMON_HPP_
#define CAFFEINE_COMMON_HPP_
#include <iostream>
#include <boost/shared_ptr.hpp>
#include "driver_types.h"
namespace caffeine {
using boost::shared_ptr;
}
static std::ostream nullout(0);
// TODO(Yangqing): make a better logging scheme
#define LOG_IF(condition) \
((condition) != cudaSuccess) ? nullout : std::cout
#define CUDA_CHECK(condition) \
LOG_IF(condition) << "Check failed: " #condition " "
#endif // CAFFEINE_COMMON_HPP_
src/caffeine/syncedmem.cpp
#include "cuda_runtime.h"
#include "caffeine/common.hpp"
#include "caffeine/syncedmem.hpp"
namespace caffeine {
SyncedMemory::~SyncedMemory() {
if (cpu_ptr_) {
CUDA_CHECK(cudaFreeHost(cpu_ptr_));
}
if (gpu_ptr_) {
CUDA_CHECK(cudaFree(gpu_ptr_));
}
}
inline void SyncedMemory::to_cpu() {
switch(head_) {
case UNINITIALIZED:
CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
memset(cpu_ptr_, 0, size_);
head_ = HEAD_AT_CPU;
break;
case HEAD_AT_GPU:
if (cpu_ptr_ == NULL) {
CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDeviceToHost));
}
head_ = SYNCED;
break;
case HEAD_AT_CPU:
case SYNCED:
break;
}
}
inline void SyncedMemory::to_gpu() {
switch(head_) {
case UNINITIALIZED:
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
CUDA_CHECK(cudaMemset(gpu_ptr_, 0, size_));
head_ = HEAD_AT_GPU;
break;
case HEAD_AT_CPU:
if (gpu_ptr_ == NULL) {
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyHostToDevice));
}
head_ = SYNCED;
break;
case HEAD_AT_GPU:
case SYNCED:
break;
}
}
inline const void* SyncedMemory::cpu_data() {
to_cpu();
return (const void*)cpu_ptr_;
}
inline const void* SyncedMemory::gpu_data() {
to_gpu();
return (const void*)gpu_ptr_;
}
inline void* SyncedMemory::mutable_cpu_data() {
to_cpu();
head_ = HEAD_AT_CPU;
return cpu_ptr_;
}
inline void* SyncedMemory::mutable_gpu_data() {
to_gpu();
head_ = HEAD_AT_GPU;
return gpu_ptr_;
}
} // namespace caffeine
src/caffeine/syncedmem.hpp
#ifndef CAFFEINE_SYNCEDMEM_HPP
#define CAFFEINE_SYNCEDMEM_HPP
namespace caffeine {
class SyncedMemory {
public:
SyncedMemory()
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(SYNCED) {};
explicit SyncedMemory(size_t size)
: cpu_ptr_(NULL), gpu_ptr_(NULL), head_(SYNCED), size_(size) {};
~SyncedMemory();
const void* cpu_data();
const void* gpu_data();
void* mutable_cpu_data();
void* mutable_gpu_data();
private:
void to_cpu();
void to_gpu();
void* cpu_ptr_;
void* gpu_ptr_;
size_t size_;
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
SyncedHead head_;
}; // class SyncedMemory
} // namespace caffeine
#endif // CAFFEINE_SYNCEDMEM_HPP_