First_try

Author Avatar
Tianqi Zhang 8月 02, 2018

First Try

讲道理这个commit的名字很是能反映心境了hhh

我们先来看代码,因为这次不像第一次那么少,所以我们分开说

Makefile

#
# The following defines a variable named "NAME" with a value of "myprogram". By convention,
# a lowercase prefix (in this case "program") and an uppercased suffix (in this case "NAME"), separated
# by an underscore is used to name attributes for a common element. Think of this like
# using program.NAME, program.C_SRCS, etc. There are no structs in Make, so we use this convention
# to keep track of attributes that all belong to the same target or program.  
#
NAME := caffeine.so
C_SRCS := $(wildcard src/caffeine/*.c)
CXX_SRCS := $(wildcard src/caffeine/*.cpp)
C_OBJS := ${C_SRCS:.c=.o}
CXX_OBJS := ${CXX_SRCS:.cpp=.o}
OBJS := $(C_OBJS) $(CXX_OBJS)

CUDA_DIR = /usr/local/cuda
CUDA_INCLUDE_DIR = $(CUDA_DIR)/include
CUDA_LIB_DIR = $(CUDA_DIR)/lib

INCLUDE_DIRS := $(CUDA_INCLUDE_DIR) src/
LIBRARY_DIRS := $(CUDA_LIB_DIR)
LIBRARIES := cudart cublas
WARNINGS := -Wall

CPPFLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir))
LDFLAGS += $(foreach library,$(LIBRARIES),-l$(library)) -shared

LINK = $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(WARNINGS)

.PHONY: all clean distclean

all: $(NAME)

$(NAME): $(OBJS)
    $(LINK) $(OBJS) -o $(NAME)

clean:
    @- $(RM) $(NAME)
    @- $(RM) $(OBJS)

distclean: clean

src/caffeine/blob.cpp

#include "caffeine/blob.hpp"
#include "caffeine/common.hpp"
#include "caffeine/syncedmem.hpp"

namespace caffeine {

template <typename Dtype>
void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
    const int width) {
  num_ = num;
  channels_ = channels;
  height_ = height;
  width_ = width;
  count_ = num_ * channels_ * height_ * width_;
  data_.reset(SyncedMemory(count_ * sizeof(Dtype)));
  diff_.reset(SyncedMemory(count_ * sizeof(Dtype)));
}

template <typename Dtype>
const Dtype* Blob<Dtype>::cpu_data() {
  check_data();
  return data_->cpu_data();
}

template <typename Dtype>
const Dtype* Blob<Dtype>::gpu_data() {
  check_data();
  return data_->gpu_data();
}

template <typename Dtype>
const Dtype* Blob<Dtype>::cpu_diff() {
  check_diff();
  return diff_->cpu_data();
}

template <typename Dtype>
const Dtype* Blob<Dtype>::gpu_diff() {
  check_diff();
  return diff_->gpu_data();
}

template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_data() {
  check_data();
  return data_->mutable_cpu_data();
}

template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_data() {
  check_data();
  return data_->mutable_gpu_data();
}

template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_diff() {
  check_diff();
  return diff_->mutable_cpu_data();
}

template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_diff() {
  check_diff();
  return diff_->mutable_gpu_data();
}

}  // namespace caffeine

src/caffeine/blob.hpp

#ifndef CAFFEINE_BLOB_HPP
#define CAFFEINE_BLOB_HPP

#include <memory>

#include "caffeine/common.hpp"
#include "caffeine/syncedmem.hpp"

namespace caffeine {

template <typename Dtype>
class Blob {
 public:
  Blob()
       : num_(0), channels_(0), height_(0), width_(0), count_(0), data_(),
       diff_() {};
  explicit Blob(const int num, const int channels, const int height,
      const int width) {
    Reshape(num, channels, height, width);
  };
  ~Blob() {};
  void Reshape(const int num, const int channels, const int height,
               const int width);
  inline int num() { return num_; }
  inline int channels() { return channels_; }
  inline int height() { return height_; }
  inline int width() { return width_; }

  const Dtype* cpu_data();
  const Dtype* gpu_data();
  const Dtype* cpu_diff();
  const Dtype* gpu_diff();
  Dtype* mutable_cpu_data();
  Dtype* mutable_gpu_data();
  Dtype* mutable_cpu_diff();
  Dtype* mutable_gpu_diff();  
 private:
  void check_data();
  void check_diff();
  shared_ptr<SyncedMemory> data_;
  shared_ptr<SyncedMemory> diff_;
  int num_;
  int channels_;
  int height_;
  int width_;
  int count_;
};  // class Blob

}  // namespace caffeine

#endif  // CAFFEINE_BLOB_HPP_

src/caffeine/common.hpp

#ifndef CAFFEINE_COMMON_HPP_
#define CAFFEINE_COMMON_HPP_

#include <iostream>

#include <boost/shared_ptr.hpp>

#include "driver_types.h"

namespace caffeine {
  using boost::shared_ptr;
}

static std::ostream nullout(0);

// TODO(Yangqing): make a better logging scheme
#define LOG_IF(condition) \
    ((condition) != cudaSuccess) ? nullout : std::cout

#define CUDA_CHECK(condition) \
    LOG_IF(condition) << "Check failed: " #condition " "

#endif  // CAFFEINE_COMMON_HPP_

src/caffeine/syncedmem.cpp

#include "cuda_runtime.h"

#include "caffeine/common.hpp"
#include "caffeine/syncedmem.hpp"

namespace caffeine {

SyncedMemory::~SyncedMemory() {
  if (cpu_ptr_) {
    CUDA_CHECK(cudaFreeHost(cpu_ptr_));
  }

  if (gpu_ptr_) {
    CUDA_CHECK(cudaFree(gpu_ptr_));
  }
}

inline void SyncedMemory::to_cpu() {
  switch(head_) {
  case UNINITIALIZED:
    CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
    memset(cpu_ptr_, 0, size_);
    head_ = HEAD_AT_CPU;
    break;
  case HEAD_AT_GPU:
    if (cpu_ptr_ == NULL) {
      CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
      CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDeviceToHost));
    }
    head_ = SYNCED;
    break;
  case HEAD_AT_CPU:
  case SYNCED:
    break;
  }
}

inline void SyncedMemory::to_gpu() {
  switch(head_) {
  case UNINITIALIZED:
    CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
    CUDA_CHECK(cudaMemset(gpu_ptr_, 0, size_));
    head_ = HEAD_AT_GPU;
    break;
  case HEAD_AT_CPU:
    if (gpu_ptr_ == NULL) {
      CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
      CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyHostToDevice));
    }
    head_ = SYNCED;
    break;
  case HEAD_AT_GPU:
  case SYNCED:
    break;
  }
}


inline const void* SyncedMemory::cpu_data() {
  to_cpu();
  return (const void*)cpu_ptr_;
}

inline const void* SyncedMemory::gpu_data() {
  to_gpu();
  return (const void*)gpu_ptr_;
}

inline void* SyncedMemory::mutable_cpu_data() {
  to_cpu();
  head_ = HEAD_AT_CPU;
  return cpu_ptr_;
}

inline void* SyncedMemory::mutable_gpu_data() {
  to_gpu();
  head_ = HEAD_AT_GPU;
  return gpu_ptr_;
}


}  // namespace caffeine

src/caffeine/syncedmem.hpp

#ifndef CAFFEINE_SYNCEDMEM_HPP
#define CAFFEINE_SYNCEDMEM_HPP

namespace caffeine {

class SyncedMemory {
 public:
  SyncedMemory()
      : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(SYNCED) {};
  explicit SyncedMemory(size_t size)
      : cpu_ptr_(NULL), gpu_ptr_(NULL), head_(SYNCED), size_(size) {};
  ~SyncedMemory();
  const void* cpu_data();
  const void* gpu_data();
  void* mutable_cpu_data();
  void* mutable_gpu_data();
 private:
  void to_cpu();
  void to_gpu();
  void* cpu_ptr_;
  void* gpu_ptr_;
  size_t size_;
  enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
  SyncedHead head_;
};  // class SyncedMemory

}  // namespace caffeine

#endif  // CAFFEINE_SYNCEDMEM_HPP_