[FSU] add FSU file offset info

make neuralnet can pass path to the swap_device & weight offset (file offset) it can make calculate weight file's offset **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Co-authored-by: hyeonseok <hs89.lee@samsung.com> Signed-off-by: Donghak PARK <donghak.park@samsung.com>
nnstreamer · DonghakPark · Feb 25, 2025 · Feb 25, 2025 · Feb 25, 2025 · Feb 25, 2025
commit ea20170850ba2e2252b6a5b7cc7fdf96b89c7979
@@ -382,7 +382,8 @@ sharedConstTensors NetworkGraph::forwarding(
   for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
     auto const &output_layer_node = LNODE(graph.getOutputNode(i));
     for (unsigned int j = 0; j < output_layer_node->getNumOutputs(); ++j) {
-      out.push_back(MAKE_SHARED_TENSOR(output_layer_node->getOutput(j)));
+      // @todo we should determine what type to return
+      out.push_back(MAKE_SHARED_TENSOR(output_layer_node->getOutput(j).clone(TensorDim::DataType::FP32)));
     }
   }
 

@@ -520,6 +520,24 @@ class NetworkGraph {
    */
   unsigned int getNumLoadedTensorPoolTensors();
 
+  /**
+   * @brief set FSU weight path
+   *
+   * @param path FSU weight file path
+   */
+  void setFsuWeightPath(std::string path) {
+    tensor_manager->setFsuWeightPath(path);
+  }
+
+  /**
+   * @brief set weight file offset for FSU loading
+   *
+   * @param offsets weight file offset
+   */
+  void setWeightOffset(std::vector<std::pair<size_t,size_t>> offsets) {
+    tensor_manager->setWeightOffset(offsets);
+  }
+
 private:
   std::map<std::string, std::string> sub_in_out; /** This is map to identify
                    input and output layer name of subgraph */

@@ -19,6 +19,7 @@
 #include <vector>
 
 #include <cache_elem.h>
+#include <common.h>
 #include <memory_pool.h>
 #include <swap_device.h>
 
@@ -50,6 +51,14 @@ class CachePool : public MemoryPool {
    */
   explicit CachePool(const std::string &path, const std::string &name);
 
+  /**
+   * @brief CachePool constructor with cache path & ExecutionMode
+   *
+   */
+  explicit CachePool(
+    const std::string &path, const std::string &name,
+    ml::train::ExecutionMode exec_mode = ml::train::ExecutionMode::TRAIN);
+
   /**
    * @brief MemoryPool destructor
    *
@@ -217,11 +226,36 @@ class CachePool : public MemoryPool {
    */
   std::vector<CachePolicy> &getCachePolicy() { return policies; }
 
-private:
-  std::string name;                        /**< pool name */
-  std::shared_ptr<SwapDevice> swap_device; /**< swap device */
-  CacheElems elems;                        /**< cache elements */
+  /**
+   * @brief set FSU weight path
+   *
+   * @param path FSU weight file path
+   */
+  void setFsuWeightPath(std::string path) override {
+    swap_device->setFsuWeightPath(path);
+    swap_device->finish();
+    if (execution_mode_ == ml::train::ExecutionMode::INFERENCE) {
+      swap_device->start(size(), false);
+    } else {
+      swap_device->start(size(), true);
+    }
+  }
+
+  /**
+   * @brief set weight file offset for FSU loading
+   *
+   * @param offsets weight file offset
+   */
+  void
+  setWeightOffset(std::vector<std::pair<size_t, size_t>> offsets) override {
+    swap_device->setWeightOffset(offsets);
+  }
 
+private:
+  std::string name;                         /**< pool name */
+  std::shared_ptr<SwapDevice> swap_device;  /**< swap device */
+  CacheElems elems;                         /**< cache elements */
+  ml::train::ExecutionMode execution_mode_; /**< execution mode */
   std::list<std::shared_ptr<CacheElem>> actives;
   std::vector<CachePolicy> policies;
   std::map<unsigned int, ExecIds> exec_ids;

@@ -149,9 +149,9 @@ class Manager {
           unsigned int lookahead = 0, const std::string tensor_format_ = "NCHW",
           const std::string tensor_dtype_ = "FP32-FP32",
           ExecutionMode exec_mode_ = ExecutionMode::TRAIN) :
-    weight_pool(enable_swap_, swap_path, "weight_pool"),
+    weight_pool(enable_swap_, swap_path, "weight_pool", exec_mode_),
     tensor_pool(enable_swap_ && (exec_mode_ == ExecutionMode::TRAIN), swap_path,
-                "tensor_pool"),
+                "tensor_pool", exec_mode_),
     enable_swap(enable_swap_),
     enable_optimizations(true),
     swap_lookahead(lookahead),
@@ -554,6 +554,24 @@ class Manager {
    */
   unsigned int getNumLoadedTensorPoolTensors();
 
+  /**
+   * @brief set FSU weight path
+   *
+   * @param path FSU weight file path
+   */
+  void setFsuWeightPath(std::string path) {
+    weight_pool.setFsuWeightPath(path);
+  }
+
+  /**
+   * @brief set weight file offset for FSU loading
+   *
+   * @param offsets weight file offset
+   */
+  void setWeightOffset(std::vector<std::pair<size_t, size_t>> offsets) {
+    weight_pool.setWeightOffset(offsets);
+  }
+
 private:
   /** @todo: merge this list to one */
   std::vector<std::unique_ptr<Weight>> weights_v2; /**< weights for the layers

@@ -326,6 +326,7 @@ void MemoryPool::clear() {
   memory_size.clear();
   memory_validity.clear();
   memory_offset.clear();
+  file_offset.clear();
   memory_is_wgrad.clear();
 
   pool_size = 0;

@@ -149,12 +149,31 @@ class MemoryPool {
    */
   void *getMemoryPoolAddress() { return mem_pool; }
 
+  /**
+   * @brief set FSU weight path
+   *
+   * @param path FSU weight file path
+   */
+  virtual void setFsuWeightPath(std::string path) {};
+
+  /**
+   * @brief set weight file offset for FSU loading
+   *
+   * @param offsets weight file offset
+   */
+  virtual void setWeightOffset(std::vector<std::pair<size_t,size_t>>) {};
+
 protected:
   /**
    * @brief  Get memory offset
    */
   std::vector<size_t> &getMemoryOffset() { return memory_offset; }
 
+  /**
+   * @brief  Get file offset
+   */
+  std::vector<size_t> &getFileOffset() { return file_offset; }
+
   /**
    * @brief  Get memory size
    */
@@ -213,6 +232,7 @@ class MemoryPool {
   std::vector<std::pair<unsigned int, unsigned int>>
     memory_validity; /**< validity intervals for each requested memory */
   std::vector<size_t> memory_offset; /**< offsets for the memory requested */
+  std::vector<size_t> file_offset; /**< offsets for the bin file */
   std::vector<std::vector<unsigned int>>
     memory_exec_order; /**< execution order for the requested memory */
 

@@ -24,11 +24,16 @@
 
 namespace nntrainer {
 
-void SwapDevice::start(size_t size) {
+void SwapDevice::start(size_t size, bool writeable) {
   if (fd > 0)
     return;
 
-  fd = open(dev_path.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0666UL);
+  if (writeable) {
+    fd =
+      open(dev_path.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_SYNC, (mode_t)0666);
+  } else {
+    fd = open(dev_path.c_str(), O_RDWR | O_CREAT, (mode_t)0666);
+  }
   NNTR_THROW_IF(fd < 0, std::runtime_error)
     << "SwapDevice: open file: " << dev_path;
 
@@ -39,11 +44,12 @@ void SwapDevice::start(size_t size) {
   NNTR_THROW_IF(off < 0, std::runtime_error)
     << "SwapDevice: seek file: " << dev_path;
 
-  ssize_t len;
-  len = write(fd, "", 1);
-  NNTR_THROW_IF(len != 1, std::runtime_error)
-    << "SwapDevice: write file: " << dev_path;
-
+  if (writeable) {
+    ssize_t len;
+    len = write(fd, "", 1);
+    NNTR_THROW_IF(len != 1, std::runtime_error)
+      << "SwapDevice: write file: " << dev_path;
+  }
   off = lseek(fd, 0, SEEK_SET);
   NNTR_THROW_IF(off < 0, std::runtime_error)
     << "SwapDevice: seek file: " << dev_path;
@@ -62,6 +68,7 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, void *memory_ptr,
 
   char *ptr = static_cast<char *>(
     mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, off));
+
   const size_t error_buflen = 100;
   char error_buf[error_buflen];
   NNTR_THROW_IF(ptr == (void *)-1, std::runtime_error)

@@ -55,14 +55,20 @@ class SwapDevice {
    *
    */
   explicit SwapDevice(const std::string &name) :
-    dev_path(swap_device_default_path + name), fd(-1), num_loaded_tensors(0) {}
+    dev_path(swap_device_default_path + name),
+    fd(-1),
+    num_loaded_tensors(0),
+    offset_index(0) {}
 
   /**
    * @brief SwapDevice default constructor
    *
    */
   explicit SwapDevice(const std::string &path, const std::string &name) :
-    dev_path(path + "/" + name), fd(-1), num_loaded_tensors(0) {}
+    dev_path(path + "/" + name),
+    fd(-1),
+    num_loaded_tensors(0),
+    offset_index(0) {}
 
   /**
    * @brief SwapDevice destructor
@@ -74,9 +80,10 @@ class SwapDevice {
    * @brief Start device
    *
    * @param size The size of requested swap device space
+   * @param writeable Writeable flag
    *
    */
-  void start(size_t size);
+  void start(size_t size, bool writeable = true);
 
   /**
    * @brief Allocate and get data
@@ -153,11 +160,28 @@ class SwapDevice {
     is_unmapped[address] = true;
   }
 
-private:
-  const std::string dev_path; /**< device path */
-  int fd;                     /**< device file description */
+  /**
+   * @brief set FSU weight path
+   *
+   * @param path FSU weight file path
+   */
+  void setFsuWeightPath(std::string file_path) { dev_path = file_path; }
+
+  /**
+   * @brief set weight file offset for FSU loading
+   *
+   * @param offsets weight file offset
+   */
+  void setWeightOffset(std::vector<std::pair<size_t, size_t>> offsets) {
+    weight_offset = offsets;
+  }
 
+private:
+  std::string dev_path; /**< device path */
+  int fd;               /**< device file description */
+  std::vector<std::pair<size_t, size_t>> weight_offset;
   unsigned int num_loaded_tensors;
+  int offset_index;
 #ifdef USE_MMAP
   std::map<void *, std::tuple<void *, size_t, off_t, ssize_t>>
     mapped; /**< <pointer, <orig_pointer, size, offset, origianl size>> */

@@ -25,6 +25,7 @@
 
 #include <cache_loader.h>
 #include <cache_pool.h>
+#include <common.h>
 #include <tensor.h>
 #include <tensor_wrap_specs.h>
 
@@ -48,10 +49,13 @@ class TensorPool {
   /**
    * @brief     Constructor of TensorPool
    */
-  TensorPool(bool enable_swap, const std::string &swap_path = "",
-             const std::string &swap_name = "") {
+  TensorPool(
+    bool enable_swap, const std::string &swap_path = "",
+    const std::string &swap_name = "",
+    ml::train::ExecutionMode execution_mode = ml::train::ExecutionMode::TRAIN) {
     if (enable_swap) {
-      auto cache_pool = std::make_shared<CachePool>(swap_path, swap_name);
+      auto cache_pool =
+        std::make_shared<CachePool>(swap_path, swap_name, exec_mode_);
       cache_loader = std::make_unique<CacheLoader>(cache_pool);
       mem_pool = cache_pool;
     } else {
@@ -311,6 +315,28 @@ class TensorPool {
    */
   unsigned int getNumLoadedTensors();
 
+  /**
+   * @brief set FSU weight path
+   *
+   * @param path FSU weight file path
+   */
+  void setFsuWeightPath(std::string path) {
+    if (mem_pool) {
+      mem_pool->setFsuWeightPath(path);
+    }
+  }
+
+  /**
+   * @brief set weight file offset for FSU loading
+   *
+   * @param offsets weight file offset
+   */
+  void setWeightOffset(std::vector<std::pair<size_t, size_t>> offsets) {
+    if (mem_pool) {
+      mem_pool->setWeightOffset(offsets);
+    }
+  }
+
 private:
   /**
    * @brief Source tensor detailed specification