[FG] Update FG for latest paddle and cuda (#6812)

* fix FG for new paddle and cuda env
PaddlePaddle · Sep 5, 2023 · 0283a6b · 0283a6b
1 parent 3a9d41b
commit 0283a6b
Show file tree

Hide file tree

Showing 31 changed files with 1,331 additions and 73 deletions.
diff --git a/paddlenlp/ops/CMakeLists.txt b/paddlenlp/ops/CMakeLists.txt
@@ -24,7 +24,7 @@ endif()
 
 INCLUDE(ExternalProject)
 
-set(CXX_STD "14" CACHE STRING "C++ standard")
+set(CXX_STD "17" CACHE STRING "C++ standard")
 
 option(ON_INFER         "Compiled with inference. "                                 OFF)
 option(WITH_GPU         "Compiled with GPU/CPU, default use CPU."                   ON)
@@ -248,8 +248,8 @@ set(CMAKE_C_FLAGS_DEBUG    "${CMAKE_C_FLAGS_DEBUG}    -Wall -O0")
 set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG}  -Wall -O0")
 set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -O0 -G -Xcompiler -Wall")
 
-set(CMAKE_CXX_STANDARD "${CXX_STD}")
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX17_STANDARD_COMPILE_OPTION "-std=c++{CXX_STD}")
+set(CMAKE_CXX17_EXTENSION_COMPILE_OPTION "-std=gnu++{CXX_STD}")
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --std=c++${CXX_STD}")
@@ -387,6 +387,8 @@ file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/${THIRD_PATH}/source/${THIRD_PARTY_NAME}
 set(MUTE_COMMAND grep -rl "printf(\"\\[WARNING\\]" ${CMAKE_BINARY_DIR}/${THIRD_PATH}/source/${THIRD_PARTY_NAME}/ | xargs -i{} sed -i "s/printf(\"\\WWARNING\\W decoding[^)]\\{1,\\})/ /" {})
 set(OPEN_ATTENTION_MUTE_COMMAND grep -rl "printf(\"\\[WARNING\\]" ${CMAKE_BINARY_DIR}/${THIRD_PATH}/source/${THIRD_PARTY_NAME}/ | xargs -i{} sed -i "s/printf(\"\\WWARNING\\W\\WOpenMultiHeadAttention\\W[^)]\\{1,\\})/ /" {})
 
+set(RM_OLD_CUB_COMMAND rm -rf ${CMAKE_BINARY_DIR}/${THIRD_PATH}/source/${THIRD_PARTY_NAME}/fastertransformer/cuda/cub)
+
 set(FT_PATCH_COMMAND
   printf \\n\\n > blank_lines
   && cp ${allocator_src} ${allocator_dst}
@@ -428,6 +430,7 @@ set(FT_PATCH_COMMAND
   && rm blank_lines
   && ${MUTE_COMMAND}
   && ${OPEN_ATTENTION_MUTE_COMMAND}
+  && ${RM_OLD_CUB_COMMAND}
 )
 
 # TODO(guosheng): Use UPDATE_COMMAND instead of PATCH_COMMAND to make cmake
@@ -438,14 +441,14 @@ set(FT_UPDATE_COMMAND git checkout nccl_dependent_refine && git checkout . && ${
 
 ExternalProject_Add(
   extern_${THIRD_PARTY_NAME}
-  GIT_REPOSITORY    https://gitee.com/paddlepaddle/FasterTransformer.git
+  GIT_REPOSITORY    https://github.com/NVIDIA/FasterTransformer.git
   GIT_TAG           nccl_dependent_refine
   PREFIX            ${THIRD_PATH}
   SOURCE_DIR        ${THIRD_PATH}/source/${THIRD_PARTY_NAME}
   UPDATE_COMMAND    ${FT_UPDATE_COMMAND}  # PATCH_COMMAND     ${FT_PATCH_COMMAND}
   BINARY_DIR        ${THIRD_PATH}/build/${THIRD_PARTY_NAME}
   INSTALL_COMMAND   ""
-  CMAKE_ARGS        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DSM=${SM} -DBUILD_PD=ON -DBUILD_ENCODER=${WITH_ENCODER} -DPY_CMD=${PY_CMD} -DON_INFER=${ON_INFER} -DPADDLE_LIB=${PADDLE_LIB} -DWITH_MKL=${WITH_MKL} -DWITH_STATIC_LIB=${WITH_STATIC_LIB} -DBUILD_GPT=${WITH_PARALLEL} -DWITH_ONNXRUNTIME=${WITH_ONNXRUNTIME}
+  CMAKE_ARGS        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DSM=${SM} -DBUILD_PD=ON -DBUILD_ENCODER=${WITH_ENCODER} -DPY_CMD=${PY_CMD} -DON_INFER=${ON_INFER} -DPADDLE_LIB=${PADDLE_LIB} -DWITH_MKL=${WITH_MKL} -DWITH_STATIC_LIB=${WITH_STATIC_LIB} -DBUILD_GPT=${WITH_PARALLEL} -DWITH_ONNXRUNTIME=${WITH_ONNXRUNTIME}
 )
 # -DBUILD_GPT=${WITH_GPT} 
 ExternalProject_Get_property(extern_${THIRD_PARTY_NAME} BINARY_DIR)

diff --git a/paddlenlp/ops/ext_utils.py b/paddlenlp/ops/ext_utils.py
@@ -17,6 +17,7 @@
 import os
 import subprocess
 import sys
+import sysconfig
 import textwrap
 from pathlib import Path
 
@@ -144,6 +145,8 @@ def build_with_command(self, ext_builder):
                 # CMake 3.12+ only.
                 build_args += ["-j{}".format(ext_builder.parallel)]
 
+        build_args += ["-j14"]
+
         if not os.path.exists(ext_builder.build_temp):
             os.makedirs(ext_builder.build_temp)
 
@@ -198,9 +201,17 @@ def build_with_command(self, ext_builder):
         # `GetCUDAComputeCapability` is not exposed yet, and detect CUDA/GPU
         # version in cmake file.
         # self.cmake_args += [f"-DSM={self.sm}"] if self.sm is not None else []
-        self.cmake_args += ["-DWITH_GPT=ON"]
+        self.cmake_args += "-DWITH_GPT=ON -DON_INFER=OFF -DWITH_MKL=ON -DWITH_ONNXRUNTIME=ON".split()
+
+        self.cmake_args += ["-DCMAKE_C_COMPILER={}".format(os.popen("which gcc").read().replace("\n", ""))]
+        self.cmake_args += ["-DCMAKE_CXX_COMPILER={}".format(os.popen("which g++").read().replace("\n", ""))]
+
+        self.cmake_args += ["-DPYTHON_LIBRARY={}".format(sysconfig.get_config_var("LIBDIR"))]
+        self.cmake_args += ["-DPYTHON_INCLUDE_DIR={}".format(sysconfig.get_config_var("INCLUDEPY"))]
+
         if self.need_parallel:
             self.cmake_args += ["-DWITH_PARALLEL=ON"]
+
         try:
             super(FasterTransformerExtension, self).build_with_command(ext_builder)
             # FastGeneration cmake file resets `CMAKE_LIBRARY_OUTPUT_DIRECTORY`

diff --git a/paddlenlp/ops/fast_transformer/src/CMakeLists.txt b/paddlenlp/ops/fast_transformer/src/CMakeLists.txt
@@ -48,7 +48,7 @@ if(ON_INFER)
     message(FATAL_ERROR "please set DEMO with -DDEMO=demo_name")
   endif()
 
-  include_directories("${PADDLE_LIB}/")
+  include_directories("${PADDLE_LIB}/paddle/include")
   set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/")
   include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
   include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
@@ -89,7 +89,7 @@ if(ON_INFER)
     if(WITH_MKL)
       set(FLAG_OPENMP "-fopenmp")
     endif()
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 ${FLAG_OPENMP}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_OPENMP}")
   endif()
 
   if (USE_TENSORRT AND WITH_GPU)
@@ -124,7 +124,7 @@ if(ON_INFER)
       if(WIN32)
         set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
       else(WIN32)
-        set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+        set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libdnnl.so.3)
       endif(WIN32)
     endif()
   else()

diff --git a/paddlenlp/ops/fast_transformer/src/demo/gpt.cc b/paddlenlp/ops/fast_transformer/src/demo/gpt.cc
@@ -15,7 +15,7 @@
 #include <pthread.h>
 #include <algorithm>
 #include <atomic>
-#include <codecvt>
+#include <string>
 #include <cstring>
 #include <fstream>
 #include <iostream>
@@ -30,6 +30,7 @@
 #endif
 
 #include "helper.h"
+#include "utf8.h"
 
 #include <sys/time.h>
 #include <unistd.h>
@@ -58,7 +59,6 @@ const int BOS_IDX = 50256;
 const int EOS_IDX = 50256;
 const int PAD_IDX = 50256;
 const int MAX_LENGTH = 256;
-std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
 
 int batch_size = 1;
 int gpu_id = 0;
@@ -186,7 +186,7 @@ class DataReader {
     std::string line;
     int k = 0;
     while (std::getline(fin, line)) {
-      std::u32string tmp = convert.from_bytes(line);
+      std::u32string tmp = utf8::utf8to32(line);
       word2num_dict[tmp] = k;
       num2word_dict[k] = tmp;
       k += 1;
@@ -314,8 +314,8 @@ int main(int argc, char** argv) {
 
   paddle::inference::Main(batch_size,
                           gpu_id,
-                          convert.from_bytes(FLAGS_start_token),
-                          convert.from_bytes(FLAGS_end_token));
+                          utf8::utf8to32(FLAGS_start_token),
+                          utf8::utf8to32(FLAGS_end_token));
 
   return 0;
 }
diff --git a/paddlenlp/ops/fast_transformer/src/demo/helper.h b/paddlenlp/ops/fast_transformer/src/demo/helper.h
@@ -21,7 +21,7 @@ limitations under the License. */
 #include <sstream>
 #include <string>
 #include <vector>
-#include "paddle/include/paddle_inference_api.h"
+#include "paddle_inference_api.h"
 
 namespace paddle {
 namespace inference {

diff --git a/paddlenlp/ops/fast_transformer/src/demo/utf8.h b/paddlenlp/ops/fast_transformer/src/demo/utf8.h
@@ -0,0 +1,34 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard