Use single precision in gain calculation, use pointers instead of span. #8051

RAMitchell · 2022-07-05T15:11:24Z

Removing spans in favour of raw pointers decreases register pressure in the GPU kernel significantly.

Use of double/single precision in gain calculations is inconsistent. Prefer single precision.

trivialfis · 2022-07-06T09:40:17Z

Do you have some benchmarks for accuracy for small datasets? Preferably with CPU implementation as well.

RAMitchell · 2022-07-09T18:42:06Z

Here are 2 gbm-bench runs for both CPU and GPU. The accuracies are identical.

Before PR

{
"airline": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.8434307346593626,
      "Accuracy": 0.7182532274977227,
      "Log_Loss": 0.5297073384219716,
      "Precision": 0.6536453477079318,
      "Recall": 0.8642811257864607
    },
    "train_time": 966.4464671728201,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.8431610435789277,
      "Accuracy": 0.718068859889482,
      "Log_Loss": 0.5300302877835903,
      "Precision": 0.6534910973662341,
      "Recall": 0.8641185139771076
    },
    "train_time": 91.14509701775387,
    "train_time_std": 0.0
  }
},
"bosch": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.6902547809021118,
      "Accuracy": 0.955129883843717,
      "Log_Loss": 0.24913303252016256,
      "Precision": 0.04193227091633466,
      "Recall": 0.29543859649122806
    },
    "train_time": 57.79425460193306,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.6911762239977932,
      "Accuracy": 0.9580316789862724,
      "Log_Loss": 0.24111224289242209,
      "Precision": 0.04423262289814715,
      "Recall": 0.28982456140350876
    },
    "train_time": 12.88072164868936,
    "train_time_std": 0.0
  }
},
"covtype": {
  "xgb-cpu": {
    "accuracy": {
      "Accuracy": 0.9398466476769103,
      "F1": 0.9397222738018455,
      "Precision": 0.9398793000566292,
      "Recall": 0.9398466476769103
    },
    "train_time": 39.06878037704155,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "Accuracy": 0.9379620147500495,
      "F1": 0.9378270737037167,
      "Precision": 0.9380157400229653,
      "Recall": 0.9379620147500495
    },
    "train_time": 18.243315340019763,
    "train_time_std": 0.0
  }
},
"epsilon": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.9477093412445664,
      "Accuracy": 0.87034,
      "Log_Loss": 0.3009770990117234,
      "Precision": 0.8434001151196687,
      "Recall": 0.9092783505154639
    },
    "train_time": 1550.9631590410136,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.9481207911778409,
      "Accuracy": 0.87118,
      "Log_Loss": 0.2998701572805944,
      "Precision": 0.8448715324936278,
      "Recall": 0.9090381343208888
    },
    "train_time": 46.500129331834614,
    "train_time_std": 0.0
  }
},
"fraud": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.9654261008004777,
      "Accuracy": 0.9995611109160493,
      "Log_Loss": 0.003519674551653179,
      "Precision": 0.9506172839506173,
      "Recall": 0.7857142857142857
    },
    "train_time": 2.6741700717248023,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.965037059421405,
      "Accuracy": 0.9995611109160493,
      "Log_Loss": 0.00356269826075694,
      "Precision": 0.9397590361445783,
      "Recall": 0.7959183673469388
    },
    "train_time": 1.332988286856562,
    "train_time_std": 0.0
  }
},
"higgs": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.8399291373085628,
      "Accuracy": 0.7342790909090909,
      "Log_Loss": 0.5217814797614634,
      "Precision": 0.6912109500847503,
      "Recall": 0.9014453459278047
    },
    "train_time": 112.69107929291204,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.8393949180683775,
      "Accuracy": 0.7335804545454545,
      "Log_Loss": 0.5225674019070321,
      "Precision": 0.6904788642120271,
      "Recall": 0.9015628188704419
    },
    "train_time": 17.52672170335427,
    "train_time_std": 0.0
  }
},
"year": {
  "xgb-cpu": {
    "accuracy": {
      "MeanAbsError": 6.220576286315918,
      "MeanSquaredError": 79.77742004394531,
      "MedianAbsError": 4.271484375
    },
    "train_time": 16.023101320955902,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "MeanAbsError": 6.22576379776001,
      "MeanSquaredError": 79.91011810302734,
      "MedianAbsError": 4.2696533203125
    },
    "train_time": 6.9804930170066655,
    "train_time_std": 0.0
  }
}
}

After PR

{
"airline": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.8434307346593626,
      "Accuracy": 0.7182532274977227,
      "Log_Loss": 0.5297073384219716,
      "Precision": 0.6536453477079318,
      "Recall": 0.8642811257864607
    },
    "train_time": 994.9771266668104,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.8431610435789277,
      "Accuracy": 0.718068859889482,
      "Log_Loss": 0.5300302877835903,
      "Precision": 0.6534910973662341,
      "Recall": 0.8641185139771076
    },
    "train_time": 89.69093904690817,
    "train_time_std": 0.0
  }
},
"bosch": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.6902547809021118,
      "Accuracy": 0.955129883843717,
      "Log_Loss": 0.24913303252016256,
      "Precision": 0.04193227091633466,
      "Recall": 0.29543859649122806
    },
    "train_time": 55.15471205022186,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.6911762239977932,
      "Accuracy": 0.9580316789862724,
      "Log_Loss": 0.24111224289242209,
      "Precision": 0.04423262289814715,
      "Recall": 0.28982456140350876
    },
    "train_time": 12.608841334935278,
    "train_time_std": 0.0
  }
},
"covtype": {
  "xgb-cpu": {
    "accuracy": {
      "Accuracy": 0.9398466476769103,
      "F1": 0.9397222738018455,
      "Precision": 0.9398793000566292,
      "Recall": 0.9398466476769103
    },
    "train_time": 40.34531668201089,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "Accuracy": 0.9379620147500495,
      "F1": 0.9378270737037167,
      "Precision": 0.9380157400229653,
      "Recall": 0.9379620147500495
    },
    "train_time": 17.709761895705014,
    "train_time_std": 0.0
  }
},
"epsilon": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.9477093412445664,
      "Accuracy": 0.87034,
      "Log_Loss": 0.3009770990117234,
      "Precision": 0.8434001151196687,
      "Recall": 0.9092783505154639
    },
    "train_time": 1647.445112537127,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.9481207911778409,
      "Accuracy": 0.87118,
      "Log_Loss": 0.2998701572805944,
      "Precision": 0.8448715324936278,
      "Recall": 0.9090381343208888
    },
    "train_time": 45.50282173091546,
    "train_time_std": 0.0
  }
},
"fraud": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.9654261008004777,
      "Accuracy": 0.9995611109160493,
      "Log_Loss": 0.003519674551653179,
      "Precision": 0.9506172839506173,
      "Recall": 0.7857142857142857
    },
    "train_time": 2.592053124215454,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.965037059421405,
      "Accuracy": 0.9995611109160493,
      "Log_Loss": 0.00356269826075694,
      "Precision": 0.9397590361445783,
      "Recall": 0.7959183673469388
    },
    "train_time": 1.2642242829315364,
    "train_time_std": 0.0
  }
},
"higgs": {
  "xgb-cpu": {
    "accuracy": {
      "AUC": 0.8399291373085628,
      "Accuracy": 0.7342790909090909,
      "Log_Loss": 0.5217814797614634,
      "Precision": 0.6912109500847503,
      "Recall": 0.9014453459278047
    },
    "train_time": 109.59541429579258,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "AUC": 0.8393949180683775,
      "Accuracy": 0.7335804545454545,
      "Log_Loss": 0.5225674019070321,
      "Precision": 0.6904788642120271,
      "Recall": 0.9015628188704419
    },
    "train_time": 17.26801946386695,
    "train_time_std": 0.0
  }
},
"year": {
  "xgb-cpu": {
    "accuracy": {
      "MeanAbsError": 6.220576286315918,
      "MeanSquaredError": 79.77742004394531,
      "MedianAbsError": 4.271484375
    },
    "train_time": 17.15528222732246,
    "train_time_std": 0.0
  },
  "xgb-gpu": {
    "accuracy": {
      "MeanAbsError": 6.22576379776001,
      "MeanSquaredError": 79.91011810302734,
      "MedianAbsError": 4.2696533203125
    },
    "train_time": 6.902157460339367,
    "train_time_std": 0.0
  }
}
}

trivialfis · 2022-07-11T18:12:24Z

It's weird that the CPU implementation seems somehow slower.

RAMitchell · 2022-07-12T09:02:11Z

I wouldn't read too much into it. The CPU runs have very high variance on my dual socket machine.

Use single precision, use pointers instead of span.

197ce75

trivialfis approved these changes Jul 12, 2022

View reviewed changes

RAMitchell merged commit 0bdaca2 into dmlc:master Jul 12, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Use single precision in gain calculation, use pointers instead of span. #8051

Use single precision in gain calculation, use pointers instead of span. #8051

RAMitchell commented Jul 5, 2022

trivialfis commented Jul 6, 2022

RAMitchell commented Jul 9, 2022

trivialfis commented Jul 11, 2022

RAMitchell commented Jul 12, 2022

Use single precision in gain calculation, use pointers instead of span. #8051

Use single precision in gain calculation, use pointers instead of span. #8051

Conversation

RAMitchell commented Jul 5, 2022

trivialfis commented Jul 6, 2022

RAMitchell commented Jul 9, 2022

trivialfis commented Jul 11, 2022

RAMitchell commented Jul 12, 2022