Skip to content

Latest commit

 

History

History
282 lines (218 loc) · 10.5 KB

telco.org

File metadata and controls

282 lines (218 loc) · 10.5 KB

电信用户流失分析

说明

这次文章的参考来自 https://zhuanlan.zhihu.com/p/68397317

提出问题

关于用户留存有这样一个观点,如果将用户流失率降低5%,公司利润将提升25%-85% 如今高居不下的获客成本让电信运营商遭遇“天花板”,甚至陷入获客难的窘境 随着市场饱和度上升,电信运营商亟待解决增加用户黏性,延长用户生命周期的问题 因此,电信用户流失分析与预测至关重要。

数据集来自 kaggle

理解数据

字段名数据类型字段描述
customerIDString顾客ID
genderString客户性别
SeniorCitizenInteger客户是否为老年人
PartnerString客户是否有合作伙伴
DependentsString客户是否有家属
tenureInteger客户在公司停留的月数
PhoneServiceString客户是否有电话服务
MultipleLinesString客户是否有多条路线
InternetServiceString客户的互联网服务提供商
OnlineSecurityString客户是否具有在线安全性
OnlineBackupString客户是否有在线备份
DeviceProtectionString客户是否有设备保护
TechSupportString客户是否有技术支持
StreamingTVString客户是否有流媒体电视
StreamingMoviesString客户是否与流媒体电影
ContractString客户的合同期限
PaperlessBillingString客户是否有无纸化账单
PaymentMethodString客户的支付方式
MonthlyChargesInteger每月向客户收取的金额
TotalChargesInteger向客户收取的总金额
ChurnString客户是否流失

数据清洗

首先导入数据

origindata = CSV.read("data/telco-customer-churn/data.csv", DataFrame)

查看数据类型,发现 TotalCharges 为字符串,把他改为浮点型数据,但是发现他有几个空的数据,是 ” ” 我们定义处理函数

function transformTotalCharges(dataframe::DataFrame)
  indexs = dataframe[!, :TotalCharges] .== " "
  dataframe[!, :TotalCharges][indexs] .= string.(dataframe[!, :MonthlyCharges][indexs])

  dataframe[!, :TotalCharges] = map(x -> parse(Float64, x), dataframe[!, :TotalCharges])
  dataframe[!, :tenure][indexs] .= 1
  return dataframe
end

解释下最后几行代码, TotalCharges 为空的行中,其 tenure 入网时长为 0 个月,这样我们推测是当月新入用户 根据一般禁烟,用户即使在注册的当月流失,也需缴纳当月费用,因此将这种用户入网市场改为1,将总消费额填充为月消费额

可视化分析

首先我们查看流失用户和占比

function plotChurn(dataframe::DataFrame)
  counts = countmap(dataframe[!, :Churn])

  yescount = counts["Yes"]
  nocount = counts["No"]
  total = yescount + nocount

  xs = ["Yes", "No"]
  ys1 = [yescount / total, nocount / total]
  ys2 = [yescount, nocount]
  pie(xs, ys1, aspect_ratio = :equal) |> display

  bar(xs, ys2, size = figuresize) |> display 
end

plotChurn(origindata)

images/电信用户流失分析/2022-07-30_21-13-22_screenshot.png

images/电信用户流失分析/2022-07-30_21-13-42_screenshot.png

属于不平衡数据集,流失用户占比达 26.54%

用户属性分析

function plotPercentages(dataframe::DataFrame, feature::Symbol, ymatrix::Matrix{Float64})
  columns = [feature, :Churn]
  groupDataframe = groupby(select(dataframe, columns), feature)
  xs = []

  let
    index = 1
    for _dataframe in groupDataframe
      x = first(_dataframe[!, feature])
      push!(xs, x)

      yescount = count(isequal("Yes"), _dataframe[!, :Churn])
      nocount = count(isequal("No"), _dataframe[!, :Churn])
      total = yescount + nocount

      ymatrix[index, :] = [yescount / total, nocount / total]
      index += 1
    end
  end
  groupedbar(ymatrix, xticks = (1:length(xs), xs), label = ["Yes" "No"]) |> display
end
plotPercentages(origindata, :SeniorCitizen, ones((2, 2)))

images/电信用户流失分析/2022-07-30_21-16-00_screenshot.png

plotPercentages(origindata, :gender, ones((2, 2)))

images/电信用户流失分析/2022-07-30_21-16-23_screenshot.png

plotPercentages(origindata, :Partner, ones((2, 2)))

images/电信用户流失分析/2022-07-30_21-16-39_screenshot.png

plotPercentages(origindata, :Dependents, ones((2, 2)))

images/电信用户流失分析/2022-07-30_21-16-55_screenshot.png

density(origindata.tenure, group = origindata.Churn, size = figuresize) |> display

images/电信用户流失分析/2022-07-30_21-17-38_screenshot.png

可以看出

  • 用户流失与性别基本无关
  • 年老用户流失占比显著高于年轻用户

服务属性分析

plotPercentages(origindata, :MultipleLines, ones((3, 2)))
plotPercentages(origindata, :InternetService, ones((3, 2)))

images/电信用户流失分析/2022-07-30_21-20-52_screenshot.png

images/电信用户流失分析/2022-07-30_21-21-11_screenshot.png

function plotPaperlessBillingChurn(dataframe::DataFrame)
  columns = [:PaperlessBilling, :Contract, :Churn]
  groupDataframe = groupby(select(dataframe, columns), :PaperlessBilling)
  array = unique(dataframe[!, :Contract])
  for _dataframe in groupDataframe
    _dataframe = filter(row -> row.Churn == "Yes", _dataframe)
    paperlessbilling = first(_dataframe[!, :PaperlessBilling])

    churn1 = count(isequal(array[1]), _dataframe[!, :Contract])
    churn2 = count(isequal(array[2]), _dataframe[!, :Contract])
    churn3 = count(isequal(array[3]), _dataframe[!, :Contract])

    total = churn1 + churn2 + churn3
    ys = [churn1 / total, churn2 / total, churn3 / total]
    bar(array, ys, title = "PaperlessBilling = $paperlessbilling") |> display
  end

end

plotPaperlessBillingChurn(origindata)

images/电信用户流失分析/2022-07-30_21-21-56_screenshot.png

function plotNumberOfCustomer(dataframe::DataFrame)
  columns = ["PhoneService", "MultipleLines", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies"]
  ymatrix = ones((length(columns), 3))

  index = 1
  for column in columns
    _dataframe = select(filter(row -> row.InternetService != "No", dataframe), [column, "Churn"])

    count1 = count(isequal("Yes"), _dataframe[!, column])
    count2  = count(isequal("No"),  _dataframe[!, column])
    if column != "MultipleLines"
      ymatrix[index, :] = [count1, count2, 0]
    else
      ymatrix[index, :] = [count1, count2, count(isequal("No phone service"), _dataframe[!, column])]
    end
    index += 1

  end

  groupedbar(ymatrix, xticks = (1:length(columns), columns), label = ["Has Service" "No Service" "No Service"], size = figuresize) |> display
end

plotNumberOfCustomer(origindata)

images/电信用户流失分析/2022-07-30_21-22-28_screenshot.png

function plotNumberOfChurnCustomer(dataframe::DataFrame)
  columns = ["PhoneService", "MultipleLines", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies"]
  ymatrix = ones((length(columns), 2))

  index = 1
  for column in columns
    _dataframe = select(filter(row -> row.InternetService != "No" && row.Churn == "Yes", dataframe), [column, "Churn"])
    # has service but churn
    yescount = count(isequal("Yes"), _dataframe[!, column])
    # has no service but churn
    nocount = count(isequal("No"), _dataframe[!, column])
    ymatrix[index, :] = [yescount, nocount]
    index += 1
  end

  groupedbar(ymatrix, xticks = (1:length(columns), columns), label = ["Has Service" "No Service"], size = figuresize) |> display
end

plotNumberOfChurnCustomer(origindata)

images/电信用户流失分析/2022-07-30_21-23-00_screenshot.png

可以看出

  • 电话服务整体对用户流失影响较小
  • 单光纤用户的流失占比较高
  • 光纤用户绑定了安全,备份,保护,技术支持服务的流失率较低
  • 光纤用户附加流媒体电视,电影服务的流失率占比较高

合同属性分析

plotPercentages(origindata, :PaymentMethod, ones((4, 2)))

images/电信用户流失分析/2022-07-30_21-23-49_screenshot.png

density(origindata.MonthlyCharges, group = origindata.Churn, size = figuresize) |> display

images/电信用户流失分析/2022-07-30_21-24-21_screenshot.png

density(origindata.TotalCharges, group = origindata.Churn, size = figuresize) |> display

images/电信用户流失分析/2022-07-30_21-24-50_screenshot.png

可以看出

  • 采用电子支票支付的用户流失率最高,推测该方式的使用体验较为一般
  • 签订合同方式对客户流失率影响为 按月签订 > 按一年签订 > 按两年签订,证明长期合同最能保留客户
  • 月消费额大约在 70 - 110 之间用户流失率较高
  • 长期来看,用户总消费越高,流失率月底,符合一般经验