Skip to content

Commit

Permalink
Merge pull request #525 from sinhrks/df_columns
Browse files Browse the repository at this point in the history
Python supports pd.DataFrame with non-str columns
  • Loading branch information
tqchen committed Oct 4, 2015
2 parents 2859c19 + dbcb4c8 commit 3109069
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
2 changes: 1 addition & 1 deletion python-package/xgboost/core.py
Expand Up @@ -153,7 +153,7 @@ def _maybe_from_pandas(data, feature_names, feature_types):
raise ValueError('DataFrame.dtypes must be int, float or bool')

if feature_names is None:
feature_names = data.columns.tolist()
feature_names = data.columns.format()
if feature_types is None:
mapper = {'int64': 'int', 'float64': 'q', 'bool': 'i'}
feature_types = [mapper[dtype.name] for dtype in dtypes]
Expand Down
15 changes: 15 additions & 0 deletions tests/python/test_basic.py
Expand Up @@ -118,6 +118,21 @@ def test_pandas(self):
df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']], columns=['a', 'b', 'c'])
self.assertRaises(ValueError, xgb.DMatrix, df)

# numeric columns
df = pd.DataFrame([[1, 2., True], [2, 3., False]])
dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
assert dm.feature_names == ['0', '1', '2']
assert dm.feature_types == ['int', 'q', 'i']
assert dm.num_row() == 2
assert dm.num_col() == 3

df = pd.DataFrame([[1, 2., 1], [2, 3., 1]], columns=[4, 5, 6])
dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
assert dm.feature_names == ['4', '5', '6']
assert dm.feature_types == ['int', 'q', 'int']
assert dm.num_row() == 2
assert dm.num_col() == 3

def test_load_file_invalid(self):

self.assertRaises(ValueError, xgb.Booster,
Expand Down

0 comments on commit 3109069

Please sign in to comment.