diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.cs b/src/Microsoft.Data.Analysis/DataFrameColumn.cs
index 2085304925..afe3be4282 100644
--- a/src/Microsoft.Data.Analysis/DataFrameColumn.cs
+++ b/src/Microsoft.Data.Analysis/DataFrameColumn.cs
@@ -202,6 +202,13 @@ public void SetName(string newName)
/// The new length of the column
protected internal virtual void Resize(long length) => throw new NotImplementedException();
+ ///
+ /// Clone column to produce a copy
+ ///
+ ///
+ /// A new
+ public DataFrameColumn Clone(long numberOfNullsToAppend = 0) => CloneImplementation(numberOfNullsToAppend);
+
///
/// Clone column to produce a copy potentially changing the order of values by supplying mapIndices and an invert flag
///
@@ -209,7 +216,7 @@ public void SetName(string newName)
///
///
/// A new
- public virtual DataFrameColumn Clone(DataFrameColumn mapIndices = null, bool invertMapIndices = false, long numberOfNullsToAppend = 0) => CloneImplementation(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ public DataFrameColumn Clone(DataFrameColumn mapIndices, bool invertMapIndices = false, long numberOfNullsToAppend = 0) => CloneImplementation(mapIndices, invertMapIndices, numberOfNullsToAppend);
///
/// Clone column to produce a copy potentially changing the order of values by supplying mapIndices and an invert flag
@@ -218,7 +225,9 @@ public void SetName(string newName)
///
///
/// A new
- protected virtual DataFrameColumn CloneImplementation(DataFrameColumn mapIndices, bool invertMapIndices, long numberOfNullsToAppend) => throw new NotImplementedException();
+ protected abstract DataFrameColumn CloneImplementation(DataFrameColumn mapIndices, bool invertMapIndices, long numberOfNullsToAppend);
+
+ protected abstract DataFrameColumn CloneImplementation(long numberOfNullsToAppend = 0);
///
/// Returns a copy of this column sorted by its values
diff --git a/src/Microsoft.Data.Analysis/DataFrameColumns/ArrowStringDataFrameColumn.cs b/src/Microsoft.Data.Analysis/DataFrameColumns/ArrowStringDataFrameColumn.cs
index 49f88eab6b..ac14541549 100644
--- a/src/Microsoft.Data.Analysis/DataFrameColumns/ArrowStringDataFrameColumn.cs
+++ b/src/Microsoft.Data.Analysis/DataFrameColumns/ArrowStringDataFrameColumn.cs
@@ -62,7 +62,6 @@ public ArrowStringDataFrameColumn(string name, ReadOnlyMemory values, Read
_nullBitMapBuffers.Add(nullBitMapBuffer);
_nullCount = nullCount;
-
}
private long _nullCount;
@@ -371,8 +370,32 @@ protected internal override Apache.Arrow.Array ToArrowArray(long startIndex, int
///
public override DataFrameColumn Sort(bool ascending = true) => throw new NotSupportedException();
+ public new ArrowStringDataFrameColumn Clone(long numberOfNullsToAppend = 0)
+ {
+ return (ArrowStringDataFrameColumn)CloneImplementation(numberOfNullsToAppend);
+ }
+
+ public new ArrowStringDataFrameColumn Clone(DataFrameColumn mapIndices, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
+ {
+ return (ArrowStringDataFrameColumn)CloneImplementation(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ }
+
///
- public override DataFrameColumn Clone(DataFrameColumn mapIndices = null, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
+ protected override DataFrameColumn CloneImplementation(long numberOfNullsToAppend)
+ {
+ var ret = new ArrowStringDataFrameColumn(Name);
+
+ for (long i = 0; i < Length; i++)
+ ret.Append(IsValid(i) ? GetBytes(i) : default(ReadOnlySpan));
+
+ for (long i = 0; i < numberOfNullsToAppend; i++)
+ ret.Append(default);
+
+ return ret;
+ }
+
+ ///
+ protected override DataFrameColumn CloneImplementation(DataFrameColumn mapIndices, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
{
ArrowStringDataFrameColumn clone;
if (!(mapIndices is null))
@@ -381,27 +404,28 @@ public override DataFrameColumn Clone(DataFrameColumn mapIndices = null, bool in
if (dataType != typeof(long) && dataType != typeof(int) && dataType != typeof(bool))
throw new ArgumentException(String.Format(Strings.MultipleMismatchedValueType, typeof(long), typeof(int), typeof(bool)), nameof(mapIndices));
if (mapIndices.DataType == typeof(long))
- clone = Clone(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
else if (dataType == typeof(int))
- clone = Clone(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
else
- clone = Clone(mapIndices as PrimitiveDataFrameColumn);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn);
+
+ for (long i = 0; i < numberOfNullsToAppend; i++)
+ clone.Append(default);
}
else
{
- clone = Clone();
- }
- for (long i = 0; i < numberOfNullsToAppend; i++)
- {
- clone.Append(default);
+ clone = Clone(numberOfNullsToAppend);
}
+
return clone;
}
- private ArrowStringDataFrameColumn Clone(PrimitiveDataFrameColumn boolColumn)
+ private ArrowStringDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn boolColumn)
{
if (boolColumn.Length > Length)
throw new ArgumentException(Strings.MapIndicesExceedsColumnLength, nameof(boolColumn));
+
ArrowStringDataFrameColumn ret = new ArrowStringDataFrameColumn(Name);
for (long i = 0; i < boolColumn.Length; i++)
{
@@ -412,10 +436,11 @@ private ArrowStringDataFrameColumn Clone(PrimitiveDataFrameColumn boolColu
return ret;
}
- private ArrowStringDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn mapIndices, bool invertMapIndices = false)
+ private ArrowStringDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn mapIndices, bool invertMapIndices)
where U : unmanaged
{
ArrowStringDataFrameColumn ret = new ArrowStringDataFrameColumn(Name);
+
mapIndices.ApplyElementwise((U? mapIndex, long rowIndex) =>
{
if (mapIndex == null)
@@ -423,38 +448,14 @@ private ArrowStringDataFrameColumn CloneImplementation(PrimitiveDataFrameColu
ret.Append(default);
return mapIndex;
}
- if (invertMapIndices)
- {
- long index = mapIndices.Length - 1 - rowIndex;
- ret.Append(IsValid(index) ? GetBytes(index) : default(ReadOnlySpan));
- }
- else
- {
- ret.Append(IsValid(rowIndex) ? GetBytes(rowIndex) : default(ReadOnlySpan));
- }
+
+ long index = invertMapIndices ? mapIndices.Length - 1 - rowIndex : rowIndex;
+ ret.Append(IsValid(index) ? GetBytes(index) : default(ReadOnlySpan));
+
return mapIndex;
});
- return ret;
- }
- private ArrowStringDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices = null, bool invertMapIndex = false)
- {
- if (mapIndices is null)
- {
- ArrowStringDataFrameColumn ret = new ArrowStringDataFrameColumn(Name);
- for (long i = 0; i < Length; i++)
- {
- ret.Append(IsValid(i) ? GetBytes(i) : default(ReadOnlySpan));
- }
- return ret;
- }
- else
- return CloneImplementation(mapIndices, invertMapIndex);
- }
-
- private ArrowStringDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices, bool invertMapIndex = false)
- {
- return CloneImplementation(mapIndices, invertMapIndex);
+ return ret;
}
///
diff --git a/src/Microsoft.Data.Analysis/DataFrameColumns/StringDataFrameColumn.cs b/src/Microsoft.Data.Analysis/DataFrameColumns/StringDataFrameColumn.cs
index 59ded9765b..e77a71e6f0 100644
--- a/src/Microsoft.Data.Analysis/DataFrameColumns/StringDataFrameColumn.cs
+++ b/src/Microsoft.Data.Analysis/DataFrameColumns/StringDataFrameColumn.cs
@@ -250,6 +250,28 @@ private PrimitiveDataFrameColumn GetSortIndices(Comparer comparer,
}
public new StringDataFrameColumn Clone(DataFrameColumn mapIndices, bool invertMapIndices, long numberOfNullsToAppend)
+ {
+ return (StringDataFrameColumn)CloneImplementation(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ }
+
+ public new StringDataFrameColumn Clone(long numberOfNullsToAppend = 0)
+ {
+ return (StringDataFrameColumn)CloneImplementation(numberOfNullsToAppend);
+ }
+
+ protected override DataFrameColumn CloneImplementation(long numberOfNullsToAppend)
+ {
+ StringDataFrameColumn ret = new StringDataFrameColumn(Name, Length);
+ for (long i = 0; i < Length; i++)
+ ret[i] = this[i];
+
+ for (long i = 0; i < numberOfNullsToAppend; i++)
+ ret.Append(null);
+
+ return ret;
+ }
+
+ protected override DataFrameColumn CloneImplementation(DataFrameColumn mapIndices, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
{
StringDataFrameColumn clone;
if (!(mapIndices is null))
@@ -258,29 +280,24 @@ public new StringDataFrameColumn Clone(DataFrameColumn mapIndices, bool invertMa
if (dataType != typeof(long) && dataType != typeof(int) && dataType != typeof(bool))
throw new ArgumentException(String.Format(Strings.MultipleMismatchedValueType, typeof(long), typeof(int), typeof(bool)), nameof(mapIndices));
if (mapIndices.DataType == typeof(long))
- clone = Clone(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
else if (dataType == typeof(int))
- clone = Clone(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
else
- clone = Clone(mapIndices as PrimitiveDataFrameColumn);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn);
+
+ for (long i = 0; i < numberOfNullsToAppend; i++)
+ clone.Append(null);
}
else
{
- clone = Clone();
+ clone = Clone(numberOfNullsToAppend);
}
- for (long i = 0; i < numberOfNullsToAppend; i++)
- {
- clone.Append(null);
- }
- return clone;
- }
- protected override DataFrameColumn CloneImplementation(DataFrameColumn mapIndices = null, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
- {
- return Clone(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ return clone;
}
- private StringDataFrameColumn Clone(PrimitiveDataFrameColumn boolColumn)
+ private StringDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn boolColumn)
{
if (boolColumn.Length > Length)
throw new ArgumentException(Strings.MapIndicesExceedsColumnLength, nameof(boolColumn));
@@ -375,28 +392,6 @@ private StringDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn
return ret;
}
- private StringDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices = null, bool invertMapIndex = false)
- {
- if (mapIndices is null)
- {
- StringDataFrameColumn ret = new StringDataFrameColumn(Name, Length);
- for (long i = 0; i < Length; i++)
- {
- ret[i] = this[i];
- }
- return ret;
- }
- else
- {
- return CloneImplementation(mapIndices, invertMapIndex);
- }
- }
-
- private StringDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices, bool invertMapIndex = false)
- {
- return CloneImplementation(mapIndices, invertMapIndex);
- }
-
internal static DataFrame ValueCountsImplementation(Dictionary> groupedValues)
{
StringDataFrameColumn keys = new StringDataFrameColumn("Values", 0);
diff --git a/src/Microsoft.Data.Analysis/DataFrameColumns/VBufferDataFrameColumn.cs b/src/Microsoft.Data.Analysis/DataFrameColumns/VBufferDataFrameColumn.cs
index fab98ee005..4c190d5cbe 100644
--- a/src/Microsoft.Data.Analysis/DataFrameColumns/VBufferDataFrameColumn.cs
+++ b/src/Microsoft.Data.Analysis/DataFrameColumns/VBufferDataFrameColumn.cs
@@ -5,7 +5,6 @@
using System;
using System.Collections;
using System.Collections.Generic;
-using System.Data;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using Microsoft.ML;
@@ -210,7 +209,7 @@ protected internal override void AddValueUsingCursor(DataViewRowCursor cursor, D
}
}
- private VBufferDataFrameColumn Clone(PrimitiveDataFrameColumn boolColumn)
+ private VBufferDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn boolColumn)
{
if (boolColumn.Length > Length)
throw new ArgumentException(Strings.MapIndicesExceedsColumnLength, nameof(boolColumn));
@@ -224,28 +223,6 @@ private VBufferDataFrameColumn Clone(PrimitiveDataFrameColumn boolColum
return ret;
}
- private VBufferDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices = null, bool invertMapIndex = false)
- {
- if (mapIndices is null)
- {
- VBufferDataFrameColumn ret = new VBufferDataFrameColumn(Name, Length);
- for (long i = 0; i < Length; i++)
- {
- ret[i] = this[i];
- }
- return ret;
- }
- else
- {
- return CloneImplementation(mapIndices, invertMapIndex);
- }
- }
-
- private VBufferDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices, bool invertMapIndex = false)
- {
- return CloneImplementation(mapIndices, invertMapIndex);
- }
-
private VBufferDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn mapIndices, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
where U : unmanaged
{
@@ -314,6 +291,16 @@ private VBufferDataFrameColumn CloneImplementation(PrimitiveDataFrameColum
}
public new VBufferDataFrameColumn Clone(DataFrameColumn mapIndices, bool invertMapIndices, long numberOfNullsToAppend)
+ {
+ return (VBufferDataFrameColumn)CloneImplementation(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ }
+
+ public new VBufferDataFrameColumn Clone(long numberOfNullsToAppend = 0)
+ {
+ return (VBufferDataFrameColumn)CloneImplementation(numberOfNullsToAppend);
+ }
+
+ protected override DataFrameColumn CloneImplementation(DataFrameColumn mapIndices, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
{
VBufferDataFrameColumn clone;
if (!(mapIndices is null))
@@ -322,11 +309,11 @@ public new VBufferDataFrameColumn Clone(DataFrameColumn mapIndices, bool inve
if (dataType != typeof(long) && dataType != typeof(int) && dataType != typeof(bool))
throw new ArgumentException(String.Format(Strings.MultipleMismatchedValueType, typeof(long), typeof(int), typeof(bool)), nameof(mapIndices));
if (mapIndices.DataType == typeof(long))
- clone = Clone(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
else if (dataType == typeof(int))
- clone = Clone(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
else
- clone = Clone(mapIndices as PrimitiveDataFrameColumn);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn);
}
else
{
@@ -336,9 +323,14 @@ public new VBufferDataFrameColumn Clone(DataFrameColumn mapIndices, bool inve
return clone;
}
- protected override DataFrameColumn CloneImplementation(DataFrameColumn mapIndices = null, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
+ protected override DataFrameColumn CloneImplementation(long numberOfNullsToAppend)
{
- return Clone(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ var ret = new VBufferDataFrameColumn(Name, Length);
+
+ for (long i = 0; i < Length; i++)
+ ret[i] = this[i];
+
+ return ret;
}
private static VectorDataViewType GetDataViewType()
diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs
index f29f4963d7..3d3f740318 100644
--- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs
+++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs
@@ -63,8 +63,7 @@ public IEnumerable> GetReadOnlyDataBuffers()
{
for (int i = 0; i < _columnContainer.Buffers.Count; i++)
{
- ReadOnlyDataFrameBuffer buffer = _columnContainer.Buffers[i];
- yield return buffer.ReadOnlyMemory;
+ yield return _columnContainer.Buffers[i].ReadOnlyMemory;
}
}
@@ -364,13 +363,29 @@ public override bool HasDescription()
}
///
- /// Returns a clone of this column
+ /// Returns a clone of this column.
+ ///
+ ///
+ ///
+ public new PrimitiveDataFrameColumn Clone(long numberOfNullsToAppend = 0)
+ {
+ return (PrimitiveDataFrameColumn)CloneImplementation(numberOfNullsToAppend);
+ }
+
+ ///
+ /// Returns a clone of this column.
///
/// A column who values are used as indices
///
///
///
- public new PrimitiveDataFrameColumn Clone(DataFrameColumn mapIndices, bool invertMapIndices, long numberOfNullsToAppend)
+ public new PrimitiveDataFrameColumn Clone(DataFrameColumn mapIndices, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
+ {
+ return (PrimitiveDataFrameColumn)CloneImplementation(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ }
+
+ ///
+ protected override DataFrameColumn CloneImplementation(DataFrameColumn mapIndices, bool invertMapIndices, long numberOfNullsToAppend)
{
PrimitiveDataFrameColumn clone;
if (!(mapIndices is null))
@@ -383,24 +398,31 @@ public new PrimitiveDataFrameColumn Clone(DataFrameColumn mapIndices, bool in
else if (dataType == typeof(int))
clone = Clone(mapIndices as PrimitiveDataFrameColumn, invertMapIndices);
else
- clone = Clone(mapIndices as PrimitiveDataFrameColumn);
+ clone = CloneImplementation(mapIndices as PrimitiveDataFrameColumn);
+
+ if (numberOfNullsToAppend != 0)
+ clone.AppendMany(null, numberOfNullsToAppend);
}
else
{
clone = Clone();
}
- Debug.Assert(!ReferenceEquals(clone, null));
- clone.AppendMany(null, numberOfNullsToAppend);
+
return clone;
}
- ///
- protected override DataFrameColumn CloneImplementation(DataFrameColumn mapIndices, bool invertMapIndices, long numberOfNullsToAppend)
+ protected override DataFrameColumn CloneImplementation(long numberOfNullsToAppend)
{
- return Clone(mapIndices, invertMapIndices, numberOfNullsToAppend);
+ var newColumnContainer = _columnContainer.Clone();
+ var clone = CreateNewColumn(Name, newColumnContainer);
+
+ if (numberOfNullsToAppend != 0)
+ clone.AppendMany(null, numberOfNullsToAppend);
+
+ return clone;
}
- private PrimitiveDataFrameColumn Clone(PrimitiveDataFrameColumn boolColumn)
+ private PrimitiveDataFrameColumn CloneImplementation(PrimitiveDataFrameColumn boolColumn)
{
if (boolColumn.Length > Length)
throw new ArgumentException(Strings.MapIndicesExceedsColumnLength, nameof(boolColumn));
@@ -436,21 +458,19 @@ private PrimitiveDataFrameColumn CloneImplementation(PrimitiveDataFrameCol
return ret;
}
- public PrimitiveDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices = null, bool invertMapIndices = false)
+ public PrimitiveDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices, bool invertMapIndices = false)
{
if (mapIndices is null)
- {
- PrimitiveColumnContainer newColumnContainer = _columnContainer.Clone();
- return CreateNewColumn(Name, newColumnContainer);
- }
- else
- {
- return CloneImplementation(mapIndices, invertMapIndices);
- }
+ return Clone();
+
+ return CloneImplementation(mapIndices, invertMapIndices);
}
public PrimitiveDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndices, bool invertMapIndices = false)
{
+ if (mapIndices is null)
+ return Clone();
+
return CloneImplementation(mapIndices, invertMapIndices);
}