From 90c1dcf0b031186407c7e7428d6fa62ba48bd77b Mon Sep 17 00:00:00 2001 From: Piyush Kanti Chanda Date: Sun, 5 Apr 2026 10:39:55 +0000 Subject: [PATCH] [FEAT] Add Dtypes method to DataFrame Add Dtypes() to return column names paired with their data type strings, mirroring PySpark's dtypes property. Addresses apache/spark-connect-go#58. Co-authored-by: Isaac --- spark/sql/dataframe.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/spark/sql/dataframe.go b/spark/sql/dataframe.go index 4c7a563..367b7f4 100644 --- a/spark/sql/dataframe.go +++ b/spark/sql/dataframe.go @@ -102,6 +102,8 @@ type DataFrame interface { Describe(ctx context.Context, cols ...string) DataFrame // Distinct returns a new DataFrame containing the distinct rows in this DataFrame. Distinct(ctx context.Context) DataFrame + // Dtypes returns the list of column names and their data types as a list of [name, type] pairs. + Dtypes(ctx context.Context) ([][2]string, error) // Drop returns a new DataFrame that drops the specified list of columns. Drop(ctx context.Context, columns ...column.Convertible) (DataFrame, error) // DropByName returns a new DataFrame that drops the specified list of columns by name. @@ -312,6 +314,18 @@ func (df *dataFrameImpl) Columns(ctx context.Context) ([]string, error) { return columns, nil } +func (df *dataFrameImpl) Dtypes(ctx context.Context) ([][2]string, error) { + schema, err := df.Schema(ctx) + if err != nil { + return nil, err + } + dtypes := make([][2]string, len(schema.Fields)) + for i, field := range schema.Fields { + dtypes[i] = [2]string{field.Name, field.DataType.TypeName()} + } + return dtypes, nil +} + func (df *dataFrameImpl) Corr(ctx context.Context, col1, col2 string) (float64, error) { return df.CorrWithMethod(ctx, col1, col2, "pearson") }