11package co .clflushopt .glint ;
22
33import java .io .FileNotFoundException ;
4- import java .util .Arrays ;
5- import java .util .Iterator ;
6- import java .util .List ;
7- import java .util .Optional ;
84
9- import org .apache .arrow .vector .types .pojo .ArrowType ;
10-
11- import co .clflushopt .glint .core .CsvReaderOptions ;
12- import co .clflushopt .glint .core .ExecutionContext ;
13- import co .clflushopt .glint .dataframe .DataFrame ;
14- import co .clflushopt .glint .query .logical .expr .AggregateExpr ;
15- import co .clflushopt .glint .query .logical .expr .CastExpr ;
16- import co .clflushopt .glint .query .logical .expr .ColumnExpr ;
17- import co .clflushopt .glint .query .logical .expr .LogicalExpr ;
18- import co .clflushopt .glint .query .logical .plan .LogicalPlan ;
19- import co .clflushopt .glint .query .optimizer .QueryOptimizer ;
20- import co .clflushopt .glint .types .ArrowTypes ;
21- import co .clflushopt .glint .types .Field ;
22- import co .clflushopt .glint .types .RecordBatch ;
23- import co .clflushopt .glint .types .Schema ;
5+ import co .clflushopt .glint .examples .NYCYellowTrips ;
246
257/**
268 * Hello world!
@@ -30,78 +12,10 @@ public class App {
3012 public static void main (String [] args ) {
3113 System .out .println ("Welcome to the Glint query compiler" );
3214 try {
33- nycTripsBenchmark ( args );
15+ NYCYellowTrips . runParquetExample ( );
3416 } catch (FileNotFoundException e ) {
3517 e .printStackTrace ();
3618 }
3719 }
3820
39- public static void nycTripsBenchmark (String [] args ) throws FileNotFoundException {
40- // Create execution context
41- ExecutionContext ctx = ExecutionContext .create ().build ();
42-
43- long startTime = System .currentTimeMillis ();
44- try {
45- // Define the schema for NYC Taxi dataset
46- Schema schema = new Schema (Arrays .asList (new Field ("VendorID" , ArrowTypes .Int32Type ),
47- new Field ("tpep_pickup_datetime" , ArrowTypes .StringType ), // Could be Timestamp
48- new Field ("tpep_dropoff_datetime" , ArrowTypes .StringType ), // Could be Timestamp
49- new Field ("passenger_count" , ArrowTypes .Int32Type ),
50- new Field ("trip_distance" , ArrowTypes .DoubleType ),
51- new Field ("pickup_longitude" , ArrowTypes .DoubleType ),
52- new Field ("pickup_latitude" , ArrowTypes .DoubleType ),
53- new Field ("RatecodeID" , ArrowTypes .Int32Type ),
54- new Field ("store_and_fwd_flag" , ArrowTypes .StringType ),
55- new Field ("dropoff_longitude" , ArrowTypes .DoubleType ),
56- new Field ("dropoff_latitude" , ArrowTypes .DoubleType ),
57- new Field ("payment_type" , ArrowTypes .Int32Type ),
58- new Field ("fare_amount" , ArrowTypes .DoubleType ),
59- new Field ("extra" , ArrowTypes .DoubleType ),
60- new Field ("mta_tax" , ArrowTypes .DoubleType ),
61- new Field ("tip_amount" , ArrowTypes .DoubleType ),
62- new Field ("tolls_amount" , ArrowTypes .DoubleType ),
63- new Field ("improvement_surcharge" , ArrowTypes .DoubleType ),
64- new Field ("total_amount" , ArrowTypes .DoubleType )));
65- // Create DataFrame and apply transformations
66- DataFrame df = ctx
67- .readCsv ("./datasets/yellow_tripdata_example.csv" , Optional .of (schema ),
68- CsvReaderOptions .builder ().delimiter (',' ).hasHeader (true ).build ())
69- .aggregate (List .of (col ("passenger_count" )),
70- List .of (max (cast (col ("fare_amount" ), ArrowTypes .FloatType ))));
71-
72- System .out .println ("Logical Plan:\t " + LogicalPlan .format (df .getLogicalPlan ()));
73- System .out .println ("Schema:\t " + df .getSchema ());
74-
75- // Optimize and execute the plan
76- LogicalPlan optimizedPlan = QueryOptimizer .optimize (df .getLogicalPlan ());
77- System .out .println ("Optimized Plan:\t " + LogicalPlan .format (optimizedPlan ));
78-
79- // Execute and print results
80- Iterator <RecordBatch > results = ctx .execute (optimizedPlan );
81-
82- while (results .hasNext ()) {
83- RecordBatch batch = results .next ();
84- System .out .println (batch .getSchema ());
85- System .out .println (batch .toCsv ());
86-
87- }
88-
89- } finally {
90- long endTime = System .currentTimeMillis ();
91- System .out .println ("Query took " + (endTime - startTime ) + " ms" );
92- }
93- }
94-
95- // Helper methods for creating expressions
96- private static LogicalExpr col (String name ) {
97- return new ColumnExpr (name );
98- }
99-
100- private static LogicalExpr cast (LogicalExpr expr , ArrowType targetType ) {
101- return new CastExpr (expr , targetType );
102- }
103-
104- private static AggregateExpr max (LogicalExpr expr ) {
105- return new AggregateExpr .Max (expr );
106- }
10721}
0 commit comments