diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md index 5cbb4e70f..5745ae3e3 100644 --- a/02_activities/assignments/Assignment2.md +++ b/02_activities/assignments/Assignment2.md @@ -53,8 +53,106 @@ The store wants to keep customer addresses. Propose two architectures for the CU **HINT:** search type 1 vs type 2 slowly changing dimensions. +Overwrite Model is Type 1 and Retain Changes is Type 2 +Overwrite Model is to replace existing model and Retain Changes is keeping and updating existing system. + ``` -Your answer... +Model for a Small Bookstore + +Employee +employee_id +first_name +last_name +position +hire_date +salary + +Customer +customer_id +first_name +last_name +email +phone + +Book +book_id +title +author +ISBN +genre +price +publication_date + +Order +order_id (PK) +customer_id (FK → Customer.customer_id) +employee_id (FK → Employee.employee_id) +order_date (FK → Date.date_id) +total_amount + +Order_Detail +order_detail_id (PK) +order_id (FK → Order.order_id) +book_id (FK → Book.book_id) +quantity +unit_price + +Sales +sale_id (PK) +order_id (FK → Order.order_id) +payment_date (FK → Date.date_id) +payment_type +amount + +Date +date_id (PK) +calendar_date +year +month +quarter +weekday +is_holiday + +Relationships: +One customer can place many orders. +One order can have many order_details. +One book can appear in many order_details. +One employee processes many orders. +Date connects to both orders and sales. + +Adding Employee Shifts with Morning & Evening Shifts: +Shift +shift_id (PK) +shift_name +start_time +end_time + +Employee_Shift +employee_id (FK → Employee.employee_id) +shift_id (FK → Shift.shift_id) +date_id (FK → Date.date_id) + +Overwrite Model +customer_id (FK → Customer.customer_id) +street +city +province +postal_code +country + +History Retention Model +customer_address_id (PK) +customer_id (FK → Customer.customer_id) +street +city +province +postal_code +country +effective_date +end_date +is_current + +Overwrite Model is Type 1 and Retain Changes is Type 2 ``` *** @@ -76,9 +174,7 @@ Steps to complete this part of the assignment: Using the following syntax you create our super cool and not at all needy manager a list: ``` -SELECT -product_name || ', ' || product_size|| ' (' || product_qty_type || ')' -FROM product + ``` But wait! The product table has some bad data (a few NULL values). diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..a651e36aa 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -20,6 +20,9 @@ The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same.) */ +SELECT + COALESCE(product_name, '') || ', ' || COALESCE(product_size, '') || ' (' || COALESCE(product_qty_type, 'unit') || ')' AS product_details +FROM product; --Windowed Functions @@ -32,17 +35,37 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number +FROM customer_purchases; /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ +WITH ranked AS ( + SELECT + customer_id, + market_date, + ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS rev_visit_number + FROM customer_purchases +) +SELECT * +FROM ranked +WHERE rev_visit_number = 1; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ +SELECT + customer_id, + product_id, + COUNT(*) OVER (PARTITION BY customer_id, product_id) AS product_purchase_count +FROM customer_purchases; -- String manipulations @@ -57,10 +80,18 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ +SELECT + product_name, + TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1)) AS description +FROM product +WHERE INSTR(product_name, '-') > 0; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ +SELECT * +FROM product +WHERE product_size REGEXP '[0-9]'; -- UNION @@ -73,6 +104,33 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ +WITH sales_per_day AS ( + SELECT + market_date, + SUM(quantity * cost_to_customer_per_qty) AS total_sales + FROM customer_purchases + GROUP BY market_date +), +ranked AS ( + SELECT + market_date, + total_sales, + RANK() OVER (ORDER BY total_sales DESC) AS best_rank, + RANK() OVER (ORDER BY total_sales ASC) AS worst_rank + FROM sales_per_day +) +-- Best Day +SELECT market_date, total_sales, 'Best Day' AS label +FROM ranked +WHERE best_rank = 1 + +UNION + +-- Worst Day +SELECT market_date, total_sales, 'Worst Day' AS label +FROM ranked +WHERE worst_rank = 1; + @@ -89,6 +147,31 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ +WITH customer_count AS ( + SELECT COUNT(*) AS num_customers + FROM customer +) +, vendor_products AS ( + SELECT + v.vendor_id, + v.vendor_name, + p.product_id, + p.product_name, + vi.original_price + FROM vendor_inventory vi + JOIN vendor v ON vi.vendor_id = v.vendor_id + JOIN product p ON vi.product_id = p.product_id + GROUP BY v.vendor_id, v.vendor_name, p.product_id, p.product_name, vi.original_price +) + +SELECT + vp.vendor_name, + vp.product_name, + SUM(5 * vp.original_price) AS total_revenue_per_product +FROM vendor_products vp +CROSS JOIN customer_count cc +GROUP BY vp.vendor_name, vp.product_name +ORDER BY vp.vendor_name, vp.product_name; -- INSERT @@ -97,11 +180,22 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ +DROP TABLE IF EXISTS product_units; + +CREATE TABLE product_units AS +SELECT p.*, + CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product p +WHERE p.product_qty_type = 'unit'; /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ +INSERT INTO product_units +SELECT *, CURRENT_TIMESTAMP +FROM product +WHERE product_name = 'Apple Pie'; -- DELETE @@ -109,6 +203,9 @@ This can be any product you desire (e.g. add another record for Apple Pie). */ HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ +DELETE FROM product_units +WHERE product_name = 'Apple Pie' +AND snapshot_timestamp = (SELECT MIN(snapshot_timestamp) FROM product_units WHERE product_name = 'Apple Pie'); -- UPDATE @@ -128,6 +225,21 @@ Finally, make sure you have a WHERE statement to update the right row, you'll need to use product_units.product_id to refer to the correct row within the product_units table. When you have all of these components, you can run the update statement. */ +ALTER TABLE product_units ADD COLUMN current_quantity INTEGER DEFAULT 0; + +UPDATE product_units +SET current_quantity = ( + SELECT COALESCE(vi.quantity, 0) + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id + ORDER BY vi.market_date DESC + LIMIT 1 +) +WHERE EXISTS ( + SELECT 1 + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id +); diff --git a/02_activities/assignments/farmersmarket.sqbpro b/02_activities/assignments/farmersmarket.sqbpro index 923c3d8f4..04ba1db23 100644 --- a/02_activities/assignments/farmersmarket.sqbpro +++ b/02_activities/assignments/farmersmarket.sqbpro @@ -1 +1 @@ -
-- Reference to file "C:/Users/Asad/dsi_day2/SQL1/02_activities/assignments/assignment1.sql" (not supported by this version) --
+
-- Reference to file "C:/Users/Asad/dsi_day2/SQL1/02_activities/assignments/assignment1.sql" (not supported by this version) ---- Reference to file "C:/Users/Asad/dsi_day2/SQL1/02_activities/assignments/assignment2.sql" (not supported by this version) --