Awesome
Sales app
Demo Sales application for Data Wareshouses and Multi-Dimensional Analysis presentation at RailsConf 2015.
Queries
Get total sales amount in California in 2014 Q1 by product families
OrderItem.joins(:order => :customer).
where("customers.country" => "USA", "customers.state_province" => "CA").
where("extract(year from orders.order_date) = ?", 2014).
where("extract(quarter from orders.order_date) = ?", 1).
joins(:product => :product_class).
group("product_classes.product_family").
sum("order_items.amount")
(335.0ms)
SELECT SUM(order_items.amount) AS sum_order_items_amount,
product_classes.product_family AS product_classes_product_family
FROM "order_items"
INNER JOIN "orders" ON "orders"."id" = "order_items"."order_id"
INNER JOIN "customers" ON "customers"."id" = "orders"."customer_id"
INNER JOIN "products" ON "products"."id" = "order_items"."product_id"
INNER JOIN "product_classes" ON "product_classes"."id" = "products"."product_class_id"
WHERE "customers"."country" = 'USA'
AND "customers"."state_province" = 'CA'
AND (extract(YEAR FROM orders.order_date) = 2014)
AND (extract(quarter FROM orders.order_date) = 1)
GROUP BY product_classes.product_family
{
"Food" => 61137.2,
"Drink" => 7510.16,
"Non-Consumable" => 16173.46
}
OrderItem.joins(:order => :customer).
where("customers.country" => "USA", "customers.state_province" => "CA").
where("extract(year from orders.order_date) = ?", 2014).
where("extract(quarter from orders.order_date) = ?", 1).
joins(:product => :product_class).
group("product_classes.product_family").
select("product_classes.product_family,"+
"SUM(order_items.amount) AS sales_amount,"+
"SUM(order_items.cost) AS sales_cost,"+
"COUNT(DISTINCT customers.id) AS customers_count").
map{|i| i.attributes.compact}
$ rails console
>> OrderItem.count
(677.0ms) SELECT COUNT(*) FROM "order_items"
=> 6218022
>> Order.count
(126.0ms) SELECT COUNT(*) FROM "orders"
=> 642362
>> OrderItem.joins(:order => :customer).
joins(:product => :product_class).
group("product_classes.product_family").
select("product_classes.product_family,"+
"SUM(order_items.amount) AS sales_amount,"+
"SUM(order_items.cost) AS sales_cost,"+
"COUNT(DISTINCT customers.id) AS customers_count").
map{|i| i.attributes.compact}
OrderItem Load (25437.0ms) ...
OrderItem.joins(:order => :customer).joins(:product => :product_class).
where("customers.country" => "USA").
group("product_classes.product_family").
sum("order_items.amount")
(5485.0ms)
SELECT SUM(order_items.amount) AS sum_order_items_amount,
product_classes.product_family AS product_classes_product_family
FROM "order_items"
INNER JOIN "orders" ON "orders"."id" = "order_items"."order_id"
INNER JOIN "customers" ON "customers"."id" = "orders"."customer_id"
INNER JOIN "products" ON "products"."id" = "order_items"."product_id"
INNER JOIN "product_classes" ON "product_classes"."id" = "products"."product_class_id"
WHERE "customers"."country" = 'USA'
GROUP BY product_classes.product_family
DWH dimension and fact tables
Dwh::SalesFact.
joins(:customer).joins(:product => :product_class).joins(:time).
where("d_customers.country" => "USA", "d_customers.state_province" => "CA").
where("d_time.year" => 2014, "d_time.quarter" => 1).
group("d_product_classes.product_family").
sum("sales_amount")
SELECT SUM("dwh"."f_sales"."sales_amount") AS sum_sales_amount,
d_product_classes.product_family AS d_product_classes_product_family
FROM "dwh"."f_sales"
INNER JOIN "dwh"."d_customers" ON "dwh"."d_customers"."id" = "dwh"."f_sales"."customer_id"
INNER JOIN "dwh"."d_products" ON "dwh"."d_products"."id" = "dwh"."f_sales"."product_id"
INNER JOIN "dwh"."d_product_classes" ON "dwh"."d_product_classes"."id" = "dwh"."d_products"."product_class_id"
INNER JOIN "dwh"."d_time" ON "dwh"."d_time"."id" = "dwh"."f_sales"."time_id"
WHERE "d_customers"."country" = 'USA'
AND "d_customers"."state_province" = 'CA'
AND "d_time"."year" = 2014
AND "d_time"."quarter" = 1
GROUP BY d_product_classes.product_family
Dwh::SalesFact.
joins(:product => :product_class).
group("d_product_classes.product_family").
select("d_product_classes.product_family,"+
"SUM(f_sales.sales_amount) AS sales_amount,"+
"SUM(f_sales.sales_cost) AS sales_cost,"+
"COUNT(DISTINCT f_sales.customer_id) AS customers_count").
map{|i| i.attributes.compact}
(19079.0ms)
SELECT d_product_classes.product_family,
SUM(f_sales.sales_amount) AS sales_amount,
SUM(f_sales.sales_cost) AS sales_cost,
COUNT(DISTINCT f_sales.customer_id) AS customers_count
FROM "dwh"."f_sales"
INNER JOIN "dwh"."d_products" ON "dwh"."d_products"."id" = "dwh"."f_sales"."product_id"
INNER JOIN "dwh"."d_product_classes" ON "dwh"."d_product_classes"."id" = "dwh"."d_products"."product_class_id"
GROUP BY d_product_classes.product_family
Mondrian OLAP queries
Dwh.benchmark {
olap.from("Sales").
columns("[Measures].[Sales Amount]").
rows("[Product].[Product Family].Members").
where("[Customer].[USA].[CA]", "[Time].[Quarter].[Q1 2014]")
}
SELECT {[Measures].[Sales Amount]} ON COLUMNS,
[Product].[Product Family].Members ON ROWS
FROM [Sales]
WHERE ([Customer].[USA].[CA], [Time].[Quarter].[Q1 2014])
Dwh.benchmark {
olap.from("Sales").
columns("[Measures].[Sales Amount]").
rows("[Product].[Product Family].Members").
where("[Customer].[USA]")
}
Dwh.benchmark {
olap.from("Sales").
columns("[Measures].[Sales Amount]",
"[Measures].[Sales Cost]","[Measures].[Customers Count]").
rows("[Product].[Product Family].Members")
}
SELECT {[Measures].[Sales Amount], [Measures].[Sales Cost], [Measures].[Customers Count]} ON COLUMNS,
[Product].[Product Family].Members ON ROWS
FROM [Sales] (21713.0ms)
SELECT {[Measures].[Sales Amount], [Measures].[Sales Cost], [Measures].[Customers Count]} ON COLUMNS,
[Product].[Product Family].Members ON ROWS
FROM [Sales] (10.0ms)
Dwh.benchmark {
olap.from("Sales").
columns("[Measures].[Sales Amount]").
rows("[Gender].[Gender].Members").
where("[Customer].[USA].[CA]", "[Time].[Quarter].[Q1 2014]")
}
Dwh.benchmark(:html) {
olap.from("Sales").
columns("[Measures].[Sales Amount]").
rows("[Age interval].[Age interval].Members").
where("[Customer].[USA].[CA]", "[Time].[Quarter].[Q1 2014]")
}
[Age interval].[<20 years]
[Age interval].[20-30 years]
[Age interval].[30-40 years]
[Age interval].[40-50 years]
[Age interval].[50+ years]
Dwh.benchmark(:html) {
olap.from("Sales").
columns("[Measures].[Profit]", "[Measures].[Margin %]").
rows("[Product].[Product Family].Members").
where("[Customer].[USA].[CA]", "[Time].[Quarter].[Q1 2014]")
}
Multi-threaded ETL
Dwh::TimeDimension.load! (5236.0ms)
Dwh::TimeDimension.parallel_load!(2) (3450.0ms)
Dwh::TimeDimension.parallel_load!(4) (2142.0ms)
Dwh::TimeDimension.parallel_load!(6) (2361.0ms)
Dwh::TimeDimension.parallel_load!(8) (2826.0ms)
Analytical Columnar Databases
SELECT d_product_classes.product_family,
SUM(f_sales.sales_amount) AS sales_amount,
SUM(f_sales.sales_cost) AS sales_cost,
COUNT(DISTINCT f_sales.customer_id) AS customers_count
FROM "dwh"."f_sales"
INNER JOIN "dwh"."d_products" ON "dwh"."d_products"."id" = "dwh"."f_sales"."product_id"
INNER JOIN "dwh"."d_product_classes" ON "dwh"."d_product_classes"."id" = "dwh"."d_products"."product_class_id"
GROUP BY d_product_classes.product_family
PostgreSQL always ~18.5 seconds
HP Vertica first ~9 seconds
next ~1.5 seconds