■21/2/21 1:00AM
BigQuery
///BigQuery
■nullが最小か調べる(BigQueryではnullが最小値)select * from bangboo_data.x_data order by value
■NULLの件数を事前に数えておく(nullを0に変換してカウントした)
SELECT * FROM (SELECT 'value', count(COALESCE(value,0)) FROM bangboo_data.x_data WHERE value IS NULL) union all (SELECT 'value2', count(COALESCE(value2,0)) FROM bangboo_data.x_data WHERE value2 IS NULL) union all (SELECT 'v_no_null', count(COALESCE(v_no_null,0)) FROM bangboo_data.x_data WHERE v_no_null IS NULL)->4件、4件、0件で正常なのでcount(COALESCE(value,0))と条件はis nullを使う
横並びなら、単一テーブルなのでFromはCross joinでOK、SQLは0件のカウントをGroup byできないので↑が安全かもSELECT count_null_1, count_null_2, count_null_3 FROM (SELECT count(COALESCE(value,0)) as count_null_1 FROM bangboo_data.x_data WHERE value IS NULL) as t1, (SELECT count(COALESCE(value2,0)) as count_null_2 FROM bangboo_data.x_data WHERE value2 IS NULL) as t2, (SELECT count(COALESCE(v_no_null,0)) as count_null_3 FROM bangboo_data.x_data WHERE v_no_null IS NULL) as t3
SQLは0件のカウントをGroup byできない(下記SQL駄目、URLを参照し対策を)→カウント以外のカラムを出すには副問い合わせが必要だったりと、、、
SELECT v_no_null, count(COALESCE(v_no_null,0)) as count_null_3 FROM bangboo_data.x_data WHERE v_no_null IS NULL GROUP BY v_no_null
https://www.ksakae1216.com/entry/2017/02/17/063000
■nullと0と''の違いselect * from bangboo_data.x_data where v_no_null = 0 ->4行でるselect * from bangboo_data.x_data where v_no_null is null ->0行==>0はnullでなくwhere a = 0で検索せよ
select * from bangboo_data.x_purchase where item = '' ->2行でるselect * from bangboo_data.x_purchase where item is null ->0行==>''はnullでなくwhere a = ''で検索せよ
■全削除delete from bangboo_data.x_employee where true
truncate table bangboo_data.x_employee
■テーブル削除
drop table bangboo_data.x_xxx
■DDLとデータ入力CREATE TABLE IF NOT EXISTS `bangboo_data.x_data` ( `no` INT64 NOT NULL, `value` INT64, `value2` INT64, `v_no_null` INT64 NOT NULL,)
INSERT INTO `bangboo_data.x_data` (`no`, `value`, `value2`, `v_no_null`) VALUES(1, 100, NULL, 0),(2, NULL, 200, 0),(3, NULL, 200, 5),(4, NULL, 100, 10),(5, NULL, 300, 0),(6, 200, NULL, 0),(7, 200, NULL, 5),(8, 100, NULL, 10);
CREATE TABLE IF NOT EXISTS bangboo_data.x_bill ( `no` INT64 NOT NULL, `user_no` INT64 NOT NULL, `total_bill` INT64 NOT NULL, `date` DATETIME,)
INSERT INTO `bangboo_data.x_bill` (`no`, `user_no`, `total_bill`, `date`) VALUES(1, 1, 1000, '2021-01-01 00:00:00'),(2, 1, 1500, '2021-01-02 00:00:00'),(3, 1, 500, '2021-01-03 00:00:00'),(4, 2, 1100, '2021-01-01 00:00:00'),(5, 2, 1700, '2021-01-02 00:00:00'),(6, 2, 800, '2021-01-03 00:00:00'),(7, 3, 1100, '2021-01-01 00:00:00'),(8, 3, 1700, '2021-01-02 00:00:00'),(9, 3, 800, '2021-01-03 00:00:00'),(10, 3, 800, '2021-01-04 00:00:00');
CREATE TABLE IF NOT EXISTS `bangboo_data.x_compensation` ( `no` INT64 NOT NULL, `allow` STRING)
INSERT INTO `bangboo_data.x_compensation` (`no`, `allow`) VALUES(1, '住宅'),(1, '交通'),(2, '住宅');
CREATE TABLE IF NOT EXISTS `bangboo_data.x_employee` ( `no` INT64 NOT NULL, `name` STRING)
INSERT INTO `bangboo_data.x_employee` (`no`, `name`) VALUES(1, '田中'),(2, '鈴木'),(3, '佐藤');
CREATE TABLE IF NOT EXISTS `bangboo_data.x_joblevel` ( `no` INT64 NOT NULL, `res` STRING)
INSERT INTO `bangboo_data.x_joblevel` (`no`, `res`) VALUES(1, '部長'),(1, '課長'),(2, '平');
CREATE TABLE IF NOT EXISTS `bangboo_data.x_purchase` ( `no` INT64 NOT NULL, `p_no` INT64 NOT NULL, `item` STRING, `price` INT64 NOT NULL, `unit` INT64 NOT NULL,)
INSERT INTO `bangboo_data.x_purchase` (`no`, `p_no`, `item`, `price`, `unit`) VALUES(1, 1, 'aaa', 100, 10),(2, 2, 'aaa', 100, 10),(3, 2, '', 100, 5),(4, 3, 'bbb', 100, 5),(5, 4, 'ccc', 300, 3),(6, 4, 'aaa', 100, 2),(7, 5, 'ccc', 300, 5),(8, 5, 'bbb', 100, 2),(9, 6, 'aaa', 100, 3),(10, 6, 'bbb', 100, 2),(11, 6, 'ccc', 300, 1),(12, 7, 'aaa', 100, 11),(13, 8, 'bbb', 100, 17),(14, 9, 'ccc', 300, 2),(15, 9, 'aaa', 100, 2),(16, 10, 'ccc', 300, 2),(17, 10, '', 100, 2);
■view
CREATE VIEW myProject.myDataset.myView AS SELECT * FROM anotherDataset.myTable;
ちなみにdeclareが入っているSQLはviewにできない
■簡易RFM(recency frequenctry monetary)SELECT name, MAX(date), count(DISTINCT x_bill.no), SUM(price * unit) FROM bangboo_data.x_employee RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no GROUP BY name ORDER BY SUM(price * unit) DESC
■RFM
SELECT RANK() OVER (ORDER BY rank_recent+rank_frequent+rank_money) as rank, rank_recent+rank_frequent+rank_money as least_score_wins, t1_r.no as user_no, t1_r.name, recent, frequent, money, rank_recent, rank_frequent, rank_moneyFROM ( SELECT RANK() OVER (ORDER BY recent DESC) as rank_recent, t1.no, t1.name, recent FROM (SELECT x_employee.no, x_employee.name, MAX(date) as recent FROM bangboo_data.x_employee RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no GROUP BY x_employee.no, x_employee.name ORDER BY MAX(date) DESC) as t1) as t1_rLEFT OUTER JOIN ( SELECT RANK() OVER (ORDER BY frequent DESC) as rank_frequent, t2.user_no, t2.name, frequent FROM (SELECT x_bill.user_no, x_employee.name, count(distinct x_bill.no) as frequent FROM bangboo_data.x_employee RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no GROUP BY x_bill.user_no, x_employee.name ORDER BY count(distinct x_bill.no) DESC) as t2) as t2_fON t1_r.no = t2_f.user_noLEFT OUTER JOIN ( SELECT RANK() OVER (ORDER BY money DESC) as rank_money, t3.no, t3.name, money FROM (SELECT x_employee.no, x_employee.name, SUM(price * unit) as money FROM bangboo_data.x_employee RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no GROUP BY x_employee.no, x_employee.name ORDER BY SUM(price * unit) DESC) as t3) as t3_mON t2_f.user_no = t3_m.noORDER BY least_score_wins asc
■SQLのルールは重要なので下記要チェック
/// BANGBOO BLOG /// - Ora Ora Ora Ora Ora
@/// BANGBOO BLOG ///