所有,
我一直试图通过BigQuery获取一些更大的分析代码,但我仍然遇到许多状态和存在的数据量的问题.我们正在谈论多年的数据.可能是我的查询不符合要求,但尝试根据特定组获取总和是我需要的.
我需要在查询中更改哪些内容以防止资源超出错误?
SELECT COMPANY_NAME, RATING_CLASS, COMPANY_KEY, -- State Info & Calculations -- Over is used as a WINDOW function to SUM ALL results within the given query SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE, -- ZIP3 Info & Calculations ZIP3, ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, ZIP3_TOTAL_RESULT_APPEARANCE, ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, ZIP3_AVG_RANK AS ZIP3_AVG_RANK, ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, -- ZIP5 Info & Calculations ZIP5, ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, ZIP5_TOTAL_RESULT_APPEARANCE, ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, ZIP5_AVG_RANK AS ZIP5_AVG_RANK, ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, FROM ( SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, -- ZIP3 ZIP3, COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE, SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT, AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST, AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK, SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT, SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT, AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF, SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT, AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK, SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT, SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT, AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, -- ZIP5 ZIP5, COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE, SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT, AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST, AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK, SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT, SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT, AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF, SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT, AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK, SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT, SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT, AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, FROM [csgapi:qh_med_supp_tool.v2_TX] WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') AND user_key NOT IN ("6522869941010432", "6277136540237824") GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5, LOWEST, RATIO_TO_MIN, RATE_ORDER, TOP5, TOP10, CENT_DIFF, DISCOUNTED_LOWEST, DISCOUNTED_RATIO_TO_MIN, DISCOUNTED_RATE_ORDER, DISCOUNTED_TOP5, DISCOUNTED_TOP10, DISCOUNTED_CENT_DIFF, LOGGING_KEY ) GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, -- ZIP3 General ZIP3, ZIP3_MED_SUPP_TOOL_NUM_QUOTE, ZIP3_TOTAL_RESULT_APPEARANCE, ZIP3_LOWEST, ZIP3_AVG_RATIO_TO_LOWEST, ZIP3_AVG_RANK, ZIP3_TOP5, ZIP3_TOP10, ZIP3_AVG_CENT_DIFF, ZIP3_DISCOUNTED_LOWEST, ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, ZIP3_DISCOUNTED_AVG_RANK, ZIP3_DISCOUNTED_TOP5, ZIP3_DISCOUNTED_TOP10, ZIP3_DISCOUNTED_AVG_CENT_DIFF, -- ZIP5 General ZIP5, ZIP5_MED_SUPP_TOOL_NUM_QUOTE, ZIP5_TOTAL_RESULT_APPEARANCE, ZIP5_LOWEST, ZIP5_AVG_RATIO_TO_LOWEST, ZIP5_AVG_RANK, ZIP5_TOP5, ZIP5_TOP10, ZIP5_AVG_CENT_DIFF, ZIP5_DISCOUNTED_LOWEST, ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, ZIP5_DISCOUNTED_AVG_RANK, ZIP5_DISCOUNTED_TOP5, ZIP5_DISCOUNTED_TOP10, ZIP5_DISCOUNTED_AVG_CENT_DIFF
使用建议的修复更新了查询:
SELECT main.COMPANY_NAME AS COMPANY_NAME, main.COMPANY_KEY AS COMPANY_KEY, main.RATING_CLASS AS RATING_CLASS, state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE, -- ZIP3 main.ZIP3 AS ZIP3, ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE, ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK, ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, -- ZIP5 main.ZIP5 AS ZIP5, ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE, ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK, ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, FROM ( SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5, STATE, FROM [csgapi:qh_med_supp_tool.v2_TX] WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') AND user_key NOT IN ("6522869941010432", "6277136540237824", "4872666167115776", "6396348765044736", "6139303562313728", "4988973881491456") AND portal_key NOT IN ("5878607637381120") GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5, STATE, ) AS MAIN LEFT JOIN ( SELECT ZIP3, COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE, COMPANY_KEY, RATING_CLASS, AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST, SUM(CASE WHEN TOP5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP5_COUNT, SUM(CASE WHEN LOWEST = TRUE THEN 1 ELSE 0 END) AS ZIP3_LOWEST_COUNT, SUM(CASE WHEN TOP10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP10_COUNT, AVG(discounted_rate_order) AS ZIP3_AVG_RANK, AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF, AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, SUM(CASE WHEN DISCOUNTED_TOP5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, SUM(CASE WHEN DISCOUNTED_LOWEST = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, SUM(CASE WHEN DISCOUNTED_TOP10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK, AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, FROM [csgapi:qh_med_supp_tool.v2_TX] WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') AND user_key NOT IN ("6522869941010432", "6277136540237824") GROUP BY ZIP3, COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ) AS ZIP3_SUB ON main.ZIP3 = ZIP3_SUB.ZIP3 AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS LEFT JOIN ( SELECT ZIP3, EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE FROM [csgapi:qh_med_supp_tool.v2_TX] WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') AND user_key NOT IN ("6522869941010432", "6277136540237824") GROUP BY ZIP3 ) AS ZIP3_COUNT ON main.ZIP3 = ZIP3_COUNT.ZIP3 LEFT JOIN ( SELECT ZIP5, COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE, COMPANY_KEY, RATING_CLASS, AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST, SUM(CASE WHEN TOP5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP5_COUNT, SUM(CASE WHEN LOWEST = TRUE THEN 1 ELSE 0 END) AS ZIP5_LOWEST_COUNT, SUM(CASE WHEN TOP10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP10_COUNT, AVG(discounted_rate_order) AS ZIP5_AVG_RANK, AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF, AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, SUM(CASE WHEN DISCOUNTED_TOP5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, SUM(CASE WHEN DISCOUNTED_LOWEST = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, SUM(CASE WHEN DISCOUNTED_TOP10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK, AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, FROM [csgapi:qh_med_supp_tool.v2_TX] WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') AND user_key NOT IN ("6522869941010432", "6277136540237824") GROUP BY ZIP5, COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ) AS ZIP5_SUB ON main.ZIP5 = ZIP5_SUB.ZIP5 AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS LEFT JOIN ( SELECT ZIP5, EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE FROM [csgapi:qh_med_supp_tool.v2_TX] WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') AND user_key NOT IN ("6522869941010432", "6277136540237824") GROUP BY ZIP5 ) AS ZIP5_COUNT ON main.ZIP5 = ZIP5_COUNT.ZIP5 LEFT JOIN ( SELECT STATE, EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE FROM [csgapi:qh_med_supp_tool.v2_TX] WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') AND user_key NOT IN ("6522869941010432", "6277136540237824") GROUP BY STATE ) AS STATE_COUNT ON main.STATE = STATE_COUNT.STATE
说明:由于BigQuery是一个共享资源,BigQuery会近似查询所需的资源量并相应地分配资源.通过将查询拆分为同一个表上的多个联接,可以通过比最初预期更多的资源来执行计算.有关限制查询的原因的更多技术解释,请参阅Jordan Tigani关于堆栈溢出响应的信息.