From aec8d1933058b994d521f1b374ef586c90791a3a Mon Sep 17 00:00:00 2001 From: Eric Wolfe Date: Tue, 17 Dec 2019 16:06:07 -0800 Subject: [PATCH 1/3] Fix wikidata subclass query --- wikidata/2019_subclassof_tree.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wikidata/2019_subclassof_tree.sql b/wikidata/2019_subclassof_tree.sql index 8316a63..0e7a0c1 100644 --- a/wikidata/2019_subclassof_tree.sql +++ b/wikidata/2019_subclassof_tree.sql @@ -5,9 +5,10 @@ SELECT a.*, b.en_label subclass_of_label, 1 level FROM ( SELECT a.numeric_id, en_label, b.numeric_id subclass_of_numeric_id FROM `fh-bigquery.wikidata.wikidata_latest_20190822` a, UNNEST(a.subclass_of) b + WHERE a.type = 'item' ) a JOIN `fh-bigquery.wikidata.wikidata_latest_20190822` b -ON a.subclass_of_numeric_id=b.numeric_id +ON a.subclass_of_numeric_id = b.numeric_id AND b.type = 'item' ; LOOP From 2f20c4b5690c342cb144ffe2f67ae17cedee818e Mon Sep 17 00:00:00 2001 From: Eric Wolfe Date: Tue, 17 Dec 2019 16:06:56 -0800 Subject: [PATCH 2/3] Use newer wikidata table for subclasses --- wikidata/2019_subclassof_tree.sql | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/wikidata/2019_subclassof_tree.sql b/wikidata/2019_subclassof_tree.sql index 0e7a0c1..1fdbae6 100644 --- a/wikidata/2019_subclassof_tree.sql +++ b/wikidata/2019_subclassof_tree.sql @@ -1,13 +1,13 @@ -CREATE OR REPLACE TABLE `wikidata.subclasses_20190822` +CREATE OR REPLACE TABLE `wikidata.subclasses_2019` AS SELECT a.*, b.en_label subclass_of_label, 1 level FROM ( SELECT a.numeric_id, en_label, b.numeric_id subclass_of_numeric_id - FROM `fh-bigquery.wikidata.wikidata_latest_20190822` a, UNNEST(a.subclass_of) b + FROM `fh-bigquery.wikidata.wikidata_2019` a, UNNEST(a.subclass_of) b WHERE a.type = 'item' ) a -JOIN `fh-bigquery.wikidata.wikidata_latest_20190822` b +JOIN `fh-bigquery.wikidata.wikidata_2019` b ON a.subclass_of_numeric_id = b.numeric_id AND b.type = 'item' ; @@ -15,21 +15,21 @@ LOOP BEGIN DECLARE row_count INT64; DECLARE row_diff INT64; - SET row_count = (SELECT COUNT(*) FROM `wikidata.subclasses_20190822`) + SET row_count = (SELECT COUNT(*) FROM `wikidata.subclasses_2019`) ; - INSERT INTO `wikidata.subclasses_20190822` + INSERT INTO `wikidata.subclasses_2019` SELECT a.numeric_id, a.en_label, b.subclass_of_numeric_id, b.subclass_of_label, MIN(b.level+1) level - FROM `wikidata.subclasses_20190822` a - JOIN `wikidata.subclasses_20190822` b + FROM `wikidata.subclasses_2019` a + JOIN `wikidata.subclasses_2019` b ON a.subclass_of_numeric_id = b.numeric_id - WHERE STRUCT(a.numeric_id,b.subclass_of_numeric_id) NOT IN (SELECT STRUCT(numeric_id,subclass_of_numeric_id) FROM `wikidata.subclasses_20190822`) + WHERE STRUCT(a.numeric_id,b.subclass_of_numeric_id) NOT IN (SELECT STRUCT(numeric_id,subclass_of_numeric_id) FROM `wikidata.subclasses_2019`) GROUP BY 1,2,3,4 ; - SET row_diff = (SELECT COUNT(*) FROM `wikidata.subclasses_20190822`) - row_count; + SET row_diff = (SELECT COUNT(*) FROM `wikidata.subclasses_2019`) - row_count; IF row_diff = 0 THEN LEAVE; END IF; From 27d1d6cac4e8d7bac99c7d51ff6b71e9db626fec Mon Sep 17 00:00:00 2001 From: Eric Wolfe Date: Tue, 17 Dec 2019 18:12:14 -0800 Subject: [PATCH 3/3] Wikidata subclass levels should be summed --- wikidata/2019_subclassof_tree.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikidata/2019_subclassof_tree.sql b/wikidata/2019_subclassof_tree.sql index 1fdbae6..ff5b8b6 100644 --- a/wikidata/2019_subclassof_tree.sql +++ b/wikidata/2019_subclassof_tree.sql @@ -21,7 +21,7 @@ LOOP INSERT INTO `wikidata.subclasses_2019` - SELECT a.numeric_id, a.en_label, b.subclass_of_numeric_id, b.subclass_of_label, MIN(b.level+1) level + SELECT a.numeric_id, a.en_label, b.subclass_of_numeric_id, b.subclass_of_label, MIN(a.level + b.level) level FROM `wikidata.subclasses_2019` a JOIN `wikidata.subclasses_2019` b ON a.subclass_of_numeric_id = b.numeric_id