Skip to content

Associating osmType MdClass by common ancestor

Peter edited this page Aug 8, 2018 · 3 revisions

Basic counting of elements with wikidata-tag:

osm_type feature_type elements
relation boundary 5332
relation multipolygon 432
relation route 203
relation waterway 117
way waterway 62
relation public 56
node enforcement 10
way boundary 8
relation site 3
select osm_type, feature_type, 
       count(*) as n, (array_agg_mult(wd_ids))[1:10] 
from wdosm.li_raw2 
group by 1,2 order by 3 desc,1,2;
 w        |                  | 69961 | {9639950,9639950,9639950,5377591,8210223,6321998,1050668,632566,2983699,10351316}
 r        | boundary         |  5332 | {43233,41428,155,320487,1804338,772569,2008789,2008521,2329733,2009222}
 n        |                  |  4166 | {842713,330208,174,841231,184403,967648,171617,4035,330175,275109}
 r        | multipolygon     |   432 | {876280,10315775,10315703,1349833,22036284,804681,3021931,614720,10315670,10388380}
 r        | route            |   186 | {3415363,3415324,2877432,2877444,2396644,3444776,2877428,578757,2877445,2333617}
 r        | waterway         |   117 | {142604,6121136,916349,117615,118251,941329,7885257,118771,14290,1165761}
 w        | waterway         |    62 | {386331,10362727,16928354,10362727,16928354,16928354,386331,16928354,386331,386331}
 r        | public_transport |    56 | {10276070,7195875,10275638,10275651,10276226,3293864,5435213,5692522,7396901,4736839}
 r        | route_master     |    17 | {3296728,2333639,7284876,10318641,5430612,1537708,6553130,10388583,10318644,30926895}
 n        | enforcement      |    10 | {}
 w        | boundary         |     8 | {10395956,2011046}
 r        | site             |     3 | {465960,2628446,465960}
 r        | watershed        |     2 | {582321}

Using some better format

CREATE FUNCTION wdosm.wd_id_sparql(  bigint[] ) RETURNS text AS $f$
  SELECT 'wd:Q'||array_to_string(  array_agg(x) ,  ', wd:Q'  )
  FROM unnest($1) t(x)
$f$ language SQL IMMUTABLE;

SELECT osm_type, feature_type, 
       count(*) as n, wdosm.wd_id_sparql((array_agg_mult(wd_ids))[1:100]) 
FROM wdosm.li_raw2 
GROUP BY 1,2 
order by 3 desc,1,2;

Check common ancestor

# 1. Show the most frequents parent (in "item instance of parent")
SELECT ?of ?ofLabel (COUNT(?of) AS ?count)
WHERE 
{
  ?item wdt:P31 ?of.
  FILTER ( ?item IN (
    wd:Q43233, wd:Q41428, wd:Q155, wd:Q320487, wd:Q632789, wd:Q1804813, 
    wd:Q2008271, wd:Q389536, wd:Q2008249, wd:Q1008242, wd:Q2008293, 
    wd:Q1967407, wd:Q2011913, wd:Q1804899, wd:Q2008596, wd:Q955985, 
    wd:Q655734, wd:Q2009474, wd:Q2008337, wd:Q2008573, wd:Q2008277, 
    wd:Q651613, wd:Q1805166
  ) )
  SERVICE wikibase:label { 
     bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
  }
}
GROUP BY ?of ?ofLabel 
ORDER BY DESC(COUNT(?of)) ?of

# 2. Check the "commom ancestor" of the most frequent cases... supposing Q1048835

# 3. Check if all are indirect instances of the supposed superclass 
SELECT (COUNT(DISTINCT ?item) AS ?count) 
WHERE {
  ?item (wdt:P31|wdt:P279)* wd:Q1048835 .
  FILTER ( ?item IN (
    wd:Q43233, wd:Q41428, wd:Q155, wd:Q320487, wd:Q632789, wd:Q1804813, 
    wd:Q2008271, wd:Q389536, wd:Q2008249, wd:Q1008242, wd:Q2008293, 
    wd:Q1967407, wd:Q2011913, wd:Q1804899, wd:Q2008596, wd:Q955985, 
    wd:Q655734, wd:Q2009474, wd:Q2008337, wd:Q2008573, wd:Q2008277, 
    wd:Q651613, wd:Q1805166
  ) )
  SERVICE wikibase:label { 
     bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
  }
}

# PS: you can also list all possible sub-classes, counting each one
SELECT ?x ?xLabel (COUNT(?x) AS ?count)
WHERE 
{
  ?x (wdt:P31|wdt:P279)* wd:Q1048835 .
  ?item wdt:P31 ?x .
  FILTER ( ?item IN (
    wd:Q43233, wd:Q41428, wd:Q155, wd:Q320487, wd:Q632789, wd:Q1804813, 
    wd:Q2008271, wd:Q389536, wd:Q2008249, wd:Q1008242, wd:Q2008293, 
    wd:Q1967407, wd:Q2011913, wd:Q1804899, wd:Q2008596, wd:Q955985, 
    wd:Q655734, wd:Q2009474, wd:Q2008337, wd:Q2008573, wd:Q2008277, 
    wd:Q651613, wd:Q1805166
  ) )
  SERVICE wikibase:label { 
     bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
  }
} GROUP BY ?x ?xLabel