Skip to content

feat: add Land Cover Thematic Accuracy indicator #893

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 40 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
c5a397e
feat: add dummy for CORINE comparison indicator
matthiasschaub Apr 29, 2025
9323271
success init of corine-comparison indicator
Gigaszi Apr 29, 2025
3e3a556
first draft of preprocess function
Gigaszi Apr 29, 2025
663bec0
fix: add template.yaml to corine comparison indicator
Gigaszi Apr 29, 2025
030a00a
make sql query postgres compatible
matthiasschaub Apr 29, 2025
e0a423a
fix: error in query and string formatting
Gigaszi Apr 29, 2025
393117a
add pytest-asyncio as dev dep
matthiasschaub Apr 29, 2025
0bab6c9
add fetch func to geodatabase module
matthiasschaub Apr 29, 2025
9ce2264
feat: get clc_classes from database
Gigaszi Apr 29, 2025
cb5d871
corine-comparison: calculate f1-score
matthiasschaub Apr 29, 2025
3548d19
feat(corine-comparison): request model and approval tests
matthiasschaub Apr 30, 2025
b8ce5ea
refactor: rename corine comparison to land cover thematic accuracy
matthiasschaub Apr 30, 2025
618a9c5
add land cover thematic accuracy request model and api tests
matthiasschaub Apr 30, 2025
3703e54
test: ignore pytest-asyncio deprecation warning
matthiasschaub May 2, 2025
de4ee2b
test(approvaltests): add postfix parameter to PytestNamer
matthiasschaub May 2, 2025
11d43a1
build: add pygments as dev dep to make pytest pretty
matthiasschaub May 5, 2025
cbbbac6
test(lc-thematic-accuracy): verify result description
matthiasschaub May 5, 2025
49579cc
test: do not test invalid result field
matthiasschaub May 5, 2025
8e4243e
test: add request context mock and CotrineEnum class
Gigaszi May 5, 2025
346bf80
feat(api): add land-cover-thematic-accuracy endpoint
matthiasschaub May 6, 2025
e1ad922
test(lc-thematic-accuracy): approve description and report
matthiasschaub May 6, 2025
52584d4
test(approvaltests reporter): create png if not exists
matthiasschaub May 6, 2025
02a2c34
test: add dummy test for figure
matthiasschaub May 6, 2025
0e7d760
feat: first draft of result figure
matthiasschaub May 6, 2025
34e4f50
feat: add functionality to request single corine class
Gigaszi May 6, 2025
c0f38bf
tests: add integration tests for single class functionalities
Gigaszi May 6, 2025
ccb61ce
feat: add query for single clc class
Gigaszi May 6, 2025
e08c0de
fix: add missing fixture data for single class request mock
Gigaszi May 6, 2025
f27e8d0
feat: new draft for indicator figure
Gigaszi May 6, 2025
f6c7d6e
use new table name for corine table
matthiasschaub May 7, 2025
465e858
fix: use absolute import statements
matthiasschaub May 8, 2025
c012952
fix: add bkg project to lulc topic
matthiasschaub May 8, 2025
0d1be7f
refactor: clearer distinction between multi and single class workflow
matthiasschaub May 8, 2025
fa89e41
test: rename tests and re-approve
matthiasschaub May 8, 2025
e2e1e1e
improve plotly figure
matthiasschaub May 8, 2025
d13063d
refactor: show bar chart for multi class
matthiasschaub May 8, 2025
60939cc
feat: improve multiclass figure
matthiasschaub May 8, 2025
103584a
feat: add new binary heatmap for single class
Gigaszi May 8, 2025
77dd108
feat: add result descriptions
Gigaszi May 8, 2025
c803cc2
feat: improve multiclass figure
matthiasschaub May 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions approvaltests_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"directory": "tests/approvals"
}
25 changes: 25 additions & 0 deletions ohsome_quality_api/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
AttributeCompletenessKeyRequest,
IndicatorDataRequest,
IndicatorRequest,
LandCoverThematicAccuracyRequest,
)
from ohsome_quality_api.api.response_models import (
AttributeMetadataResponse,
Expand Down Expand Up @@ -267,6 +268,30 @@ async def post_attribute_completeness(
return await _post_indicator(request, "attribute-completeness", parameters)


@app.post(
"/indicators/land-cover-thematic-accuracy",
tags=["indicator"],
response_model=Union[IndicatorJSONResponse, IndicatorGeoJSONResponse],
responses={
200: {
"content": {
"application/json": {
"schema": {"$ref": "#/components/schemas/IndicatorJSONResponse"}
},
"application/geo+json": {
"schema": {"$ref": "#/components/schemas/IndicatorGeoJSONResponse"}
},
},
},
},
)
async def post_land_cover_thematic_accuracy(
request: Request, parameters: LandCoverThematicAccuracyRequest
) -> Any:
"""Request the Land Cover Thematic Accuracy indicator for your area of interest."""
return await _post_indicator(request, "land-cover-thematic-accuracy", parameters)


@app.post(
"/indicators/{key}",
tags=["indicator"],
Expand Down
55 changes: 55 additions & 0 deletions ohsome_quality_api/api/request_models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from enum import Enum
from typing import Dict, List, Self

import geojson
Expand Down Expand Up @@ -172,6 +173,60 @@ def validate_indicator_topic_combination(self) -> Self:
return self


class CorineLandCoverClassLevel1(Enum):
ARTIFICIAL_AREAS = 1
AGRICULTURAL_AREAS = 2
FOREST_AND_SEMINATURAL_AREAS = 3
WETLANDS = 4
WATER_BODIES = 5


class CorineLandCoverClass(Enum):
"""Corine Land Cover Class Level 2."""

# TODO: Use more descriptive names
ARTIFICIAL_AREAS_1 = 11
ARTIFICIAL_AREAS_2 = 12
ARTIFICIAL_AREAS_3 = 13
ARTIFICIAL_AREAS_4 = 14
AGRICULTURAL_AREAS_1 = 21
AGRICULTURAL_AREAS_2 = 22
AGRICULTURAL_AREAS_3 = 23
AGRICULTURAL_AREAS_4 = 24
FOREST_AND_SEMINATURAL_AREAS_1 = 31
FOREST_AND_SEMINATURAL_AREAS_2 = 32
FOREST_AND_SEMINATURAL_AREAS_3 = 33
WETLANDS_1 = 41
WETLANDS_2 = 42
WATER_BODIES_1 = 51
WATER_BODIES_2 = 52


class LandCoverThematicAccuracyRequest(IndicatorRequest):
corine_class: CorineLandCoverClass | None = Field(
default=None,
title="CORINE Land Cover class",
description=(
"CORINE Land Cover is a pan-European land cover inventory with thematic classes", # noqa
),
)

@model_validator(mode="after")
def validate_indicator_topic_combination(self) -> Self:
# NOTE: overrides parent validator. That is because endpoint of
# indicator/land-cover-thematic-accuracy is fixed and therefore path
# parameters of request context empty
valid_indicators = get_valid_indicators(self.topic.key)
if "land-cover-thematic-accuracy" not in valid_indicators:
raise ValueError(
"Invalid combination of indicator and topic: {} and {}".format(
"land-cover-thematic-accuracy",
self.topic.key,
)
)
return self


class IndicatorDataRequest(BaseBpolys):
"""Model for the `/indicators/mapping-saturation/data` endpoint.

Expand Down
5 changes: 5 additions & 0 deletions ohsome_quality_api/geodatabase/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ async def get_connection():
await conn.close()


async def fetch(query: str, *args) -> list:
async with get_connection() as conn:
return await conn.fetch(query, *args)


async def get_shdi(bpoly: Feature | FeatureCollection) -> list[Record]:
"""Get Subnational Human Development Index (SHDI) for a bounding polygon.

Expand Down
7 changes: 7 additions & 0 deletions ohsome_quality_api/indicators/indicators.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,10 @@ attribute-completeness:
Derive the ratio of OSM features compared to features which
match additional expected tags (e.g. amenity=hospital vs
amenity=hospital and wheelchair=yes).
land-cover-thematic-accuracy:
name: Land Cover Thematic Accuracy
projects:
- bkg
quality_dimension: thematic-accuracy
description: >-
Thematic Accuracy of Land Cover in Open Street Map compared to refrence data.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import logging
from datetime import datetime, timezone
from pathlib import Path
from string import Template

import plotly.graph_objects as pgo
from geojson import Feature
from sklearn.metrics import classification_report, confusion_matrix, f1_score

from ohsome_quality_api.api.request_models import (
CorineLandCoverClass,
CorineLandCoverClassLevel1,
)
from ohsome_quality_api.geodatabase import client
from ohsome_quality_api.indicators.base import BaseIndicator
from ohsome_quality_api.topics.models import BaseTopic as Topic

# Source: https://land.copernicus.eu/content/corine-land-cover-nomenclature-guidelines/docs/pdf/CLC2018_Nomenclature_illustrated_guide_20190510.pdf
corine_classes = {
CorineLandCoverClass(11): "Urban fabric",
CorineLandCoverClass(12): "Industrial, commercial and transport units",
CorineLandCoverClass(13): "Mine, dump and construction sites",
CorineLandCoverClass(14): "Artificial non-agricultural vegetated areas",
CorineLandCoverClass(21): "Arable land",
CorineLandCoverClass(22): "Permanetn crops",
CorineLandCoverClass(23): "Pastures",
CorineLandCoverClass(24): "Heterogeneous agricultural areas",
CorineLandCoverClass(31): "Forest",
CorineLandCoverClass(32): "Shrubs and/or herbaceous vegetation associations",
CorineLandCoverClass(33): "Open spaces with little or no vegetation",
CorineLandCoverClass(41): "Inland wetlands",
CorineLandCoverClass(42): "Coastal wetlands",
CorineLandCoverClass(51): "Inland waters",
CorineLandCoverClass(52): "Marine waters",
}
corine_top_level_class = {
1: "Artificial areas",
2: "Agricultural areas",
3: "Forest and semi-natural areas",
4: "Wetlands",
5: "Water bodies",
}


class LandCoverThematicAccuracy(BaseIndicator):
"""
TODO

Only shows class for which OSM has data.

Ergänzend zu dem Corine Completeness Indicator
"""

def __init__(self, topic: Topic, feature: Feature, corine_class=None) -> None:
super().__init__(topic=topic, feature=feature)
self.corine_class = corine_class

async def preprocess(self) -> None:
if self.corine_class:
with open(Path(__file__).parent / "query-single-class.sql", "r") as file:
query = file.read()
results = await client.fetch(
query, str(self.feature["geometry"]), self.corine_class
)
else:
with open(Path(__file__).parent / "query-all-classes.sql", "r") as file:
query = file.read()
results = await client.fetch(query, str(self.feature["geometry"]))
self.clc_classes_corine = [r["clc_class_corine"] for r in results]
self.clc_classes_osm = [r["clc_class_osm"] for r in results]
self.areas = [r["area"] / 1_000_000 for r in results] # sqkm
# TODO: take real timestamps from data
self.result.timestamp_osm = datetime.now(timezone.utc)
self.timestamp_corine = datetime.now(timezone.utc)

def calculate(self) -> None:
if self.corine_class:
self.clc_classes_osm = [
1 if clc_class == CorineClass(self.corine_class).value else 0
for clc_class in self.clc_classes_osm
]
self.clc_classes_corine = [
1 if clc_class == CorineClass(self.corine_class).value else 0
for clc_class in self.clc_classes_corine
]

self.f1_score = f1_score(
self.clc_classes_corine,
self.clc_classes_osm,
average="weighted",
sample_weight=self.areas,
labels=list(set(self.clc_classes_corine)),
)
self.confusion_matrix = confusion_matrix(
self.clc_classes_corine,
self.clc_classes_osm,
sample_weight=self.areas,
normalize="all",
)
self.result.value = self.f1_score
if self.f1_score > 0.8:
self.result.class_ = 5
elif self.f1_score > 0.5:
self.result.class_ = 3
else:
self.result.class_ = 1

template = Template(self.templates.result_description)
description = template.substitute(
score=round(self.f1_score * 100, 2),
)
self.result.description = " ".join(
(description, self.templates.label_description[self.result.label])
)

# TODO: UdefinedMetricWarning
# Recall is ill-defined and being set to 0.0 in labels with no
# true samples. Use `zero_division` parameter to control this
# behavior.
self.report = classification_report(
self.clc_classes_corine,
self.clc_classes_osm,
sample_weight=self.areas,
)

def create_figure(self) -> None:
if self.result.label == "undefined":
logging.info("Result is undefined. Skipping figure creation.")
return

if self.corine_class:
self._create_figure_single_class()
else:
self._create_figure_multi_class()

def _create_figure_multi_class(self):
self.f1_scores = f1_score(
self.clc_classes_corine,
self.clc_classes_osm,
average=None, # for each
sample_weight=self.areas,
labels=list(set(self.clc_classes_corine)),
)
class_labels = []
for c in self.clc_classes_corine:
class_labels.append(corine_classes[CorineLandCoverClass(c)])

bars = []

clc_class_names_level_1 = [
" ".join(
CorineLandCoverClassLevel1(int(str(c)[0])).name.split("_")
).capitalize()
for c in set(self.clc_classes_corine)
]
clc_class_names_level_2 = [
corine_classes[CorineLandCoverClass(c)]
for c in set(self.clc_classes_corine)
]
x_list = [str(i) for i in list(set(self.clc_classes_corine))]
y_list = [v * 100 for v in self.f1_scores]
for name_level_1, name_level_2, x, y in zip(
clc_class_names_level_1, clc_class_names_level_2, x_list, y_list
):
bars.append(
pgo.Bar(
name=name_level_2,
x=[x],
y=[y],
legendgroup=name_level_1,
legendgrouptitle_text=name_level_1,
)
)
fig = pgo.Figure(
data=bars,
layout=pgo.Layout(
{
"yaxis_range": [0, 100],
"xaxis_dtick": 1,
"autotypenumbers": "strict",
"legend": {
"yanchor": "top",
"x": 0,
"y": -0.1,
"orientation": "h",
},
},
showlegend=True,
),
# updatemenus=[
# {
# "type": "buttons",
# "buttons": [
# {
# "label": "≡",
# "method": "relayout",
# "args": ["showlegend", False],
# "args2": ["showlegend", True],
# }
# ],
# }
# ],
)

raw = fig.to_dict()
raw["layout"].pop("template") # remove boilerplate
self.result.figure = raw

def _create_figure_single_class(self):
class_labels = ["Other classes", CorineClass(self.corine_class).name]
fig = pgo.Figure(
data=pgo.Heatmap(
z=self.confusion_matrix,
x=class_labels,
y=class_labels,
text=self.confusion_matrix,
texttemplate="%{text:.2f}",
),
# layout=pgo.Layout(title={"subtitle": {"text": ", ".join(class_labels)}}),
)
fig.update_yaxes(title_text="Corine Land Cover Class in OSM")
fig.update_xaxes(title_text="Corine Land Cover Class (actual)")
# TODO add legend with corine land cover classes mapped to meaningful titles?

raw = fig.to_dict()
raw["layout"].pop("template") # remove boilerplate
self.result.figure = raw
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH bpoly AS (
SELECT
-- split mutlipolygon into list of polygons for more efficient processing
(ST_Dump (ST_SetSRID (ST_GeomFromGeoJSON ($1), 4326))).geom AS geometry
)
SELECT
CLC_class as clc_class_corine,
osm_CLC_class as clc_class_osm,
CASE WHEN ST_Within (o.geometry, b.geometry) THEN
area
ELSE
ST_Area (ST_Intersection (o.geometry, b.geometry)::geography)
END AS area
FROM
osm_corine_2021_deu_intersection o,
bpoly b
WHERE
ST_Intersects (o.geometry, b.geometry);
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- TODO: will index on classes make this query faster?
WITH bpoly AS (
SELECT
(ST_Dump(ST_SetSRID(ST_GeomFromGeoJSON($1), 4326))).geom AS geometry
)
SELECT
CLC_class as clc_class_corine,
osm_CLC_class as clc_class_osm,
CASE
WHEN ST_Within(o.geometry, b.geometry) THEN area
ELSE ST_Area(ST_Intersection(o.geometry, b.geometry)::geography)
END AS area
FROM
osm_corine_2021_deu_intersection o,
bpoly b
WHERE
ST_Intersects(o.geometry, b.geometry)
AND (
o.CLC_class = $2 OR o.osm_CLC_class = $2
);
Loading