Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- AddHCATMixin assures hcat-extension validity and csv-based data-conversion if required
- EuroCropsConverterMixin is a BaseClass for EuroCrops-provided datasets
- EuroLandBaseConverter is a BaseClass for Euroland-provided datasets
- Addes some converters that contain splits for ML usecases
- Avoid base property schema override
- Add Converter for Bulgaria
- Remove unintended CommonMark formatting (indentation) from descriptions in converters
Expand Down
4 changes: 2 additions & 2 deletions fiboa_cli/datasets/ai4sf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ..conversion.fiboa_converter import FiboaBaseConverter


class Converter(FiboaBaseConverter):
class Ai4SfConverter(FiboaBaseConverter):
sources = {
# Cambodia
"https://phys-techsciences.datastations.nl/api/access/datafile/100634?gbrecs=true": "2_cambodia_areas.gpkg",
Expand Down Expand Up @@ -72,7 +72,7 @@ class Converter(FiboaBaseConverter):
}

id = "ai4sf"
short_name = "Cambodia/Vietnam (AI4SmallFarms)"
short_name = "Cambodia/Vietnam (AI4SF)"
title = "Field boundaries for Cambodia and Vietnam (AI4SmallFarms)"
# from https://research.tudelft.nl/en/publications/ai4smallfarms-a-dataset-for-crop-field-delineation-in-southeast-a
description = """
Expand Down
20 changes: 20 additions & 0 deletions fiboa_cli/datasets/ai4sf_ml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from .ai4sf import Ai4SfConverter
from .commons.ml_splits import MlSplitsMixin


class Ai4SfMlConverter(MlSplitsMixin, Ai4SfConverter):

def migrate(self, gdf):
# Download file with splits
urls = {
"https://phys-techsciences.datastations.nl/api/access/datafile/100418?gbrecs=true": "tiles_asia.gpkg",
}
paths = self.download_files(urls, self.cache)
tiles = self.read_data(paths, **self.open_options)

# Add splits
splits = tiles[["id", "country", "split"]].drop_duplicates(subset=["id", "country"])
gdf = gdf.merge(splits, on=["id", "country"], how="left")
gdf["split"] = gdf["split"].replace({"validate": "val"})

return super().migrate(gdf)
26 changes: 26 additions & 0 deletions fiboa_cli/datasets/commons/ml_splits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
class MlSplitsMixin:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.cache = None

self.id = self.id + "_ml"
self.title += " with splits"
self.short_name += " with splits"

self.columns["split"] = "split"

if "required" not in self.missing_schemas:
self.missing_schemas["required"] = []
self.missing_schemas["required"].append("split")
if "properties" not in self.missing_schemas:
self.missing_schemas["properties"] = {}
self.missing_schemas["properties"]["split"] = {
"type": "string",
"enum": ["train", "val", "test"],
}

def download_files(self, uris, cache_folder=None, **kwargs):
# Store cache folder for later use in migrate
self.cache = cache_folder
return super().download_files(uris, cache_folder, **kwargs)
Loading