From 03bb7ca8c06946a49a0304b454c4ad53655c0e29 Mon Sep 17 00:00:00 2001 From: danlooo Date: Thu, 8 Jan 2026 16:36:37 +0100 Subject: [PATCH 1/6] Add tables _traits and _traits_paths --- pathtraits/db.py | 58 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/pathtraits/db.py b/pathtraits/db.py index 2b5457d..35982a5 100644 --- a/pathtraits/db.py +++ b/pathtraits/db.py @@ -90,7 +90,7 @@ def __init__(self, db_path): self.cursor.row_factory = TraitsDB.row_factory init_path_table_query = """ - CREATE TABLE IF NOT EXISTS path ( + CREATE TABLE IF NOT EXISTS _paths ( id INTEGER PRIMARY KEY AUTOINCREMENT, path text NOT NULL UNIQUE ); @@ -99,9 +99,29 @@ def __init__(self, db_path): init_path_index_query = """ CREATE INDEX IF NOT EXISTS idx_path_path - ON path(path); + ON _paths(path); """ self.execute(init_path_index_query) + + init_traits_table_query = """ + CREATE TABLE IF NOT EXISTS _traits ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + trait text NOT NULL UNIQUE + ); + """ + self.execute(init_traits_table_query) + + init_traits_paths_table_query = """ + CREATE TABLE IF NOT EXISTS _traits_paths ( + trait INTEGER, + path INTEGER, + FOREIGN KEY(trait) REFERENCES _traits(id), + FOREIGN KEY(path) REFERENCES path(id), + UNIQUE(trait, path) + ); + """ + self.execute(init_traits_paths_table_query) + self.update_traits() # pylint: disable=R1710 @@ -159,13 +179,13 @@ def put_path_id(self, path): :param path: path to put to the data base :returns: the id of that path """ - get_row_query = f"SELECT id FROM path WHERE path = '{path}' LIMIT 1;" + get_row_query = f"SELECT id FROM _paths WHERE path = '{path}' LIMIT 1;" res = self.execute(get_row_query).fetchone() if res: return res["id"] # create - self.put("path", path=path) - path_id = self.get("path", path=path, cols="id")["id"] + self.put("_paths", path=path) + path_id = self.get("_paths", path=path, cols="id")["id"] return path_id @staticmethod @@ -234,7 +254,7 @@ def put_data_view(self): if self.traits: join_query = " ".join( [ - f"LEFT JOIN [{x}] ON [{x}].path = path.id \n" + f"LEFT JOIN [{x}] ON [{x}].path = _paths.id \n" for x in self.traits if x != "path" ] @@ -242,15 +262,15 @@ def put_data_view(self): create_view_query = f""" CREATE VIEW data AS - SELECT path.path, [{'], ['.join(self.traits)}] - FROM path + SELECT _paths.path, [{'], ['.join(self.traits)}] + FROM _paths {join_query}; """ else: create_view_query = """ CREATE VIEW data AS - SELECT path.path - FROM path; + SELECT _paths.path + FROM _paths; """ self.execute(create_view_query) @@ -259,14 +279,15 @@ def update_traits(self): Get all traits from the database """ get_traits_query = """ - SELECT name - FROM sqlite_master - WHERE type='table' - AND name NOT LIKE 'sqlite_%' - AND name != 'path' - ORDER BY name; + SELECT trait + FROM _traits + ORDER BY trait; """ - traits = self.execute(get_traits_query).fetchall() + traits = self.execute(get_traits_query) + if traits is not None: + traits = traits.fetchall() + else: + traits = [] self.traits = [list(x.values())[0] for x in traits] self.put_data_view() @@ -295,6 +316,7 @@ def create_trait_table(self, trait_name, value_type): ); """ self.execute(add_table_query) + self.put("_traits", trait=trait_name) self.update_traits() def put_trait(self, path_id, trait_name, value, update=True): @@ -307,6 +329,8 @@ def put_trait(self, path_id, trait_name, value, update=True): :param value: trait value """ kwargs = {"path": path_id, trait_name: value} + trait_id = self.get("_traits", trait=trait_name)["id"] + self.put("_traits_paths", trait=trait_id, path=path_id) self.put(trait_name, condition=f"path = {path_id}", update=update, **kwargs) def add_pathpair(self, pair: PathPair): From e26f22212804dea13997f80d5ce12afd17101d80 Mon Sep 17 00:00:00 2001 From: danlooo Date: Fri, 9 Jan 2026 10:04:18 +0100 Subject: [PATCH 2/6] Refactor colum names --- pathtraits/db.py | 74 ++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/pathtraits/db.py b/pathtraits/db.py index 35982a5..4014c6c 100644 --- a/pathtraits/db.py +++ b/pathtraits/db.py @@ -90,8 +90,8 @@ def __init__(self, db_path): self.cursor.row_factory = TraitsDB.row_factory init_path_table_query = """ - CREATE TABLE IF NOT EXISTS _paths ( - id INTEGER PRIMARY KEY AUTOINCREMENT, + CREATE TABLE IF NOT EXISTS _path ( + path_id INTEGER PRIMARY KEY AUTOINCREMENT, path text NOT NULL UNIQUE ); """ @@ -99,30 +99,30 @@ def __init__(self, db_path): init_path_index_query = """ CREATE INDEX IF NOT EXISTS idx_path_path - ON _paths(path); + ON _path(path); """ self.execute(init_path_index_query) - init_traits_table_query = """ - CREATE TABLE IF NOT EXISTS _traits ( - id INTEGER PRIMARY KEY AUTOINCREMENT, + init_trait_table_query = """ + CREATE TABLE IF NOT EXISTS _trait ( + trait_id INTEGER PRIMARY KEY AUTOINCREMENT, trait text NOT NULL UNIQUE ); """ - self.execute(init_traits_table_query) + self.execute(init_trait_table_query) - init_traits_paths_table_query = """ - CREATE TABLE IF NOT EXISTS _traits_paths ( - trait INTEGER, - path INTEGER, - FOREIGN KEY(trait) REFERENCES _traits(id), - FOREIGN KEY(path) REFERENCES path(id), - UNIQUE(trait, path) + init_trait_path_table_query = """ + CREATE TABLE IF NOT EXISTS _trait_path ( + trait_id INTEGER, + path_id INTEGER, + FOREIGN KEY(trait_id) REFERENCES _trait(trait_id), + FOREIGN KEY(path_id) REFERENCES path(path_id), + UNIQUE(trait_id, path_id) ); """ - self.execute(init_traits_paths_table_query) + self.execute(init_trait_path_table_query) - self.update_traits() + self.update_trait() # pylint: disable=R1710 def execute(self, query, ignore_error=True): @@ -179,13 +179,13 @@ def put_path_id(self, path): :param path: path to put to the data base :returns: the id of that path """ - get_row_query = f"SELECT id FROM _paths WHERE path = '{path}' LIMIT 1;" + get_row_query = f"SELECT path_id FROM _path WHERE path = '{path}' LIMIT 1;" res = self.execute(get_row_query).fetchone() if res: - return res["id"] + return res["path_id"] # create - self.put("_paths", path=path) - path_id = self.get("_paths", path=path, cols="id")["id"] + self.put("_path", path=path) + path_id = self.get("_path", path=path, cols="path_id")["path_id"] return path_id @staticmethod @@ -254,7 +254,7 @@ def put_data_view(self): if self.traits: join_query = " ".join( [ - f"LEFT JOIN [{x}] ON [{x}].path = _paths.id \n" + f"LEFT JOIN [{x}] ON [{x}].path = _path.id \n" for x in self.traits if x != "path" ] @@ -262,28 +262,28 @@ def put_data_view(self): create_view_query = f""" CREATE VIEW data AS - SELECT _paths.path, [{'], ['.join(self.traits)}] - FROM _paths + SELECT _path.path, [{'], ['.join(self.traits)}] + FROM _path {join_query}; """ else: create_view_query = """ CREATE VIEW data AS - SELECT _paths.path - FROM _paths; + SELECT _path.path + FROM _path; """ self.execute(create_view_query) - def update_traits(self): + def update_trait(self): """ Get all traits from the database """ - get_traits_query = """ + get_trait_query = """ SELECT trait - FROM _traits + FROM _trait ORDER BY trait; """ - traits = self.execute(get_traits_query) + traits = self.execute(get_trait_query) if traits is not None: traits = traits.fetchall() else: @@ -310,14 +310,14 @@ def create_trait_table(self, trait_name, value_type): sql_type = TraitsDB.sql_type(value_type) add_table_query = f""" CREATE TABLE [{trait_name}] ( - path INTEGER, + path_id INTEGER, [{trait_name}] {sql_type}, - FOREIGN KEY(path) REFERENCES path(id) + FOREIGN KEY(path_id) REFERENCES path(path_id) ); """ self.execute(add_table_query) - self.put("_traits", trait=trait_name) - self.update_traits() + self.put("_trait", trait=trait_name) + self.update_trait() def put_trait(self, path_id, trait_name, value, update=True): """ @@ -328,10 +328,10 @@ def put_trait(self, path_id, trait_name, value, update=True): :param key: trait name :param value: trait value """ - kwargs = {"path": path_id, trait_name: value} - trait_id = self.get("_traits", trait=trait_name)["id"] - self.put("_traits_paths", trait=trait_id, path=path_id) - self.put(trait_name, condition=f"path = {path_id}", update=update, **kwargs) + kwargs = {"path_id": path_id, trait_name: value} + trait_id = self.get("_trait", trait=trait_name)["trait_id"] + self.put("_trait_path", trait_id=trait_id, path_id=path_id) + self.put(trait_name, condition=f"path_id = {path_id}", update=update, **kwargs) def add_pathpair(self, pair: PathPair): """ From 904fc1f9600116d38e6fa88928cdd58219a2268f Mon Sep 17 00:00:00 2001 From: danlooo Date: Fri, 9 Jan 2026 11:11:18 +0100 Subject: [PATCH 3/6] Improve readability of trait names --- pathtraits/access.py | 4 +-- pathtraits/db.py | 62 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/pathtraits/access.py b/pathtraits/access.py index 13acdf8..d040768 100644 --- a/pathtraits/access.py +++ b/pathtraits/access.py @@ -50,12 +50,12 @@ def get_dict(db, path): # get traits from path and its parents dirs_data = [] - data = db.get("data", path=abs_path) + data = db.get_pathtraits(abs_path) if data: dirs_data.append(data) for i in reversed(range(0, len(dirs))): cur_path = "/".join(dirs[0 : i + 1]) - data = db.get("data", path=cur_path) + data = db.get_pathtraits(cur_path) if data: dirs_data.append(data) diff --git a/pathtraits/db.py b/pathtraits/db.py index 4014c6c..3b54d06 100644 --- a/pathtraits/db.py +++ b/pathtraits/db.py @@ -35,12 +35,12 @@ def row_factory(cursor, row): if v is None: continue # sqlite don't know bool - if k.endswith("/BOOL"): + if k.endswith("_BOOL"): v = v > 0 if isinstance(v, float): v_int = int(v) v = v_int if v_int == v else v - k = k.removesuffix("/TEXT").removesuffix("/REAL").removesuffix("/BOOL") + k = k.removesuffix("_TEXT").removesuffix("_REAL").removesuffix("_BOOL") res[k] = v return res @@ -171,6 +171,60 @@ def get(self, table, cols="*", condition=None, **kwargs): return res + def get_path_id(self, path: str): + """ + Docstring for get_path_id + + :param self: Description + :param path: Description + :type path: str + """ + res = self.get("_path", path=path, cols="path_id") + if res == []: + return None + + return res["path_id"] + + def get_traits(self, path_id: int): + """ + Get traits of a given path + """ + query = f""" + SELECT DISTINCT trait + FROM _trait + INNER JOIN _trait_path + WHERE path_id = '{path_id}' + """ + response = self.execute(query) + + if response is None: + return None + + res = response.fetchall() + if len(res) == 1: + return res[0] + return [x["trait"] for x in res] + + def get_pathtraits(self, path: str): + """ + Docstring for get_pathtraits + + :param self: Description + :param path: Description + :type path: str + """ + path_id = self.get_path_id(path) + traits = self.get_traits(path_id) + res = {} + for trait in traits: + pathtraits = self.get(trait, path_id=path_id) + if isinstance(pathtraits, dict): + pathtraits.pop("path_id") + for k, v in pathtraits.items(): + res[k] = v + print("###1 ", path, res) + return res + def put_path_id(self, path): """ Docstring for put_path_id @@ -185,7 +239,7 @@ def put_path_id(self, path): return res["path_id"] # create self.put("_path", path=path) - path_id = self.get("_path", path=path, cols="path_id")["path_id"] + path_id = self.get_path_id(path) return path_id @staticmethod @@ -363,7 +417,7 @@ def add_pathpair(self, pair: PathPair): # get element type for list # add: handle lists with mixed element type t = type(v[0]) if isinstance(v, list) and len(v) > 0 else type(v) - k = f"{k}/{TraitsDB.sql_type(t)}" + k = f"{k}_{TraitsDB.sql_type(t)}" if k not in self.traits: self.create_trait_table(k, t) if k in self.traits: From 39c17c3f7381cc3d70b4b5f94d47b0e3a45a4a78 Mon Sep 17 00:00:00 2001 From: danlooo Date: Fri, 9 Jan 2026 11:17:59 +0100 Subject: [PATCH 4/6] Remove data view --- pathtraits/db.py | 31 ------------------------------- test/test.py | 15 --------------- 2 files changed, 46 deletions(-) diff --git a/pathtraits/db.py b/pathtraits/db.py index 3b54d06..af8ea0f 100644 --- a/pathtraits/db.py +++ b/pathtraits/db.py @@ -222,7 +222,6 @@ def get_pathtraits(self, path: str): pathtraits.pop("path_id") for k, v in pathtraits.items(): res[k] = v - print("###1 ", path, res) return res def put_path_id(self, path): @@ -299,35 +298,6 @@ def put(self, table, condition=None, update=True, **kwargs): insert_query = f"INSERT INTO [{table}] ({keys}) VALUES ({values});" self.execute(insert_query) - def put_data_view(self): - """ - Creates a SQL View with all denormalized traits - """ - self.execute("DROP VIEW IF EXISTS DATA;") - - if self.traits: - join_query = " ".join( - [ - f"LEFT JOIN [{x}] ON [{x}].path = _path.id \n" - for x in self.traits - if x != "path" - ] - ) - - create_view_query = f""" - CREATE VIEW data AS - SELECT _path.path, [{'], ['.join(self.traits)}] - FROM _path - {join_query}; - """ - else: - create_view_query = """ - CREATE VIEW data AS - SELECT _path.path - FROM _path; - """ - self.execute(create_view_query) - def update_trait(self): """ Get all traits from the database @@ -343,7 +313,6 @@ def update_trait(self): else: traits = [] self.traits = [list(x.values())[0] for x in traits] - self.put_data_view() def create_trait_table(self, trait_name, value_type): """ diff --git a/test/test.py b/test/test.py index 197c7aa..3c0db6f 100644 --- a/test/test.py +++ b/test/test.py @@ -70,21 +70,6 @@ def test_missing_north_america(self): for k, v in target.items(): self.assertEqual(source[k], v) - def test_data_view(self): - source = len(self.db.execute("SELECT * FROM data;").fetchall()) - target = 8 - self.assertEqual(source, target) - - def test_data_query(self): - source = len(pathtraits.access.get_paths(self.db, "[score/REAL] >= 5")) - target = 1 - self.assertEqual(source, target) - - traits = pathtraits.access.get_paths_values(self.db, "TRUE") - self.assertEqual(len(traits), 3) - for v in traits.values(): - self.assertTrue("path" not in v.keys()) - if __name__ == "__main__": unittest.main() From 5a0933bc08ae273a7fbf4ecc3d12049855244802 Mon Sep 17 00:00:00 2001 From: danlooo Date: Fri, 9 Jan 2026 14:25:02 +0100 Subject: [PATCH 5/6] Update query --- pathtraits/access.py | 63 +++++--------------------------------------- pathtraits/cli.py | 9 +++---- pathtraits/db.py | 34 +++++++++++++++++++++++- test/test.py | 9 +++++++ 4 files changed, 51 insertions(+), 64 deletions(-) diff --git a/pathtraits/access.py b/pathtraits/access.py index d040768..8d9a916 100644 --- a/pathtraits/access.py +++ b/pathtraits/access.py @@ -70,40 +70,6 @@ def get_dict(db, path): return res -def get_paths(db, query_str): - """ - Docstring for get_paths - - :param db: Description - :param query_str: Description - """ - query_str = f"SELECT DISTINCT path FROM data where {query_str};" - res = db.execute(query_str, ignore_error=False).fetchall() - res = [x["path"] for x in res] - return res - - -def get_paths_values(db, query_str): - """ - Docstring for get_paths_values - - :param db: Description - :param query_str: Description - """ - query_str = f"SELECT * FROM data where {query_str};" - response = db.execute(query_str, ignore_error=False).fetchall() - res = {} - for r in response: - path = r["path"] - # ensure distinct paths - # pylint: disable=C0201 - if path not in res.keys(): - r = nest_dict(r) - r.pop("path") - res[path] = r - return res - - def get(path, db_path, verbose): """ Docstring for get @@ -124,7 +90,7 @@ def get(path, db_path, verbose): sys.exit(1) -def query(query_str, db_path, show_values): +def query(query_str, db_path): """ Docstring for query @@ -132,25 +98,8 @@ def query(query_str, db_path, show_values): :param db_path: Description """ db = TraitsDB(db_path) - if show_values: - res = get_paths_values(db, query_str) - if len(res) > 0: - print(yaml.safe_dump(res)) - else: - logger.error( - "No paths found for traits matching %s in database %s", - query_str, - db_path, - ) - sys.exit(1) - else: - res = get_paths(db, query_str) - if len(res) > 0: - for r in res: - print(r) - else: - logger.error( - "No paths found for traits matching %s in database %s", - query_str, - db_path, - ) + paths = db.get_paths(query_str) + if paths == []: + sys.exit(f"No paths matching query '{query_str}'") + for path in paths: + print(path) diff --git a/pathtraits/cli.py b/pathtraits/cli.py index 04cb373..a9c7131 100644 --- a/pathtraits/cli.py +++ b/pathtraits/cli.py @@ -75,17 +75,14 @@ def get(path, db_path, verbose): default=DB_PATH, type=click.Path(file_okay=True, dir_okay=False), ) -@click.option( - "--show-values", flag_value=True, default=False, help="Also show their trait values" -) -def query(query_str, db_path, show_values): +def query(query_str, db_path): """ Get paths of given traits Enter QUERY_STR in SQLite3 where statement format, - e.g. "[score/REAL]>1" to get all paths having a score >1. + e.g. "score_REAL>1" to get all paths having a numerical score >1. """ - access.query(query_str, db_path, show_values) + access.query(query_str, db_path) if __name__ == "__main__": diff --git a/pathtraits/db.py b/pathtraits/db.py index af8ea0f..7ec5c62 100644 --- a/pathtraits/db.py +++ b/pathtraits/db.py @@ -21,6 +21,17 @@ class TraitsDB: cursor = None traits = [] + @staticmethod + def remove_type_suffixes(s: str): + """ + Docstring for remove_type_suffixes + + :param s: Description + :type s: str + """ + s = s.removesuffix("_TEXT").removesuffix("_REAL").removesuffix("_BOOL") + return s + @staticmethod def row_factory(cursor, row): """ @@ -40,7 +51,7 @@ def row_factory(cursor, row): if isinstance(v, float): v_int = int(v) v = v_int if v_int == v else v - k = k.removesuffix("_TEXT").removesuffix("_REAL").removesuffix("_BOOL") + k = TraitsDB.remove_type_suffixes(k) res[k] = v return res @@ -224,6 +235,27 @@ def get_pathtraits(self, path: str): res[k] = v return res + def get_paths(self, query_str): + """ + Get paths matching pathtraits + + :param self: Description + :param kwargs: pathtraits to match + """ + traits = filter(lambda x: x in query_str, self.traits) + query = "SELECT DISTINCT path FROM _path" + for trait in traits: + query += f" NATURAL JOIN {trait}" + query += f" WHERE {query_str};" + + response = self.execute(query) + if response is None: + return None + else: + res = response.fetchall() + res = [x["path"] for x in res] + return res + def put_path_id(self, path): """ Docstring for put_path_id diff --git a/test/test.py b/test/test.py index 3c0db6f..d1d2490 100644 --- a/test/test.py +++ b/test/test.py @@ -70,6 +70,15 @@ def test_missing_north_america(self): for k, v in target.items(): self.assertEqual(source[k], v) + def test_query(self): + q1 = self.db.get_paths("score_REAL > 3") + self.assertEqual(len(q1), 2) + + q2 = self.db.get_paths( + "score_TEXT = 'zero' AND description_TEXT LIKE '%Germany%'" + ) + self.assertEqual(len(q2), 1) + if __name__ == "__main__": unittest.main() From 125e3b7b48223aa306742e3af4bdb634148c166b Mon Sep 17 00:00:00 2001 From: danlooo Date: Fri, 9 Jan 2026 14:33:16 +0100 Subject: [PATCH 6/6] Fix lint --- pathtraits/db.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pathtraits/db.py b/pathtraits/db.py index 7ec5c62..ee9f042 100644 --- a/pathtraits/db.py +++ b/pathtraits/db.py @@ -245,16 +245,17 @@ def get_paths(self, query_str): traits = filter(lambda x: x in query_str, self.traits) query = "SELECT DISTINCT path FROM _path" for trait in traits: + # pylint: disable=R1713 query += f" NATURAL JOIN {trait}" query += f" WHERE {query_str};" response = self.execute(query) if response is None: return None - else: - res = response.fetchall() - res = [x["path"] for x in res] - return res + + res = response.fetchall() + res = [x["path"] for x in res] + return res def put_path_id(self, path): """