-
Notifications
You must be signed in to change notification settings - Fork 33
Add feature to make studies from embedded images #538
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,7 +69,8 @@ | |
| import sys | ||
| import os | ||
| import shutil | ||
| import stat | ||
| import stat | ||
| import tempfile | ||
| import copy_with_port_portname | ||
| import numpy as np | ||
| import shlex # Added for POSIX shell escaping | ||
|
|
@@ -144,6 +145,35 @@ def _resolve_concore_path(): | |
|
|
||
| ORIGINAL_CWD = os.getcwd() | ||
| GRAPHML_FILE = os.path.abspath(sys.argv[1]) | ||
|
|
||
| # --- Image input support: extract embedded GraphML from PNG/JPG --- | ||
| _IMAGE_EXTS = {".png", ".jpg", ".jpeg"} | ||
| _tmp_graphml_file = None # keep reference so the temp file isn't deleted early | ||
| if os.path.splitext(GRAPHML_FILE)[1].lower() in _IMAGE_EXTS: | ||
| try: | ||
| import importlib.util | ||
| _tool_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tools", "extract_graphml.py") | ||
| _spec = importlib.util.spec_from_file_location("extract_graphml", _tool_path) | ||
| _extract_graphml_module = importlib.util.module_from_spec(_spec) | ||
| _spec.loader.exec_module(_extract_graphml_module) | ||
| _extract_graphml = _extract_graphml_module.extract_graphml | ||
| _graphml_str = _extract_graphml(GRAPHML_FILE) | ||
| if _graphml_str is None: | ||
| print(f"No embedded GraphML found in '{GRAPHML_FILE}'.") | ||
| sys.exit(1) | ||
| _tmp_graphml_file = tempfile.NamedTemporaryFile( | ||
| mode="w", suffix=".graphml", delete=False, encoding="utf-8" | ||
| ) | ||
| _tmp_graphml_file.write(_graphml_str) | ||
| _tmp_graphml_file.close() | ||
| GRAPHML_FILE = _tmp_graphml_file.name | ||
| import atexit as _atexit | ||
| _atexit.register(os.unlink, GRAPHML_FILE) | ||
|
Comment on lines
+164
to
+171
|
||
| except Exception as _e: | ||
| print(f"Failed to extract GraphML from image: {_e}") | ||
| sys.exit(1) | ||
| # ------------------------------------------------------------------ | ||
|
|
||
| TRIMMED_LOGS = True | ||
| CONCOREPATH = _resolve_concore_path() | ||
| CPPWIN = os.environ.get("CONCORE_CPPWIN", "g++") #Windows C++ 6/22/21 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,189 @@ | ||
| from typing import Optional | ||
| import struct | ||
| import zlib | ||
| import sys | ||
| import os | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # PNG extraction | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| _PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" | ||
|
|
||
|
|
||
| def _extract_from_png(data: bytes) -> Optional[str]: | ||
| #Return the GraphML string embedded in a PNG file, or None | ||
| if data[:8] != _PNG_SIGNATURE: | ||
| return None | ||
|
|
||
| pos = 8 | ||
| while pos + 12 <= len(data): | ||
| length = struct.unpack(">I", data[pos : pos + 4])[0] | ||
| chunk_type = data[pos + 4 : pos + 8] | ||
| chunk_data = data[pos + 8 : pos + 8 + length] | ||
|
|
||
| if chunk_type == b"tEXt": | ||
| try: | ||
| null_pos = chunk_data.index(b"\x00") | ||
| except ValueError: | ||
| pos += 12 + length | ||
| continue | ||
| keyword = chunk_data[:null_pos].decode("latin-1") | ||
| if keyword.lower() == "graphml": | ||
| text = chunk_data[null_pos + 1 :].decode("latin-1") | ||
| return text | ||
|
|
||
| elif chunk_type == b"zTXt": | ||
| try: | ||
| null_pos = chunk_data.index(b"\x00") | ||
| except ValueError: | ||
| pos += 12 + length | ||
| continue | ||
| keyword = chunk_data[:null_pos].decode("latin-1") | ||
| if keyword.lower() == "graphml": | ||
| # compression method byte follows null, then deflate data | ||
| compressed = chunk_data[null_pos + 2 :] | ||
| try: | ||
| text = zlib.decompress(compressed).decode("utf-8") | ||
| return text | ||
| except Exception: | ||
| pass | ||
|
|
||
| elif chunk_type == b"iTXt": | ||
| # Keyword, null, compression flag, compression method, lang, | ||
| # translated keyword, null, text (may be compressed) | ||
| try: | ||
| null_pos = chunk_data.index(b"\x00") | ||
| keyword = chunk_data[:null_pos].decode("latin-1") | ||
| if keyword.lower() == "graphml": | ||
| rest = chunk_data[null_pos + 1 :] | ||
| comp_flag = rest[0] | ||
| comp_method = rest[1] | ||
| rest = rest[2:] | ||
| # skip language tag | ||
| second_null = rest.index(b"\x00") | ||
| rest = rest[second_null + 1 :] | ||
| # skip translated keyword | ||
| third_null = rest.index(b"\x00") | ||
| text_bytes = rest[third_null + 1 :] | ||
| if comp_flag == 1: | ||
| text_bytes = zlib.decompress(text_bytes) | ||
| return text_bytes.decode("utf-8") | ||
| except Exception: | ||
| pass | ||
|
|
||
| elif chunk_type == b"IEND": | ||
| break | ||
|
|
||
| pos += 12 + length | ||
|
|
||
| return None | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # JPEG extraction | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| _JPEG_SOI = b"\xff\xd8" | ||
|
|
||
|
|
||
| def _extract_from_jpeg(data: bytes) -> Optional[str]: | ||
| #Return the GraphML string embedded in a JPEG file, or None. | ||
| if data[:2] != _JPEG_SOI: | ||
| return None | ||
|
|
||
| # Strategy 1: look for raw <?xml ... </graphml> span anywhere in the file. | ||
| xml_start = data.find(b"<?xml") | ||
| if xml_start == -1: | ||
| # Also try without XML declaration | ||
| xml_start = data.find(b"<graphml") | ||
| if xml_start == -1: | ||
| return None | ||
|
|
||
| graphml_end = data.find(b"</graphml>", xml_start) | ||
| if graphml_end == -1: | ||
| return None | ||
|
|
||
| xml_bytes = data[xml_start : graphml_end + len(b"</graphml>")] | ||
| try: | ||
| return xml_bytes.decode("utf-8") | ||
| except UnicodeDecodeError: | ||
| try: | ||
| return xml_bytes.decode("latin-1") | ||
| except Exception: | ||
| return None | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Public API | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| def extract_graphml(image_path: str) -> Optional[str]: | ||
| try: | ||
| with open(image_path, "rb") as fh: | ||
| data = fh.read() | ||
| except OSError as exc: | ||
| print(f"[extract_graphml] Cannot open '{image_path}': {exc}", file=sys.stderr) | ||
| return None | ||
|
|
||
| # Detect by magic bytes | ||
| if data[:8] == _PNG_SIGNATURE: | ||
| return _extract_from_png(data) | ||
|
|
||
| if data[:2] == _JPEG_SOI: | ||
| return _extract_from_jpeg(data) | ||
|
|
||
| print( | ||
| f"[extract_graphml] '{image_path}' is neither PNG nor JPEG.", | ||
| file=sys.stderr, | ||
| ) | ||
| return None | ||
|
|
||
|
|
||
| def extract_graphml_to_file(image_path: str, output_path: Optional[str] = None) -> Optional[str]: | ||
| graphml = extract_graphml(image_path) | ||
| if graphml is None: | ||
| print( | ||
| f"[extract_graphml] No embedded GraphML found in '{image_path}'.", | ||
| file=sys.stderr, | ||
| ) | ||
| return None | ||
|
|
||
| if output_path is None: | ||
| base, _ = os.path.splitext(image_path) | ||
| # Handle double-extension names like "foo.graphml.png" -> "foo.graphml" | ||
| if base.endswith(".graphml"): | ||
| output_path = base | ||
| else: | ||
| output_path = base + ".graphml" | ||
|
|
||
| try: | ||
| with open(output_path, "w", encoding="utf-8") as fh: | ||
| fh.write(graphml) | ||
| print(f"[extract_graphml] Extracted GraphML written to '{output_path}'.") | ||
| return output_path | ||
| except OSError as exc: | ||
| print( | ||
| f"[extract_graphml] Cannot write to '{output_path}': {exc}", | ||
| file=sys.stderr, | ||
| ) | ||
| return None | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # CLI entry point | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| if __name__ == "__main__": | ||
| if len(sys.argv) < 2: | ||
| print("Usage: python extract_graphml.py <image.png|jpg|jpeg> [output.graphml]") | ||
| sys.exit(1) | ||
|
|
||
| in_path = sys.argv[1] | ||
| out_path = sys.argv[2] if len(sys.argv) >= 3 else None | ||
|
|
||
| result = extract_graphml_to_file(in_path, out_path) | ||
| if result is None: | ||
| sys.exit(1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that image inputs are supported, the script’s
usage: py mkconcore.py file.graphml ...message (earlier in the file) is misleading. Consider updating the usage/help text to mention.png/.jpg/.jpegso users don’t assume only.graphmlworks.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will document this properly later in guide.md.