diff --git a/README.md b/README.md
index 27c1457..97213c9 100644
--- a/README.md
+++ b/README.md
@@ -362,6 +362,33 @@ Save this to `dmesg_custom_config.json` and run:
node-scraper --plugin-configs=dmesg_custom_config.json run-plugins DmesgPlugin
```
+### Regex helper and `RegexSearchPlugin`
+
+A small utility of common regex patterns is available at `nodescraper.regex_patterns` to
+help build analyzer-friendly `error_regex` dicts. This is useful when composing configs for
+`RegexSearchPlugin` or other analyzers that accept `error_regex` lists.
+
+Python example (programmatic usage):
+
+```py
+from nodescraper import regex_patterns
+from nodescraper.plugins.regex_search.regex_search_analyzer import RegexSearchAnalyzer
+from nodescraper.plugins.regex_search.regex_search_data import RegexSearchData
+
+# build error_regex list from named common patterns
+rules = regex_patterns.build_error_regex_dicts(["ipv4", "email"], message_template="Found {name}")
+
+# prepare data and args
+data = RegexSearchData(content="2026-05-01T12:00:00,000+00:00 connect from 192.0.2.1")
+args = {"error_regex": rules}
+
+analyzer = RegexSearchAnalyzer(system_info=None)
+result = analyzer.analyze_data(data, args)
+print(result.events)
+```
+
+CLI note: `RegexSearchPlugin` accepts `--data` pointing to a file or directory and `--error-regex` entries for patterns when invoked from the CLI or a plugin config JSON.
+
#### **'compare-runs' subcommand**
The `compare-runs` subcommand compares datamodels from two run log directories (e.g. two
`nodescraper_log_*` folders). By default, all plugins with data in both runs are compared.
diff --git a/docs/PLUGIN_DOC.md b/docs/PLUGIN_DOC.md
index 5e84641..4339096 100644
--- a/docs/PLUGIN_DOC.md
+++ b/docs/PLUGIN_DOC.md
@@ -4,6 +4,7 @@
| Plugin | Collection | Analyzer Args | Collection Args | DataModel | Collector | Analyzer |
| --- | --- | --- | --- | --- | --- | --- |
+| RegexHelper | - | **Utility:** common regex patterns and `build_error_regex_dicts()` to produce analyzer-friendly `error_regex` dicts; useful when composing `RegexSearchPlugin` configs programmatically. | - | - | - | - |
| AmdSmiPlugin | bad-pages
firmware --json
list --json
metric -g all
partition --json
process --json
ras --cper --folder={folder}
ras --afid --cper-file {cper_file}
static -g all --json
static -g {gpu_id} --json
topology
version --json
xgmi -l
xgmi -m | **Analyzer Args:**
- `check_static_data`: bool — If True, run static data checks (e.g. driver version, partition mode).
- `expected_gpu_processes`: Optional[int] — Expected number of GPU processes.
- `expected_max_power`: Optional[int] — Expected maximum power value (e.g. watts).
- `expected_driver_version`: Optional[str] — Expected AMD driver version string.
- `expected_memory_partition_mode`: Optional[str] — Expected memory partition mode (e.g. sp3, dp).
- `expected_compute_partition_mode`: Optional[str] — Expected compute partition mode.
- `expected_firmware_versions`: Optional[dict[str, str]] — Expected firmware versions keyed by amd-smi fw_id (e.g. PLDM_BUNDLE).
- `l0_to_recovery_count_error_threshold`: Optional[int] — L0-to-recovery count above which an error is raised.
- `l0_to_recovery_count_warning_threshold`: Optional[int] — L0-to-recovery count above which a warning is raised.
- `vendorid_ep`: Optional[str] — Expected endpoint vendor ID (e.g. for PCIe).
- `vendorid_ep_vf`: Optional[str] — Expected endpoint VF vendor ID.
- `devid_ep`: Optional[str] — Expected endpoint device ID.
- `devid_ep_vf`: Optional[str] — Expected endpoint VF device ID.
- `sku_name`: Optional[str] — Expected SKU name string for GPU.
- `expected_xgmi_speed`: Optional[list[float]] — Expected xGMI speed value(s) (e.g. link rate).
- `analysis_range_start`: Optional[datetime.datetime] — Start of time range for time-windowed analysis.
- `analysis_range_end`: Optional[datetime.datetime] — End of time range for time-windowed analysis. | **Collection Args:**
- `analysis_firmware_ids`: Optional[list[str]] — amd-smi fw_id values to record in analysis_ref.firmware_versions
- `cper_file_path`: Optional[str] — Path to CPER folder or file for RAS AFID collection (ras --afid --cper-file). | [AmdSmiDataModel](#AmdSmiDataModel-Model) | [AmdSmiCollector](#Collector-Class-AmdSmiCollector) | [AmdSmiAnalyzer](#Data-Analyzer-Class-AmdSmiAnalyzer) |
| BiosPlugin | sh -c 'cat /sys/devices/virtual/dmi/id/bios_version'
wmic bios get SMBIOSBIOSVersion /Value | **Analyzer Args:**
- `exp_bios_version`: list[str] — Expected BIOS version(s) to match against collected value (str or list).
- `regex_match`: bool — If True, match exp_bios_version as regex; otherwise exact match. | - | [BiosDataModel](#BiosDataModel-Model) | [BiosCollector](#Collector-Class-BiosCollector) | [BiosAnalyzer](#Data-Analyzer-Class-BiosAnalyzer) |
| CmdlinePlugin | cat /proc/cmdline | **Analyzer Args:**
- `required_cmdline`: Union[str, List] — Command-line parameters that must be present (e.g. 'pci=bfsort').
- `banned_cmdline`: Union[str, List] — Command-line parameters that must not be present.
- `os_overrides`: Dict[str, nodescraper.plugins.inband.cmdline.cmdlineconfig.OverrideConfig] — Per-OS overrides for required_cmdline and banned_cmdline (keyed by OS identifier).
- `platform_overrides`: Dict[str, nodescraper.plugins.inband.cmdline.cmdlineconfig.OverrideConfig] — Per-platform overrides for required_cmdline and banned_cmdline (keyed by platform). | - | [CmdlineDataModel](#CmdlineDataModel-Model) | [CmdlineCollector](#Collector-Class-CmdlineCollector) | [CmdlineAnalyzer](#Data-Analyzer-Class-CmdlineAnalyzer) |
diff --git a/nodescraper/regex_patterns.py b/nodescraper/regex_patterns.py
new file mode 100644
index 0000000..2920e27
--- /dev/null
+++ b/nodescraper/regex_patterns.py
@@ -0,0 +1,52 @@
+"""Common regex patterns and helpers for building analyzer error rules.
+
+Keep these lightweight and dependency-free so other modules can import them
+without circular imports.
+"""
+from typing import Iterable, List
+
+COMMON_PATTERNS: dict[str, str] = {
+ "ipv4": r"\b(?:25[0-5]|2[0-4]\d|1?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|1?\d?\d)){3}\b",
+ "mac": r"\b(?:[0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\b",
+ "uuid": r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b",
+ "iso8601_ts": r"\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?\b",
+ "email": r"\b[\w.+-]+@[\w-]+(?:\.[\w-]+)+\b",
+}
+
+
+def get_pattern(name: str) -> str:
+ """Return the raw regex string for a named common pattern.
+
+ Raises KeyError if the name is unknown.
+ """
+ return COMMON_PATTERNS[name]
+
+
+def build_error_regex_dicts(
+ names: Iterable[str],
+ message_template: str = "{name} matched",
+ event_category: str = "UNKNOWN",
+ event_priority: str = "ERROR",
+) -> List[dict]:
+ """Create list of dicts compatible with RegexAnalyzer._convert_and_extend_error_regex.
+
+ Each dict contains keys: 'regex' (string), 'message', 'event_category', 'event_priority'.
+ The analyzer will compile the regex strings into patterns.
+ """
+ out: List[dict] = []
+ for name in names:
+ pat = COMMON_PATTERNS.get(name)
+ if not pat:
+ raise KeyError(f"Unknown pattern name: {name}")
+ out.append(
+ {
+ "regex": pat,
+ "message": message_template.format(name=name),
+ "event_category": event_category,
+ "event_priority": event_priority,
+ }
+ )
+ return out
+
+
+__all__ = ["COMMON_PATTERNS", "get_pattern", "build_error_regex_dicts"]
diff --git a/test/unit/test_regex_patterns.py b/test/unit/test_regex_patterns.py
new file mode 100644
index 0000000..98d0a11
--- /dev/null
+++ b/test/unit/test_regex_patterns.py
@@ -0,0 +1,22 @@
+import re
+
+from nodescraper import regex_patterns
+
+
+def test_ipv4_pattern_matches():
+ pat = regex_patterns.get_pattern("ipv4")
+ compiled = re.compile(pat)
+ assert compiled.search("address 192.0.2.1")
+
+
+def test_mac_and_uuid_patterns_match():
+ mac = regex_patterns.get_pattern("mac")
+ uuid = regex_patterns.get_pattern("uuid")
+ assert re.search(mac, "found MAC 00:1A:2B:3C:4D:5E")
+ assert re.search(uuid, "id: 123e4567-e89b-12d3-a456-426655440000")
+
+
+def test_build_error_regex_dicts_works():
+ rules = regex_patterns.build_error_regex_dicts(["ipv4", "email"], message_template="got {name}")
+ assert isinstance(rules, list) and len(rules) == 2
+ assert all("regex" in r and "message" in r for r in rules)
diff --git a/test/unit/test_regex_search_analyzer_extra.py b/test/unit/test_regex_search_analyzer_extra.py
new file mode 100644
index 0000000..69eeadf
--- /dev/null
+++ b/test/unit/test_regex_search_analyzer_extra.py
@@ -0,0 +1,29 @@
+from nodescraper.models.systeminfo import SystemInfo
+from nodescraper.plugins.regex_search.regex_search_analyzer import RegexSearchAnalyzer
+from nodescraper.plugins.regex_search.regex_search_data import RegexSearchData
+from nodescraper.plugins.regex_search.analyzer_args import RegexSearchAnalyzerArgs
+
+from nodescraper import regex_patterns
+
+
+def test_regex_search_analyzer_detects_ipv4():
+ system_info = SystemInfo()
+ analyzer = RegexSearchAnalyzer(system_info=system_info)
+
+ # Content includes an ISO-like timestamp and an IPv4 address
+ content = "2026-05-01T12:00:00,000+00:00 Something happened at 192.0.2.123\n"
+ data = RegexSearchData(content=content, data_root="regex_search")
+
+ args = {
+ "error_regex": regex_patterns.build_error_regex_dicts(["ipv4"], message_template="Found {name}"),
+ "num_timestamps": 2,
+ "interval_to_collapse_event": 60,
+ }
+
+ result = analyzer.analyze_data(data, args)
+
+ assert result is not None
+ assert len(result.events) >= 1
+ ev = result.events[0]
+ # matched content should include the IPv4
+ assert "192.0.2.123" in str(ev.data.get("match_content", ""))