Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,15 @@
import java.io.StringWriter;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HexFormat;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.XMLConstants;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
Expand All @@ -44,6 +50,7 @@
*/
public class SchematronTransformer {
private static final Logger LOG = LoggerFactory.getLogger(SchematronTransformer.class);
private final Map<String, Templates> cachedTemplates = new ConcurrentHashMap<>();

/**
* Constructor.
Expand Down Expand Up @@ -97,23 +104,69 @@ public Transformer transform(Source source) throws TransformerException {
*
* @throws TransformerException If an error occurred during the transform process.
*/
// Not cached — Source objects are not stably comparable/hashable.
// In practice LabelValidator only calls the String-based overload (line 622),
// so this uncached path does not affect bundle-validation performance.
Comment on lines +108 to +109
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new inline comment references specific line numbers in LabelValidator (e.g., “line 622”). These line numbers will drift over time and make the comment misleading. Prefer referencing the method name/call site (e.g., LabelValidator.parseAndValidate() uses the String overload) without hard-coded line numbers.

Suggested change
// In practice LabelValidator only calls the String-based overload (line 622),
// so this uncached path does not affect bundle-validation performance.
// In practice, LabelValidator.parseAndValidate() uses the String-based
// overload, so this uncached path does not affect bundle-validation
// performance.

Copilot uses AI. Check for mistakes.
public Transformer transform(Source source, ProblemHandler handler) throws TransformerException {
Transformer isoTransformer = this.buildIsoTransformer();
return compileSchematron(source, handler).newTransformer();
}

private Templates compileSchematron(Source source, ProblemHandler handler) throws TransformerException {
Transformer isoTransformer = this.buildIsoTransformer();
if (handler != null) {
isoTransformer.setErrorListener(new TransformerErrorListener(handler));
}
StringWriter schematronStyleSheet = new StringWriter();
isoTransformer.transform(source, new StreamResult(schematronStyleSheet));
return TransformerFactory.newInstance()
.newTransformer(new StreamSource(new StringReader(schematronStyleSheet.toString())));
.newTemplates(new StreamSource(new StringReader(schematronStyleSheet.toString())));
}
Comment on lines 121 to 123
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compileSchematron() creates a new TransformerFactory for compiling the generated XSLT into Templates but does not apply the same secure-processing / external access restrictions used in buildIsoTransformer(). This can allow external stylesheet resolution during compilation if the generated XSLT contains imports/includes. Suggest configuring this factory similarly (secure-processing + ACCESS_EXTERNAL_* attributes and, if needed, a URIResolver) before calling newTemplates().

Copilot uses AI. Check for mistakes.

public Transformer transform(String source) throws TransformerException {
return this.transform(source, null);
}
public Transformer transform(String source, ProblemHandler handler) throws TransformerException {
return transform (new StreamSource(new StringReader(source)), handler);
String key = sha256(source);
// Intentional check-then-act: a concurrent miss may compile twice, but
// both results are equivalent and the race is self-healing after warmup.
// computeIfAbsent is avoided because it holds a lock during compilation.
// handler is intentionally unused on cache hit — compilation errors
// would only occur during the first call (cache miss), not on reuse.
Templates templates = cachedTemplates.get(key);
if (templates == null) {
LOG.debug("transform: cache miss, compiling schematron (length={})", source.length());
templates = compileSchematron(new StreamSource(new StringReader(source)), handler);
cachedTemplates.put(key, templates);
Comment on lines +129 to +139
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

transform(String, ProblemHandler) recomputes a SHA-256 hash of the entire schematron text on every call. In LabelValidator.parseAndValidate() the same schematron String instance is reused for many labels, so this adds avoidable O(n) work per label and reduces the benefit of caching. Consider using the source String itself as the cache key (String.hashCode is memoized per instance) or caching a precomputed key alongside the schematron text so you don’t re-hash for every validation run.

Copilot uses AI. Check for mistakes.
} else {
LOG.debug("transform: cache hit, reusing compiled schematron (length={})", source.length());
}
return templates.newTransformer();
}

// Package-private: cache lifetime is tied to the SchematronTransformer instance.
// LabelValidator.clear() creates a new instance (line 219), which naturally
// starts with an empty cache. Exposed for testing.
void clearCache() {
cachedTemplates.clear();
Comment on lines +146 to +150
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR description states clearCache() was added “for explicit lifecycle management”, but the implementation is package-private and the comment says it’s “Exposed for testing”. Either make the method part of the public API (if callers should manage cache lifetime) or update the PR description / comment to reflect that it’s test-only and not intended for external use.

Copilot uses AI. Check for mistakes.
}

/** Returns the number of cached templates entries. Package-private for testing. */
int cacheSize() {
return cachedTemplates.size();
}

private static final ThreadLocal<MessageDigest> SHA256 = ThreadLocal.withInitial(() -> {
try {
return MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
});

private static String sha256(String input) {
MessageDigest digest = SHA256.get();
digest.reset();
return HexFormat.of().formatHex(digest.digest(input.getBytes(StandardCharsets.UTF_8)));
}
/**
* Transform the given schematron.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package gov.nasa.pds.tools.label;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNotSame;

import javax.xml.transform.Transformer;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;

/**
* Tests for {@link SchematronTransformer} cache behavior.
*
* <p>These tests exercise the full ISO schematron → XSLT compilation pipeline and
* therefore require Saxon-HE on the classpath. They are slower than pure unit tests
* on first run due to XSLT compilation overhead.
*/
@Tag("integration")
public class SchematronTransformerTest {

/** Minimal valid ISO Schematron document. */
private static final String MINIMAL_SCHEMATRON =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<sch:schema xmlns:sch=\"http://purl.oclc.org/dsdl/schematron\" queryBinding=\"xslt2\">\n"
+ " <sch:title>Minimal test schematron</sch:title>\n"
+ " <sch:pattern>\n"
+ " <sch:rule context=\"/root\">\n"
+ " <sch:assert test=\"true()\">Always passes.</sch:assert>\n"
+ " </sch:rule>\n"
+ " </sch:pattern>\n"
+ "</sch:schema>";

/** A different schematron to verify distinct cache entries. */
private static final String OTHER_SCHEMATRON =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<sch:schema xmlns:sch=\"http://purl.oclc.org/dsdl/schematron\" queryBinding=\"xslt2\">\n"
+ " <sch:title>Other test schematron</sch:title>\n"
+ " <sch:pattern>\n"
+ " <sch:rule context=\"/other\">\n"
+ " <sch:assert test=\"true()\">Also passes.</sch:assert>\n"
+ " </sch:rule>\n"
+ " </sch:pattern>\n"
+ "</sch:schema>";

private SchematronTransformer transformer;

@BeforeEach
void setUp() throws Exception {
transformer = new SchematronTransformer();
}

@Test
void testCachePopulatedOnFirstCall() throws Exception {
assertEquals(0, transformer.cacheSize(), "cache should start empty");

Transformer t = transformer.transform(MINIMAL_SCHEMATRON);
assertNotNull(t);
assertEquals(1, transformer.cacheSize(), "cache should have one entry after first call");
}

@Test
void testCacheHitOnRepeatedCall() throws Exception {
Transformer t1 = transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize());

Transformer t2 = transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize(), "cache size should remain 1 after repeated call");

// Each call should return a distinct Transformer (Transformer is not thread-safe)
assertNotSame(t1, t2, "each call should produce a new Transformer instance");
}

@Test
void testDistinctSchematronsGetSeparateCacheEntries() throws Exception {
transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize());

transformer.transform(OTHER_SCHEMATRON);
assertEquals(2, transformer.cacheSize(), "distinct schematrons should produce separate cache entries");
}

@Test
void testClearCacheRemovesAllEntries() throws Exception {
transformer.transform(MINIMAL_SCHEMATRON);
transformer.transform(OTHER_SCHEMATRON);
assertEquals(2, transformer.cacheSize());

transformer.clearCache();
assertEquals(0, transformer.cacheSize(), "cache should be empty after clearCache()");
}

@Test
void testCacheMissAfterClear() throws Exception {
transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize());

transformer.clearCache();
assertEquals(0, transformer.cacheSize());

// Re-compile should repopulate the cache
Transformer t = transformer.transform(MINIMAL_SCHEMATRON);
assertNotNull(t);
assertEquals(1, transformer.cacheSize(), "cache should repopulate after clear");
}
}