diff --git a/http-tests/admin/model/ontology-import-upload-no-deadlock.sh b/http-tests/admin/model/ontology-import-upload-no-deadlock.sh new file mode 100755 index 000000000..939da9687 --- /dev/null +++ b/http-tests/admin/model/ontology-import-upload-no-deadlock.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Test that ontology imports of uploaded files do not cause deadlock +# This verifies the fix for circular dependency when: +# 1. Request arrives for /uploads/xyz +# 2. OntologyFilter intercepts it and loads ontology +# 3. Ontology has owl:imports for /uploads/xyz +# 4. Jena FileManager makes HTTP request to /uploads/xyz +# 5. Would cause infinite loop/deadlock without the fix + +initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL" +initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL" +purge_cache "$END_USER_VARNISH_SERVICE" +purge_cache "$ADMIN_VARNISH_SERVICE" +purge_cache "$FRONTEND_VARNISH_SERVICE" + +pwd=$(realpath "$PWD") + +# add agent to the writers group so they can upload files + +add-agent-to-group.sh \ + -f "$OWNER_CERT_FILE" \ + -p "$OWNER_CERT_PWD" \ + --agent "$AGENT_URI" \ + "${ADMIN_BASE_URL}acl/groups/writers/" + +# Step 1: Upload an RDF file + +file_content_type="text/turtle" + +file_doc=$(create-file.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + -b "$END_USER_BASE_URL" \ + --title "Test ontology for upload import" \ + --file "$pwd/test-ontology-import.ttl" \ + --file-content-type "${file_content_type}") + +# Step 2: Extract the uploaded file URI (content-addressed) + +file_doc_ntriples=$(get.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + --accept 'application/n-triples' \ + "$file_doc") + +upload_uri=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_doc//\//\\/}> <(.*)> \./\1/p") + +# Verify the uploaded file is accessible before we add it as an import +curl -k -f -s \ + -E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \ + -H "Accept: ${file_content_type}" \ + "$upload_uri" > /dev/null + +# Step 3: Add the uploaded file as an owl:import to the namespace ontology + +namespace_doc="${END_USER_BASE_URL}ns" +namespace="${namespace_doc}#" +ontology_doc="${ADMIN_BASE_URL}ontologies/namespace/" + +add-ontology-import.sh \ + -f "$OWNER_CERT_FILE" \ + -p "$OWNER_CERT_PWD" \ + --import "$upload_uri" \ + "$ontology_doc" + +# Step 4: Clear the namespace ontology from memory to force reload on next request + +clear-ontology.sh \ + -f "$OWNER_CERT_FILE" \ + -p "$OWNER_CERT_PWD" \ + -b "$ADMIN_BASE_URL" \ + --ontology "$namespace" + +# Step 5: Verify the import is present in the loaded ontology +# This request also triggers ontology loading and would detect deadlock + +curl -k -f -s \ + -H "Accept: application/n-triples" \ + "$namespace_doc" \ +| grep "<${namespace}> <${upload_uri}>" > /dev/null + +# Step 6: Verify the uploaded file is still accessible after ontology loading + +curl -k -f -s \ + -E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \ + -H "Accept: ${file_content_type}" \ + "$upload_uri" > /dev/null + +# Step 7: Verify that the imported ontology content is accessible via the namespace document +# This confirms the import was actually loaded (not just skipped) + +curl -k -f -s \ + -G \ + -E "$OWNER_CERT_FILE":"$OWNER_CERT_PWD" \ + -H 'Accept: application/sparql-results+xml' \ + --data-urlencode "query=SELECT * { ?p ?o }" \ + "$namespace_doc" \ +| grep 'Test Class' > /dev/null diff --git a/http-tests/admin/model/test-ontology-import.ttl b/http-tests/admin/model/test-ontology-import.ttl new file mode 100644 index 000000000..24361b035 --- /dev/null +++ b/http-tests/admin/model/test-ontology-import.ttl @@ -0,0 +1,17 @@ +@prefix : . +@prefix owl: . +@prefix rdfs: . +@prefix xsd: . + +: a owl:Ontology ; + rdfs:label "Test ontology for upload import" ; + rdfs:comment "This ontology is uploaded to test that ontology imports of uploaded files do not cause deadlock" . + +:TestClass a owl:Class ; + rdfs:label "Test Class" ; + rdfs:comment "A test class to verify ontology was loaded" . + +:testProperty a owl:DatatypeProperty ; + rdfs:label "Test Property" ; + rdfs:domain :TestClass ; + rdfs:range xsd:string . diff --git a/http-tests/imports/PUT-file-format-explicit.sh b/http-tests/imports/PUT-file-format-explicit.sh new file mode 100755 index 000000000..3c9dffd8b --- /dev/null +++ b/http-tests/imports/PUT-file-format-explicit.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +set -euo pipefail + +initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL" +initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL" +purge_cache "$END_USER_VARNISH_SERVICE" +purge_cache "$ADMIN_VARNISH_SERVICE" +purge_cache "$FRONTEND_VARNISH_SERVICE" + +pwd=$(realpath "$PWD") + +# add agent to the writers group + +add-agent-to-group.sh \ + -f "$OWNER_CERT_FILE" \ + -p "$OWNER_CERT_PWD" \ + --agent "$AGENT_URI" \ + "${ADMIN_BASE_URL}acl/groups/writers/" + +# create test file with sample content + +test_file=$(mktemp) +echo "test,data,sample" > "$test_file" +echo "1,2,3" >> "$test_file" +echo "4,5,6" >> "$test_file" + +# generate slug for the file document + +slug=$(uuidgen | tr '[:upper:]' '[:lower:]') + +# upload file with explicit media type: text/plain + +file_doc=$(create-file.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + -b "$END_USER_BASE_URL" \ + --title "Test File for Media Type Update" \ + --slug "$slug" \ + --file "$test_file" \ + --file-content-type "text/plain") + +# get the file resource URI and initial dct:format + +file_doc_ntriples=$(get.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + --accept 'application/n-triples' \ + "$file_doc") + +file_uri=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_doc//\//\\/}> <(.*)> \./\1/p") + +# get initial SHA1 hash +initial_sha1=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> \"(.*)\" \./\1/p") + +# get initial dct:format +initial_format=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <(.*)> \./\1/p") + +# verify initial format is text/plain +if [[ ! "$initial_format" =~ text/plain ]]; then + echo "ERROR: Initial format should contain text/plain but got: $initial_format" + exit 1 +fi + +# re-upload the same file with same slug but different explicit media type: text/csv +# this simulates editing the file document through the UI and uploading a new file + +create-file.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + -b "$END_USER_BASE_URL" \ + --title "Test File for Media Type Update" \ + --slug "$slug" \ + --file "$test_file" \ + --file-content-type "text/csv" \ + > /dev/null + +# get updated document + +updated_ntriples=$(get.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + --accept 'application/n-triples' \ + "$file_doc") + +# get updated SHA1 hash (should be same as initial) +updated_sha1=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> \"(.*)\" \./\1/p") + +# get updated dct:format (should be text/csv) +updated_format=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <(.*)> \./\1/p") + +# verify SHA1 is unchanged (same file content) +if [ "$initial_sha1" != "$updated_sha1" ]; then + echo "ERROR: SHA1 hash changed! Initial: $initial_sha1, Updated: $updated_sha1" + exit 1 +fi + +# verify dct:format was updated to text/csv +if [[ ! "$updated_format" =~ text/csv ]]; then + echo "ERROR: Format should have been updated to text/csv but got: $updated_format" + exit 1 +fi + +# cleanup +rm -f "$test_file" diff --git a/http-tests/imports/PUT-file-format.sh b/http-tests/imports/PUT-file-format.sh new file mode 100755 index 000000000..4a30ad9d6 --- /dev/null +++ b/http-tests/imports/PUT-file-format.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +set -euo pipefail + +initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL" +initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL" +purge_cache "$END_USER_VARNISH_SERVICE" +purge_cache "$ADMIN_VARNISH_SERVICE" +purge_cache "$FRONTEND_VARNISH_SERVICE" + +pwd=$(realpath "$PWD") + +# add agent to the writers group + +add-agent-to-group.sh \ + -f "$OWNER_CERT_FILE" \ + -p "$OWNER_CERT_PWD" \ + --agent "$AGENT_URI" \ + "${ADMIN_BASE_URL}acl/groups/writers/" + +# create test file with sample content + +test_file=$(mktemp) +echo "test,data,sample" > "$test_file" +echo "1,2,3" >> "$test_file" +echo "4,5,6" >> "$test_file" + +# generate slug for the file document + +slug=$(uuidgen | tr '[:upper:]' '[:lower:]') + +# upload file WITHOUT explicit media type (rely on browser detection via `file -b --mime-type`) + +file_doc=$(create-file.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + -b "$END_USER_BASE_URL" \ + --title "Test File for Browser Media Type" \ + --slug "$slug" \ + --file "$test_file") + +# get the file resource URI and initial dct:format + +file_doc_ntriples=$(get.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + --accept 'application/n-triples' \ + "$file_doc") + +file_uri=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_doc//\//\\/}> <(.*)> \./\1/p") + +# get initial SHA1 hash +initial_sha1=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> \"(.*)\" \./\1/p") + +# get initial dct:format (should be browser-detected) +initial_format=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <(.*)> \./\1/p") + +# re-upload the same file with same slug but WITH explicit media type: text/csv +# this simulates editing and uploading with a corrected format after browser auto-detection was wrong + +create-file.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + -b "$END_USER_BASE_URL" \ + --title "Test File for Browser Media Type" \ + --slug "$slug" \ + --file "$test_file" \ + --file-content-type "text/csv" \ + > /dev/null + +# get updated document + +updated_ntriples=$(get.sh \ + -f "$AGENT_CERT_FILE" \ + -p "$AGENT_CERT_PWD" \ + --accept 'application/n-triples' \ + "$file_doc") + +# get updated SHA1 hash (should be same as initial) +updated_sha1=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> \"(.*)\" \./\1/p") + +# get updated dct:format (should be text/csv) +updated_format=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <(.*)> \./\1/p") + +# verify SHA1 is unchanged (same file content) +if [ "$initial_sha1" != "$updated_sha1" ]; then + echo "ERROR: SHA1 hash changed! Initial: $initial_sha1, Updated: $updated_sha1" + exit 1 +fi + +# verify dct:format was updated to text/csv +if [[ ! "$updated_format" =~ text/csv ]]; then + echo "ERROR: Format should have been updated to text/csv but got: $updated_format" + echo "Initial format was: $initial_format" + exit 1 +fi + +# cleanup +rm -f "$test_file" diff --git a/pom.xml b/pom.xml index 32e9246e3..6f440bfff 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ com.atomgraph linkeddatahub - 5.2.1 + 5.2.2-SNAPSHOT ${packaging.type} AtomGraph LinkedDataHub @@ -46,7 +46,7 @@ https://github.com/AtomGraph/LinkedDataHub scm:git:git://github.com/AtomGraph/LinkedDataHub.git scm:git:git@github.com:AtomGraph/LinkedDataHub.git - linkeddatahub-5.2.1 + linkeddatahub-2.1.1 diff --git a/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/OntologyFilter.java b/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/OntologyFilter.java index c996d5214..0390a989b 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/OntologyFilter.java +++ b/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/OntologyFilter.java @@ -54,12 +54,43 @@ public class OntologyFilter implements ContainerRequestFilter private static final Logger log = LoggerFactory.getLogger(OntologyFilter.class); + /** + * Paths that should not trigger ontology loading to avoid circular dependencies. + * + * When an ontology contains owl:imports pointing to URIs within these paths, + * loading the ontology would trigger HTTP requests to those URIs. If those requests + * are intercepted by this filter, it creates a circular dependency: + * + * 1. Request arrives for /uploads/xyz + * 2. OntologyFilter intercepts it and loads ontology + * 3. Ontology has owl:imports for /uploads/xyz + * 4. Jena FileManager makes HTTP request to /uploads/xyz + * 5. OntologyFilter intercepts it again → infinite loop/deadlock + * + * Additionally, uploaded files are binary/RDF content that don't require + * ontology context for their serving logic. + */ + private static final java.util.Set IGNORED_PATH_PREFIXES = java.util.Set.of( + "uploads/" + ); + @Inject com.atomgraph.linkeddatahub.Application system; @Override public void filter(ContainerRequestContext crc) throws IOException { + String path = crc.getUriInfo().getPath(); + + // Skip ontology loading for paths that may be referenced in owl:imports + // to prevent circular dependency deadlocks during ontology resolution + if (IGNORED_PATH_PREFIXES.stream().anyMatch(path::startsWith)) + { + if (log.isTraceEnabled()) log.trace("Skipping ontology loading for path: {}", path); + crc.setProperty(OWL.Ontology.getURI(), Optional.empty()); + return; + } + crc.setProperty(OWL.Ontology.getURI(), getOntology(crc)); } diff --git a/src/main/java/com/atomgraph/linkeddatahub/vocabulary/ACL.java b/src/main/java/com/atomgraph/linkeddatahub/vocabulary/ACL.java index 1a528e187..259d34a0e 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/vocabulary/ACL.java +++ b/src/main/java/com/atomgraph/linkeddatahub/vocabulary/ACL.java @@ -60,6 +60,9 @@ public static String getURI() /** acl:Append access mode */ public static final OntClass Append = m_model.createClass( NS + "Append" ); + /** acl:Control access mode */ + public static final OntClass Control = m_model.createClass( NS + "Control" ); + /** acl:AuthenticatedAgent class */ public static final OntClass AuthenticatedAgent = m_model.createClass( NS + "AuthenticatedAgent" );