Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions http-tests/admin/model/ontology-import-upload-no-deadlock.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env bash
set -euo pipefail

# Test that ontology imports of uploaded files do not cause deadlock
# This verifies the fix for circular dependency when:
# 1. Request arrives for /uploads/xyz
# 2. OntologyFilter intercepts it and loads ontology
# 3. Ontology has owl:imports for /uploads/xyz
# 4. Jena FileManager makes HTTP request to /uploads/xyz
# 5. Would cause infinite loop/deadlock without the fix

initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL"
initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL"
purge_cache "$END_USER_VARNISH_SERVICE"
purge_cache "$ADMIN_VARNISH_SERVICE"
purge_cache "$FRONTEND_VARNISH_SERVICE"

pwd=$(realpath "$PWD")

# add agent to the writers group so they can upload files

add-agent-to-group.sh \
-f "$OWNER_CERT_FILE" \
-p "$OWNER_CERT_PWD" \
--agent "$AGENT_URI" \
"${ADMIN_BASE_URL}acl/groups/writers/"

# Step 1: Upload an RDF file

file_content_type="text/turtle"

file_doc=$(create-file.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
-b "$END_USER_BASE_URL" \
--title "Test ontology for upload import" \
--file "$pwd/test-ontology-import.ttl" \
--file-content-type "${file_content_type}")

# Step 2: Extract the uploaded file URI (content-addressed)

file_doc_ntriples=$(get.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
--accept 'application/n-triples' \
"$file_doc")

upload_uri=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_doc//\//\\/}> <http:\/\/xmlns.com\/foaf\/0.1\/primaryTopic> <(.*)> \./\1/p")

# Verify the uploaded file is accessible before we add it as an import
curl -k -f -s \
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
-H "Accept: ${file_content_type}" \
"$upload_uri" > /dev/null

# Step 3: Add the uploaded file as an owl:import to the namespace ontology

namespace_doc="${END_USER_BASE_URL}ns"
namespace="${namespace_doc}#"
ontology_doc="${ADMIN_BASE_URL}ontologies/namespace/"

add-ontology-import.sh \
-f "$OWNER_CERT_FILE" \
-p "$OWNER_CERT_PWD" \
--import "$upload_uri" \
"$ontology_doc"

# Step 4: Clear the namespace ontology from memory to force reload on next request

clear-ontology.sh \
-f "$OWNER_CERT_FILE" \
-p "$OWNER_CERT_PWD" \
-b "$ADMIN_BASE_URL" \
--ontology "$namespace"

# Step 5: Verify the import is present in the loaded ontology
# This request also triggers ontology loading and would detect deadlock

curl -k -f -s \
-H "Accept: application/n-triples" \
"$namespace_doc" \
| grep "<${namespace}> <http://www.w3.org/2002/07/owl#imports> <${upload_uri}>" > /dev/null

# Step 6: Verify the uploaded file is still accessible after ontology loading

curl -k -f -s \
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
-H "Accept: ${file_content_type}" \
"$upload_uri" > /dev/null

# Step 7: Verify that the imported ontology content is accessible via the namespace document
# This confirms the import was actually loaded (not just skipped)

curl -k -f -s \
-G \
-E "$OWNER_CERT_FILE":"$OWNER_CERT_PWD" \
-H 'Accept: application/sparql-results+xml' \
--data-urlencode "query=SELECT * { <https://example.org/test#TestClass> ?p ?o }" \
"$namespace_doc" \
| grep '<literal>Test Class</literal>' > /dev/null
17 changes: 17 additions & 0 deletions http-tests/admin/model/test-ontology-import.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
@prefix : <https://example.org/test#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

: a owl:Ontology ;
rdfs:label "Test ontology for upload import" ;
rdfs:comment "This ontology is uploaded to test that ontology imports of uploaded files do not cause deadlock" .

:TestClass a owl:Class ;
rdfs:label "Test Class" ;
rdfs:comment "A test class to verify ontology was loaded" .

:testProperty a owl:DatatypeProperty ;
rdfs:label "Test Property" ;
rdfs:domain :TestClass ;
rdfs:range xsd:string .
104 changes: 104 additions & 0 deletions http-tests/imports/PUT-file-format-explicit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env bash
set -euo pipefail

initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL"
initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL"
purge_cache "$END_USER_VARNISH_SERVICE"
purge_cache "$ADMIN_VARNISH_SERVICE"
purge_cache "$FRONTEND_VARNISH_SERVICE"

pwd=$(realpath "$PWD")

# add agent to the writers group

add-agent-to-group.sh \
-f "$OWNER_CERT_FILE" \
-p "$OWNER_CERT_PWD" \
--agent "$AGENT_URI" \
"${ADMIN_BASE_URL}acl/groups/writers/"

# create test file with sample content

test_file=$(mktemp)
echo "test,data,sample" > "$test_file"
echo "1,2,3" >> "$test_file"
echo "4,5,6" >> "$test_file"

# generate slug for the file document

slug=$(uuidgen | tr '[:upper:]' '[:lower:]')

# upload file with explicit media type: text/plain

file_doc=$(create-file.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
-b "$END_USER_BASE_URL" \
--title "Test File for Media Type Update" \
--slug "$slug" \
--file "$test_file" \
--file-content-type "text/plain")

# get the file resource URI and initial dct:format

file_doc_ntriples=$(get.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
--accept 'application/n-triples' \
"$file_doc")

file_uri=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_doc//\//\\/}> <http:\/\/xmlns.com\/foaf\/0.1\/primaryTopic> <(.*)> \./\1/p")

# get initial SHA1 hash
initial_sha1=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/xmlns.com\/foaf\/0.1\/sha1> \"(.*)\" \./\1/p")

# get initial dct:format
initial_format=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/purl.org\/dc\/terms\/format> <(.*)> \./\1/p")

# verify initial format is text/plain
if [[ ! "$initial_format" =~ text/plain ]]; then
echo "ERROR: Initial format should contain text/plain but got: $initial_format"
exit 1
fi

# re-upload the same file with same slug but different explicit media type: text/csv
# this simulates editing the file document through the UI and uploading a new file

create-file.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
-b "$END_USER_BASE_URL" \
--title "Test File for Media Type Update" \
--slug "$slug" \
--file "$test_file" \
--file-content-type "text/csv" \
> /dev/null

# get updated document

updated_ntriples=$(get.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
--accept 'application/n-triples' \
"$file_doc")

# get updated SHA1 hash (should be same as initial)
updated_sha1=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/xmlns.com\/foaf\/0.1\/sha1> \"(.*)\" \./\1/p")

# get updated dct:format (should be text/csv)
updated_format=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/purl.org\/dc\/terms\/format> <(.*)> \./\1/p")

# verify SHA1 is unchanged (same file content)
if [ "$initial_sha1" != "$updated_sha1" ]; then
echo "ERROR: SHA1 hash changed! Initial: $initial_sha1, Updated: $updated_sha1"
exit 1
fi

# verify dct:format was updated to text/csv
if [[ ! "$updated_format" =~ text/csv ]]; then
echo "ERROR: Format should have been updated to text/csv but got: $updated_format"
exit 1
fi

# cleanup
rm -f "$test_file"
98 changes: 98 additions & 0 deletions http-tests/imports/PUT-file-format.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env bash
set -euo pipefail

initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL"
initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL"
purge_cache "$END_USER_VARNISH_SERVICE"
purge_cache "$ADMIN_VARNISH_SERVICE"
purge_cache "$FRONTEND_VARNISH_SERVICE"

pwd=$(realpath "$PWD")

# add agent to the writers group

add-agent-to-group.sh \
-f "$OWNER_CERT_FILE" \
-p "$OWNER_CERT_PWD" \
--agent "$AGENT_URI" \
"${ADMIN_BASE_URL}acl/groups/writers/"

# create test file with sample content

test_file=$(mktemp)
echo "test,data,sample" > "$test_file"
echo "1,2,3" >> "$test_file"
echo "4,5,6" >> "$test_file"

# generate slug for the file document

slug=$(uuidgen | tr '[:upper:]' '[:lower:]')

# upload file WITHOUT explicit media type (rely on browser detection via `file -b --mime-type`)

file_doc=$(create-file.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
-b "$END_USER_BASE_URL" \
--title "Test File for Browser Media Type" \
--slug "$slug" \
--file "$test_file")

# get the file resource URI and initial dct:format

file_doc_ntriples=$(get.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
--accept 'application/n-triples' \
"$file_doc")

file_uri=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_doc//\//\\/}> <http:\/\/xmlns.com\/foaf\/0.1\/primaryTopic> <(.*)> \./\1/p")

# get initial SHA1 hash
initial_sha1=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/xmlns.com\/foaf\/0.1\/sha1> \"(.*)\" \./\1/p")

# get initial dct:format (should be browser-detected)
initial_format=$(echo "$file_doc_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/purl.org\/dc\/terms\/format> <(.*)> \./\1/p")

# re-upload the same file with same slug but WITH explicit media type: text/csv
# this simulates editing and uploading with a corrected format after browser auto-detection was wrong

create-file.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
-b "$END_USER_BASE_URL" \
--title "Test File for Browser Media Type" \
--slug "$slug" \
--file "$test_file" \
--file-content-type "text/csv" \
> /dev/null

# get updated document

updated_ntriples=$(get.sh \
-f "$AGENT_CERT_FILE" \
-p "$AGENT_CERT_PWD" \
--accept 'application/n-triples' \
"$file_doc")

# get updated SHA1 hash (should be same as initial)
updated_sha1=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/xmlns.com\/foaf\/0.1\/sha1> \"(.*)\" \./\1/p")

# get updated dct:format (should be text/csv)
updated_format=$(echo "$updated_ntriples" | sed -rn "s/<${file_uri//\//\\/}> <http:\/\/purl.org\/dc\/terms\/format> <(.*)> \./\1/p")

# verify SHA1 is unchanged (same file content)
if [ "$initial_sha1" != "$updated_sha1" ]; then
echo "ERROR: SHA1 hash changed! Initial: $initial_sha1, Updated: $updated_sha1"
exit 1
fi

# verify dct:format was updated to text/csv
if [[ ! "$updated_format" =~ text/csv ]]; then
echo "ERROR: Format should have been updated to text/csv but got: $updated_format"
echo "Initial format was: $initial_format"
exit 1
fi

# cleanup
rm -f "$test_file"
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

<groupId>com.atomgraph</groupId>
<artifactId>linkeddatahub</artifactId>
<version>5.2.1</version>
<version>5.2.2-SNAPSHOT</version>
<packaging>${packaging.type}</packaging>

<name>AtomGraph LinkedDataHub</name>
Expand Down Expand Up @@ -46,7 +46,7 @@
<url>https://github.com/AtomGraph/LinkedDataHub</url>
<connection>scm:git:git://github.com/AtomGraph/LinkedDataHub.git</connection>
<developerConnection>scm:git:git@github.com:AtomGraph/LinkedDataHub.git</developerConnection>
<tag>linkeddatahub-5.2.1</tag>
<tag>linkeddatahub-2.1.1</tag>
</scm>

<repositories>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,43 @@ public class OntologyFilter implements ContainerRequestFilter

private static final Logger log = LoggerFactory.getLogger(OntologyFilter.class);

/**
* Paths that should not trigger ontology loading to avoid circular dependencies.
*
* When an ontology contains owl:imports pointing to URIs within these paths,
* loading the ontology would trigger HTTP requests to those URIs. If those requests
* are intercepted by this filter, it creates a circular dependency:
*
* 1. Request arrives for /uploads/xyz
* 2. OntologyFilter intercepts it and loads ontology
* 3. Ontology has owl:imports for /uploads/xyz
* 4. Jena FileManager makes HTTP request to /uploads/xyz
* 5. OntologyFilter intercepts it again → infinite loop/deadlock
*
* Additionally, uploaded files are binary/RDF content that don't require
* ontology context for their serving logic.
*/
private static final java.util.Set<String> IGNORED_PATH_PREFIXES = java.util.Set.of(
"uploads/"
);

@Inject com.atomgraph.linkeddatahub.Application system;


@Override
public void filter(ContainerRequestContext crc) throws IOException
{
String path = crc.getUriInfo().getPath();

// Skip ontology loading for paths that may be referenced in owl:imports
// to prevent circular dependency deadlocks during ontology resolution
if (IGNORED_PATH_PREFIXES.stream().anyMatch(path::startsWith))
{
if (log.isTraceEnabled()) log.trace("Skipping ontology loading for path: {}", path);
crc.setProperty(OWL.Ontology.getURI(), Optional.empty());
return;
}

crc.setProperty(OWL.Ontology.getURI(), getOntology(crc));
}

Expand Down
3 changes: 3 additions & 0 deletions src/main/java/com/atomgraph/linkeddatahub/vocabulary/ACL.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ public static String getURI()
/** <code>acl:Append</code> access mode */
public static final OntClass Append = m_model.createClass( NS + "Append" );

/** <code>acl:Control</code> access mode */
public static final OntClass Control = m_model.createClass( NS + "Control" );

/** <code>acl:AuthenticatedAgent</code> class */
public static final OntClass AuthenticatedAgent = m_model.createClass( NS + "AuthenticatedAgent" );

Expand Down