diff --git a/files/galaxy/tools/alphafind.xml b/files/galaxy/tools/alphafind.xml
new file mode 100644
index 00000000..3d504b88
--- /dev/null
+++ b/files/galaxy/tools/alphafind.xml
@@ -0,0 +1,220 @@
+
+ Search AlphaFind API for structurally similar proteins
+
+ python
+ requests
+
+ /dev/null || echo "1.0.0"
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0.5 indicates significant structural similarity
+* TM-score > 0.8 indicates very high structural similarity
+
+-----
+
+**Examples**
+
+1. **Basic search**: Query protein P0A6F5 with default settings
+
+2. **Find similar proteins in specific organism**:
+
+ - Query: Q8Y547
+ - Organism: Mycobacterium tuberculosis
+
+3. **Domain-level search**:
+
+ - Query: F8U1Q0
+ - Index: domains
+ - CATH annotation: 1.10.8.10
+
+4. **High-confidence results**:
+
+ - Query: P9WGR1
+ - Sort by: TM-Score
+ - Sort order: Descending
+
+-----
+
+**References**
+
+* AlphaFind API: https://alphafind.ics.muni.cz
+* AlphaFold DB: https://alphafold.ebi.ac.uk
+* TM-Score paper: Yang & Skolnick, 2004
+
+-----
+
+**Troubleshooting**
+
+* **"Protein not found"**: The query ID may not exist in AlphaFold DB or lacks embedding vectors. Verify the UniProt ID.
+* **Timeout exceeded**: Increase the timeout value in Search Options.
+* **Empty results**: Your filters may be too restrictive, or no similar proteins exist with the specified criteria.
+* **"Search failed"**: The API encountered an error. Try again or check the API status.
+ ]]>
+
+ 10.1038/nature14539
+
+
diff --git a/files/galaxy/tools/alphafind_search.py b/files/galaxy/tools/alphafind_search.py
new file mode 100755
index 00000000..487e24a6
--- /dev/null
+++ b/files/galaxy/tools/alphafind_search.py
@@ -0,0 +1,899 @@
+#!/usr/bin/env python3
+"""
+AlphaFind API Client
+
+Search AlphaFold protein structures for structural similarity via the AlphaFind API.
+
+Usage:
+ python3 alphafind_search.py --query P0A6F5 --output results.csv
+
+Search Features:
+ - Query by UniProt protein ID
+ - Multiple index types: chains, chains_90, chains_80, chains_70, domains
+ - Filters: organism, taxonomy ID, gene name, CATH annotation (domains)
+ - Pagination and sorting support
+ - Asynchronous computation with progress tracking
+
+Output:
+ CSV file with structural similarity results including TM-scores and metadata
+
+API Limitations:
+ - Maximum 5,000 results per query (hard limit)
+ - Page size limited to 100 results by the API
+ - TM-score calculations can take time for large result sets
+
+Common Use Cases:
+ 1. Basic search for similar structures: --query P0A6F5
+ 2. Filter by organism: --filters '{"organism": "Mycobacterium tuberculosis"}'
+ 3. Sort by TM-score: --sort-by tm_score --sort-order desc
+ 4. Get more results: --options '{"size": 5000}' (max API limit)
+
+For queries with >5000 results, split by organism or tax_id and combine.
+
+Version: 1.0.0
+"""
+
+__version__ = "1.0.0"
+
+import argparse
+import csv
+import json
+import logging
+import sys
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+import requests
+
+# ============================================================================
+# Constants
+# ============================================================================
+
+# Exit codes
+EXIT_SUCCESS = 0
+EXIT_SEARCH_FAILED = 1
+EXIT_TIMEOUT = 2
+EXIT_HTTP_ERROR = 3
+EXIT_INTERRUPTED = 4
+EXIT_UNEXPECTED_ERROR = 5
+
+# API defaults
+DEFAULT_BASE_URL = "https://alphafind.ics.muni.cz"
+DEFAULT_PAGE_SIZE = 100
+DEFAULT_TIMEOUT = 600
+DEFAULT_POLL_INTERVAL = 5
+
+# Pagination: API max is 100 per page
+MAX_PAGE_SIZE = 100
+
+# Result limits
+MAX_RESULTS_PER_QUERY = 5000
+
+# Sort options
+DEFAULT_SORT_BY = 'knn'
+DEFAULT_SORT_ORDER = 'desc'
+VALID_SORT_BY = ['knn', 'tm_score']
+VALID_SORT_ORDER = ['asc', 'desc']
+
+# Index types
+VALID_INDEXES = ['chains', 'chains_90', 'chains_80', 'chains_70', 'domains']
+
+# CSV output columns
+CSV_COLUMNS = [
+ 'query_id', 'index_type', 'page_number',
+ 'target_id', 'score', 'organism', 'tax_id',
+ 'gene_name', 'protein_name', 'avg_plddt',
+ 'tm_score_query', 'tm_score_target', 'rmsd',
+ 'sequential_identity', 'aligned_residues',
+ 'status', 'created_at', 'completed_at',
+ 'has_experimental_structure', 'pdb_ids',
+ 'chopping', 'tar_index'
+]
+
+# Status constants
+STATUS_PENDING = 'pending'
+STATUS_SCORING = 'scoring'
+STATUS_COMPLETED = 'completed'
+STATUS_FAILED = 'failed'
+STATUS_FINAL = (STATUS_COMPLETED, STATUS_FAILED)
+
+
+# ============================================================================
+# Data Classes
+# ============================================================================
+
+@dataclass
+class SearchConfig:
+ """Configuration for a search operation."""
+ query: str
+ index: Optional[List[str]] # None = use server default
+ filters: Dict[str, Any]
+ options: Dict[str, Any]
+ output_file: str
+ base_url: str
+ poll_interval: int
+ timeout: int
+ page_size: int
+ sort_by: str = DEFAULT_SORT_BY
+ sort_order: str = DEFAULT_SORT_ORDER
+ verbose: bool = False
+ dry_run: bool = False
+ quiet: bool = False
+
+
+# ============================================================================
+# API Client
+# ============================================================================
+
+class AlphaFindClient:
+ """Client for interacting with the AlphaFind API."""
+
+ def __init__(self, base_url: str, timeout: int = 30):
+ """Initialize the AlphaFind client.
+
+ Args:
+ base_url: Base URL of the AlphaFind API
+ timeout: Request timeout in seconds
+ """
+ self.base_url = base_url.rstrip('/')
+ self.timeout = timeout
+ self.session = requests.Session()
+ self.session.headers.update({'Content-Type': 'application/json'})
+
+ def health_check(self) -> bool:
+ """Check if the API is accessible.
+
+ Returns:
+ True if API is healthy, False otherwise
+ """
+ try:
+ response = self.session.get(
+ f"{self.base_url}/api/health",
+ timeout=self.timeout
+ )
+ response.raise_for_status()
+ return True
+ except requests.RequestException as e:
+ logging.warning(f"Health check failed: {e}")
+ return False
+
+ def submit_search(
+ self,
+ query: str,
+ index: Optional[List[str]],
+ filters: Dict[str, Any],
+ options: Dict[str, Any]
+ ) -> tuple[str, str, str]:
+ """Submit a search query to the API.
+
+ Args:
+ query: Protein ID to search for
+ index: Index types to search (None for server default)
+ filters: Filter criteria
+ options: Search options
+
+ Returns:
+ Tuple of (query_id, index_type, status)
+
+ Raises:
+ requests.HTTPError: If the API request fails
+ """
+ payload = {
+ "query": query,
+ "filters": filters,
+ "options": options
+ }
+
+ # Only include index if specified
+ if index:
+ payload["index"] = index
+
+ logging.info(f"Submitting search for query: {query}")
+ logging.info(f"API URL: {self.base_url}/api/search")
+ logging.info(f"API Payload: {json.dumps(payload)}")
+
+ response = self.session.post(
+ f"{self.base_url}/api/search",
+ json=payload,
+ timeout=self.timeout
+ )
+ response.raise_for_status()
+
+ data = response.json()
+ query_id = data['id']
+ index_type = data['index_type'][0]
+ status = data['status']
+
+ logging.info(f"Search submitted: query_id={query_id}, "
+ f"index_type={index_type}, status={status}")
+ logging.debug(f"Response: {json.dumps(data, indent=2)}")
+
+ return query_id, index_type, status
+
+ def get_results(
+ self,
+ query_id: str,
+ index_type: str,
+ page: int = 1,
+ page_size: int = DEFAULT_PAGE_SIZE,
+ sort_by: str = DEFAULT_SORT_BY,
+ sort_order: str = DEFAULT_SORT_ORDER
+ ) -> Dict[str, Any]:
+ """Retrieve paginated search results.
+
+ Args:
+ query_id: Query ID from search submission
+ index_type: Index type
+ page: Page number (1-indexed)
+ page_size: Number of results per page (max 100)
+ sort_by: Sort field ('knn' or 'tm_score')
+ sort_order: Sort order ('desc' or 'asc')
+
+ Returns:
+ Dictionary containing results and metadata
+
+ Raises:
+ requests.HTTPError: If the API request fails
+ """
+ params = {
+ "page": page,
+ "page_size": page_size,
+ "sort_by": sort_by,
+ "sort_order": sort_order
+ }
+
+ logging.debug(f"Fetching results: page={page}, page_size={page_size}")
+
+ response = self.session.get(
+ f"{self.base_url}/api/search/{query_id}/{index_type}/results",
+ params=params,
+ timeout=self.timeout
+ )
+
+ # Handle 404 responses that contain error messages
+ if response.status_code == 404:
+ try:
+ error_data = response.json()
+ if error_data.get('status') == STATUS_FAILED:
+ error_message = error_data.get('message', 'Search query not found')
+ sys.stderr.write(f"ERROR: {error_message}\n")
+ sys.stderr.flush()
+ raise RuntimeError(f"Search failed: {error_message}")
+ except (ValueError, KeyError):
+ # If response is not JSON or doesn't have expected structure
+ sys.stderr.write(f"ERROR: Search query not found for query_id: {query_id}, index_type: {index_type}\n")
+ sys.stderr.flush()
+ raise RuntimeError(f"Search query not found: query_id={query_id}, index_type={index_type}")
+
+ response.raise_for_status()
+
+ return response.json()
+
+ def wait_for_completion(
+ self,
+ query_id: str,
+ index_type: str,
+ poll_interval: int = DEFAULT_POLL_INTERVAL,
+ timeout: int = DEFAULT_TIMEOUT,
+ show_progress: bool = True
+ ) -> str:
+ """Poll the API until the search is completed or failed.
+
+ Args:
+ query_id: Query ID to monitor
+ index_type: Index type
+ poll_interval: Seconds between polls
+ timeout: Maximum time to wait in seconds
+ show_progress: Whether to show progress indicator
+
+ Returns:
+ Final status ('completed' or 'failed')
+
+ Raises:
+ TimeoutError: If timeout is exceeded
+ """
+ start_time = time.time()
+ status = STATUS_PENDING
+
+ logging.info(f"Waiting for completion (timeout={timeout}s, "
+ f"poll_interval={poll_interval}s)")
+
+ while time.time() - start_time < timeout:
+ try:
+ response = self.get_results(query_id, index_type, page=1, page_size=1)
+ status = response.get('status', STATUS_PENDING)
+ total_results = response.get('total_results', 0)
+
+ if show_progress:
+ sys.stdout.write(f"\r Status: {status} | "
+ f"Elapsed: {int(time.time() - start_time)}s | "
+ f"Results: {total_results}")
+ sys.stdout.flush()
+
+ if status in STATUS_FINAL:
+ if show_progress:
+ sys.stdout.write('\n')
+ logging.info(f"Search finished with status: {status}")
+ return status
+
+ time.sleep(poll_interval)
+
+ except requests.RequestException as e:
+ logging.warning(f"Polling error (will retry): {e}")
+ time.sleep(poll_interval)
+
+ if show_progress:
+ sys.stdout.write('\n')
+ raise TimeoutError(
+ f"Search did not complete within {timeout} seconds. "
+ f"Last status: {status}"
+ )
+
+ def get_all_results(
+ self,
+ query_id: str,
+ index_type: str,
+ page_size: int = DEFAULT_PAGE_SIZE,
+ sort_by: str = DEFAULT_SORT_BY,
+ sort_order: str = DEFAULT_SORT_ORDER,
+ show_progress: bool = True
+ ) -> List[Dict[str, Any]]:
+ """Fetch all paginated results.
+
+ Args:
+ query_id: Query ID
+ index_type: Index type
+ page_size: Number of results per page
+ sort_by: Sort field
+ sort_order: Sort order
+ show_progress: Whether to show progress
+
+ Returns:
+ List of all result dictionaries
+ """
+ all_results: List[Dict[str, Any]] = []
+ page = 1
+ total_pages = 1
+
+ logging.info(f"Fetching all results with page_size={page_size}")
+
+ while page <= total_pages:
+ try:
+ response = self.get_results(
+ query_id, index_type,
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order
+ )
+
+ results = response.get('results', [])
+ all_results.extend(results)
+
+ if page == 1:
+ total_pages = response.get('total_pages', 1)
+
+ if show_progress:
+ sys.stdout.write(f"\r Fetching page {page}/{total_pages}")
+ sys.stdout.flush()
+
+ page += 1
+
+ except requests.RequestException as e:
+ logging.error(f"Error fetching page {page}: {e}")
+ break
+
+ if show_progress:
+ sys.stdout.write('\n')
+ logging.info(f"Fetched {len(all_results)} results")
+
+ return all_results
+
+
+# ============================================================================
+# Data Processing
+# ============================================================================
+
+def flatten_result(
+ result: Dict[str, Any],
+ query_id: str,
+ index_type: str,
+ page: int
+) -> Dict[str, Any]:
+ """Flatten nested result dictionary for CSV export.
+
+ Args:
+ result: Raw result from API
+ query_id: Query ID for this search
+ index_type: Index type for this search
+ page: Page number this result came from
+
+ Returns:
+ Flattened dictionary ready for CSV export
+ """
+ flattened = {
+ 'query_id': query_id,
+ 'index_type': index_type,
+ 'page_number': page,
+ 'target_id': result.get('target_id'),
+ 'score': result.get('score'),
+ 'organism': result.get('organism'),
+ 'tax_id': result.get('tax_id'),
+ 'gene_name': result.get('gene_name'),
+ 'protein_name': result.get('protein_name'),
+ 'avg_plddt': result.get('avg_plddt'),
+ 'tm_score_query': result.get('tm_score_query'),
+ 'tm_score_target': result.get('tm_score_target'),
+ 'rmsd': result.get('rmsd'),
+ 'sequential_identity': result.get('sequential_identity'),
+ 'aligned_residues': result.get('aligned_residues'),
+ }
+
+ # Flatten metadata
+ metadata = result.get('metadata', {})
+ flattened['status'] = metadata.get('status')
+ flattened['created_at'] = metadata.get('created_at')
+ flattened['completed_at'] = metadata.get('completed_at')
+
+ # Flatten experimental_structure (can be None)
+ exp_structure = result.get('experimental_structure')
+ if exp_structure:
+ flattened['has_experimental_structure'] = exp_structure.get('has_experimental_structure')
+ pdb_ids = exp_structure.get('pdb_ids')
+ flattened['pdb_ids'] = ';'.join(pdb_ids) if pdb_ids else ''
+ else:
+ flattened['has_experimental_structure'] = None
+ flattened['pdb_ids'] = ''
+
+ # Handle optional fields
+ if 'tar_index' in result:
+ flattened['tar_index'] = str(result['tar_index'])
+
+ flattened['chopping'] = result.get('chopping', '')
+
+ return flattened
+
+
+def results_to_csv(
+ results: List[Dict[str, Any]],
+ query_id: str,
+ index_type: str,
+ filename: str,
+ page_size: int
+) -> int:
+ """Write flattened results to CSV file.
+
+ Args:
+ results: List of raw result dictionaries
+ query_id: Query ID for this search
+ index_type: Index type for this search
+ filename: Output CSV filename
+ page_size: Page size used (for page number assignment)
+
+ Returns:
+ Number of results written
+ """
+ if not results:
+ logging.warning("No results to write to CSV")
+ return 0
+
+ # Calculate page numbers and flatten all results
+ flattened_results = []
+ for i, result in enumerate(results):
+ page = (i // page_size) + 1
+ flattened_results.append(
+ flatten_result(result, query_id, index_type, page)
+ )
+
+ # Write to CSV
+ try:
+ with open(filename, 'w', newline='', encoding='utf-8') as f:
+ writer = csv.DictWriter(f, fieldnames=CSV_COLUMNS, extrasaction='ignore')
+ writer.writeheader()
+ writer.writerows(flattened_results)
+
+ logging.info(f"Wrote {len(flattened_results)} results to {filename}")
+ return len(flattened_results)
+
+ except IOError as e:
+ logging.error(f"Error writing CSV file: {e}")
+ raise
+
+
+# ============================================================================
+# Argument Parsing
+# ============================================================================
+
+def parse_json_args(arg_string: str) -> Dict[str, Any]:
+ """Parse JSON string argument.
+
+ Args:
+ arg_string: JSON string to parse
+
+ Returns:
+ Parsed dictionary
+
+ Raises:
+ argparse.ArgumentTypeError: If JSON is invalid
+ """
+ if not arg_string:
+ return {}
+
+ try:
+ return json.loads(arg_string)
+ except json.JSONDecodeError as e:
+ raise argparse.ArgumentTypeError(f"Invalid JSON: {e}")
+
+
+def parse_arguments() -> SearchConfig:
+ """Parse command-line arguments.
+
+ Returns:
+ SearchConfig with all parameters
+ """
+ parser = argparse.ArgumentParser(
+ description='AlphaFind API Client - Search for protein structures',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # Basic search
+ python3 alphafind_search.py --query P0A6F5
+
+ # Search with organism filter
+ python3 alphafind_search.py --query Q8Y547 --filters '{"organism": "Mycobacterium tuberculosis"}'
+
+ # Search with sorting
+ python3 alphafind_search.py --query Q9SBL1 --sort-by tm_score --sort-order desc
+
+ # Request maximum results (API limit is 5000 per query)
+ python3 alphafind_search.py --query P69905 --options '{"size": 5000}' --timeout 1800
+ """
+ )
+
+ # Required: query
+ parser.add_argument(
+ '--query',
+ help='UniProt protein ID to search (e.g., "P0A6F5")'
+ )
+
+ # Optional: index selection
+ parser.add_argument(
+ '--index',
+ nargs='+',
+ help=f'Index types to search (uses server default if not specified). '
+ f'Valid: {", ".join(VALID_INDEXES)}'
+ )
+
+ # Optional: filters
+ parser.add_argument(
+ '--filters',
+ type=parse_json_args,
+ default={},
+ help='Filter criteria as JSON string (e.g., \'{"organism": "Mycobacterium tuberculosis"}\')'
+ )
+
+ # Optional: search options
+ parser.add_argument(
+ '--options',
+ type=parse_json_args,
+ default={},
+ help='Search options as JSON string (e.g., \'{"size": 5000}\')'
+ )
+
+ # Optional: individual filters (for Galaxy compatibility)
+ parser.add_argument(
+ '--filter-organism',
+ help='Filter by organism name (e.g., "Mycobacterium tuberculosis")'
+ )
+ parser.add_argument(
+ '--filter-tax-id',
+ type=int,
+ help='Filter by NCBI Taxonomy ID (numeric)'
+ )
+ parser.add_argument(
+ '--filter-gene-name',
+ help='Filter by gene name'
+ )
+ parser.add_argument(
+ '--filter-cath-annotation',
+ help='Filter by CATH annotation (only for domains index)'
+ )
+
+ # Optional: individual options (for Galaxy compatibility)
+ parser.add_argument(
+ '--option-k',
+ type=int,
+ help='Number of similar proteins to return (k parameter)'
+ )
+
+ # Optional: output
+ parser.add_argument(
+ '--output',
+ default='results.csv',
+ help='Output CSV filename (default: results.csv)'
+ )
+
+ # Optional: API configuration
+ parser.add_argument(
+ '--base-url',
+ default=DEFAULT_BASE_URL,
+ help=f'AlphaFind API base URL (default: {DEFAULT_BASE_URL})'
+ )
+
+ # Optional: timeouts and polling
+ parser.add_argument(
+ '--poll-interval',
+ type=int,
+ default=DEFAULT_POLL_INTERVAL,
+ help=f'Polling interval in seconds (default: {DEFAULT_POLL_INTERVAL})'
+ )
+
+ parser.add_argument(
+ '--timeout',
+ type=int,
+ default=DEFAULT_TIMEOUT,
+ help=f'Maximum wait time in seconds (default: {DEFAULT_TIMEOUT})'
+ )
+
+ # Optional: pagination
+ parser.add_argument(
+ '--page-size',
+ type=int,
+ default=DEFAULT_PAGE_SIZE,
+ choices=range(1, MAX_PAGE_SIZE + 1),
+ metavar=f'[1-{MAX_PAGE_SIZE}]',
+ help=f'Results per page (default: {DEFAULT_PAGE_SIZE}, max: {MAX_PAGE_SIZE})'
+ )
+
+ # Optional: sorting
+ parser.add_argument(
+ '--sort-by',
+ choices=VALID_SORT_BY,
+ default=DEFAULT_SORT_BY,
+ help=f'Sort results by (default: {DEFAULT_SORT_BY})'
+ )
+
+ parser.add_argument(
+ '--sort-order',
+ choices=VALID_SORT_ORDER,
+ default=DEFAULT_SORT_ORDER,
+ help=f'Sort order (default: {DEFAULT_SORT_ORDER})'
+ )
+
+ # Mode flags
+ parser.add_argument('-v', '--verbose', action='store_true',
+ help='Enable verbose logging')
+ parser.add_argument('-q', '--quiet', action='store_true',
+ help='Suppress informational output (recommended for Galaxy automation)')
+ parser.add_argument('--dry-run', action='store_true',
+ help='Show what would be done without executing')
+ parser.add_argument('--version', action='store_true',
+ help='Show version and exit')
+
+ args = parser.parse_args()
+
+ # Handle version flag
+ if args.version:
+ print(__version__)
+ sys.exit(EXIT_SUCCESS)
+
+ # Validate required arguments
+ if not args.query and not args.dry_run:
+ parser.error('--query is required')
+
+ # Build filters from both JSON and individual params
+ filters = dict(args.filters)
+ if args.filter_organism:
+ filters['organism'] = args.filter_organism
+ if args.filter_tax_id:
+ filters['tax_id'] = args.filter_tax_id
+ if args.filter_gene_name:
+ filters['gene_name'] = args.filter_gene_name
+ if args.filter_cath_annotation:
+ filters['cath_annotation'] = args.filter_cath_annotation
+
+ # Build options from both JSON and individual params
+ options = dict(args.options)
+ if args.option_k:
+ options['k'] = args.option_k
+
+ return SearchConfig(
+ query=args.query,
+ index=args.index,
+ filters=filters,
+ options=options,
+ output_file=args.output,
+ base_url=args.base_url,
+ poll_interval=args.poll_interval,
+ timeout=args.timeout,
+ page_size=args.page_size,
+ sort_by=args.sort_by,
+ sort_order=args.sort_order,
+ verbose=args.verbose,
+ dry_run=args.dry_run,
+ quiet=args.quiet
+ )
+
+
+# ============================================================================
+# Logging
+# ============================================================================
+
+def setup_logging(verbose: bool, quiet: bool = False) -> None:
+ """Configure logging based on verbosity and quiet mode.
+
+ Args:
+ verbose: Whether to enable verbose logging
+ quiet: Whether to suppress all informational logging (for Galaxy)
+ """
+ if quiet:
+ logging.basicConfig(
+ level=logging.ERROR,
+ format='%(message)s',
+ stream=sys.stderr
+ )
+ else:
+ level = logging.DEBUG if verbose else logging.INFO
+ logging.basicConfig(
+ level=level,
+ format='%(asctime)s - %(levelname)s - %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S'
+ )
+
+
+def error_exit(message: str, exit_code: int = 1) -> None:
+ """Print error to stderr and exit.
+
+ Args:
+ message: Error message to display
+ exit_code: Exit code to return
+ """
+ sys.stderr.write(f"ERROR: {message}\n")
+ sys.stderr.flush()
+ sys.exit(exit_code)
+
+
+# ============================================================================
+# Main Application
+# ============================================================================
+
+def run_search(client: AlphaFindClient, config: SearchConfig) -> int:
+ """Execute the search workflow.
+
+ Args:
+ client: AlphaFind API client
+ config: Search configuration
+
+ Returns:
+ Exit code (0 for success, non-zero for failure)
+ """
+ show_progress = not config.quiet
+
+ # Submit search
+ query_id, index_type, status = client.submit_search(
+ query=config.query,
+ index=config.index,
+ filters=config.filters,
+ options=config.options
+ )
+
+ # Wait for completion if needed
+ if status != STATUS_COMPLETED:
+ if not config.quiet:
+ logging.info("Waiting for search to complete...")
+ status = client.wait_for_completion(
+ query_id=query_id,
+ index_type=index_type,
+ poll_interval=config.poll_interval,
+ timeout=config.timeout,
+ show_progress=show_progress
+ )
+
+ # Check final status
+ if status == STATUS_FAILED:
+ error_msg = "Search failed. No results to retrieve."
+ logging.error(error_msg)
+ if config.quiet:
+ error_exit(error_msg, EXIT_SEARCH_FAILED)
+ return EXIT_SEARCH_FAILED
+
+ # Get all results
+ results = client.get_all_results(
+ query_id=query_id,
+ index_type=index_type,
+ page_size=config.page_size,
+ sort_by=config.sort_by,
+ sort_order=config.sort_order,
+ show_progress=show_progress
+ )
+
+ # Save to CSV
+ if not results:
+ if not config.quiet:
+ logging.warning("No results found")
+ return EXIT_SUCCESS
+
+ count = results_to_csv(
+ results=results,
+ query_id=query_id,
+ index_type=index_type,
+ filename=config.output_file,
+ page_size=config.page_size
+ )
+
+ if not config.quiet:
+ logging.info(f"Successfully completed. {count} results saved to {config.output_file}")
+
+ return EXIT_SUCCESS
+
+
+def main() -> int:
+ """Main entry point.
+
+ Returns:
+ Exit code (0 for success, non-zero for failure)
+ """
+ config = parse_arguments()
+ setup_logging(config.verbose, config.quiet)
+
+ # Log configuration if not in quiet mode
+ if not config.quiet:
+ logging.info("=" * 60)
+ logging.info("AlphaFind API Client")
+ logging.info("=" * 60)
+ logging.info(f"Query: {config.query}")
+ logging.info(f"Index: {config.index or 'Server default'}")
+ logging.info(f"Filters: {config.filters}")
+ logging.info(f"Options: {config.options}")
+ logging.info(f"Output: {config.output_file}")
+ logging.info(f"Base URL: {config.base_url}")
+ logging.info(f"Timeout: {config.timeout}s, Poll interval: {config.poll_interval}s")
+ logging.info(f"Page size: {config.page_size}, Sort by: {config.sort_by} {config.sort_order}")
+ logging.info("=" * 60)
+
+ if config.dry_run:
+ logging.info("DRY RUN - No API calls will be made")
+ logging.info(f"Query: {config.query}")
+ logging.info(f"Results would be saved to: {config.output_file}")
+ return EXIT_SUCCESS
+
+ try:
+ client = AlphaFindClient(config.base_url)
+
+ # Health check (optional, just warns if fails)
+ if not client.health_check() and not config.quiet:
+ logging.warning("Health check failed. Continuing anyway...")
+
+ return run_search(client, config)
+
+ except TimeoutError as e:
+ error_msg = str(e)
+ logging.error(error_msg)
+ if config.quiet:
+ error_exit(error_msg, EXIT_TIMEOUT)
+ return EXIT_TIMEOUT
+
+ except requests.HTTPError as e:
+ response_text = e.response.text if e.response else 'No response'
+ error_msg = f"HTTP error: {e}\nResponse: {response_text}"
+ logging.error(error_msg)
+ if config.quiet:
+ error_exit(error_msg, EXIT_HTTP_ERROR)
+ return EXIT_HTTP_ERROR
+
+ except KeyboardInterrupt:
+ if not config.quiet:
+ logging.info("\nInterrupted by user")
+ return EXIT_INTERRUPTED
+
+ except Exception as e:
+ error_msg = f"Unexpected error: {e}"
+ logging.error(error_msg, exc_info=config.verbose)
+ if config.quiet:
+ error_exit(error_msg, EXIT_UNEXPECTED_ERROR)
+ return EXIT_UNEXPECTED_ERROR
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/group_vars/galaxyservers.yml b/group_vars/galaxyservers.yml
index a8f3ff20..ae5c7fea 100644
--- a/group_vars/galaxyservers.yml
+++ b/group_vars/galaxyservers.yml
@@ -295,6 +295,8 @@ galaxy_local_tools:
- testing.xml
- testing_pbs.xml
- testing_html.xml
+ - alphafind.xml
+ - alphafind_search.py
galaxy_extra_dirs:
- "{{ galaxy_mutable_data_dir }}"
diff --git a/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml b/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml
index 2e842eb6..cf3cac32 100644
--- a/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml
+++ b/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml
@@ -1,9 +1,8 @@
-galaxy_commit_id: release_25.1
galaxy_build_client: false
galaxy_client_make_target: client-production
galaxy_repo: https://github.com/CESNET/galaxy.git
-galaxy_commit_id: cesnet_invenio
+galaxy_commit_id: cesnet_invenio_26.0
csnt_brand: "QA1-TEST-{{ galaxy_commit_id }}"
csnt_log_level: DEBUG
diff --git a/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2 b/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2
index 7f54acdd..d248419c 100644
--- a/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2
+++ b/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2
@@ -3,4 +3,5 @@
+
diff --git a/templates/galaxy/config/tpv_rules_meta.yml.j2 b/templates/galaxy/config/tpv_rules_meta.yml.j2
index 326a40ab..565bf241 100644
--- a/templates/galaxy/config/tpv_rules_meta.yml.j2
+++ b/templates/galaxy/config/tpv_rules_meta.yml.j2
@@ -253,11 +253,13 @@ tools:
scheduling:
require:
- alphafold
+ alphafind_search*:
+ inherits: local_running_tools
toolshed.g2.bx.psu.edu/repos/bgruening/llm_hub/llm_hub/.*:
inherits: local_running_tools
env:
LITELLM_CONFIG_FILE: "{{ galaxy_config_dir }}/llm_hub_config.yaml"
-
+
roles:
training.*: