diff --git a/changelog/unreleased/PR#4149-distributed-luke b/changelog/unreleased/PR#4149-distributed-luke new file mode 100644 index 00000000000..aae5ac517b5 --- /dev/null +++ b/changelog/unreleased/PR#4149-distributed-luke @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Distributed Luke +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Luke Kot-Zaniewski +links: + name: PR#4149 + url: https://github.com/apache/solr/pull/4149 diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index b5879024d10..63b8c7b7e80 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -19,6 +19,8 @@ import static org.apache.lucene.index.IndexOptions.DOCS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; +import static org.apache.solr.common.params.CommonParams.DISTRIB; +import static org.apache.solr.common.params.CommonParams.PATH; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -33,6 +35,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -67,15 +70,25 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.client.api.model.CoreStatusResponse; +import org.apache.solr.client.api.util.SolrVersion; +import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.api.V2ApiUtils; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.ShardHandler; +import org.apache.solr.handler.component.ShardHandlerFactory; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.CopyField; @@ -84,7 +97,9 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.security.AuthorizationContext; +import org.apache.solr.servlet.HttpSolrCall; import org.apache.solr.update.SolrIndexWriter; +import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -97,7 +112,7 @@ * @see SegmentsInfoRequestHandler * @since solr 1.2 */ -public class LukeRequestHandler extends RequestHandlerBase { +public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAware { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String NUMTERMS = "numTerms"; @@ -108,6 +123,35 @@ public class LukeRequestHandler extends RequestHandlerBase { static final int HIST_ARRAY_SIZE = 33; + // Response section keys + private static final String RSP_INDEX = "index"; + private static final String RSP_FIELDS = "fields"; + private static final String RSP_SCHEMA = "schema"; + private static final String RSP_INFO = "info"; + private static final String RSP_DOC = "doc"; + private static final String RSP_SHARDS = "shards"; + + // Field-level keys + private static final String KEY_NUM_DOCS = "numDocs"; + private static final String KEY_MAX_DOC = "maxDoc"; + private static final String KEY_DELETED_DOCS = "deletedDocs"; + private static final String KEY_SEGMENT_COUNT = "segmentCount"; + private static final String KEY_TYPE = "type"; + private static final String KEY_SCHEMA_FLAGS = "schema"; + private static final String KEY_DOCS = "docs"; + private static final String KEY_DISTINCT = "distinct"; + private static final String KEY_TOP_TERMS = "topTerms"; + private static final String KEY_DYNAMIC_BASE = "dynamicBase"; + private static final String KEY_INDEX_FLAGS = "index"; + private static final String KEY_HISTOGRAM = "histogram"; + + private ShardHandlerFactory shardHandlerFactory; + + @Override + public void inform(SolrCore core) { + this.shardHandlerFactory = core.getCoreContainer().getShardHandlerFactory(); + } + @Override public Name getPermissionName(AuthorizationContext request) { return Name.READ_PERM; @@ -123,7 +167,7 @@ public static ShowStyle get(String v) { if (v == null) return null; if ("schema".equalsIgnoreCase(v)) return SCHEMA; if ("index".equalsIgnoreCase(v)) return INDEX; - if ("doc".equalsIgnoreCase(v)) return DOC; + if (RSP_DOC.equalsIgnoreCase(v)) return DOC; if ("all".equalsIgnoreCase(v)) return ALL; throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: " + v); } @@ -131,16 +175,26 @@ public static ShowStyle get(String v) { @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { + SolrParams params = req.getParams(); + + boolean isDistrib = params.getBool(DISTRIB, req.getCoreContainer().isZooKeeperAware()); + if (!isDistrib) { + String shards = params.get(ShardParams.SHARDS); + isDistrib = shards != null && shards.indexOf('/') > 0; + } + if (isDistrib && handleDistributed(req, rsp)) { + return; + } + IndexSchema schema = req.getSchema(); SolrIndexSearcher searcher = req.getSearcher(); DirectoryReader reader = searcher.getIndexReader(); - SolrParams params = req.getParams(); ShowStyle style = ShowStyle.get(params.get("show")); // If no doc is given, show all fields and top terms final var indexVals = new SimpleOrderedMap<>(); V2ApiUtils.squashIntoNamedList(indexVals, getIndexInfo(reader)); - rsp.add("index", indexVals); + rsp.add(RSP_INDEX, indexVals); if (ShowStyle.INDEX == style) { return; // that's all we need @@ -153,14 +207,10 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw String v = uniqueKey.getType().toInternal(params.get(ID)); Term t = new Term(uniqueKey.getName(), v); docId = searcher.getFirstMatch(t); - if (docId < 0) { - throw new SolrException( - SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID)); - } } // Read the document from the index - if (docId != null) { + if (docId != null && docId > -1) { if (style != null && style != ShowStyle.DOC) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing doc param for doc style"); } @@ -179,11 +229,11 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw docinfo.add("docId", docId); docinfo.add("lucene", info); docinfo.add("solr", doc); - rsp.add("doc", docinfo); + rsp.add(RSP_DOC, docinfo); } else if (ShowStyle.SCHEMA == style) { - rsp.add("schema", getSchemaInfo(req.getSchema())); + rsp.add(RSP_SCHEMA, getSchemaInfo(req.getSchema())); } else { - rsp.add("fields", getIndexedFieldsInfo(req)); + rsp.add(RSP_FIELDS, getIndexedFieldsInfo(req)); } // Add some generally helpful information @@ -192,10 +242,385 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents."); - rsp.add("info", info); + rsp.add(RSP_INFO, info); rsp.setHttpCaching(false); } + /** Per-field accumulation state across shards: aggregated response data and field validation. */ + private static class AggregatedFieldData { + // keyed by individual field info properties, i.e. type, schema, etc. + final SimpleOrderedMap aggregated = new SimpleOrderedMap<>(); + final String originalShardAddr; + final LukeResponse.FieldInfo originalFieldInfo; + private Object indexFlags; + private String indexFlagsShardAddr; + + AggregatedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { + this.originalShardAddr = shardAddr; + this.originalFieldInfo = fieldInfo; + Object flags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); + if (flags != null) { + this.indexFlags = flags; + this.indexFlagsShardAddr = shardAddr; + } + } + } + + private static class ShardData { + final String shardAddr; // key in "shards" response map + final Map shardFieldInfo; // keyed by field name + private NamedList indexInfo; // value for "index" key in per-shard entry + private SimpleOrderedMap detailedFields; // keyed by field name + + ShardData(String shardAddr, Map shardFieldInfo) { + this.shardAddr = shardAddr; + this.shardFieldInfo = shardFieldInfo; + } + + void setIndexInfo(NamedList indexInfo) { + this.indexInfo = indexInfo; + } + + void addDetailedFieldInfo(String fieldName, SimpleOrderedMap fieldStats) { + if (detailedFields == null) { + detailedFields = new SimpleOrderedMap<>(); + } + detailedFields.add(fieldName, fieldStats); + } + + SimpleOrderedMap toResponseEntry() { + SimpleOrderedMap entry = new SimpleOrderedMap<>(); + if (indexInfo != null) { + entry.add(RSP_INDEX, indexInfo); + } + if (detailedFields != null) { + entry.add(RSP_FIELDS, detailedFields); + } + return entry; + } + } + + /** + * @return true if the request was handled in distributed mode, false if prepDistributed + * short-circuited (e.g. single-shard collection) and the caller should fall through to local + * logic. + */ + private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { + SolrParams reqParams = req.getParams(); + + // docId is a Lucene-internal integer, not meaningful across shards + if (reqParams.getInt(DOC_ID) != null) { + throw new SolrException( + ErrorCode.BAD_REQUEST, + "docId parameter is not supported in distributed mode." + + " Use the id parameter to look up documents by their Solr unique key."); + } + + ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); + ResponseBuilder rb = new ResponseBuilder(req, rsp, List.of()); + shardHandler.prepDistributed(rb); + + String[] shards = rb.shards; + if (shards == null || shards.length == 0) { + return false; + } + + ShardRequest sreq = new ShardRequest(); + sreq.shards = shards; + sreq.actualShards = shards; + sreq.responses = new ArrayList<>(shards.length); + + String reqPath = (String) req.getContext().get(PATH); + + for (String shard : shards) { + ModifiableSolrParams params = new ModifiableSolrParams(reqParams); + params.set(CommonParams.QT, reqPath); + ShardHandler.setShardAttributesToParams(params, sreq.purpose); + shardHandler.submit(sreq, shard, params); + } + + ShardResponse lastSrsp = shardHandler.takeCompletedOrError(); + if (lastSrsp == null) { + throw new SolrException(ErrorCode.SERVER_ERROR, "No responses received from shards"); + } + List responses = sreq.responses; + for (ShardResponse srsp : responses) { + if (srsp.getException() != null) { + shardHandler.cancelAll(); + if (srsp.getException() instanceof SolrException) { + throw (SolrException) srsp.getException(); + } + throw new SolrException(ErrorCode.SERVER_ERROR, srsp.getException()); + } + } + + aggregateDistributedResponses(req, rsp, responses); + rsp.setHttpCaching(false); + return true; + } + + private static String shardAddress(ShardResponse srsp) { + return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); + } + + private void aggregateDistributedResponses( + SolrQueryRequest req, SolrQueryResponse rsp, List responses) { + + if (!responses.isEmpty()) { + ShardResponse firstRsp = responses.getFirst(); + NamedList firstShardRsp = firstRsp.getSolrResponse().getResponse(); + if (firstShardRsp == null) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Unexpected empty response from shard: " + shardAddress(firstRsp)); + } + Object schema = firstShardRsp.get(RSP_SCHEMA); + if (schema != null) { + rsp.add(RSP_SCHEMA, schema); + } + Object info = firstShardRsp.get(RSP_INFO); + if (info != null) { + rsp.add(RSP_INFO, info); + } + } + + long totalNumDocs = 0; + int totalMaxDoc = 0; + long totalDeletedDocs = 0; + int totalSegmentCount = 0; + Map aggregatedFields = new HashMap<>(); + String firstDocShard = null; + Object firstDoc = null; + List shardDataList = new ArrayList<>(); + + for (ShardResponse srsp : responses) { + NamedList shardRsp = srsp.getSolrResponse().getResponse(); + LukeResponse lukeRsp = new LukeResponse(); + lukeRsp.setResponse(shardRsp); + // Only process field info if the shard explicitly included it in its response. + // LukeResponse.getFieldInfo() falls back to schema.fields which has incomplete data. + Map fieldInfo = + shardRsp.get(RSP_FIELDS) != null ? lukeRsp.getFieldInfo() : null; + ShardData shardData = new ShardData(shardAddress(srsp), fieldInfo); + + NamedList shardIndex = lukeRsp.getIndexInfo(); + if (shardIndex != null) { + totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocs()).orElse(0L); + totalMaxDoc = Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0)); + totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocs()).orElse(0L); + Number segCount = (Number) shardIndex.get(KEY_SEGMENT_COUNT); + totalSegmentCount += segCount != null ? segCount.intValue() : 0; + + shardData.setIndexInfo(shardIndex); + } + + processShardFields(shardData, aggregatedFields); + Object doc = shardRsp.get(RSP_DOC); + if (doc != null) { + if (firstDoc != null) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Solr Id of document " + + firstDoc + + " found on multiple shards (" + + firstDocShard + + " and " + + shardAddress(srsp) + + "). The index is corrupt: unique key constraint violated."); + } + firstDoc = doc; + firstDocShard = shardAddress(srsp); + } + shardDataList.add(shardData); + } + + SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); + for (ShardData sd : shardDataList) { + SimpleOrderedMap entry = sd.toResponseEntry(); + if (!entry.isEmpty()) { + shardsInfo.add(sd.shardAddr, entry); + } + } + + SimpleOrderedMap aggregatedIndex = new SimpleOrderedMap<>(); + aggregatedIndex.add(KEY_NUM_DOCS, totalNumDocs); + aggregatedIndex.add(KEY_MAX_DOC, totalMaxDoc); + aggregatedIndex.add(KEY_DELETED_DOCS, totalDeletedDocs); + aggregatedIndex.add(KEY_SEGMENT_COUNT, totalSegmentCount); + rsp.add(RSP_INDEX, aggregatedIndex); + + if (firstDoc != null) { + rsp.add(RSP_DOC, firstDoc); + } + if (shouldNarrowLongsForOldClient(req)) { + narrowLongToInt(aggregatedIndex, KEY_NUM_DOCS); + narrowLongToInt(aggregatedIndex, KEY_DELETED_DOCS); + for (AggregatedFieldData fd : aggregatedFields.values()) { + narrowLongToInt(fd.aggregated, KEY_DOCS); + } + } + if (!aggregatedFields.isEmpty()) { + SimpleOrderedMap aggregatedFieldsNL = new SimpleOrderedMap<>(); + for (Map.Entry entry : aggregatedFields.entrySet()) { + aggregatedFieldsNL.add(entry.getKey(), entry.getValue().aggregated); + } + rsp.add(RSP_FIELDS, aggregatedFieldsNL); + } + + rsp.add(RSP_SHARDS, shardsInfo); + } + + private void processShardFields( + ShardData shardData, Map aggregatedFields) { + if (shardData.shardFieldInfo == null) { + return; + } + for (Map.Entry entry : shardData.shardFieldInfo.entrySet()) { + String fieldName = entry.getKey(); + LukeResponse.FieldInfo fi = entry.getValue(); + + aggregateShardField(shardData.shardAddr, fi, aggregatedFields); + + // Detailed stats — kept per-shard, not aggregated + NamedList topTerms = fi.getTopTerms(); + if (topTerms != null) { + SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + detailedFieldInfo.add(KEY_HISTOGRAM, fi.getExtras().get(KEY_HISTOGRAM)); + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); + shardData.addDetailedFieldInfo(fieldName, detailedFieldInfo); + } + } + } + + private void aggregateShardField( + String shardAddr, + LukeResponse.FieldInfo fi, + Map aggregatedFields) { + + String fieldName = fi.getName(); + + AggregatedFieldData fieldData = aggregatedFields.get(fieldName); + if (fieldData == null) { + fieldData = new AggregatedFieldData(shardAddr, fi); + aggregatedFields.put(fieldName, fieldData); + + // First shard to report this field — populate response keys: + // "type" → field type name (e.g. "string", "text_general") + // "schema" → schema flags string (e.g. "I-S-M-----OF-----l") + // "dynamicBase" → dynamic field glob if this is a dynamic field (e.g. "*_s") + // "index" → index-derived flags from the first shard that has them + fieldData.aggregated.add(KEY_TYPE, fi.getType()); + fieldData.aggregated.add(KEY_SCHEMA_FLAGS, fi.getSchema()); + Object dynBase = fi.getExtras().get(KEY_DYNAMIC_BASE); + if (dynBase != null) { + fieldData.aggregated.add(KEY_DYNAMIC_BASE, dynBase); + } + if (fieldData.indexFlags != null) { + fieldData.aggregated.add(KEY_INDEX_FLAGS, fieldData.indexFlags); + } + } else { + // Subsequent shards: validate that "type", "schema", and "dynamicBase" match + validateFieldAttr( + fieldName, + KEY_TYPE, + fi.getType(), + fieldData.originalFieldInfo.getType(), + shardAddr, + fieldData.originalShardAddr); + validateFieldAttr( + fieldName, + KEY_SCHEMA_FLAGS, + fi.getSchema(), + fieldData.originalFieldInfo.getSchema(), + shardAddr, + fieldData.originalShardAddr); + validateFieldAttr( + fieldName, + KEY_DYNAMIC_BASE, + fi.getExtras().get(KEY_DYNAMIC_BASE), + fieldData.originalFieldInfo.getExtras().get(KEY_DYNAMIC_BASE), + shardAddr, + fieldData.originalShardAddr); + + Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); + if (indexFlags != null) { + if (fieldData.indexFlags == null) { + fieldData.indexFlags = indexFlags; + fieldData.indexFlagsShardAddr = shardAddr; + fieldData.aggregated.add(KEY_INDEX_FLAGS, indexFlags); + } else { + validateFieldAttr( + fieldName, + KEY_INDEX_FLAGS, + indexFlags, + fieldData.indexFlags, + shardAddr, + fieldData.indexFlagsShardAddr); + } + } + } + + // "docs" → sum of per-shard doc counts (number of documents containing this field) + fieldData.aggregated.merge(KEY_DOCS, fi.getDocs(), (a, b) -> Long.sum((Long) a, (Long) b)); + } + + /** + * Minimum client version that understands Long values in distributed Luke responses. Distributed + * Luke aggregates counts across shards, which can overflow Integer. Older clients cast these + * values to Integer and would fail with a ClassCastException. + */ + private static final SolrVersion DISTRIB_LONG_COUNTS_MIN_VERSION = + SolrVersion.forIntegers(10, 2, 0); + + private static boolean shouldNarrowLongsForOldClient(SolrQueryRequest req) { + HttpSolrCall call = req.getHttpSolrCall(); + if (call == null) return false; + SolrVersion clientVersion = call.getUserAgentSolrVersion(); + return clientVersion != null && clientVersion.lessThan(DISTRIB_LONG_COUNTS_MIN_VERSION); + } + + /** Narrows a Long value to Integer if it fits, for javabin backward compatibility. */ + private static void narrowLongToInt(NamedList nl, String key) { + int idx = nl.indexOf(key, 0); + if (idx >= 0) { + Object val = nl.getVal(idx); + if (val instanceof Long l && l >= Integer.MIN_VALUE && l <= Integer.MAX_VALUE) { + nl.setVal(idx, l.intValue()); + } + } + } + + /** Validates that a field attribute value is identical across shards. */ + private void validateFieldAttr( + String fieldName, + String attrName, + Object currentVal, + Object expectedVal, + String currentShardAddr, + String expectedShardAddr) { + String currentStr = currentVal != null ? currentVal.toString() : null; + String expectedStr = expectedVal != null ? expectedVal.toString() : null; + if (!Objects.equals(currentStr, expectedStr)) { + String error = + "FIELD CONFIGURATION MISMATCH! Field '" + + fieldName + + "' has inconsistent '" + + attrName + + "' across shards: '" + + expectedStr + + "' (from " + + expectedShardAddr + + ") vs '" + + currentStr + + "' (from " + + currentShardAddr + + "). Use distrib=false to query individual shards and compare field configurations."; + log.error(error); + throw new SolrException(ErrorCode.SERVER_ERROR, error); + } + } + /** * @return a string representing a IndexableField's flags. */ @@ -329,8 +754,8 @@ private static SimpleOrderedMap getDocumentFieldsInfo( SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); - f.add("type", (ftype == null) ? null : ftype.getTypeName()); - f.add("schema", getFieldFlags(sfield)); + f.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + f.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); @@ -417,12 +842,12 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re SchemaField sfield = schema.getFieldOrNull(fieldName); FieldType ftype = (sfield == null) ? null : sfield.getType(); - fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName()); - fieldMap.add("schema", getFieldFlags(sfield)); + fieldMap.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + fieldMap.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { - fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); + fieldMap.add(KEY_DYNAMIC_BASE, schema.getDynamicPattern(sfield.getName())); } Terms terms = reader.terms(fieldName); // Not indexed, so we need to report what we can (it made it through the fl param if @@ -441,17 +866,16 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re try { IndexableField fld = doc.getField(fieldName); if (fld != null) { - fieldMap.add("index", getFieldFlags(fld)); + fieldMap.add(KEY_INDEX_FLAGS, getFieldFlags(fld)); } else { - // it is a non-stored field... - fieldMap.add("index", "(unstored field)"); + fieldMap.add(KEY_INDEX_FLAGS, "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: {}", fieldName); } } } - fieldMap.add("docs", terms.getDocCount()); + fieldMap.add(KEY_DOCS, terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); @@ -728,13 +1152,13 @@ private static void getDetailedFieldInfo( } } tiq.histogram.add(buckets); - fieldMap.add("distinct", tiq.distinctTerms); + fieldMap.add(KEY_DISTINCT, tiq.distinctTerms); // Include top terms - fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); + fieldMap.add(KEY_TOP_TERMS, tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram - fieldMap.add("histogram", tiq.histogram.toNamedList()); + fieldMap.add(KEY_HISTOGRAM, tiq.histogram.toNamedList()); } private static List toListOfStrings(SchemaField[] raw) { diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java new file mode 100644 index 00000000000..770175bb324 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import static org.apache.solr.common.params.CommonParams.DISTRIB; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.schema.SchemaRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.SolrParams; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** Cloud-specific Luke tests that require SolrCloud features like managed schema and Schema API. */ +public class LukeHandlerCloudTest extends SolrCloudTestCase { + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2).addConfig("managed", configset("cloud-managed")).configure(); + } + + @AfterClass + public static void afterClass() throws Exception { + shutdownCluster(); + } + + private void requestLuke(String collection, SolrParams extra) throws Exception { + QueryRequest req = new QueryRequest(SolrParams.wrapDefaults(extra, params("numTerms", "0"))); + req.setPath("/admin/luke"); + cluster.getSolrClient().request(req, collection); + } + + /** + * Verifies that distributed Luke detects inconsistent index flags across shards. Uses Schema API + * to change a field's {@code stored} property between indexing on different shards, producing + * different Lucene FieldInfo (and thus different index flags strings) on each shard. + */ + @Test + public void testInconsistentIndexFlagsAcrossShards() throws Exception { + String collection = "lukeInconsistentFlags"; + System.setProperty("managed.schema.mutable", "true"); + CollectionAdminRequest.createCollection(collection, "managed", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + + cluster.waitForActiveCollection(collection, 2, 2); + + try { + // Add a field with stored=true, indexed=true + Map fieldAttrs = new LinkedHashMap<>(); + fieldAttrs.put("name", "test_flag_s"); + fieldAttrs.put("type", "string"); + fieldAttrs.put("stored", true); + fieldAttrs.put("indexed", true); + new SchemaRequest.AddField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Index a target doc WITH the field, plus seed docs without it + SolrInputDocument targetDoc = new SolrInputDocument(); + targetDoc.addField("id", "target"); + targetDoc.addField("test_flag_s", "has_indexed"); + cluster.getSolrClient().add(collection, targetDoc); + + List seedDocs = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "seed_" + i); + seedDocs.add(doc); + } + cluster.getSolrClient().add(collection, seedDocs); + cluster.getSolrClient().commit(collection); + + // Find which shard has the target doc by querying each replica directly. + // Must use distrib=false — SolrCloud defaults distrib to true even on direct replica queries. + DocCollection docColl = getCollectionState(collection); + String targetSliceName = null; + for (Slice slice : docColl.getSlices()) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("id:target"); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + if (qr.getResults().getNumFound() > 0) { + targetSliceName = slice.getName(); + } + } + } + assertNotNull("target doc should exist on a shard", targetSliceName); + + // Find a seed doc on the other shard + String otherDocId = null; + for (Slice slice : docColl.getSlices()) { + if (!slice.getName().equals(targetSliceName)) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("*:*"); + q.setRows(1); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); + otherDocId = (String) qr.getResults().getFirst().getFieldValue("id"); + } + break; + } + } + assertNotNull("should find a seed doc on the other shard", otherDocId); + + // Change the field to stored=false via Schema API + fieldAttrs.put("stored", false); + new SchemaRequest.ReplaceField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Reload collection to pick up schema change + CollectionAdminRequest.reloadCollection(collection).process(cluster.getSolrClient()); + + // Update the other-shard doc to include the field (now unstored in the new segment) + SolrInputDocument updateDoc = new SolrInputDocument(); + updateDoc.addField("id", otherDocId); + updateDoc.addField("test_flag_s", "not_indexed"); + cluster.getSolrClient().add(collection, updateDoc); + cluster.getSolrClient().commit(collection); + + // Distributed Luke should detect inconsistent index flags between the two shards. + // One shard has stored=true segments, the other has stored=false segments for test_flag_s. + // No need to set distrib=true — ZK-aware nodes default to distributed mode. + Exception ex = + expectThrows(Exception.class, () -> requestLuke(collection, params("fl", "test_flag_s"))); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception chain should mention inconsistent index flags: " + fullMessage, + fullMessage.contains("inconsistent")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } +} diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java new file mode 100644 index 00000000000..cbbd9466516 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -0,0 +1,511 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import java.util.Map; +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.InputStreamResponseParser; +import org.apache.solr.client.solrj.response.LukeResponse; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.CommitUpdateCommand; +import org.apache.solr.util.BaseTestHarness; +import org.junit.Test; + +public class LukeRequestHandlerDistribTest extends BaseDistributedSearchTestCase { + + private static final Long NUM_DOCS = 20L; + + public LukeRequestHandlerDistribTest() { + fixShardCount(2); + } + + private LukeResponse requestLuke() throws Exception { + return requestLuke(new ModifiableSolrParams()); + } + + private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("shards", shards); + params.add(extra); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + return rsp; + } + + private void assertLukeXPath(ModifiableSolrParams extra, String... xpaths) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("wt", "xml"); + params.set("shards", shards); + params.add(extra); + QueryRequest req = new QueryRequest(params); + req.setResponseParser(new InputStreamResponseParser("xml")); + NamedList raw = clients.get(0).request(req); + String xml = InputStreamResponseParser.consumeResponseToString(raw); + String failedXpath = BaseTestHarness.validateXPath(xml, xpaths); + assertNull("XPath validation failed: " + failedXpath + "\nResponse:\n" + xml, failedXpath); + } + + private void indexTestData() throws Exception { + for (int i = 0; i < NUM_DOCS; i++) { + index("id", String.valueOf(i), "name", "name_" + i, "subject", "subject value " + (i % 5)); + } + commit(); + } + + @Test + @ShardsFixed(num = 2) + public void testDistributedAggregate() throws Exception { + indexTestData(); + + LukeResponse rsp = requestLuke(); + + assertEquals("aggregated numDocs should equal total docs", NUM_DOCS, rsp.getNumDocs()); + assertTrue("aggregated maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertNotNull("deletedDocs should be present", rsp.getDeletedDocs()); + + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + assertEquals("should have 2 shard entries", 2, shardResponses.size()); + + Long sumShardDocs = 0L; + for (Map.Entry entry : shardResponses.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); + assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); + sumShardDocs += shardLuke.getNumDocs(); + } + assertEquals( + "sum of per-shard numDocs should equal aggregated numDocs", rsp.getNumDocs(), sumShardDocs); + } + + @Test + @ShardsFixed(num = 2) + public void testDistributedFieldsAggregate() throws Exception { + indexTestData(); + + LukeResponse rsp = requestLuke(); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("field type should be present", nameField.getType()); + assertNotNull("schema flags should be present", nameField.getSchema()); + assertEquals( + "aggregated docs count for 'name' should equal total docs", + (long) NUM_DOCS, + nameField.getDocs()); + + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id field type should be string", "string", idField.getType()); + + assertLukeXPath( + new ModifiableSolrParams(), + "//lst[@name='index']/long[@name='numDocs'][.='20']", + "count(//lst[@name='shards']/lst)=2", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='20']", + "//lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='id']/long[@name='docs'][.='20']"); + } + + @Test + @ShardsFixed(num = 2) + public void testDetailedFieldStatsPerShard() throws Exception { + indexTestData(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("fl", "name"); + params.set("numTerms", "5"); + + LukeResponse rsp = requestLuke(params); + + // Top-level fields should NOT have topTerms, distinct, histogram + LukeResponse.FieldInfo nameField = rsp.getFieldInfo().get("name"); + assertNotNull("'name' field should be present", nameField); + assertNull("topTerms should NOT be in top-level fields", nameField.getTopTerms()); + assertEquals("distinct should NOT be in top-level fields", 0, nameField.getDistinct()); + + // Per-shard entries should have detailed stats + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + + ModifiableSolrParams detailedParams = new ModifiableSolrParams(); + detailedParams.set("fl", "name"); + detailedParams.set("numTerms", "5"); + assertLukeXPath( + detailedParams, + "/response/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "/response/lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='20']", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms'])", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='histogram'])", + "not(/response/lst[@name='fields']/lst[@name='name']/int[@name='distinct'])", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='histogram']/int[@name='1']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/int[@name='distinct']"); + } + + @Test + @ShardsFixed(num = 2) + public void testLocalModeDefault() throws Exception { + indexTestData(); + + // Query a single client without the shards param — local mode + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull("shards should NOT be present in local mode", rsp.getShardResponses()); + } + + @Test + @ShardsFixed(num = 2) + public void testExplicitDistribFalse() throws Exception { + indexTestData(); + + // Query a single client with distrib=false — no shards param + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("distrib", "false"); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull("shards should NOT be present with distrib=false", rsp.getShardResponses()); + } + + @Test + @ShardsFixed(num = 12) + public void testSparseShards() throws Exception { + // Index a single doc on shard 0 + index_specific( + 0, "id", "100", "name", "sparse test", "subject", "subject value", "cat_s", "category"); + commit(); + + LukeResponse rsp = requestLuke(); + + // Index-level stats + assertEquals("numDocs should be 1", 1, (long) rsp.getNumDocs()); + assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertEquals("deletedDocs should be 0", 0L, (long) rsp.getDeletedDocs()); + + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + assertEquals("should have 12 shard entries", 12, shardResponses.size()); + + long sumShardDocs = 0; + for (Map.Entry entry : shardResponses.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); + sumShardDocs += shardLuke.getNumDocs(); + } + assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); + + // Field-level checks + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id type", "string", idField.getType()); + assertNotNull("id schema flags", idField.getSchema()); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("name type", nameField.getType()); + assertNotNull("name schema flags", nameField.getSchema()); + assertEquals("name docs should be 1", 1, nameField.getDocs()); + + // Dynamic field — should have dynamicBase in extras + LukeResponse.FieldInfo catField = fields.get("cat_s"); + assertNotNull("'cat_s' field should be present", catField); + assertNotNull("cat_s type", catField.getType()); + assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); + + assertLukeXPath( + new ModifiableSolrParams(), + "//lst[@name='index']/long[@name='numDocs'][.='1']", + "//lst[@name='index']/long[@name='deletedDocs'][.='0']", + "count(//lst[@name='shards']/lst)=12", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='1']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='cat_s']/long[@name='docs'][.='1']"); + } + + @Test + @ShardsFixed(num = 2) + public void testDistribShowSchema() throws Exception { + indexTestData(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("show", "schema"); + + assertLukeXPath( + params, + "//lst[@name='schema']/lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='schema']/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='schema']/lst[@name='dynamicFields']/lst[@name='*_s']", + "//lst[@name='schema']/str[@name='uniqueKeyField'][.='id']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='string']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='nametext']", + "//lst[@name='schema']/lst[@name='similarity']", + "not(/response/lst[@name='fields'])", + "count(//lst[@name='shards']/lst)=2"); + } + + @Test + @ShardsFixed(num = 16) + public void testDeferredIndexFlags() throws Exception { + // Index docs with the target field across shards, plus anchor docs without it. + // Use numeric IDs (the default test schema copies id to integer fields). + // Target docs get even IDs starting at 1000, anchor docs get odd IDs. + for (int i = 0; i < 16 * 4; i++) { + index("id", String.valueOf(1000 + i * 2), "flag_target_s", "value_" + i); + index("id", String.valueOf(1001 + i * 2), "name", "anchor"); + } + commit(); + + // Delete all target docs except the first one, using per-shard deletes. + // Then optimize to force segment merge — expunges soft-deleted docs so + // Terms.getDocCount() (which backs docs) reflects only live docs. + for (int i = 0; i < clients.size(); i++) { + clients.get(i).deleteByQuery("flag_target_s:* AND -id:1000"); + clients.get(i).optimize(); + } + controlClient.deleteByQuery("flag_target_s:* AND -id:1000"); + controlClient.optimize(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("fl", "flag_target_s"); + + LukeResponse rsp = requestLuke(params); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + LukeResponse.FieldInfo targetField = fields.get("flag_target_s"); + assertNotNull("'flag_target_s' field should be present", targetField); + + ModifiableSolrParams xpathParams = new ModifiableSolrParams(); + xpathParams.set("fl", "flag_target_s"); + assertLukeXPath( + xpathParams, + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='index']", + "//lst[@name='fields']/lst[@name='flag_target_s']/long[@name='docs'][.='1']"); + } + + @Test + @ShardsFixed(num = 2) + public void testDistributedShardError() throws Exception { + indexTestData(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("id", "0"); + params.set("show", "schema"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention doc style mismatch: " + fullMessage, + fullMessage.contains("missing doc param for doc style")); + } + + @Test + @ShardsFixed(num = 2) + public void testDistributedDocIdRejected() throws Exception { + indexTestData(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("docId", "0"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention docId not supported: " + fullMessage, + fullMessage.contains("docId parameter is not supported in distributed mode")); + } + + @Test + @ShardsFixed(num = 2) + public void testDistributedDocLookupFound() throws Exception { + indexTestData(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("id", "0"); + + assertLukeXPath( + params, + "//lst[@name='doc']/int[@name='docId']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='value'][.='0']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='value'][.='name_0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='name_0']", + "//lst[@name='index']", + "//lst[@name='info']"); + } + + @Test + @ShardsFixed(num = 2) + public void testDistributedDocLookupNotFound() throws Exception { + indexTestData(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("id", "999888777"); + + LukeResponse rsp = requestLuke(params); + + NamedList raw = rsp.getResponse(); + assertNull("doc section should NOT be present for missing ID", raw.get("doc")); + + assertLukeXPath(params, "not(//lst[@name='doc'])"); + } + + @Test + @ShardsFixed(num = 2) + public void testDistributedDocLookupDuplicateId() throws Exception { + String dupId = "99999"; + + // Write the same document directly to two shard cores via UpdateHandler, + // completely bypassing the distributed update processor chain. + for (int i = 0; i < 2; i++) { + try (SolrCore core = jettys.get(i).getCoreContainer().getCore("collection1")) { + SolrInputDocument solrDoc = new SolrInputDocument(); + solrDoc.addField("id", dupId); + solrDoc.addField("name", "dup_copy_" + i); + + AddUpdateCommand addCmd = + new AddUpdateCommand(new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}); + addCmd.solrDoc = solrDoc; + core.getUpdateHandler().addDoc(addCmd); + + CommitUpdateCommand commitCmd = + new CommitUpdateCommand( + new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}, false); + commitCmd.waitSearcher = true; + core.getUpdateHandler().commit(commitCmd); + } + } + + // Distributed Luke doc lookup should detect the corruption + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("id", dupId); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention duplicate/corrupt index: " + fullMessage, + fullMessage.contains("found on multiple shards")); + } + + @Test + @ShardsFixed(num = 2) + public void testShardsParamRoutesToSpecificShard() throws Exception { + // Index a doc with a dynamic field only to shard 0 + index_specific(0, "id", "700", "name", "shard0_only", "only_on_shard0_s", "present"); + // Index a plain doc to shard 1 (no dynamic field) + index_specific(1, "id", "701", "name", "shard1_only"); + commit(); + + // Query with shards= pointing only at shard 1 — the dynamic field should NOT appear. + // This also tests that a single remote shard is correctly fanned out to rather than + // falling through to local-mode on the coordinating node. + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("shards", shardsArr[1]); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + assertNull( + "only_on_shard0_s should NOT be present when querying only shard 1", + fields.get("only_on_shard0_s")); + assertNotNull("'name' field should still be present", fields.get("name")); + + // Now query with shards= pointing only at shard 0 — the dynamic field SHOULD appear + params.set("shards", shardsArr[0]); + req = new QueryRequest(params); + raw = clients.get(0).request(req); + rsp = new LukeResponse(); + rsp.setResponse(raw); + + fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + assertNotNull( + "only_on_shard0_s SHOULD be present when querying shard 0", fields.get("only_on_shard0_s")); + } + + @Test + @ShardsFixed(num = 1) + public void testSingleShardViaParamStillDistributes() throws Exception { + index("id", "500", "name", "test_name"); + commit(); + + // Pass the shards param with a single shard — should still fan out to it + // rather than incorrectly falling through to local mode + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("shards", shards); + QueryRequest req = new QueryRequest(params); + NamedList raw = clients.get(0).request(req); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(raw); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertEquals("should see the 1 doc we indexed", 1, (long) rsp.getNumDocs()); + assertNotNull( + "shards section should be present when targeting a shard via shards param", + rsp.getShardResponses()); + assertEquals("should have 1 shard entry", 1, rsp.getShardResponses().size()); + } +} diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index fb795f62cc1..5ae7b216e1b 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -83,6 +83,18 @@ The number of top terms for each field. Choose whether `/luke` should return the index-flags for each field. Fetching and returning the index-flags for each field in the index has non-zero cost, and can slow down requests to `/luke`. +`distrib`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `false` +|=== ++ +When set to `true` in SolrCloud mode, the handler aggregates results from all shards in the collection. +Additive index metrics (`numDocs`, `deletedDocs`, `segmentCount`) are summed across shards; `maxDoc` is the maximum across shards. +Field types and schema flags are validated for consistency across shards. +Per-shard index details and per-field detailed statistics are returned under a `shards` key. + == LukeRequestHandler Examples All of the examples in this section assume you are running the "techproducts" Solr example: @@ -118,3 +130,42 @@ Alternatively, to work through the Lucene native id: http://localhost:8983/solr/techproducts/admin/luke?fl=manu&docId=0 From SolrJ, you can access /luke using the {solr-javadocs}/solrj/org/apache/solr/client/solrj/request/LukeRequest.html[`LukeRequest`] object. + +== Distributed Mode (multiple shards) + +When running in SolrCloud, the Luke handler automatically distributes requests across all shards in the collection, the same as search requests. +To inspect only the local shard's index, set `distrib=false`. + +To get a collection-wide view: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke + +To get detailed field statistics across all shards for a specific field: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke?fl=manu + +=== Response Structure + +In distributed mode, the response contains: + +* `index` -- Aggregated metrics across all shards: `numDocs`, `deletedDocs`, `segmentCount` are summed; `maxDoc` is the maximum across shards. +* `fields` -- Aggregated field metadata. For each field: `type`, `schema` flags, and `dynamicBase` are validated to be consistent across shards; `index` flags use the first non-null value. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. +* `doc` -- Present when `id` is specified. Contains the document from whichever shard owns it, including a `lucene` section (per-field analysis with shard-local `docFreq` values) and a `solr` section (stored fields). Only `id` is supported for distributed doc lookup; `docId` is rejected because Lucene document IDs are shard-local. +* `schema` -- Schema information from the first responding shard (identical across shards sharing the same configset). +* `info` -- Static info from the first responding shard. +* `shards` -- Per-shard details in response-completion order. Each entry contains: +** `index` -- Full index info for that shard (including `directory`, `segmentsFile`, `version`, `current`, `hasDeletions`, `lastModified`, `userData`). +** `fields` -- Only present when `fl` triggers detailed statistics. Contains per-field `topTerms`, `distinct`, and `histogram` from that shard. + +=== Aggregation Semantics + +Field `type`, `schema` flags, and `dynamicBase` are validated for consistency across shards. +If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. +You can use `distrib=false` to query individual shards and compare their field configurations when troubleshooting mismatches. +The `index` flags are index-derived (not schema-derived) and may be absent on shards where the field has no indexed data; the first non-null value is used, and any subsequent non-null values are validated for consistency. + +Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not aggregated across shards. +These statistics are shard-local and appear in each shard's entry under the `shards` key. +For collection-wide term frequencies or cardinality estimates, Solr's xref:query-guide:faceting.adoc[faceting API] may cover some of these use cases. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 265a40dfb3e..f56c85f743b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -20,10 +20,12 @@ import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; /** * This is an incomplete representation of the data returned from Luke @@ -113,11 +115,12 @@ public static class FieldInfo implements Serializable { String name; String type; String schema; - int docs; + long docs; int distinct; EnumSet flags; boolean cacheableFaceting; NamedList topTerms; + Map extras = new HashMap<>(); public FieldInfo(String n) { name = n; @@ -128,19 +131,20 @@ public void read(NamedList nl) { for (Map.Entry entry : nl) { if ("type".equals(entry.getKey())) { type = (String) entry.getValue(); - } - if ("flags".equals(entry.getKey())) { + } else if ("flags".equals(entry.getKey())) { flags = parseFlags((String) entry.getValue()); } else if ("schema".equals(entry.getKey())) { schema = (String) entry.getValue(); } else if ("docs".equals(entry.getKey())) { - docs = (Integer) entry.getValue(); + docs = ((Number) entry.getValue()).longValue(); } else if ("distinct".equals(entry.getKey())) { distinct = (Integer) entry.getValue(); } else if ("cacheableFaceting".equals(entry.getKey())) { cacheableFaceting = (Boolean) entry.getValue(); } else if ("topTerms".equals(entry.getKey())) { topTerms = (NamedList) entry.getValue(); + } else { + extras.put(entry.getKey(), entry.getValue()); } } } @@ -173,7 +177,7 @@ public int getDistinct() { return distinct; } - public int getDocs() { + public long getDocs() { return docs; } @@ -192,12 +196,17 @@ public EnumSet getSchemaFlags() { public NamedList getTopTerms() { return topTerms; } + + public Map getExtras() { + return extras; + } } private NamedList indexInfo; private Map fieldInfo; private Map dynamicFieldInfo; private Map fieldTypeInfo; + private Map shardResponses; @Override @SuppressWarnings("unchecked") @@ -246,6 +255,18 @@ public void setResponse(NamedList res) { } } } + + // Parse shards section (present in distributed responses) + SimpleOrderedMap> shardsNL = + (SimpleOrderedMap>) res.get("shards"); + if (shardsNL != null) { + shardResponses = new LinkedHashMap<>(); + for (Map.Entry> entry : shardsNL) { + LukeResponse shardRsp = new LukeResponse(); + shardRsp.setResponse(entry.getValue()); + shardResponses.put(entry.getKey(), shardRsp); + } + } } // ---------------------------------------------------------------- @@ -256,9 +277,14 @@ public String getIndexDirectory() { return (String) indexInfo.get("directory"); } - public Integer getNumDocs() { + private Long getIndexLong(String key) { if (indexInfo == null) return null; - return (Integer) indexInfo.get("numDocs"); + Number n = (Number) indexInfo.get(key); + return n != null ? n.longValue() : null; + } + + public Long getNumDocs() { + return getIndexLong("numDocs"); } public Integer getMaxDoc() { @@ -266,6 +292,10 @@ public Integer getMaxDoc() { return (Integer) indexInfo.get("maxDoc"); } + public Long getDeletedDocs() { + return getIndexLong("deletedDocs"); + } + public Integer getNumTerms() { if (indexInfo == null) return null; return (Integer) indexInfo.get("numTerms"); @@ -299,5 +329,9 @@ public FieldInfo getDynamicFieldInfo(String f) { return dynamicFieldInfo.get(f); } + public Map getShardResponses() { + return shardResponses; + } + // ---------------------------------------------------------------- }