Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion src/Elastic.Markdown/DocumentationGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System.IO.Abstractions;
using System.Text.Json;
using System.Text.RegularExpressions;
using Elastic.Documentation;
using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.LegacyUrlMappings;
Expand Down Expand Up @@ -37,7 +38,7 @@ public record GenerationResult
public IReadOnlyDictionary<string, LinkRedirect> Redirects { get; set; } = new Dictionary<string, LinkRedirect>();
}

public class DocumentationGenerator
public partial class DocumentationGenerator
{
private readonly IDocumentationFileOutputProvider? _documentationFileOutputProvider;
private readonly IConversionCollector? _conversionCollector;
Expand Down Expand Up @@ -231,6 +232,29 @@ private async Task ExtractEmbeddedStaticResources(Cancel ctx)
}
}

[GeneratedRegex(@"^[a-z0-9\s\-_\.\/\\]*[a-z0-9_\-]\.([a-z]+)$")]
private static partial Regex FilePathRegex();

[GeneratedRegex(@"^[a-z0-9_][a-z0-9_\-\s\.]*?\.([a-z]+)$")]
private static partial Regex FileNameRegex();

public static bool IsValidFileName(string strToCheck) =>
strToCheck switch
{
//prior art
_ when strToCheck.StartsWith("release-notes/elastic-agent/_snippets/") => true,
_ when strToCheck.StartsWith("reference/query-languages/esql/_snippets/") => true,
_ when strToCheck.EndsWith(".svg") => true,
_ when strToCheck.EndsWith(".gif") => true,
_ when strToCheck.EndsWith(".png") => true,
_ when strToCheck.EndsWith(".png") => true,
"reference/security/prebuilt-rules/audit_policies/windows/README.md" => true,
"extend/integrations/developer-workflow-fleet-UI.md" => true,
"reference/elasticsearch/clients/ruby/Helpers.md" => true,
"explore-analyze/ai-features/llm-guides/connect-to-vLLM.md" => true,
_ => FilePathRegex().IsMatch(strToCheck) && FileNameRegex().IsMatch(Path.GetFileName(strToCheck))
};

private async Task ProcessFile(HashSet<string> offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel ctx)
{
if (!Context.Force)
Expand All @@ -243,8 +267,16 @@ private async Task ProcessFile(HashSet<string> offendingFiles, DocumentationFile

_logger.LogTrace("--> {FileFullPath}", file.SourceFile.FullName);
var outputFile = OutputFile(file.RelativePath);

if (outputFile is not null)
{
var relative = Path.GetRelativePath(Context.OutputDirectory.FullName, outputFile.FullName);
if (!IsValidFileName(relative))
{
Context.Collector.EmitError(file.SourceFile.FullName, $"File name {relative} is not valid needs to be lowercase and contain only alphanumeric characters, spaces, dashes, dots and underscores");
return;
}

var context = new ProcessingFileContext
{
BuildContext = Context,
Expand Down Expand Up @@ -368,4 +400,5 @@ public async Task<RenderResult> RenderLayout(MarkdownFile markdown, Cancel ctx)
await DocumentationSet.ResolveDirectoryTree(ctx);
return await HtmlWriter.RenderLayout(markdown, ctx);
}

}
270 changes: 270 additions & 0 deletions tests/Elastic.Markdown.Tests/OutputDirectoryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,276 @@ public async Task CreatesDefaultOutputDirectory()
await collector.StopAsync(TestContext.Current.CancellationToken);

fileSystem.Directory.Exists(".artifacts").Should().BeTrue();
}

[Theory]
[MemberData(nameof(ValidFileNames))]
public void OutputFileValidationValidNames(string fileName)
{
var valid = DocumentationGenerator.IsValidFileName(fileName);
valid.Should().BeTrue($"'{fileName}' should be a valid filename");
}

[Theory]
[MemberData(nameof(InvalidFileNames))]
public void OutputFileValidationInvalidNames(string fileName)
{
var valid = DocumentationGenerator.IsValidFileName(fileName);
valid.Should().BeFalse($"'{fileName}' should be an invalid filename");
}

public static TheoryData<string> ValidFileNames =>
[
"test.md",
"file.txt",
"index.html",
"readme.rst",

// With numbers
"test123.md",
"123test.md",
"file2.md",
"99bottles.md",

// With underscores
"test_file.md",
"my_long_file_name.md",
"_leading_underscore.md",
"trailing_underscore_.md",

// With hyphens
"test-file.md",
"my-long-file-name.md",
"trailing-hyphen-.md",

// Combined underscores and hyphens
"test_file-name.md",
"my-file_name.md",

// With dots in filename (before extension)
"test.config.md",
"file.test.backup.md",
"v1.0.0.md",

// With spaces (allowed per regex)
"test file.md",
"my document.md",

// Paths with all lowercase directories
"path/to/file.md",
"deep/nested/path/to/file.md",
"folder/subfolder/document.md",

// Paths with numbers
"path123/file.md",
"v1/docs/guide.md",

// Paths with underscores and hyphens
"my_folder/file.md",
"my-folder/file.md",
"path_to/sub-folder/file.md",

// SVG files exception (even with uppercase - per the .EndsWith checks)
"image.svg",
"Icon.svg",
"LOGO.svg",
"path/to/Image.svg",

// PNG files exception
"image.png",
"Screenshot.png",
"IMAGE.png",
"path/to/Logo.png",

// GIF files exception
"animation.gif",
"Loading.gif",
"SPINNER.gif",

// ESQL snippets exception (prior art)
"reference/query-languages/esql/_snippets/functions/examples/cbrt.md",
"reference/query-languages/esql/_snippets/anything/here/File.md",
"reference/query-languages/esql/_snippets/UPPERCASE.md",

// Hardcoded exceptions
"reference/security/prebuilt-rules/audit_policies/windows/README.md",
"extend/integrations/developer-workflow-fleet-UI.md",
"reference/elasticsearch/clients/ruby/Helpers.md",
"explore-analyze/ai-features/llm-guides/connect-to-vLLM.md"
];

public static TheoryData<string> InvalidFileNames =>
[
"Test.md",
"FILE.md",
"MyFile.md",
"testFile.md",
"README.md",

// Uppercase in extension
"test.MD",
"test.Md",
"file.TXT",
"document.Html",

// Uppercase in directory path
"Path/file.md",
"path/To/file.md",
"FOLDER/file.md",
"docs/MyFolder/file.md",

// Filenames starting with invalid characters (must start with [a-z0-9_])
"-leading-hyphen.md",
"-file.md",
".hidden.md",
" leading-space.md",
"path/to/-invalid.md",
"path/to/.hidden.md",
"path/to/ space.md",

// Special characters - parentheses
"test(1).md",
"file (copy).md",
"document(v2).md",

// Special characters - square brackets
"test[1].md",
"file[copy].md",

// Special characters - curly braces
"test{1}.md",

// Special characters - exclamation mark
"test!.md",
"important!file.md",

// Special characters - at sign
"[email protected]",
"[email protected]",

// Special characters - hash
"test#1.md",
"file#.md",

// Special characters - dollar sign
"test$file.md",
"price$.md",

// Special characters - percent
"test%file.md",
"100%done.md",

// Special characters - caret
"test^file.md",

// Special characters - ampersand
"test&file.md",
"this&that.md",

// Special characters - asterisk
"test*file.md",
"*.md",

// Special characters - plus sign
"test+file.md",
"c++.md",

// Special characters - equals sign
"test=file.md",

// Special characters - pipe
"test|file.md",

// Special characters - less than / greater than
"test<file>.md",

// Special characters - colon
"test:file.md",

// Special characters - semicolon
"test;file.md",

// Special characters - single quote
"test'file.md",
"it's.md",

// Special characters - double quote
"test\"file.md",

// Special characters - backtick
"test`file.md",

// Special characters - tilde
"test~file.md",
"~temp.md",

// Special characters - comma
"test,file.md",
"a,b,c.md",

// Special characters - question mark
"test?.md",
"what?.md",

// No extension
"testfile",
"README",
"Makefile",

// Just extension
".md",
".txt",

// Empty extension
"test.",

// Double extension edge cases with uppercase
"test.Config.md",
"file.Test.md",

// Non-ASCII characters - accented
"tëst.md",
"café.md",
"naïve.md",
"résumé.md",

// Non-ASCII characters - other alphabets
"тест.md",
"测试.md",
"テスト.md",

// Non-ASCII characters - symbols
"test™.md",
"file©.md",

// Empty string
"",

// Whitespace only
" ",

// Extension only variations
"..md",

// Numbers in extension (if we expect only letters)
"test.md5",
"file.mp3",
"video.mp4",

// CamelCase variations
"camelCase.md",
"PascalCase.md",
"mixedCASE.md",

// Acronyms
"API.md",
"HTTP.md",
"XMLParser.md",

// Common problematic filenames
"CHANGELOG.md",
"LICENSE.md",
"CONTRIBUTING.md",
"TODO.md"
];
}
Loading