Skip to content

Commit

Permalink
[CI] Script updates in support of semconv spec processing and testing (
Browse files Browse the repository at this point in the history
  • Loading branch information
chalin authored Jan 27, 2025
1 parent 1dca592 commit 8256a75
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 93 deletions.
10 changes: 5 additions & 5 deletions scripts/content-modules/adjust-pages.pl
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ ($$$)

return 0 if $patchMsgCount{$key};

if (($vers = $versions{$specName}) ne $targetVers) {
print STDOUT "INFO: remove obsolete patch '$patchID' now that spec '$specName' is at v$vers, not v$targetVers - $0\n";
} elsif (($vers = $versFromSubmod{$specName}) ne $targetVers) {
print STDOUT "INFO [$patchID]: skipping patch '$patchID' since spec '$specName' submodule is at v$vers not v$targetVers - $0\n";
if (($vers = $versions{$specName}) gt $targetVers) {
print STDOUT "INFO: remove obsolete patch '$patchID' now that spec '$specName' is at v$vers > v$targetVers - $0\n";
} elsif (($vers = $versFromSubmod{$specName}) gt $targetVers) {
print STDOUT "INFO [$patchID]: skipping patch '$patchID' since spec '$specName' submodule is at v$vers > v$targetVers - $0\n";
} else {
return 'Apply the patch';
}
Expand All @@ -103,7 +103,7 @@ ()

sub patchSemConv1_30_0() {
return unless $ARGV =~ /^tmp\/semconv\/docs\//
&& applyPatchOrPrintMsgIf('2025-01-24-emit-an-event', 'semconv', '1.30.0');
&& applyPatchOrPrintMsgIf('2025-01-24-emit-an-event', 'semconv', '1.30.0-3-g');

s|Emit Event API|Log API|;
s|(docs/specs/otel/logs/api.md#emit-a)n-event|$1-logrecord|;
Expand Down
217 changes: 129 additions & 88 deletions scripts/content-modules/normalize-titles.pl
Original file line number Diff line number Diff line change
Expand Up @@ -20,161 +20,202 @@
my $semconvSpecRepoUrl = 'https://github.com/open-telemetry/semantic-conventions';
my $semConvRef = "$otelSpecRepoUrl/blob/main/semantic_conventions/README.md";
my $specBasePath = '/docs/specs';
my $path_base_for_github_subdir = "content/en$specBasePath";
my %versions = qw(
spec: 1.22.0
otlp: 1.0.0
);
my $otelSpecVers = $versions{'spec:'};
my $otlpSpecVers = $versions{'otlp:'};
my $seenFirstNonBlankLineBeforeTitle;
my $beforeTitle = '';

# TODO: remove once OpAMP spec has been updated
my $opampFrontMatter = << "EOS";
title: Open Agent Management Protocol
linkTitle: OpAMP
body_class: otel-docs-spec
github_repo: &repo $opAmpSpecRepoUrl
github_project_repo: *repo
path_base_for_github_subdir:
from: content/en/docs/specs/opamp/index.md
to: specification.md
EOS

# TODO: remove once Semconv spec has been updated
my $semconvFrontMatter = << "EOS";
linkTitle: Semantic Conventions
# no_list: true
cascade:
body_class: otel-docs-spec
github_repo: &repo $semconvSpecRepoUrl
github_subdir: docs
path_base_for_github_subdir: content/en/docs/specs/semconv/
github_project_repo: *repo
EOS

# Adjust semconv title capitalization
sub toTitleCase($) {
my $str = shift;
my @specialCaseWords = qw(
CloudEvents
CouchDB
DynamoDB
FaaS
GraphQL
gRPC
HBase
MongoDB
OpenTelemetry
RabbitMQ
RocketMQ
);
my %specialCases = map { lc($_) => $_ } @specialCaseWords;
while ($str =~ /(\b[A-Z]+\b)/g) {
$specialCases{lc $1} = $1;

my @mixedCaseWords; # mixed-case or ALLCAPS
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
push @mixedCaseWords, $1;
}

$str =~ s/(\w+)/\u\L$1/g;
while (my ($key, $value) = each %specialCases) {
$str =~ s/\b\u\L$key\b/$value/g;

foreach my $word (@mixedCaseWords) {
my $lc_word = lc($word);
$str =~ s/\b$lc_word\b/$word/ig;
}
$str =~ s/\b(A|And|As|For|In|On)\b/\L$1/g;
$str =~ s/\b(A|And|As|By|For|In|On|\.Js)\b/\L$1/g;
return $str;
}

sub printTitleAndFrontMatter() {
my @specialWords = qw(Core); # for .NET

sub toSentenceCase($) {
my $str = shift;

my @mixedCaseWords = @specialWords; # mixed-case or ALLCAPS
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
push @mixedCaseWords, $1;
}

$str = lc $str;

# Replace words with their mixed-case or ALL CAPS versions
foreach my $word (@mixedCaseWords) {
my $lc_word = lc($word);
$str =~ s/\b\Q$lc_word\E\b/$word/g;
}

# Capitalize the first letter of the string
$str =~ s/^(\s*\w)/\u$1/;

return $str;
}

sub computeTitleAndFrontMatter() {
my $frontMatter = '';
my $originalTitle = $title;
if ($frontMatterFromFile) {
# printf STDOUT "> $file has front matter:\n$frontMatterFromFile\n"; # if $gD;
$frontMatterFromFile = '' unless $ARGV =~ /\/system\/[^R]/;
$frontMatterFromFile = '' unless $frontMatterFromFile =~ /aliases|cSpell|cascade/i;
# printf STDOUT "> $file\n" if $ARGV =~ /\/system\b/;
}
if ($title eq 'OpenTelemetry Semantic Conventions') {
$frontMatterFromFile = $semconvFrontMatter unless $frontMatterFromFile;
} elsif ($ARGV =~ /json-rpc/) {
$title = 'Semantic Conventions for JSON-RPC';
}
$title = toTitleCase($title);
my $titleMaybeQuoted = ($title =~ ':') ? "\"$title\"" : $title;
# $frontMatter .= "title: $titleMaybeQuoted\n" if $frontMatterFromFile !~ /title: /;
$linkTitle = $title;

if ($title =~ /^OpenTelemetry (Protocol )?(.*)/) {
$linkTitle = $2;
} elsif ($title =~ /^(.*?) Semantic Conventions?$/i) {
$linkTitle = toTitleCase($1);
} elsif ($title =~ /^Semantic Conventions? for (.*)$/i) {
} elsif ($title =~ /^.*? for (.*)$/i) {
$linkTitle = toTitleCase($1);
}
if ($linkTitle =~ /^Function.as.a.Service$/i) {
$linkTitle = 'FaaS';
}
$linkTitle = 'Database' if $title =~ /Database Calls and Systems$/i;
if ($linkTitle =~ /^Database (.*)$/i) {
$linkTitle = "$1";
} elsif ($linkTitle =~ /^FaaS (.*)$/i) {
$linkTitle = "$1";
} elsif ($linkTitle =~ /^HTTP (.*)$/i) {
$linkTitle = "$1";
} elsif ($linkTitle =~ /^Microsoft (.*)$/i) {
$linkTitle = "$1";
if ($linkTitle =~ /^(?:FaaS|HTTP) (.*)$/i && $ARGV !~ /dotnet|migration/) {
$linkTitle = $1;
} elsif ($linkTitle =~ /^Microsoft (?:Azure)? (.*)$/i) {
$linkTitle = $1;
} elsif ($linkTitle =~ /^RPC (.*)$/i) {
$linkTitle = "$1";
$linkTitle = $1;
} elsif ($linkTitle =~ /^(Exceptions|Feature Flags) .. (.*)$/i) {
$linkTitle = "$2";
$linkTitle = $2;
}
if ($linkTitle =~ /^(.*) Attributes$/i && $title ne 'General Attributes') {
$linkTitle = "$1";
$linkTitle = $1;
}

$linkTitle = 'Attributes' if $title eq 'General Attributes';
$linkTitle = 'Events' if $linkTitle eq 'Event';
$linkTitle = 'Logs' if $title =~ /Logs Attributes$/;
$linkTitle = 'Connect' if $title =~ /Connect RPC$/;
$linkTitle = 'SQL' if $title =~ /SQL Databases$/;
$title = 'Semantic Conventions for Function-as-a-Service' if $title eq 'Semantic Conventions for FaaS';
$linkTitle = 'Tracing Compatibility' if $linkTitle eq 'Tracing Compatibility Components';
if ($title =~ /Semantic Convention\b/) {
$title =~ s/Semantic Convention\b/$&s/g;
printf STDOUT "> $title -> $linkTitle\n";
$linkTitle = 'Events' if $linkTitle =~ /Mobile Events/;
$linkTitle = 'Connect' if $title =~ /Connect RPC$/i;
$linkTitle = 'HTTP' if $linkTitle =~ /^HTTP Client and Server/i;
$linkTitle = 'SQL' if $title =~ /SQL Databases$/i;
$linkTitle = 'System use cases' if $title =~ /System .*?General Use Cases/i;

# Missing an `s` in "Semantic Convention"?
if ($title =~ /^Semantic Convention\b/i and $title !~ /Groups$/i) {
$title =~ s/Semantic Convention\b/$&s/ig;
printf STDOUT "> $title -> $linkTitle - added 's' to 'Conventions'\n";
}
$linkTitle =~ s/^Database Client //;
if ($ARGV =~ /docs\/azure/) {
$linkTitle =~ s/ Resource Logs?//i;
$linkTitle =~ s/Azure //i;
} elsif ($ARGV =~ /docs\/messaging\/[^R]/) {
$linkTitle =~ s/( messaging|messaging )//i;
}

$linkTitle =~ s/^General //i; # if $ARGV =~ /docs\/general/
$linkTitle =~ s/( (runtime|(web )?server))? metrics( emitted by .*)?$//i
unless $ARGV =~ /gen-ai-metrics/;
$linkTitle =~ s/ (components|guide|queries|supplementary information|systems|platform)$//i;
$linkTitle =~ s/ \(command line interface\)//i;

$linkTitle = '.NET' if $linkTitle =~ /.net common language runtime/i;
$linkTitle = 'CLI' if $linkTitle =~ /\(command line interface\) programs/i;

if ($ARGV =~ /non-normative/) {
$linkTitle =~ s/Semantic Conventions? Stability //i;
}

if ($linkTitle and $linkTitle ne $title) {
$linkTitle = toSentenceCase($linkTitle) unless $linkTitle =~ /^gRPC/;
if ($frontMatterFromFile =~ /linkTitle: /) {
$frontMatterFromFile =~ s/^(linkTitle: ).*$/$1$linkTitle/m;
} else {
$frontMatter .= "linkTitle: $linkTitle\n"
}
}

$frontMatter .= "linkTitle: $linkTitle\n" if $linkTitle and $frontMatterFromFile !~ /linkTitle: /;
$frontMatter .= $frontMatterFromFile if $frontMatterFromFile;

if ($ARGV =~ /docs\/(.*?)(README|_index).md$/) {
$frontMatter .= "path_base_for_github_subdir:\n";
$frontMatter .= " from: $path_base_for_github_subdir/semconv/$1_index.md\n";
$frontMatter .= " from: tmp/semconv/docs/$1_index.md\n";
$frontMatter .= " to: $1README.md\n";
}
$frontMatter .= "weight: -1\n" if $title eq 'General Semantic Conventions';

return $frontMatter;
}

sub printTitleAndFrontMatter() {
my $frontMatter;


# if ($ARGV =~ /docs\/(README|_index)/) {
# print STDOUT "> $ARGV\n > frontMatterFromFile: $frontMatterFromFile\n";
# print STDOUT " > title: $title\n";
# print STDOUT " > linkTitle: $linkTitle\n";
# }

if ($frontMatterFromFile && $frontMatterFromFile =~ /auto_gen:\s*false/) {
$frontMatter = $frontMatterFromFile;
} else {
$frontMatter = computeTitleAndFrontMatter();
}

if ($frontMatter) {
$frontMatter = "<!--- Hugo front matter used to generate the website version of this page:\n" . $frontMatter;
$frontMatter .= "--->\n";
print "$frontMatter\n";
}
print $beforeTitle if $beforeTitle;
$title = toTitleCase($title);
print "# $title\n"
}

# main

my $titleRegexStr = '^#\s+(.*)';

while(<>) {
# printf STDOUT "$ARGV Got: $_" if $gD;

if ($file ne $ARGV) {
$file = $ARGV;
# printf STDOUT "> $file\n"; # if $gD;
$seenFirstNonBlankLineBeforeTitle = 0;
$frontMatterFromFile = '';
$title = '';
if (/^<!---? Hugo/) {
$beforeTitle = '';
$linkTitle = '';
if (/^<!--- Hugo/) {
while(<>) {
last if /^-?-->/;
last if /^--->/;
$frontMatterFromFile .= $_;
}
next;
}
}
if(! $title) {
($title) = /^#\s+(.*)/;
$linkTitle = '';
printTitleAndFrontMatter() if $title;
next;
}

print;
if ($title) {
print;
} elsif (/^\s*$/ && !$seenFirstNonBlankLineBeforeTitle) {
next; # Drop blank lines until we see a title
} elsif (($title) = /$titleRegexStr/) {
printTitleAndFrontMatter();
} else {
$seenFirstNonBlankLineBeforeTitle = 1;
$beforeTitle .= $_;
}
}

0 comments on commit 8256a75

Please sign in to comment.