parse HTML spec to get types and keywords

2023-09-11 20:53:42 +02:00 · 2023-09-11 20:53:42 +02:00 · 7fc1e38ea1
commit 7fc1e38ea1
parent 4bd6c60b99
7 changed files with 2328 additions and 47 deletions
--- a/samples/glsl/basic.vert
+++ b/samples/glsl/basic.vert
@ -1,2 +1,3 @@
 int main() {
+
 }
--- a/spec/gen_spec.py
+++ b/spec/gen_spec.py
@ -5,13 +5,82 @@ import os
 from glob import iglob
 import json
 from bs4 import BeautifulSoup
-import progressbar
 import tokenize
 import re

+keywords = []
+types = []
+operators = []
 variables = []
 functions = []

+def process_glsl_html_spec(path):
+    with open(path, 'r') as f:
+        soup = BeautifulSoup(f, 'html.parser')
+
+    keywords_section = soup.find(id='keywords').parent
+    for index, dl in enumerate(keywords_section.find_all('dl')):
+        names = [t.getText() for t in dl.find_all('strong')]
+
+        kind = 'glsl'
+        if index == 1: kind = 'vulkan'
+        if index == 2: kind = 'reserved'
+
+        for name in names:
+            keywords.append({
+                'name': name,
+                'kind': kind,
+            })
+
+    basic_types_section = soup.find(id='basic-types').parent
+    for table in basic_types_section.find_all('table'):
+        headers = table.find_all('th')
+        if headers[0].getText() != 'Type': continue
+        if headers[1].getText() != 'Meaning': continue
+        rows = table.find('tbody').find_all('tr')
+
+        for row in rows:
+            name_cell, meaning_cell = row.find_all('td')
+            names = name_cell.getText().splitlines()
+            meaning = ' '.join(meaning_cell.getText().split())
+
+            for name in names:
+                types.append({
+                    'name': name,
+                    'description': [meaning],
+                })
+
+    operators_section = soup.find(id='operators').parent
+    operator_table = operators_section.find('table')
+    headers = operator_table.find_all('th')
+    assert headers[0].getText() == 'Precedence'
+    assert headers[1].getText() == 'Operator Class'
+    assert headers[2].getText() == 'Operators'
+    assert headers[3].getText() == 'Associativity'
+
+    rows = operator_table.find('tbody').find_all('tr')
+    ignored_operators = ['(', ')', '[', ']', '.', ',']
+    for row in rows:
+        precedence, operator_class, operator_words, associativity = row.find_all('td')
+
+        precedence_number = int(precedence.getText().split()[0])
+        left_to_right = associativity.getText() == 'Left to Right'
+
+        operator_class = operator_class.getText()
+        kind = 'infix'
+        if 'prefix' in operator_class: kind = 'prefix'
+        if 'post fix' in operator_class: kind = 'postfix'
+
+        for word in operator_words.getText().split():
+            if word in ignored_operators: continue
+            operators.append({
+                'name': word,
+                'precedence': precedence_number,
+                'left_to_right': left_to_right,
+                'kind': kind,
+            })
+
+
 def process_docs_gl_file(path):
    is_variable = os.path.basename(path).startswith('gl_')

@ -50,7 +119,7 @@ def process_docs_gl_file(path):

 def paragraph_to_markdown(paragraph):
    if paragraph.math is not None and paragraph.math.mtable is not None:
-        return '```\n' + expand_math(paragraph.math.mtable) + '\n```\n'
+        return '```\n' + expand_math(paragraph.math.mtable).replace('δ  ', 'δ') + '\n```\n'

    for tag in paragraph.find_all('em'):
        tag.replace_with('_' + tag.getText() + '_')
@ -71,7 +140,7 @@ def math_children(node):
    return children

 def escape_math(node):
-    return ' '.join(expand_math(node).split(" \t\r")).replace('δ ', 'δ')
+    return ' '.join(expand_math(node).split(" \t\r")).replace('δ  ', 'δ')

 def expand_math(node):
    if node.name is None or node.name in ['mi', 'mn', 'mo']:
@ -506,18 +575,22 @@ def escape_code(text):

 output = sys.argv[1]

-scriptdir = os.path.dirname(sys.argv[0])
+scriptdir = os.path.dirname(sys.argv[0]) or '.'
 extension_files = [f for f in iglob(f'{scriptdir}/GLSL/extensions/*/*.txt')]
 docs_files = [f for f in iglob(f'{scriptdir}/docs.gl/sl4/*.xhtml')]
+glsl_html_spec = f'{scriptdir}/GLSLangSpec.4.60.html'

 work = 0
-total_work = len(extension_files) + len(docs_files)
+total_work = len(extension_files) + len(docs_files) + 1

 def progress(info):
    global work
    print(f'{work}/{total_work}: {info}')
    work += 1

+progress(glsl_html_spec)
+process_glsl_html_spec(glsl_html_spec)
+
 for i, path in enumerate(docs_files):
    progress(path)
    process_docs_gl_file(path)
@ -553,14 +626,20 @@ variables.append({
 })


+keywords.sort(key=lambda x: x['name'])
+operators.sort(key=lambda x: x['name'])
+types.sort(key=lambda x: x['name'])
 variables.sort(key=lambda x: x['name'])
 functions.sort(key=lambda x: x['name'])

 with open(output, 'w') as f:
    f.write(json.dumps({
        'comment': 'generated from docs.gl',
-        'variables':variables,
-        'functions':functions,
+        'keywords': keywords,
+        'operators': operators,
+        'types': types,
+        'variables': variables,
+        'functions': functions,
    }, indent=4, ensure_ascii=False))

 progress('done')
--- a/spec/justfile
+++ b/spec/justfile
@ -1,6 +1,6 @@

 generate-spec:
-    ./gen_spec.py spec.json
+    `which pypy3 || which python3` ./gen_spec.py spec.json

 watch:
    watchexec -e py -c -- just generate-spec
--- a/spec/requirements.txt
+++ b/spec/requirements.txt
@ -0,0 +1,2 @@
+beautifulsoup4
+
--- a/spec/spec.json
+++ b/spec/spec.json
--- a/src/Spec.zig
+++ b/src/Spec.zig
@ -2,9 +2,31 @@ const std = @import("std");
 const util = @import("util.zig");

 comment: []const u8 = "",
+keywords: []const Keyword,
+operators: []const Operator,
+types: []const Type,
 variables: []const Variable,
 functions: []const Function,

+pub const Keyword = struct {
+    name: []const u8,
+    kind: Kind,
+    pub const Kind = enum { glsl, vulkan, reserved };
+};
+
+pub const Operator = struct {
+    name: []const u8,
+    precedence: u8,
+    left_to_right: bool,
+    kind: Kind,
+    pub const Kind = enum { prefix, infix, postfix };
+};
+
+pub const Type = struct {
+    name: []const u8,
+    description: []const []const u8,
+};
+
 pub const Variable = struct {
    modifiers: Modifiers = .{ .in = true },
    type: []const u8,
--- a/src/main.zig
+++ b/src/main.zig
@ -536,37 +536,40 @@ pub const Dispatch = struct {
 fn builtinCompletions(arena: std.mem.Allocator, spec: *const Spec) ![]lsp.CompletionItem {
    var completions = std.ArrayList(lsp.CompletionItem).init(arena);

-    const types = [_][]const u8{
-        "void",
-        "bool",
-        "int",
-        "uint",
-        "float",
-        "double",
-        "vec2",
-        "vec3",
-        "vec4",
-        "ivec2",
-        "ivec3",
-        "ivec4",
-        "uvec2",
-        "uvec3",
-        "uvec4",
-        "bvec2",
-        "bvec3",
-        "bvec4",
-        "dvec2",
-        "dvec3",
-        "dvec4",
-        "mat2",
-        "mat3",
-        "mat4",
-    };
+    try completions.ensureUnusedCapacity(
+        spec.types.len + spec.variables.len + spec.functions.len,
+    );

-    try completions.ensureUnusedCapacity(types.len + spec.variables.len + spec.functions.len);
+    for (spec.types) |typ| {
+        try completions.append(.{
+            .label = typ.name,
+            .kind = .class,
+            .documentation = .{
+                .kind = .markdown,
+                .value = try std.mem.join(arena, "\n\n", typ.description),
+            },
+        });
+    }

-    for (types) |name| {
-        try completions.append(.{ .label = name, .kind = .class });
+    keywords: for (spec.keywords) |keyword| {
+        for (spec.types) |typ| {
+            if (std.mem.eql(u8, keyword.name, typ.name)) {
+                continue :keywords;
+            }
+        }
+
+        try completions.append(.{
+            .label = keyword.name,
+            .kind = .keyword,
+            .documentation = .{
+                .kind = .markdown,
+                .value = switch (keyword.kind) {
+                    .glsl => "Available in standard GLSL.",
+                    .vulkan => "Only available when targeting Vulkan.",
+                    .reserved => "Reserved for future use.",
+                },
+            },
+        });
    }

    for (spec.variables) |variable| {