1
Fork 0

parse HTML spec to get types and keywords

This commit is contained in:
Christofer Nolander 2023-09-11 20:53:42 +02:00
commit 7fc1e38ea1
7 changed files with 2328 additions and 47 deletions

View file

@ -1,2 +1,3 @@
int main() {
}

View file

@ -5,13 +5,82 @@ import os
from glob import iglob
import json
from bs4 import BeautifulSoup
import progressbar
import tokenize
import re
keywords = []
types = []
operators = []
variables = []
functions = []
def process_glsl_html_spec(path):
with open(path, 'r') as f:
soup = BeautifulSoup(f, 'html.parser')
keywords_section = soup.find(id='keywords').parent
for index, dl in enumerate(keywords_section.find_all('dl')):
names = [t.getText() for t in dl.find_all('strong')]
kind = 'glsl'
if index == 1: kind = 'vulkan'
if index == 2: kind = 'reserved'
for name in names:
keywords.append({
'name': name,
'kind': kind,
})
basic_types_section = soup.find(id='basic-types').parent
for table in basic_types_section.find_all('table'):
headers = table.find_all('th')
if headers[0].getText() != 'Type': continue
if headers[1].getText() != 'Meaning': continue
rows = table.find('tbody').find_all('tr')
for row in rows:
name_cell, meaning_cell = row.find_all('td')
names = name_cell.getText().splitlines()
meaning = ' '.join(meaning_cell.getText().split())
for name in names:
types.append({
'name': name,
'description': [meaning],
})
operators_section = soup.find(id='operators').parent
operator_table = operators_section.find('table')
headers = operator_table.find_all('th')
assert headers[0].getText() == 'Precedence'
assert headers[1].getText() == 'Operator Class'
assert headers[2].getText() == 'Operators'
assert headers[3].getText() == 'Associativity'
rows = operator_table.find('tbody').find_all('tr')
ignored_operators = ['(', ')', '[', ']', '.', ',']
for row in rows:
precedence, operator_class, operator_words, associativity = row.find_all('td')
precedence_number = int(precedence.getText().split()[0])
left_to_right = associativity.getText() == 'Left to Right'
operator_class = operator_class.getText()
kind = 'infix'
if 'prefix' in operator_class: kind = 'prefix'
if 'post fix' in operator_class: kind = 'postfix'
for word in operator_words.getText().split():
if word in ignored_operators: continue
operators.append({
'name': word,
'precedence': precedence_number,
'left_to_right': left_to_right,
'kind': kind,
})
def process_docs_gl_file(path):
is_variable = os.path.basename(path).startswith('gl_')
@ -50,7 +119,7 @@ def process_docs_gl_file(path):
def paragraph_to_markdown(paragraph):
if paragraph.math is not None and paragraph.math.mtable is not None:
return '```\n' + expand_math(paragraph.math.mtable) + '\n```\n'
return '```\n' + expand_math(paragraph.math.mtable).replace('δ ', 'δ') + '\n```\n'
for tag in paragraph.find_all('em'):
tag.replace_with('_' + tag.getText() + '_')
@ -71,7 +140,7 @@ def math_children(node):
return children
def escape_math(node):
return ' '.join(expand_math(node).split(" \t\r")).replace('δ ', 'δ')
return ' '.join(expand_math(node).split(" \t\r")).replace('δ ', 'δ')
def expand_math(node):
if node.name is None or node.name in ['mi', 'mn', 'mo']:
@ -506,18 +575,22 @@ def escape_code(text):
output = sys.argv[1]
scriptdir = os.path.dirname(sys.argv[0])
scriptdir = os.path.dirname(sys.argv[0]) or '.'
extension_files = [f for f in iglob(f'{scriptdir}/GLSL/extensions/*/*.txt')]
docs_files = [f for f in iglob(f'{scriptdir}/docs.gl/sl4/*.xhtml')]
glsl_html_spec = f'{scriptdir}/GLSLangSpec.4.60.html'
work = 0
total_work = len(extension_files) + len(docs_files)
total_work = len(extension_files) + len(docs_files) + 1
def progress(info):
global work
print(f'{work}/{total_work}: {info}')
work += 1
progress(glsl_html_spec)
process_glsl_html_spec(glsl_html_spec)
for i, path in enumerate(docs_files):
progress(path)
process_docs_gl_file(path)
@ -553,14 +626,20 @@ variables.append({
})
keywords.sort(key=lambda x: x['name'])
operators.sort(key=lambda x: x['name'])
types.sort(key=lambda x: x['name'])
variables.sort(key=lambda x: x['name'])
functions.sort(key=lambda x: x['name'])
with open(output, 'w') as f:
f.write(json.dumps({
'comment': 'generated from docs.gl',
'variables':variables,
'functions':functions,
'keywords': keywords,
'operators': operators,
'types': types,
'variables': variables,
'functions': functions,
}, indent=4, ensure_ascii=False))
progress('done')

View file

@ -1,6 +1,6 @@
generate-spec:
./gen_spec.py spec.json
`which pypy3 || which python3` ./gen_spec.py spec.json
watch:
watchexec -e py -c -- just generate-spec

2
spec/requirements.txt Normal file
View file

@ -0,0 +1,2 @@
beautifulsoup4

File diff suppressed because it is too large Load diff

View file

@ -2,9 +2,31 @@ const std = @import("std");
const util = @import("util.zig");
comment: []const u8 = "",
keywords: []const Keyword,
operators: []const Operator,
types: []const Type,
variables: []const Variable,
functions: []const Function,
pub const Keyword = struct {
name: []const u8,
kind: Kind,
pub const Kind = enum { glsl, vulkan, reserved };
};
pub const Operator = struct {
name: []const u8,
precedence: u8,
left_to_right: bool,
kind: Kind,
pub const Kind = enum { prefix, infix, postfix };
};
pub const Type = struct {
name: []const u8,
description: []const []const u8,
};
pub const Variable = struct {
modifiers: Modifiers = .{ .in = true },
type: []const u8,

View file

@ -536,37 +536,40 @@ pub const Dispatch = struct {
fn builtinCompletions(arena: std.mem.Allocator, spec: *const Spec) ![]lsp.CompletionItem {
var completions = std.ArrayList(lsp.CompletionItem).init(arena);
const types = [_][]const u8{
"void",
"bool",
"int",
"uint",
"float",
"double",
"vec2",
"vec3",
"vec4",
"ivec2",
"ivec3",
"ivec4",
"uvec2",
"uvec3",
"uvec4",
"bvec2",
"bvec3",
"bvec4",
"dvec2",
"dvec3",
"dvec4",
"mat2",
"mat3",
"mat4",
};
try completions.ensureUnusedCapacity(
spec.types.len + spec.variables.len + spec.functions.len,
);
try completions.ensureUnusedCapacity(types.len + spec.variables.len + spec.functions.len);
for (spec.types) |typ| {
try completions.append(.{
.label = typ.name,
.kind = .class,
.documentation = .{
.kind = .markdown,
.value = try std.mem.join(arena, "\n\n", typ.description),
},
});
}
for (types) |name| {
try completions.append(.{ .label = name, .kind = .class });
keywords: for (spec.keywords) |keyword| {
for (spec.types) |typ| {
if (std.mem.eql(u8, keyword.name, typ.name)) {
continue :keywords;
}
}
try completions.append(.{
.label = keyword.name,
.kind = .keyword,
.documentation = .{
.kind = .markdown,
.value = switch (keyword.kind) {
.glsl => "Available in standard GLSL.",
.vulkan => "Only available when targeting Vulkan.",
.reserved => "Reserved for future use.",
},
},
});
}
for (spec.variables) |variable| {