Skip to content

Commit 6189a32

Browse files
authored
Merge pull request #96 from ultraleap/libelement/feature/speedup
Libelement/feature/speedup
2 parents 8d7f1ac + 50651c6 commit 6189a32

File tree

8 files changed

+191
-86
lines changed

8 files changed

+191
-86
lines changed

libelement/src/instruction_tree/cache.hpp

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace element
1919
: cache()
2020
{}
2121

22-
explicit instruction_cache(const instruction_const_shared_ptr& instruction)
22+
explicit instruction_cache(const instruction* instruction)
2323
: cache()
2424
{
2525
initialise(instruction);
@@ -37,25 +37,61 @@ namespace element
3737
* Get a pointer to the value associated with the instruction in the cache
3838
* If no cache entry exists with that instruction, return nullptr
3939
*/
40-
instruction_cache_value* find(const instruction_const_shared_ptr& instruction)
40+
instruction_cache_value* find(const instruction* instruction)
4141
{
42-
auto found = cache.find(instruction);
42+
const auto found = cache.find(instruction);
4343
if (found != cache.end())
4444
{
4545
return &(found->second);
4646
}
4747
return nullptr;
4848
}
4949

50+
[[nodiscard]] std::string to_string() const
51+
{
52+
int present_entry_count = 0;
53+
for (const auto& [key, value] : cache)
54+
{
55+
if (value.present)
56+
present_entry_count++;
57+
}
58+
59+
std::string as_string = fmt::format(
60+
"the cache contains {} entries, {} of which are present\n",
61+
cache.size(),
62+
present_entry_count);
63+
64+
for (const auto& [key, value] : cache)
65+
{
66+
if (value.present)
67+
{
68+
as_string += fmt::format("{} = {}\n{}\n\n",
69+
fmt::ptr(key),
70+
value.value,
71+
instruction_to_string(*key));
72+
}
73+
}
74+
75+
return as_string;
76+
}
77+
5078
private:
51-
std::unordered_map<instruction_const_shared_ptr, instruction_cache_value> cache;
79+
std::unordered_map<const instruction*, instruction_cache_value> cache;
5280

53-
void initialise(const instruction_const_shared_ptr& instruction)
81+
void initialise(const instruction* instruction)
5482
{
55-
cache.emplace(std::make_pair(instruction, instruction_cache_value{ 0, false }));
83+
const bool skip_caching =
84+
instruction->is<instruction_constant>() ||
85+
instruction->is<instruction_input>() ||
86+
instruction->is<instruction_serialised_structure>() ||
87+
instruction->is<instruction_for>();
88+
89+
if (!skip_caching)
90+
cache.emplace(instruction, instruction_cache_value{ 0, false });
91+
5692
for (const auto& dep : instruction->dependents())
5793
{
58-
initialise(dep);
94+
initialise(dep.get());
5995
}
6096
}
6197
};

libelement/src/instruction_tree/evaluator.cpp

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@ static void add_to_cache(instruction_cache_value* cache_value, element_value val
2222
static element_result do_evaluate(element_evaluator_ctx& context, const element::instruction_const_shared_ptr& expr,
2323
instruction_cache* cache, element_value* outputs, size_t outputs_count, size_t& outputs_written)
2424
{
25+
// Don't cache constants, faster to grab the value
26+
if (const auto* ec = expr->as<element::instruction_constant>())
27+
{
28+
assert(outputs_count > outputs_written);
29+
outputs[outputs_written++] = ec->value();
30+
return ELEMENT_OK;
31+
}
32+
2533
// Don't check the cache for multi-valued objects, just check their individual values.
2634
if (const auto* es = expr->as<element::instruction_serialised_structure>())
2735
{
@@ -57,6 +65,23 @@ static element_result do_evaluate(element_evaluator_ctx& context, const element:
5765
return ELEMENT_OK;
5866
}
5967

68+
// Don't cache inputs, faster to grab the value
69+
if (const auto* ei = expr->as<element::instruction_input>())
70+
{
71+
if (context.boundaries.size() <= ei->scope()
72+
|| context.boundaries[ei->scope()].inputs_count <= ei->index()
73+
|| outputs_count <= outputs_written)
74+
{
75+
//occurs during constant folding to check if it can be evaluated
76+
outputs_written = 0;
77+
return ELEMENT_ERROR_UNKNOWN;
78+
}
79+
80+
assert(outputs_count > outputs_written);
81+
outputs[outputs_written++] = context.boundaries[ei->scope()].inputs[ei->index()];
82+
return ELEMENT_OK;
83+
}
84+
6085
if (const auto* sel = expr->as<element::instruction_select>())
6186
{
6287
assert(outputs_count > outputs_written);
@@ -71,44 +96,14 @@ static element_result do_evaluate(element_evaluator_ctx& context, const element:
7196
}
7297

7398
// Everything below this point only returns a single value
74-
instruction_cache_value* cache_entry = cache ? cache->find(expr) : nullptr;
75-
99+
instruction_cache_value* cache_entry = cache ? cache->find(expr.get()) : nullptr;
76100
if (cache_entry && cache_entry->present)
77101
{
78102
// Value is in the cache, use it!
79103
outputs[outputs_written++] = cache_entry->value;
80104
return ELEMENT_OK;
81105
}
82106

83-
if (const auto* ec = expr->as<element::instruction_constant>())
84-
{
85-
assert(outputs_count > outputs_written);
86-
element_value value = ec->value();
87-
add_to_cache(cache_entry, value);
88-
outputs[outputs_written++] = value;
89-
return ELEMENT_OK;
90-
}
91-
92-
if (const auto* ei = expr->as<element::instruction_input>())
93-
{
94-
if (context.boundaries.size() <= ei->scope()
95-
|| context.boundaries[ei->scope()].inputs_count <= ei->index()
96-
|| outputs_count <= outputs_written)
97-
{
98-
//occurs during constant folding to check if it can be evaluated
99-
outputs_written = 0;
100-
return ELEMENT_ERROR_UNKNOWN;
101-
}
102-
103-
assert(outputs_count > outputs_written);
104-
105-
element_value value = context.boundaries[ei->scope()].inputs[ei->index()];
106-
add_to_cache(cache_entry, value);
107-
outputs[outputs_written++] = value;
108-
return ELEMENT_OK;
109-
}
110-
111-
112107
if (const auto* eu = expr->as<element::instruction_nullary>())
113108
{
114109
assert(outputs_count > outputs_written);
@@ -172,7 +167,6 @@ static element_result do_evaluate(element_evaluator_ctx& context, const element:
172167
return ELEMENT_OK;
173168
}
174169

175-
176170
if (const auto* eb = expr->as<element::instruction_indexer>())
177171
{
178172
assert(outputs_count > outputs_written);

libelement/src/instruction_tree/instructions.hpp

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -287,37 +287,76 @@ namespace element
287287
[[nodiscard]] size_t options_count() const { return m_dependents.size() - 1; };
288288
[[nodiscard]] const instruction_const_shared_ptr& selector() const { return m_dependents[0]; };
289289
};
290-
} // namespace element
291-
292-
////
293-
//// instruction groups
294-
////
295-
//struct instruction_group : public instruction
296-
//{
297-
// DECLARE_TYPE_ID();
298-
//
299-
// //todo: do we still need instruction groups?
300-
//protected:
301-
// instruction_group()
302-
// : instruction(type_id)
303-
// {
304-
// }
305-
//
306-
// // virtual size_t group_size() const = 0;
307-
//};
308-
//
309-
//struct instruction_unbound_arg : public instruction
310-
//{
311-
// DECLARE_TYPE_ID();
312-
//
313-
// instruction_unbound_arg(size_t idx)
314-
// : instruction(type_id)
315-
// , m_index(idx)
316-
// {
317-
// }
318-
//
319-
// size_t index() const { return m_index; }
320-
//
321-
//protected:
322-
// size_t m_index;
323-
//};
290+
291+
//do some additional peephole optimisations based on known operations and operands
292+
static instruction_const_shared_ptr optimise_binary(const instruction_binary& binary)
293+
{
294+
const auto* input1_as_const = binary.input1()->as<const instruction_constant>();
295+
const auto* input2_as_const = binary.input2()->as<const instruction_constant>();
296+
297+
//if it's a numerical op and one of the operands is NaN, then the result is NaN
298+
//todo: can we also optimise for +/- Inf?
299+
if (binary.operation() < element_binary_op::and_)
300+
{
301+
if (input1_as_const && std::isnan(input1_as_const->value()))
302+
return binary.input1();
303+
304+
if (input2_as_const && std::isnan(input2_as_const->value()))
305+
return binary.input2();
306+
}
307+
308+
switch (binary.operation())
309+
{
310+
case element_binary_op::add:
311+
{
312+
if (input1_as_const && input1_as_const->value() == 0.0f)
313+
return binary.input2();
314+
315+
if (input2_as_const && input2_as_const->value() == 0.0f)
316+
return binary.input1();
317+
318+
//todo: could transform identical adds to mul(input, 2) if that's faster?
319+
//probably machine architecture dependent, should be an optimisation done by the target (e.g. LMNT)
320+
321+
break;
322+
}
323+
324+
case element_binary_op::sub:
325+
{
326+
if (input2_as_const && input2_as_const->value() == 0.0f)
327+
return binary.input1();
328+
329+
break;
330+
}
331+
332+
case element_binary_op::mul:
333+
{
334+
if (input1_as_const && input1_as_const->value() == 1.0f)
335+
return binary.input2();
336+
337+
if (input2_as_const && input2_as_const->value() == 1.0f)
338+
return binary.input1();
339+
340+
// NaN or Inf * 0 = NaN, and since that is valid user input, we can't do that optimisation
341+
342+
break;
343+
}
344+
345+
case element_binary_op::div:
346+
{
347+
if (input2_as_const && input2_as_const->value() == 1.0f)
348+
return binary.input1();
349+
350+
// We can't optimise for division by 0
351+
352+
// todo: could transform divs to muls if that's faster
353+
354+
break;
355+
}
356+
357+
//todo: optimise other operators
358+
}
359+
360+
return nullptr;
361+
}
362+
} // namespace element

libelement/src/interpreter.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ element_result element_interpreter_compile_declaration(
340340
return ELEMENT_ERROR_UNKNOWN;
341341
}
342342

343-
element::instruction_cache cache(instr);
343+
element::instruction_cache cache(instr.get());
344344
*instruction = new element_instruction{ std::move(instr), std::move(cache) };
345345
return ELEMENT_OK;
346346
}
@@ -367,6 +367,14 @@ element_result element_interpreter_evaluate_instruction(
367367
if (!outputs)
368368
return ELEMENT_ERROR_API_OUTPUT_IS_NULL;
369369

370+
//if it's just a constant then handle it quickly.
371+
if (const auto* ic = instruction->instruction->as<element::instruction_constant>())
372+
{
373+
outputs->count = 1;
374+
outputs->values[0] = ic->value();
375+
return ELEMENT_OK;
376+
}
377+
370378
if (instruction->instruction->is_error())
371379
return instruction->instruction->log_any_error(interpreter->logger.get());
372380

@@ -470,7 +478,7 @@ element_result element_interpreter_compile_expression(
470478
return ELEMENT_ERROR_UNKNOWN;
471479
}
472480

473-
element::instruction_cache cache(instr);
481+
element::instruction_cache cache(instr.get());
474482
*instruction = new element_instruction{ std::move(instr), std::move(cache) };
475483
element_object_delete(&object_ptr);
476484
return ELEMENT_OK;

libelement/src/object.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ element_result element_object_to_instruction(const element_object* object, eleme
196196
return ELEMENT_ERROR_SERIALISATION;
197197
}
198198

199-
(*output)->cache = element::instruction_cache(instr);
199+
(*output)->cache = element::instruction_cache(instr.get());
200200
(*output)->instruction = std::move(instr);
201201

202202
return ELEMENT_OK;

libelement/src/object_model/intermediaries/list_wrapper.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,20 @@ object_const_shared_ptr list_wrapper::create_or_optimise(const object_const_shar
6060
}
6161
}
6262

63-
//note: do not serialize to an expression
64-
bool list_elements_are_expressions = true;
63+
//note: do not serialize to an instruction
64+
bool list_elements_are_instructions = true;
6565
for (const auto& element : option_objects)
6666
{
6767
if (!dynamic_cast<const instruction*>(element.get()))
68-
list_elements_are_expressions = false;
68+
list_elements_are_instructions = false;
6969
}
7070

71-
if (list_elements_are_expressions)
71+
if (list_elements_are_instructions)
7272
{
73+
//if the list only contains one instruction then we can optimise it to be that instruction
74+
if (option_objects.size() == 1)
75+
return option_objects[0];
76+
7377
std::vector<instruction_const_shared_ptr> options;
7478
options.reserve(option_objects.size());
7579
for (const auto& option : option_objects)

libelement/src/object_model/intrinsics/intrinsic.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,33 @@ std::shared_ptr<const instruction> element::evaluate(const compilation_context&
147147

148148
element_evaluator_ctx evaluator;
149149
const auto result = element_evaluate(evaluator, expr, nullptr, nullptr, 0, &output, output_count);
150-
if (result != ELEMENT_OK)
151-
return expr;
152150

153-
auto new_expr = std::make_shared<const instruction_constant>(output);
154-
new_expr->actual_type = expr->actual_type;
155-
return new_expr;
151+
//the tree was fully evaluated, so it has been constant folded
152+
if (result == ELEMENT_OK)
153+
{
154+
auto new_expr = std::make_shared<const instruction_constant>(output);
155+
new_expr->actual_type = expr->actual_type;
156+
return new_expr;
157+
}
158+
159+
//we failed to fully evaluate the tree, likely due to boundary inputs (whose value are not known), so try and optimise it differently
160+
161+
if (const auto* binary = expr->as<const instruction_binary>())
162+
{
163+
auto optimised = optimise_binary(*binary);
164+
if (optimised)
165+
return optimised;
166+
}
167+
168+
if (const auto* selector = expr->as<const instruction_select>())
169+
{
170+
//if there's only one option to pick from then we're guaranteed to pick it, so we can just treat it as that option
171+
if (selector->options_count() == 1)
172+
return selector->options_at(0);
173+
}
174+
175+
//todo: optimise other instructions
176+
177+
//we couldn't optimise it, so just return the original tree
178+
return expr;
156179
}

libelement/src/typeutil.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ struct rtti_type
1919
template <typename TD>
2020
bool is() const
2121
{
22-
return std::is_base_of<T, TD>::value && (subtype() & TD::type_id) != 0;
22+
static_assert(std::is_base_of_v<T, TD>, "This type does not derive from the base type");
23+
return (subtype() & TD::type_id) != 0;
2324
}
2425

2526
template <typename TD>

0 commit comments

Comments
 (0)