Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .proj.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ has-cpu-only-benchmarks = false
has-cuda-tests = false
has-cuda-benchmarks = false

# [targets.local-execution]
# type = "lib"
# has-cpu-only-tests = true
# has-cpu-only-benchmarks = false
# has-cuda-tests = true
# has-cuda-benchmarks = false
[targets.local-execution]
type = "lib"
has-cpu-only-tests = true
has-cpu-only-benchmarks = false
has-cuda-tests = true
has-cuda-benchmarks = false

# [targets.local-pcg-execution]
# type = "lib"
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1,14 @@
The primary external-facing interface of local-execution
The primary external-facing interface of local-execution.

Flow:

* input (from compiler): `ComputationGraph`
* `create_computation_graph_instance()` => `ComputationGraphInstance`
* `initialize_computation_graph_instance()` => `InitializedComputationGraphInstance`
* execute (TBD)

Details:

* `ComputationGraph` is the unexpanded form of the graph: no passes, no parallelism, etc.
* `create_computation_graph_instance()` takes the `ComputationGraph` and expands it into a `DynamicOpenDataflowGraph`. This form has passes and updates but no allocations and no parallelism. (Note because this is the *local* executor there will be no parallelism.) This version gets stored in the `ComputationGraphInstance`.
* `initialize_computation_graph_instance()` takes the `ComputationGraphInstance`, along with user-provided input tensors. It allocates any remaining (not-user-provided) tensors and performs initialization (cuBLAS handles, etc.). These get stored in a new `DynamicOpenDataflowGraph` which gets wrapped in `InitializedComputationGraphInstance`. (The old `DynamicOpenDataflowGraph` is treated as immutable and is not modified.) This form is fully specified and ready for (single-device) execution.
Original file line number Diff line number Diff line change
@@ -1,45 +1,49 @@
#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_COMPUTATION_GRAPH_INSTANCE_H
#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_COMPUTATION_GRAPH_INSTANCE_H

#include "kernels/accessor.h"
#include "local-execution/computation_graph_training_tensor_ref_t.dtg.h"
#include "local-execution/local_task_registry.dtg.h"
#include "local-execution/local_tensor_backing.dtg.h"
#include "kernels/device_handle_t.dtg.h"
#include "kernels/profiling_settings.dtg.h"
#include "pcg/computation_graph.dtg.h"
#include "pcg/layer_guid_t.dtg.h"
#include "task-spec/symbolic/training_symbolic_computation_graph_from_cg_conversion.dtg.h"
#include "pcg/optimizer_attrs.dtg.h"
#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h"
#include "task-spec/ff_iteration_config.dtg.h"
#include "utils/units/milliseconds_t.h"
#include <unordered_map>

namespace FlexFlow {

struct ComputationGraphInstance {
public:
ComputationGraphInstance() = delete;

explicit ComputationGraphInstance(
TrainingSymbolicComputationGraphFromCgConversion const &,
LocalTensorBacking const &,
LocalTaskRegistry const &);

public:
TrainingSymbolicComputationGraphFromCgConversion const &
get_symbolic_training_graph_for_cg() const;
LocalTensorBacking const &get_tensor_backing() const;
LocalTaskRegistry const &get_task_registry() const;
ComputationGraphInstance(DynamicOpenDataflowGraph, Allocator &);
DynamicOpenDataflowGraph const &get_dynamic_dataflow_graph() const;
Allocator &get_allocator() const;

private:
TrainingSymbolicComputationGraphFromCgConversion
symbolic_training_graph_for_cg;
LocalTensorBacking tensor_backing;
LocalTaskRegistry task_registry;
DynamicOpenDataflowGraph initialized_dataflow_graph;
Allocator &allocator;
};

ComputationGraphInstance create_computation_graph_instance(
ComputationGraph const &,
bidict<computation_graph_training_tensor_ref_t,
std::variant<GenericTensorAccessorW, GenericTensorAccessorR>> const
&);
ComputationGraphInstance initialize_computation_graph_instance(
ComputationGraph const &cg,
OptimizerAttrs const &optimizer,
std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor> const &,
Allocator &,
ProfilingSettings const &,
device_handle_t const &,
DeviceType,
FFIterationConfig const &,
size_t);

std::unordered_map<layer_guid_t, std::optional<milliseconds_t>>
perform_forward_pass_for_computation_graph_instance(
ComputationGraphInstance const &);

std::unordered_map<layer_guid_t, std::optional<milliseconds_t>>
perform_backward_pass_for_computation_graph_instance(
ComputationGraphInstance const &);

void perform_update_pass_for_computation_graph_instance(
ComputationGraphInstance const &);

} // namespace FlexFlow

Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#if 0 // FIXME (Elliott): fix cost estimator

#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_COST_ESTIMATOR_LOCAL_COST_ESTIMATOR_H
#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_COST_ESTIMATOR_LOCAL_COST_ESTIMATOR_H

Expand Down Expand Up @@ -33,3 +35,5 @@ CostEstimator get_local_cost_estimator(RuntimeArgConfig const &);
} // namespace FlexFlow

#endif

#endif
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#if 0 // FIXME (Elliott): fix cost estimator

#ifndef _FLEXFLOW_LOCAL_EXECUTION_TRACKED_ALLOCATOR_H
#define _FLEXFLOW_LOCAL_EXECUTION_TRACKED_ALLOCATOR_H

Expand Down Expand Up @@ -33,3 +35,5 @@ size_t get_tracked_memory_usage(Allocator &wrapped_allocator);
} // namespace FlexFlow

#endif

#endif
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
#if 0 // FIXME (Elliott): fix execute task

#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_EXECUTE_TASK_FOR_LAYER_H
#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_EXECUTE_TASK_FOR_LAYER_H

#include "local-execution/local_atomic_tensor_backing.dtg.h"
#include "local-execution/local_ready_to_launch_task.dtg.h"
#include "local-execution/local_concrete_task_invocation.dtg.h"
#include "local-execution/local_task_registry.dtg.h"
#include "local-execution/local_tensor_backing.dtg.h"
#include "pcg/layer_guid_t.dtg.h"
#include "task-spec/runtime_task_invocation/runtime_arg_config.dtg.h"
#include "task-spec/runtime_task_invocation/runtime_task_invocation.dtg.h"
#include "task-spec/symbolic/symbolic_cg_op_attrs_and_training_signature_with_shapes.dtg.h"
#include "task-spec/symbolic/training_symbolic_computation_graph.dtg.h"
#include "task-spec/symbolic/training_symbolic_computation_graph_from_cg_conversion.dtg.h"
#include "utils/units/milliseconds_t.h"

namespace FlexFlow {
Expand All @@ -31,24 +26,6 @@ std::optional<DeviceSpecificPerDeviceOpState> execute_init_for_layer(
LocalTaskRegistry const &,
RuntimeArgConfig const &);

std::optional<milliseconds_t> execute_forward_for_layer(
symbolic_layer_guid_t,
SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &,
LocalTensorBacking const &,
LocalAtomicTensorBacking const &,
Allocator &,
LocalTaskRegistry const &,
RuntimeArgConfig const &);

std::optional<milliseconds_t> execute_backward_for_layer(
symbolic_layer_guid_t,
SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &,
LocalTensorBacking const &,
LocalAtomicTensorBacking const &,
Allocator &,
LocalTaskRegistry const &,
RuntimeArgConfig const &);

void execute_compute_loss(TrainingSymbolicComputationGraph const &,
LocalTensorBacking const &,
LocalAtomicTensorBacking const &,
Expand Down Expand Up @@ -85,3 +62,5 @@ std::unordered_map<layer_guid_t, std::optional<milliseconds_t>>
} // namespace FlexFlow

#endif

#endif

This file was deleted.

This file was deleted.

This file was deleted.

Loading
Loading