SMAUG
Simulating Machine Learning Applications on gem5-Aladdin
smaug.cpp
1 #include <fstream>
2 #include <string>
3 
4 #include <boost/program_options.hpp>
5 
6 #include "core/backend.h"
7 #include "core/globals.h"
8 #include "core/scheduler.h"
9 #include "core/network_builder.h"
10 #include "operators/common.h"
11 #include "utility/debug_stream.h"
12 #include "utility/utils.h"
13 #include "utility/thread_pool.h"
14 
15 namespace po = boost::program_options;
16 
17 using namespace smaug;
18 
19 int main(int argc, char* argv[]) {
20  std::string modelTopo;
21  std::string modelParams;
22  int debugLevel = -1;
23  std::string lastOutputFile;
24  bool dumpGraph = false;
25  runningInSimulation = false;
26  SamplingInfo sampling;
27  std::string samplingLevel = "no";
28  sampling.num_sample_iterations = 1;
30  int numThreads = -1;
32  po::options_description options(
33  "SMAUG Usage: ./smaug model_topo.pbtxt model_params.pb [options]");
34  // clang-format off
35  options.add_options()
36  ("help,h", "Display this help message")
37  ("debug-level", po::value(&debugLevel)->implicit_value(0),
38  "Set the debugging output level. If omitted, all debugging output "
39  "is ignored. If specified without a value, the debug level is set "
40  "to zero.")
41  ("dump-graph", po::value(&dumpGraph)->implicit_value(true),
42  "Dump the network in GraphViz format.")
43  ("gem5", po::value(&runningInSimulation)->implicit_value(true),
44  "Run the network in gem5 simulation.")
45  ("print-last-output,p",
46  po::value(&lastOutputFile)->implicit_value("stdout"),
47  "Dump the output of the last layer to this file. If specified with "
48  "'proto', the output tensor is serialized to a output.pb file. By "
49  "default, it is printed to stdout.")
50  ("sample-level",
51  po::value(&samplingLevel)->implicit_value("no"),
52  "Set the sampling level. By default, SMAUG doesn't do any sampling. "
53  "There are five options of sampling: no, low, medium, high and "
54  "very_high. With more sampling, the simulation speed can be greatly "
55  "improved at the expense of accuracy loss.")
56  ("sample-num",
57  po::value(&(sampling.num_sample_iterations))->implicit_value(1),
58  "Set the number of sample iterations used by every sampling enabled "
59  "entity. By default, the global sample number is set to 1. Larger "
60  "sample number means less sampling.")
61  ("num-accels",
62  po::value(&numAcceleratorsAvailable)->implicit_value(1),
63  "The number of accelerators that the backend has. As far as "
64  "simulation goes, if there are multiple accelerators available, "
65  "SMAUG requires the accelerator IDs (configured in the gem5 "
66  "configuration file) to be monotonically incremented by 1.")
67  ("num-threads",
68  po::value(&numThreads)->implicit_value(1),
69  "Number of threads in the thread pool.")
70  ("use-systolic-array",
71  po::value(&useSystolicArrayWhenAvailable)->implicit_value(true),
72  "If the backend contains a systolic array, use it whenever possible.");
73  // clang-format on
74 
75  po::options_description hidden;
76  hidden.add_options()("model-topo-file", po::value(&modelTopo),
77  "Model topology protobuf file");
78  hidden.add_options()("model-params-file", po::value(&modelParams),
79  "Model parameters protobuf file");
80  po::options_description all, visible;
81  all.add(options).add(hidden);
82  visible.add(options);
83 
84  po::positional_options_description p;
85  p.add("model-topo-file", 1);
86  p.add("model-params-file", 1);
87  po::variables_map vm;
88  po::store(po::command_line_parser(argc, argv)
89  .options(all)
90  .positional(p)
91  .run(),
92  vm);
93  try {
94  po::notify(vm);
95  } catch (po::error& e) {
96  std::cout << "ERROR: " << e.what() << "\n";
97  exit(1);
98  }
99 
100  if (vm.count("help")) {
101  std::cout << visible << "\n";
102  return 1;
103  }
104  if (modelTopo.empty() || modelParams.empty()) {
105  std::cout << "The model protobuf files must be specified!\n";
106  exit(1);
107  }
108  initDebugStream(debugLevel);
109 
110  std::cout << "Model topology file: " << modelTopo << "\n";
111  std::cout << "Model parameters file: " << modelParams << "\n";
112 
113  if (samplingLevel == "no") {
114  sampling.level = NoSampling;
115  } else if (samplingLevel == "low") {
116  sampling.level = Low;
117  } else if (samplingLevel == "medium") {
118  sampling.level = Medium;
119  } else if (samplingLevel == "high") {
120  sampling.level = High;
121  } else if (samplingLevel == "very_high") {
122  sampling.level = VeryHigh;
123  } else {
124  std::cout << "Doesn't support the specified sampling option: "
125  << samplingLevel << "\n";
126  exit(1);
127  }
128  if (sampling.level > NoSampling) {
129  std::cout << "Sampling level: " << samplingLevel
130  << ", number of sample iterations: "
131  << sampling.num_sample_iterations << "\n";
132  }
133 
135  std::cout << "The number of accelerators exceeds the max number!\n";
136  exit(1);
137  }
138  std::cout << "Number of accelerators: " << numAcceleratorsAvailable << "\n";
140  std::cout << "SMAUG requires the accelerator IDs (configured in the "
141  "gem5 configuration file) to be monotonically incremented "
142  "by 1.\n";
143  }
144 
145  if (numThreads != -1) {
146  std::cout << "Using a thread pool, size: " << numThreads << ".\n";
147  threadPool = new ThreadPool(numThreads);
148  }
149 
150  Workspace* workspace = new Workspace();
151  Network* network =
152  buildNetwork(modelTopo, modelParams, sampling, workspace);
153  ReferenceBackend::initGlobals();
154  SmvBackend::initGlobals();
155 
156  if (dumpGraph)
157  network->dumpDataflowGraph();
158 
159  if (!network->validate())
160  return -1;
161 
162  Scheduler scheduler(network, workspace);
163  Tensor* output = scheduler.runNetwork();
164 
165  if (!lastOutputFile.empty()) {
166  if (lastOutputFile == "stdout") {
167  std::cout << "Final network output:\n" << *output << "\n";
168  } else if (lastOutputFile == "proto") {
169  // Serialize the output tensor into a proto buffer.
170  std::fstream outfile("output.pb", std::ios::out | std::ios::trunc |
171  std::ios::binary);
172  TensorProto* tensorProto = output->asTensorProto();
173  if (!tensorProto->SerializeToOstream(&outfile)) {
174  std::cerr << "Failed to serialize the output tensor and write "
175  "it to the given C++ ostream! Did you run out of "
176  "disk space?\n";
177  return 1;
178  }
179  delete tensorProto;
180  } else {
181  std::ofstream outfile(lastOutputFile);
182  outfile << "Final network output:\n" << *output << "\n";
183  }
184  }
185 
186  if (threadPool)
187  delete threadPool;
188 
189  delete network;
190  delete workspace;
191  ReferenceBackend::freeGlobals();
192  SmvBackend::freeGlobals();
193 
194  return 0;
195 }
smaug::Tensor
Tensor represents a single multi-dimensional array of data.
Definition: tensor.h:344
smaug::Tensor::asTensorProto
TensorProto * asTensorProto()
Serializes this Tensor to a TensorProto.
Definition: tensor.cpp:16
smaug::threadPool
ThreadPool * threadPool
The user-space thread pool used by SMAUG to run multithreaded tasks.
Definition: globals.cpp:7
smaug::numAcceleratorsAvailable
int numAcceleratorsAvailable
The actual number of accelerator complexes currently in use.
Definition: globals.cpp:6
smaug::Network
Network encapsulates all of the information SMAUG will use during execution: the overall computation ...
Definition: network.h:39
_SamplingInfo
Simulation sampling information maintained by the Operator and passed to the accelerated kernel.
Definition: common.h:262
smaug::useSystolicArrayWhenAvailable
bool useSystolicArrayWhenAvailable
If true, uses the systolic array for applicable operators when backend support exists.
Definition: globals.cpp:8
smaug::buildNetwork
Network * buildNetwork(const std::string &modelTopoFile, const std::string &modelParamsFile, SamplingInfo &sampling, Workspace *workspace)
buildNetwork reads the specified model topology and parameters protobufs and simulation sampling dire...
Definition: network_builder.cpp:370
smaug::Workspace
Workspace is the container and owner of all Tensors and Operators in the Network.
Definition: workspace.h:17
smaug::maxNumAccelerators
constexpr const int maxNumAccelerators
The maximum number of accelerators an operator's work can be split across.
Definition: globals.h:25
_SamplingInfo::num_sample_iterations
int num_sample_iterations
The requested number of iterations to run a sampled loop.
Definition: common.h:269
smaug::runningInSimulation
bool runningInSimulation
This is true if the user chooses to run the network in gem5 simulation.
Definition: globals.cpp:4
globals.h
SMAUG Global variables.
_SamplingInfo::level
SamplingLevel level
Qualitative level of sampling.
Definition: common.h:264
smaug::initDebugStream
void initDebugStream(int debugLevel)
Initializes the global debug stream for the given debug level.
Definition: debug_stream.cpp:10
smaug
The smaug namespace is the parent namespace of all C++ code in SMAUG.
Definition: backend.cpp:38
common.h
Utilities for writing and invoking Aladdin kernels from Operators.
smaug::Scheduler
Scheduler is responsible for running the Network.
Definition: scheduler.h:12
smaug::ThreadPool
A user-space cooperatve thread pool implementation designed for gem5 in SE mode.
Definition: thread_pool.h:23