Skip to content

Commit 05f4246

Browse files
committed
more submitter stats
1 parent d9845d1 commit 05f4246

File tree

2 files changed

+37
-19
lines changed

2 files changed

+37
-19
lines changed

src/mine/stats.cpp

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
#include "sys/setup.hpp"
1717

1818
const std::string Stats::CALL_GRAPH_HEADER("caller,callee");
19-
const std::string Stats::PROGRAMS_HEADER("id,length,usages,inc_eval,log_eval");
19+
const std::string Stats::PROGRAMS_HEADER(
20+
"id,submitter,length,usages,inc_eval,log_eval");
2021
const std::string Stats::STEPS_HEADER("total,min,max,runs");
2122
const std::string Stats::SUMMARY_HEADER(
2223
"num_sequences,num_programs,num_formulas");
23-
const std::string SUBMITTERS_HEADER = "submitter,id,count";
24+
const std::string SUBMITTERS_HEADER = "submitter,ref_id,num_programs";
2425

2526
void checkHeader(std::istream &in, const std::string &header,
2627
const std::string &file) {
@@ -42,7 +43,7 @@ void Stats::load(std::string path) {
4243
auto start_time = std::chrono::steady_clock::now();
4344

4445
const std::string sep(",");
45-
std::string full, line, k, l, m, v, w;
46+
std::string full, line, k, l, m, v, w, u;
4647
Parser parser;
4748
Operation op;
4849
Operand count;
@@ -152,13 +153,15 @@ void Stats::load(std::string path) {
152153
while (std::getline(programs, line)) {
153154
std::stringstream s(line);
154155
std::getline(s, k, ',');
156+
std::getline(s, u, ',');
155157
std::getline(s, l, ',');
156158
std::getline(s, m, ',');
157159
std::getline(s, v, ',');
158160
std::getline(s, w);
159161
UID id(k);
160162
largest_id = std::max<int64_t>(largest_id, id.number());
161163
all_program_ids.insert(id);
164+
program_submitter[id] = std::stoll(u);
162165
program_lengths[id] = std::stoll(l);
163166
program_usages[id] = std::stoll(m);
164167
if (std::stoll(v)) {
@@ -219,17 +222,18 @@ void Stats::load(std::string path) {
219222
Log::get().debug("Loading " + full);
220223
std::ifstream submitters(full);
221224
num_programs_per_submitter.clear();
222-
if (std::getline(submitters, line)) {
223-
if (line != SUBMITTERS_HEADER) {
224-
throw std::runtime_error("unexpected header in " + full);
225-
}
226-
}
225+
checkHeader(submitters, SUBMITTERS_HEADER, full);
227226
while (std::getline(submitters, line)) {
228227
std::stringstream s(line);
229228
std::getline(s, k, ',');
230229
std::getline(s, v, ',');
231230
std::getline(s, w);
232-
num_programs_per_submitter[k] = std::stoll(w);
231+
auto ref_id = std::stoll(v);
232+
submitter_ref_ids[k] = ref_id;
233+
if (ref_id >= static_cast<int64_t>(num_programs_per_submitter.size())) {
234+
num_programs_per_submitter.resize(ref_id + 1);
235+
}
236+
num_programs_per_submitter[ref_id] = std::stoll(w);
233237
}
234238
submitters.close();
235239
}
@@ -265,8 +269,9 @@ void Stats::save(std::string path) {
265269
for (auto id : all_program_ids) {
266270
const auto inceval = supports_inceval.exists(id);
267271
const auto logeval = supports_logeval.exists(id);
268-
programs << id.string() << sep << program_lengths[id] << sep
269-
<< program_usages[id] << sep << inceval << sep << logeval << "\n";
272+
programs << id.string() << sep << program_submitter[id] << sep
273+
<< program_lengths[id] << sep << program_usages[id] << sep
274+
<< inceval << sep << logeval << "\n";
270275
}
271276
programs.close();
272277

@@ -341,9 +346,9 @@ void Stats::save(std::string path) {
341346

342347
std::ofstream submitters(path + "submitters.csv");
343348
submitters << SUBMITTERS_HEADER << "\n";
344-
int64_t index = 1;
345-
for (const auto &e : num_programs_per_submitter) {
346-
submitters << e.first << sep << (index++) << sep << e.second << "\n";
349+
for (const auto &e : submitter_ref_ids) {
350+
submitters << e.first << sep << e.second << sep
351+
<< num_programs_per_submitter[e.second] << "\n";
347352
}
348353
submitters.close();
349354

@@ -357,16 +362,27 @@ std::string Stats::getMainStatsFile(std::string path) const {
357362
}
358363

359364
void Stats::updateProgramStats(UID id, const Program &program,
360-
const std::string &submitter) {
365+
std::string submitter) {
361366
const size_t num_ops = ProgramUtil::numOps(program, false);
362367
program_lengths[id] = num_ops;
363368
if (num_ops >= num_programs_per_length.size()) {
364369
num_programs_per_length.resize(num_ops + 1);
365370
}
366371
num_programs_per_length[num_ops]++;
367-
if (!submitter.empty()) {
368-
num_programs_per_submitter[submitter]++;
372+
int64_t ref_id;
373+
replaceAll(submitter, ",", "_");
374+
auto it = submitter_ref_ids.find(submitter);
375+
if (it != submitter_ref_ids.end()) {
376+
ref_id = it->second;
377+
} else {
378+
ref_id = submitter_ref_ids.size() + 1;
379+
submitter_ref_ids[submitter] = ref_id;
380+
if (ref_id >= static_cast<int64_t>(num_programs_per_submitter.size())) {
381+
num_programs_per_submitter.resize(ref_id + 1, 0);
382+
}
369383
}
384+
num_programs_per_submitter[ref_id]++;
385+
program_submitter[id] = ref_id;
370386
OpPos o;
371387
o.len = program.ops.size();
372388
o.pos = 0;

src/mine/stats.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class Stats {
4444
std::string getMainStatsFile(std::string path) const;
4545

4646
void updateProgramStats(UID id, const Program &program,
47-
const std::string &submitter);
47+
std::string submitter);
4848

4949
void updateSequenceStats(UID id, bool program_found, bool formula_found);
5050

@@ -61,12 +61,14 @@ class Stats {
6161
std::map<Number, int64_t> num_constants;
6262
std::map<Operation, int64_t> num_operations;
6363
std::map<OpPos, int64_t> num_operation_positions;
64-
std::map<std::string, int64_t> num_programs_per_submitter;
64+
std::map<std::string, int64_t> submitter_ref_ids;
6565
std::multimap<UID, UID> call_graph;
66+
std::vector<int64_t> num_programs_per_submitter;
6667
std::vector<int64_t> num_programs_per_length;
6768
std::vector<int64_t> num_ops_per_type;
6869
std::unordered_map<UID, int64_t> program_lengths;
6970
std::unordered_map<UID, int64_t> program_usages;
71+
std::unordered_map<UID, int64_t> program_submitter;
7072
UIDSet all_program_ids;
7173
UIDSet latest_program_ids;
7274
UIDSet supports_inceval;

0 commit comments

Comments
 (0)